mirror of
https://github.com/moby/moby.git
synced 2026-01-11 18:51:37 +00:00
daemon: Discover devices and include in system info
Add ability for the device driver to implement a device discovery mechanism and expose discovered devices in the `docker info` output. Currently it's only implemented for CDI devices. Signed-off-by: Paweł Gronowski <pawel.gronowski@docker.com>
This commit is contained in:
@@ -127,6 +127,9 @@ func (s *systemRouter) getInfo(ctx context.Context, w http.ResponseWriter, r *ht
|
||||
if versions.GreaterThanOrEqualTo(version, "1.42") {
|
||||
info.KernelMemory = false
|
||||
}
|
||||
if versions.LessThan(version, "1.50") {
|
||||
info.DiscoveredDevices = nil
|
||||
}
|
||||
return info, nil
|
||||
})
|
||||
return httputils.WriteJSON(w, http.StatusOK, info)
|
||||
|
||||
@@ -2956,6 +2956,23 @@ definitions:
|
||||
progressDetail:
|
||||
$ref: "#/definitions/ProgressDetail"
|
||||
|
||||
DeviceInfo:
|
||||
type: "object"
|
||||
description: |
|
||||
DeviceInfo represents a device that can be used by a container.
|
||||
properties:
|
||||
Source:
|
||||
type: "string"
|
||||
example: "cdi"
|
||||
description: |
|
||||
The origin device driver.
|
||||
ID:
|
||||
type: "string"
|
||||
example: "vendor.com/gpu=0"
|
||||
description: |
|
||||
The unique identifier for the device within its source driver.
|
||||
For CDI devices, this would be an FQDN like "vendor.com/gpu=0".
|
||||
|
||||
ErrorDetail:
|
||||
type: "object"
|
||||
properties:
|
||||
@@ -6858,6 +6875,15 @@ definitions:
|
||||
example: "24"
|
||||
FirewallBackend:
|
||||
$ref: "#/definitions/FirewallInfo"
|
||||
DiscoveredDevices:
|
||||
description: |
|
||||
List of devices discovered by device drivers.
|
||||
|
||||
Each device includes information about its source driver, kind, name,
|
||||
and additional driver-specific attributes.
|
||||
type: "array"
|
||||
items:
|
||||
$ref: "#/definitions/DeviceInfo"
|
||||
Warnings:
|
||||
description: |
|
||||
List of warnings / informational messages about missing features, or
|
||||
|
||||
@@ -75,6 +75,7 @@ type Info struct {
|
||||
DefaultAddressPools []NetworkAddressPool `json:",omitempty"`
|
||||
FirewallBackend *FirewallInfo `json:"FirewallBackend,omitempty"`
|
||||
CDISpecDirs []string
|
||||
DiscoveredDevices []DeviceInfo `json:",omitempty"`
|
||||
|
||||
Containerd *ContainerdInfo `json:",omitempty"`
|
||||
|
||||
@@ -160,3 +161,12 @@ type FirewallInfo struct {
|
||||
// Info is a list of label/value pairs, containing information related to the firewall.
|
||||
Info [][2]string `json:"Info,omitempty"`
|
||||
}
|
||||
|
||||
// DeviceInfo represents a discoverable device from a device driver.
|
||||
type DeviceInfo struct {
|
||||
// Source indicates the origin device driver.
|
||||
Source string `json:"Source"`
|
||||
// ID is the unique identifier for the device.
|
||||
// Example: CDI FQDN like "vendor.com/gpu=0", or other driver-specific device ID
|
||||
ID string `json:"ID"`
|
||||
}
|
||||
|
||||
@@ -75,3 +75,53 @@ func TestInfo(t *testing.T) {
|
||||
t.Fatalf("expected 3 containers, got %d", info.Containers)
|
||||
}
|
||||
}
|
||||
|
||||
func TestInfoWithDiscoveredDevices(t *testing.T) {
|
||||
expectedURL := "/info"
|
||||
client := &Client{
|
||||
client: newMockClient(func(req *http.Request) (*http.Response, error) {
|
||||
if !strings.HasPrefix(req.URL.Path, expectedURL) {
|
||||
return nil, fmt.Errorf("Expected URL '%s', got '%s'", expectedURL, req.URL)
|
||||
}
|
||||
info := &system.Info{
|
||||
ID: "daemonID",
|
||||
Containers: 3,
|
||||
DiscoveredDevices: []system.DeviceInfo{
|
||||
{
|
||||
Source: "cdi",
|
||||
ID: "vendor.com/gpu=0",
|
||||
},
|
||||
{
|
||||
Source: "cdi",
|
||||
ID: "vendor.com/gpu=1",
|
||||
},
|
||||
},
|
||||
}
|
||||
b, err := json.Marshal(info)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return &http.Response{
|
||||
StatusCode: http.StatusOK,
|
||||
Body: io.NopCloser(bytes.NewReader(b)),
|
||||
}, nil
|
||||
}),
|
||||
}
|
||||
|
||||
info, err := client.Info(context.Background())
|
||||
assert.NilError(t, err)
|
||||
|
||||
assert.Check(t, is.Equal(info.ID, "daemonID"))
|
||||
assert.Check(t, is.Equal(info.Containers, 3))
|
||||
|
||||
assert.Check(t, is.Len(info.DiscoveredDevices, 2))
|
||||
|
||||
device0 := info.DiscoveredDevices[0]
|
||||
assert.Check(t, is.Equal(device0.Source, "cdi"))
|
||||
assert.Check(t, is.Equal(device0.ID, "vendor.com/gpu=0"))
|
||||
|
||||
device1 := info.DiscoveredDevices[1]
|
||||
assert.Check(t, is.Equal(device1.Source, "cdi"))
|
||||
assert.Check(t, is.Equal(device1.ID, "vendor.com/gpu=1"))
|
||||
}
|
||||
|
||||
@@ -3,8 +3,11 @@ package daemon
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"os"
|
||||
|
||||
"github.com/containerd/log"
|
||||
"github.com/docker/docker/api/types/system"
|
||||
"github.com/docker/docker/daemon/config"
|
||||
"github.com/docker/docker/errdefs"
|
||||
"github.com/hashicorp/go-multierror"
|
||||
"github.com/opencontainers/runtime-spec/specs-go"
|
||||
@@ -40,6 +43,11 @@ func newCDIDeviceDriver(cdiSpecDirs ...string) *deviceDriver {
|
||||
}
|
||||
return &deviceDriver{
|
||||
updateSpec: errorOnUpdateSpec,
|
||||
ListDevices: func(ctx context.Context, cfg *config.Config) (deviceListing, error) {
|
||||
return deviceListing{
|
||||
Warnings: []string{fmt.Sprintf("CDI cache initialization failed: %v", err)},
|
||||
}, nil
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
@@ -49,7 +57,8 @@ func newCDIDeviceDriver(cdiSpecDirs ...string) *deviceDriver {
|
||||
}
|
||||
|
||||
return &deviceDriver{
|
||||
updateSpec: c.injectCDIDevices,
|
||||
updateSpec: c.injectCDIDevices,
|
||||
ListDevices: c.listDevices,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -105,3 +114,39 @@ func (c *cdiHandler) getErrors() error {
|
||||
}
|
||||
return err.ErrorOrNil()
|
||||
}
|
||||
|
||||
// listDevices uses the CDI cache to list all discovered CDI devices.
|
||||
// It conforms to the deviceDriver.ListDevices function signature.
|
||||
func (c *cdiHandler) listDevices(ctx context.Context, cfg *config.Config) (deviceListing, error) {
|
||||
var out deviceListing
|
||||
|
||||
// Collect global errors from the CDI cache (e.g., issues with spec files themselves).
|
||||
for specPath, specErrs := range c.registry.GetErrors() {
|
||||
for _, err := range specErrs {
|
||||
if errors.Is(err, os.ErrNotExist) {
|
||||
continue
|
||||
}
|
||||
out.Warnings = append(out.Warnings, fmt.Sprintf("CDI: Error associated with spec file %s: %v", specPath, err))
|
||||
}
|
||||
}
|
||||
|
||||
qualifiedDeviceNames := c.registry.ListDevices()
|
||||
if len(qualifiedDeviceNames) == 0 {
|
||||
return out, nil
|
||||
}
|
||||
|
||||
for _, qdn := range qualifiedDeviceNames {
|
||||
device := c.registry.GetDevice(qdn)
|
||||
if device == nil {
|
||||
log.G(ctx).WithField("device", qdn).Warn("CDI: Cache.GetDevice() returned nil for a listed device, skipping.")
|
||||
out.Warnings = append(out.Warnings, fmt.Sprintf("CDI: Device %s listed but not found by GetDevice()", qdn))
|
||||
continue
|
||||
}
|
||||
|
||||
out.Devices = append(out.Devices, system.DeviceInfo{
|
||||
ID: qdn,
|
||||
})
|
||||
}
|
||||
|
||||
return out, nil
|
||||
}
|
||||
|
||||
@@ -1,16 +1,31 @@
|
||||
package daemon // import "github.com/docker/docker/daemon"
|
||||
|
||||
import (
|
||||
"context"
|
||||
|
||||
"github.com/docker/docker/api/types/container"
|
||||
"github.com/docker/docker/api/types/system"
|
||||
"github.com/docker/docker/daemon/config"
|
||||
"github.com/docker/docker/daemon/internal/capabilities"
|
||||
"github.com/opencontainers/runtime-spec/specs-go"
|
||||
)
|
||||
|
||||
var deviceDrivers = map[string]*deviceDriver{}
|
||||
|
||||
type deviceListing struct {
|
||||
Devices []system.DeviceInfo
|
||||
Warnings []string
|
||||
}
|
||||
|
||||
type deviceDriver struct {
|
||||
capset capabilities.Set
|
||||
updateSpec func(*specs.Spec, *deviceInstance) error
|
||||
|
||||
// ListDevices returns a list of discoverable devices provided by this
|
||||
// driver, any warnings encountered during the discovery, and an error if
|
||||
// the overall listing operation failed.
|
||||
// Can be nil if the driver does not provide a device listing.
|
||||
ListDevices func(ctx context.Context, cfg *config.Config) (deviceListing, error)
|
||||
}
|
||||
|
||||
type deviceInstance struct {
|
||||
|
||||
@@ -94,6 +94,7 @@ func (daemon *Daemon) SystemInfo(ctx context.Context) (*system.Info, error) {
|
||||
daemon.fillLicense(v)
|
||||
daemon.fillDefaultAddressPools(ctx, v, &cfg.Config)
|
||||
daemon.fillFirewallInfo(v)
|
||||
daemon.fillDiscoveredDevicesFromDrivers(ctx, v, &cfg.Config)
|
||||
|
||||
return v, nil
|
||||
}
|
||||
@@ -388,3 +389,40 @@ func promoteNil[S ~[]E, E any](s S) S {
|
||||
}
|
||||
return s
|
||||
}
|
||||
|
||||
// fillDiscoveredDevicesFromDrivers iterates over registered device drivers
|
||||
// and calls their ListDevices method (if available) to populate system info.
|
||||
func (daemon *Daemon) fillDiscoveredDevicesFromDrivers(ctx context.Context, v *system.Info, cfg *config.Config) {
|
||||
ctx, span := tracing.StartSpan(ctx, "daemon.fillDiscoveredDevicesFromDrivers")
|
||||
defer span.End()
|
||||
|
||||
// Make sure v.DiscoveredDevices is initialized to an empty slice instead of nil.
|
||||
// This ensures that the JSON output is always a valid array, even if no devices are discovered.
|
||||
v.DiscoveredDevices = []system.DeviceInfo{}
|
||||
|
||||
for driverName, driver := range deviceDrivers {
|
||||
if driver.ListDevices == nil {
|
||||
log.G(ctx).WithField("driver", driverName).Trace("Device driver does not implement ListDevices method.")
|
||||
continue
|
||||
}
|
||||
|
||||
ls, err := driver.ListDevices(ctx, cfg)
|
||||
if err != nil {
|
||||
log.G(ctx).WithFields(log.Fields{
|
||||
"driver": driverName,
|
||||
"error": err,
|
||||
}).Warn("Failed to list devices for driver")
|
||||
v.Warnings = append(v.Warnings, fmt.Sprintf("Failed to list devices from driver '%s': %v", driverName, err))
|
||||
continue
|
||||
}
|
||||
|
||||
if len(ls.Warnings) > 0 {
|
||||
v.Warnings = append(v.Warnings, ls.Warnings...)
|
||||
}
|
||||
|
||||
for _, device := range ls.Devices {
|
||||
device.Source = driverName
|
||||
v.DiscoveredDevices = append(v.DiscoveredDevices, device)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,6 +13,15 @@ keywords: "API, Docker, rcli, REST, documentation"
|
||||
will be rejected.
|
||||
-->
|
||||
|
||||
## v1.50 API changes
|
||||
|
||||
[Docker Engine API v1.50](https://docs.docker.com/reference/api/engine/version/v1.50/) documentation
|
||||
|
||||
* `GET /info` now includes a `DiscoveredDevices` field. This is an array of
|
||||
`DeviceInfo` objects, each providing details about a device discovered by a
|
||||
device driver.
|
||||
Currently only the CDI device driver is supported.
|
||||
|
||||
## v1.49 API changes
|
||||
|
||||
[Docker Engine API v1.49](https://docs.docker.com/reference/api/engine/version/v1.49/) documentation
|
||||
|
||||
@@ -9,6 +9,7 @@ import (
|
||||
"testing"
|
||||
|
||||
containertypes "github.com/docker/docker/api/types/container"
|
||||
"github.com/docker/docker/api/types/system"
|
||||
"github.com/docker/docker/integration/internal/container"
|
||||
"github.com/docker/docker/pkg/stdcopy"
|
||||
"github.com/docker/docker/testutil"
|
||||
@@ -152,3 +153,51 @@ func TestCDISpecDirsAreInSystemInfo(t *testing.T) {
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestCDIInfoDiscoveredDevices(t *testing.T) {
|
||||
skip.If(t, testEnv.IsRemoteDaemon, "cannot run daemon when remote daemon")
|
||||
skip.If(t, testEnv.DaemonInfo.OSType == "windows", "CDI not supported on Windows")
|
||||
|
||||
ctx := testutil.StartSpan(baseContext, t)
|
||||
|
||||
// Create a sample CDI spec file
|
||||
specContent := `{
|
||||
"cdiVersion": "0.5.0",
|
||||
"kind": "test.com/device",
|
||||
"devices": [
|
||||
{
|
||||
"name": "mygpu0",
|
||||
"containerEdits": {
|
||||
"deviceNodes": [
|
||||
{"path": "/dev/null"}
|
||||
]
|
||||
}
|
||||
}
|
||||
]
|
||||
}`
|
||||
|
||||
cdiDir := testutil.TempDir(t)
|
||||
specFilePath := filepath.Join(cdiDir, "test-device.json")
|
||||
|
||||
err := os.WriteFile(specFilePath, []byte(specContent), 0644)
|
||||
assert.NilError(t, err, "Failed to write sample CDI spec file")
|
||||
|
||||
d := daemon.New(t)
|
||||
d.Start(t, "--feature", "cdi", "--cdi-spec-dir="+cdiDir)
|
||||
defer d.Stop(t)
|
||||
|
||||
c := d.NewClientT(t)
|
||||
info, err := c.Info(ctx)
|
||||
assert.NilError(t, err)
|
||||
|
||||
assert.Check(t, is.Len(info.CDISpecDirs, 1))
|
||||
assert.Check(t, is.Equal(info.CDISpecDirs[0], cdiDir))
|
||||
|
||||
expectedDevice := system.DeviceInfo{
|
||||
Source: "cdi",
|
||||
ID: "test.com/device=mygpu0",
|
||||
}
|
||||
|
||||
assert.Check(t, is.Equal(len(info.DiscoveredDevices), 1), "Expected one discovered device")
|
||||
assert.Check(t, is.DeepEqual(info.DiscoveredDevices, []system.DeviceInfo{expectedDevice}))
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user