daemon: Discover devices and include in system info

Add ability for the device driver to implement a device discovery
mechanism and expose discovered devices in the `docker info` output.

Currently it's only implemented for CDI devices.

Signed-off-by: Paweł Gronowski <pawel.gronowski@docker.com>
This commit is contained in:
Paweł Gronowski
2025-05-14 11:03:09 +02:00
parent f95a7c47e8
commit 9095698a5c
9 changed files with 246 additions and 1 deletions

View File

@@ -127,6 +127,9 @@ func (s *systemRouter) getInfo(ctx context.Context, w http.ResponseWriter, r *ht
if versions.GreaterThanOrEqualTo(version, "1.42") {
info.KernelMemory = false
}
if versions.LessThan(version, "1.50") {
info.DiscoveredDevices = nil
}
return info, nil
})
return httputils.WriteJSON(w, http.StatusOK, info)

View File

@@ -2956,6 +2956,23 @@ definitions:
progressDetail:
$ref: "#/definitions/ProgressDetail"
DeviceInfo:
type: "object"
description: |
DeviceInfo represents a device that can be used by a container.
properties:
Source:
type: "string"
example: "cdi"
description: |
The origin device driver.
ID:
type: "string"
example: "vendor.com/gpu=0"
description: |
The unique identifier for the device within its source driver.
For CDI devices, this would be an FQDN like "vendor.com/gpu=0".
ErrorDetail:
type: "object"
properties:
@@ -6858,6 +6875,15 @@ definitions:
example: "24"
FirewallBackend:
$ref: "#/definitions/FirewallInfo"
DiscoveredDevices:
description: |
List of devices discovered by device drivers.
Each device includes information about its source driver, kind, name,
and additional driver-specific attributes.
type: "array"
items:
$ref: "#/definitions/DeviceInfo"
Warnings:
description: |
List of warnings / informational messages about missing features, or

View File

@@ -75,6 +75,7 @@ type Info struct {
DefaultAddressPools []NetworkAddressPool `json:",omitempty"`
FirewallBackend *FirewallInfo `json:"FirewallBackend,omitempty"`
CDISpecDirs []string
DiscoveredDevices []DeviceInfo `json:",omitempty"`
Containerd *ContainerdInfo `json:",omitempty"`
@@ -160,3 +161,12 @@ type FirewallInfo struct {
// Info is a list of label/value pairs, containing information related to the firewall.
Info [][2]string `json:"Info,omitempty"`
}
// DeviceInfo represents a discoverable device from a device driver.
type DeviceInfo struct {
// Source indicates the origin device driver.
Source string `json:"Source"`
// ID is the unique identifier for the device.
// Example: CDI FQDN like "vendor.com/gpu=0", or other driver-specific device ID
ID string `json:"ID"`
}

View File

@@ -75,3 +75,53 @@ func TestInfo(t *testing.T) {
t.Fatalf("expected 3 containers, got %d", info.Containers)
}
}
func TestInfoWithDiscoveredDevices(t *testing.T) {
expectedURL := "/info"
client := &Client{
client: newMockClient(func(req *http.Request) (*http.Response, error) {
if !strings.HasPrefix(req.URL.Path, expectedURL) {
return nil, fmt.Errorf("Expected URL '%s', got '%s'", expectedURL, req.URL)
}
info := &system.Info{
ID: "daemonID",
Containers: 3,
DiscoveredDevices: []system.DeviceInfo{
{
Source: "cdi",
ID: "vendor.com/gpu=0",
},
{
Source: "cdi",
ID: "vendor.com/gpu=1",
},
},
}
b, err := json.Marshal(info)
if err != nil {
return nil, err
}
return &http.Response{
StatusCode: http.StatusOK,
Body: io.NopCloser(bytes.NewReader(b)),
}, nil
}),
}
info, err := client.Info(context.Background())
assert.NilError(t, err)
assert.Check(t, is.Equal(info.ID, "daemonID"))
assert.Check(t, is.Equal(info.Containers, 3))
assert.Check(t, is.Len(info.DiscoveredDevices, 2))
device0 := info.DiscoveredDevices[0]
assert.Check(t, is.Equal(device0.Source, "cdi"))
assert.Check(t, is.Equal(device0.ID, "vendor.com/gpu=0"))
device1 := info.DiscoveredDevices[1]
assert.Check(t, is.Equal(device1.Source, "cdi"))
assert.Check(t, is.Equal(device1.ID, "vendor.com/gpu=1"))
}

View File

@@ -3,8 +3,11 @@ package daemon
import (
"context"
"fmt"
"os"
"github.com/containerd/log"
"github.com/docker/docker/api/types/system"
"github.com/docker/docker/daemon/config"
"github.com/docker/docker/errdefs"
"github.com/hashicorp/go-multierror"
"github.com/opencontainers/runtime-spec/specs-go"
@@ -40,6 +43,11 @@ func newCDIDeviceDriver(cdiSpecDirs ...string) *deviceDriver {
}
return &deviceDriver{
updateSpec: errorOnUpdateSpec,
ListDevices: func(ctx context.Context, cfg *config.Config) (deviceListing, error) {
return deviceListing{
Warnings: []string{fmt.Sprintf("CDI cache initialization failed: %v", err)},
}, nil
},
}
}
@@ -49,7 +57,8 @@ func newCDIDeviceDriver(cdiSpecDirs ...string) *deviceDriver {
}
return &deviceDriver{
updateSpec: c.injectCDIDevices,
updateSpec: c.injectCDIDevices,
ListDevices: c.listDevices,
}
}
@@ -105,3 +114,39 @@ func (c *cdiHandler) getErrors() error {
}
return err.ErrorOrNil()
}
// listDevices uses the CDI cache to list all discovered CDI devices.
// It conforms to the deviceDriver.ListDevices function signature.
func (c *cdiHandler) listDevices(ctx context.Context, cfg *config.Config) (deviceListing, error) {
var out deviceListing
// Collect global errors from the CDI cache (e.g., issues with spec files themselves).
for specPath, specErrs := range c.registry.GetErrors() {
for _, err := range specErrs {
if errors.Is(err, os.ErrNotExist) {
continue
}
out.Warnings = append(out.Warnings, fmt.Sprintf("CDI: Error associated with spec file %s: %v", specPath, err))
}
}
qualifiedDeviceNames := c.registry.ListDevices()
if len(qualifiedDeviceNames) == 0 {
return out, nil
}
for _, qdn := range qualifiedDeviceNames {
device := c.registry.GetDevice(qdn)
if device == nil {
log.G(ctx).WithField("device", qdn).Warn("CDI: Cache.GetDevice() returned nil for a listed device, skipping.")
out.Warnings = append(out.Warnings, fmt.Sprintf("CDI: Device %s listed but not found by GetDevice()", qdn))
continue
}
out.Devices = append(out.Devices, system.DeviceInfo{
ID: qdn,
})
}
return out, nil
}

View File

@@ -1,16 +1,31 @@
package daemon // import "github.com/docker/docker/daemon"
import (
"context"
"github.com/docker/docker/api/types/container"
"github.com/docker/docker/api/types/system"
"github.com/docker/docker/daemon/config"
"github.com/docker/docker/daemon/internal/capabilities"
"github.com/opencontainers/runtime-spec/specs-go"
)
var deviceDrivers = map[string]*deviceDriver{}
type deviceListing struct {
Devices []system.DeviceInfo
Warnings []string
}
type deviceDriver struct {
capset capabilities.Set
updateSpec func(*specs.Spec, *deviceInstance) error
// ListDevices returns a list of discoverable devices provided by this
// driver, any warnings encountered during the discovery, and an error if
// the overall listing operation failed.
// Can be nil if the driver does not provide a device listing.
ListDevices func(ctx context.Context, cfg *config.Config) (deviceListing, error)
}
type deviceInstance struct {

View File

@@ -94,6 +94,7 @@ func (daemon *Daemon) SystemInfo(ctx context.Context) (*system.Info, error) {
daemon.fillLicense(v)
daemon.fillDefaultAddressPools(ctx, v, &cfg.Config)
daemon.fillFirewallInfo(v)
daemon.fillDiscoveredDevicesFromDrivers(ctx, v, &cfg.Config)
return v, nil
}
@@ -388,3 +389,40 @@ func promoteNil[S ~[]E, E any](s S) S {
}
return s
}
// fillDiscoveredDevicesFromDrivers iterates over registered device drivers
// and calls their ListDevices method (if available) to populate system info.
func (daemon *Daemon) fillDiscoveredDevicesFromDrivers(ctx context.Context, v *system.Info, cfg *config.Config) {
ctx, span := tracing.StartSpan(ctx, "daemon.fillDiscoveredDevicesFromDrivers")
defer span.End()
// Make sure v.DiscoveredDevices is initialized to an empty slice instead of nil.
// This ensures that the JSON output is always a valid array, even if no devices are discovered.
v.DiscoveredDevices = []system.DeviceInfo{}
for driverName, driver := range deviceDrivers {
if driver.ListDevices == nil {
log.G(ctx).WithField("driver", driverName).Trace("Device driver does not implement ListDevices method.")
continue
}
ls, err := driver.ListDevices(ctx, cfg)
if err != nil {
log.G(ctx).WithFields(log.Fields{
"driver": driverName,
"error": err,
}).Warn("Failed to list devices for driver")
v.Warnings = append(v.Warnings, fmt.Sprintf("Failed to list devices from driver '%s': %v", driverName, err))
continue
}
if len(ls.Warnings) > 0 {
v.Warnings = append(v.Warnings, ls.Warnings...)
}
for _, device := range ls.Devices {
device.Source = driverName
v.DiscoveredDevices = append(v.DiscoveredDevices, device)
}
}
}

View File

@@ -13,6 +13,15 @@ keywords: "API, Docker, rcli, REST, documentation"
will be rejected.
-->
## v1.50 API changes
[Docker Engine API v1.50](https://docs.docker.com/reference/api/engine/version/v1.50/) documentation
* `GET /info` now includes a `DiscoveredDevices` field. This is an array of
`DeviceInfo` objects, each providing details about a device discovered by a
device driver.
Currently only the CDI device driver is supported.
## v1.49 API changes
[Docker Engine API v1.49](https://docs.docker.com/reference/api/engine/version/v1.49/) documentation

View File

@@ -9,6 +9,7 @@ import (
"testing"
containertypes "github.com/docker/docker/api/types/container"
"github.com/docker/docker/api/types/system"
"github.com/docker/docker/integration/internal/container"
"github.com/docker/docker/pkg/stdcopy"
"github.com/docker/docker/testutil"
@@ -152,3 +153,51 @@ func TestCDISpecDirsAreInSystemInfo(t *testing.T) {
})
}
}
func TestCDIInfoDiscoveredDevices(t *testing.T) {
skip.If(t, testEnv.IsRemoteDaemon, "cannot run daemon when remote daemon")
skip.If(t, testEnv.DaemonInfo.OSType == "windows", "CDI not supported on Windows")
ctx := testutil.StartSpan(baseContext, t)
// Create a sample CDI spec file
specContent := `{
"cdiVersion": "0.5.0",
"kind": "test.com/device",
"devices": [
{
"name": "mygpu0",
"containerEdits": {
"deviceNodes": [
{"path": "/dev/null"}
]
}
}
]
}`
cdiDir := testutil.TempDir(t)
specFilePath := filepath.Join(cdiDir, "test-device.json")
err := os.WriteFile(specFilePath, []byte(specContent), 0644)
assert.NilError(t, err, "Failed to write sample CDI spec file")
d := daemon.New(t)
d.Start(t, "--feature", "cdi", "--cdi-spec-dir="+cdiDir)
defer d.Stop(t)
c := d.NewClientT(t)
info, err := c.Info(ctx)
assert.NilError(t, err)
assert.Check(t, is.Len(info.CDISpecDirs, 1))
assert.Check(t, is.Equal(info.CDISpecDirs[0], cdiDir))
expectedDevice := system.DeviceInfo{
Source: "cdi",
ID: "test.com/device=mygpu0",
}
assert.Check(t, is.Equal(len(info.DiscoveredDevices), 1), "Expected one discovered device")
assert.Check(t, is.DeepEqual(info.DiscoveredDevices, []system.DeviceInfo{expectedDevice}))
}