mirror of
https://github.com/moby/moby.git
synced 2026-01-11 02:31:44 +00:00
Added support for AMD GPUs in "docker run --gpus".
Added backend code to support the exact same interface used today for Nvidia GPUs, allowing customers to use the same docker commands for both Nvidia and AMD GPUs. Signed-off-by: Sudheendra Gopinath <sudheendra.gopinath@amd.com> Reused common functions from nvidia_linux.go. Removed duplicate code in amd_linux.go by reusing the init() and countToDevices() functions in nvidia_linux.go. AMD driver is registered in init(). Signed-off-by: Sudheendra Gopinath <sudheendra.gopinath@amd.com> Renamed amd-container-runtime constant Signed-off-by: Sudheendra Gopinath <sudheendra.gopinath@amd.com> Removed empty branch to keep linter happy. Also renamed amd_linux.go to gpu_amd_linux.go. Signed-off-by: Sudheendra Gopinath <sudheendra.gopinath@amd.com> Renamed nvidia_linux.go and gpu_amd_linux.go. Signed-off-by: Sudheendra Gopinath <sudheendra.gopinath@amd.com>
This commit is contained in:
27
daemon/devices_amd_linux.go
Normal file
27
daemon/devices_amd_linux.go
Normal file
@@ -0,0 +1,27 @@
|
||||
package daemon
|
||||
|
||||
import (
|
||||
"strings"
|
||||
|
||||
"github.com/opencontainers/runtime-spec/specs-go"
|
||||
)
|
||||
|
||||
func setAMDGPUs(s *specs.Spec, dev *deviceInstance) error {
|
||||
req := dev.req
|
||||
if req.Count != 0 && len(req.DeviceIDs) > 0 {
|
||||
return errConflictCountDeviceIDs
|
||||
}
|
||||
|
||||
switch {
|
||||
case len(req.DeviceIDs) > 0:
|
||||
s.Process.Env = append(s.Process.Env, "AMD_VISIBLE_DEVICES="+strings.Join(req.DeviceIDs, ","))
|
||||
case req.Count > 0:
|
||||
s.Process.Env = append(s.Process.Env, "AMD_VISIBLE_DEVICES="+countToDevices(req.Count))
|
||||
case req.Count < 0:
|
||||
s.Process.Env = append(s.Process.Env, "AMD_VISIBLE_DEVICES=all")
|
||||
case req.Count == 0:
|
||||
s.Process.Env = append(s.Process.Env, "AMD_VISIBLE_DEVICES=void")
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
@@ -17,7 +17,10 @@ import (
|
||||
|
||||
var errConflictCountDeviceIDs = errors.New("cannot set both Count and DeviceIDs on device request")
|
||||
|
||||
const nvidiaHook = "nvidia-container-runtime-hook"
|
||||
const (
|
||||
nvidiaHook = "nvidia-container-runtime-hook"
|
||||
amdContainerRuntimeExecutableName = "amd-container-runtime"
|
||||
)
|
||||
|
||||
// These are NVIDIA-specific capabilities stolen from github.com/containerd/containerd/contrib/nvidia.allCaps
|
||||
var allNvidiaCaps = map[nvidia.Capability]struct{}{
|
||||
@@ -30,19 +33,29 @@ var allNvidiaCaps = map[nvidia.Capability]struct{}{
|
||||
}
|
||||
|
||||
func init() {
|
||||
if _, err := exec.LookPath(nvidiaHook); err != nil {
|
||||
// do not register Nvidia driver if helper binary is not present.
|
||||
// Register Nvidia driver if Nvidia helper binary is present.
|
||||
if _, err := exec.LookPath(nvidiaHook); err == nil {
|
||||
capset := capabilities.Set{"gpu": struct{}{}, "nvidia": struct{}{}}
|
||||
for c := range allNvidiaCaps {
|
||||
capset[string(c)] = struct{}{}
|
||||
}
|
||||
registerDeviceDriver("nvidia", &deviceDriver{
|
||||
capset: capset,
|
||||
updateSpec: setNvidiaGPUs,
|
||||
})
|
||||
return
|
||||
}
|
||||
capset := capabilities.Set{"gpu": struct{}{}, "nvidia": struct{}{}}
|
||||
nvidiaDriver := &deviceDriver{
|
||||
capset: capset,
|
||||
updateSpec: setNvidiaGPUs,
|
||||
|
||||
// Register AMD driver if AMD helper binary is present.
|
||||
if _, err := exec.LookPath(amdContainerRuntimeExecutableName); err == nil {
|
||||
registerDeviceDriver("amd", &deviceDriver{
|
||||
capset: capabilities.Set{"gpu": struct{}{}, "amd": struct{}{}},
|
||||
updateSpec: setAMDGPUs,
|
||||
})
|
||||
return
|
||||
}
|
||||
for c := range allNvidiaCaps {
|
||||
nvidiaDriver.capset[string(c)] = struct{}{}
|
||||
}
|
||||
registerDeviceDriver("nvidia", nvidiaDriver)
|
||||
|
||||
// No "gpu" capability
|
||||
}
|
||||
|
||||
func setNvidiaGPUs(s *specs.Spec, dev *deviceInstance) error {
|
||||
Reference in New Issue
Block a user