Files
moby/daemon/create.go
Sebastiaan van Stijn 83510a26b3 api/types: move backend types to daemon/server
The "backend" types in API were designed to decouple the API server
implementation from the daemon, or other parts of the code that
back the API server. This would allow the daemon to evolve (e.g.
functionality moved to different subsystems) without that impacting
the API server's implementation.

Now that the API server is no longer part of the API package (module),
there is no benefit to having it in the API module. The API server
may evolve (and require changes in the backend), which has no direct
relation with the API module (types, responses); the backend definition
is, however, coupled to the API server implementation.

It's worth noting that, while "technically" possible to use the API
server package, and implement an alternative backend implementation,
this has never been a prime objective. The backend definition was
never considered "stable", and we don't expect external users to
(attempt) to use it as such.

This patch moves the backend types to the daemon/server package,
so that they can evolve with the daemon and API server implementation
without that impacting the API module (which we intend to be stable,
following SemVer).

Signed-off-by: Sebastiaan van Stijn <github@gone.nl>
2025-07-28 00:03:04 +02:00

393 lines
13 KiB
Go

// TODO(thaJeztah): remove once we are a module; the go:build directive prevents go from downgrading language version to go1.16:
//go:build go1.23
package daemon
import (
"context"
"errors"
"fmt"
"os"
"strings"
"sync"
"time"
"github.com/containerd/log"
"github.com/containerd/platforms"
"github.com/docker/docker/daemon/config"
"github.com/docker/docker/daemon/container"
"github.com/docker/docker/daemon/images"
"github.com/docker/docker/daemon/internal/image"
"github.com/docker/docker/daemon/internal/metrics"
"github.com/docker/docker/daemon/internal/multierror"
"github.com/docker/docker/daemon/internal/otelutil"
"github.com/docker/docker/daemon/server/backend"
"github.com/docker/docker/errdefs"
"github.com/docker/docker/runconfig"
containertypes "github.com/moby/moby/api/types/container"
"github.com/moby/moby/api/types/events"
networktypes "github.com/moby/moby/api/types/network"
"github.com/moby/sys/user"
ocispec "github.com/opencontainers/image-spec/specs-go/v1"
"github.com/opencontainers/selinux/go-selinux"
"github.com/tonistiigi/go-archvariant"
"go.opentelemetry.io/otel"
"go.opentelemetry.io/otel/attribute"
"go.opentelemetry.io/otel/trace"
)
type createOpts struct {
params backend.ContainerCreateConfig
managed bool
ignoreImagesArgsEscaped bool
}
// CreateManagedContainer creates a container that is managed by a Service
func (daemon *Daemon) CreateManagedContainer(ctx context.Context, params backend.ContainerCreateConfig) (containertypes.CreateResponse, error) {
return daemon.containerCreate(ctx, daemon.config(), createOpts{
params: params,
managed: true,
})
}
// ContainerCreate creates a regular container
func (daemon *Daemon) ContainerCreate(ctx context.Context, params backend.ContainerCreateConfig) (containertypes.CreateResponse, error) {
return daemon.containerCreate(ctx, daemon.config(), createOpts{
params: params,
})
}
// ContainerCreateIgnoreImagesArgsEscaped creates a regular container. This is called from the builder RUN case
// and ensures that we do not take the images ArgsEscaped
func (daemon *Daemon) ContainerCreateIgnoreImagesArgsEscaped(ctx context.Context, params backend.ContainerCreateConfig) (containertypes.CreateResponse, error) {
return daemon.containerCreate(ctx, daemon.config(), createOpts{
params: params,
ignoreImagesArgsEscaped: true,
})
}
func (daemon *Daemon) containerCreate(ctx context.Context, daemonCfg *configStore, opts createOpts) (_ containertypes.CreateResponse, retErr error) {
ctx, span := otel.Tracer("").Start(ctx, "daemon.containerCreate", trace.WithAttributes(
labelsAsOTelAttributes(opts.params.Config.Labels)...,
))
defer func() {
otelutil.RecordStatus(span, retErr)
span.End()
}()
start := time.Now()
if opts.params.Config == nil {
return containertypes.CreateResponse{}, errdefs.InvalidParameter(runconfig.ErrEmptyConfig)
}
// Normalize some defaults. Doing this "ad-hoc" here for now, as there's
// only one field to migrate, but we should consider having a better
// location for this (and decide where in the flow would be most appropriate).
//
// TODO(thaJeztah): we should have a more visible, more canonical location for this.
if opts.params.HostConfig != nil && opts.params.HostConfig.RestartPolicy.Name == "" {
// Set the default restart-policy ("none") if no restart-policy was set.
opts.params.HostConfig.RestartPolicy.Name = containertypes.RestartPolicyDisabled
}
warnings, err := daemon.verifyContainerSettings(daemonCfg, opts.params.HostConfig, opts.params.Config, false)
if err != nil {
return containertypes.CreateResponse{Warnings: warnings}, errdefs.InvalidParameter(err)
}
if opts.params.Platform == nil && opts.params.Config.Image != "" {
img, err := daemon.imageService.GetImage(ctx, opts.params.Config.Image, backend.GetImageOpts{})
if err != nil {
return containertypes.CreateResponse{}, err
}
if img != nil {
p := maximumSpec()
imgPlat := ocispec.Platform{
OS: img.OS,
Architecture: img.Architecture,
Variant: img.Variant,
}
if !images.OnlyPlatformWithFallback(p).Match(imgPlat) {
warnings = append(warnings, fmt.Sprintf("The requested image's platform (%s) does not match the detected host platform (%s) and no specific platform was requested", platforms.FormatAll(imgPlat), platforms.FormatAll(p)))
}
}
}
err = daemon.validateNetworkingConfig(opts.params.NetworkingConfig)
if err != nil {
return containertypes.CreateResponse{Warnings: warnings}, errdefs.InvalidParameter(err)
}
if opts.params.HostConfig == nil {
opts.params.HostConfig = &containertypes.HostConfig{}
}
err = daemon.adaptContainerSettings(&daemonCfg.Config, opts.params.HostConfig)
if err != nil {
return containertypes.CreateResponse{Warnings: warnings}, errdefs.InvalidParameter(err)
}
ctr, err := daemon.create(ctx, &daemonCfg.Config, opts)
if err != nil {
return containertypes.CreateResponse{Warnings: warnings}, err
}
metrics.ContainerActions.WithValues("create").UpdateSince(start)
if warnings == nil {
warnings = make([]string, 0) // Create an empty slice to avoid https://github.com/moby/moby/issues/38222
}
return containertypes.CreateResponse{ID: ctr.ID, Warnings: warnings}, nil
}
var (
containerLabelsFilter []string
containerLabelsFilterOnce sync.Once
)
func labelsAsOTelAttributes(labels map[string]string) []attribute.KeyValue {
containerLabelsFilterOnce.Do(func() {
containerLabelsFilter = strings.Split(os.Getenv("DOCKER_CONTAINER_LABELS_FILTER"), ",")
})
// This env var is a comma-separated list of labels to be included in the
// OTel span attributes. The labels are prefixed with "label." to avoid
// collision with other attributes.
//
// Note that, this is an experimental env var that might be removed
// unceremoniously at any point in time.
attrs := make([]attribute.KeyValue, 0, len(containerLabelsFilter))
for _, k := range containerLabelsFilter {
if v, ok := labels[k]; ok {
attrs = append(attrs, attribute.String("label."+k, v))
}
}
return attrs
}
// Create creates a new container from the given configuration with a given name.
func (daemon *Daemon) create(ctx context.Context, daemonCfg *config.Config, opts createOpts) (retC *container.Container, retErr error) {
var (
ctr *container.Container
img *image.Image
imgManifest *ocispec.Descriptor
imgID image.ID
err error
platform = platforms.DefaultSpec()
)
if opts.params.Config.Image != "" {
img, err = daemon.imageService.GetImage(ctx, opts.params.Config.Image, backend.GetImageOpts{Platform: opts.params.Platform})
if err != nil {
return nil, err
}
if img.Details != nil {
imgManifest = img.Details.ManifestDescriptor
}
platform = img.Platform()
imgID = img.ID()
} else if isWindows {
platform.OS = "linux" // 'scratch' case.
}
// On WCOW, if are not being invoked by the builder to create this container (where
// ignoreImagesArgEscaped will be true) - if the image already has its arguments escaped,
// ensure that this is replicated across to the created container to avoid double-escaping
// of the arguments/command line when the runtime attempts to run the container.
if platform.OS == "windows" && !opts.ignoreImagesArgsEscaped && img != nil && img.RunConfig().ArgsEscaped {
opts.params.Config.ArgsEscaped = true
}
if err := daemon.mergeAndVerifyConfig(opts.params.Config, img); err != nil {
return nil, errdefs.InvalidParameter(err)
}
if err := daemon.mergeAndVerifyLogConfig(&opts.params.HostConfig.LogConfig); err != nil {
return nil, errdefs.InvalidParameter(err)
}
if ctr, err = daemon.newContainer(opts.params.Name, platform, opts.params.Config, opts.params.HostConfig, imgID, opts.managed); err != nil {
return nil, err
}
defer func() {
if retErr != nil {
err = daemon.cleanupContainer(ctr, backend.ContainerRmConfig{
ForceRemove: true,
RemoveVolume: true,
})
if err != nil {
log.G(ctx).WithFields(log.Fields{
"error": err,
"container": ctr.ID,
}).Errorf("failed to cleanup container on create error")
}
}
}()
if err := daemon.setSecurityOptions(daemonCfg, ctr, opts.params.HostConfig); err != nil {
return nil, err
}
ctr.HostConfig.StorageOpt = opts.params.HostConfig.StorageOpt
ctr.ImageManifest = imgManifest
// Set RWLayer for container after mount labels have been set
rwLayer, err := daemon.imageService.CreateLayer(ctr, setupInitLayer(daemon.idMapping.RootPair()))
if err != nil {
return nil, errdefs.System(err)
}
ctr.RWLayer = rwLayer
cuid := os.Getuid()
_, gid := daemon.IdentityMapping().RootPair()
if err := user.MkdirAndChown(ctr.Root, 0o710, cuid, gid); err != nil {
return nil, err
}
if err := user.MkdirAndChown(ctr.CheckpointDir(), 0o700, cuid, os.Getegid()); err != nil {
return nil, err
}
if err := daemon.setHostConfig(ctr, opts.params.HostConfig, opts.params.DefaultReadOnlyNonRecursive); err != nil {
return nil, err
}
if err := daemon.createContainerOSSpecificSettings(ctx, ctr, opts.params.Config, opts.params.HostConfig); err != nil {
return nil, err
}
var endpointsConfigs map[string]*networktypes.EndpointSettings
if opts.params.NetworkingConfig != nil {
endpointsConfigs = opts.params.NetworkingConfig.EndpointsConfig
}
// Make sure NetworkMode has an acceptable value. We do this to ensure
// backwards API compatibility.
if ctr.HostConfig != nil && ctr.HostConfig.NetworkMode == "" {
ctr.HostConfig.NetworkMode = networktypes.NetworkDefault
}
daemon.updateContainerNetworkSettings(ctr, endpointsConfigs)
if err := daemon.register(ctx, ctr); err != nil {
return nil, err
}
metrics.StateCtr.Set(ctr.ID, "stopped")
daemon.LogContainerEvent(ctr, events.ActionCreate)
return ctr, nil
}
func toHostConfigSelinuxLabels(labels []string) []string {
for i, l := range labels {
labels[i] = "label=" + l
}
return labels
}
func (daemon *Daemon) generateSecurityOpt(hostConfig *containertypes.HostConfig) ([]string, error) {
for _, opt := range hostConfig.SecurityOpt {
con := strings.Split(opt, "=")
if con[0] == "label" {
// Caller overrode SecurityOpts
return nil, nil
}
}
ipcMode := hostConfig.IpcMode
pidMode := hostConfig.PidMode
privileged := hostConfig.Privileged
if ipcMode.IsHost() || pidMode.IsHost() || privileged {
return toHostConfigSelinuxLabels(selinux.DisableSecOpt()), nil
}
var ipcLabel []string
var pidLabel []string
ipcContainer := ipcMode.Container()
pidContainer := pidMode.Container()
if ipcContainer != "" {
c, err := daemon.GetContainer(ipcContainer)
if err != nil {
return nil, err
}
ipcLabel, err = selinux.DupSecOpt(c.ProcessLabel)
if err != nil {
return nil, err
}
if pidContainer == "" {
return toHostConfigSelinuxLabels(ipcLabel), err
}
}
if pidContainer != "" {
c, err := daemon.GetContainer(pidContainer)
if err != nil {
return nil, err
}
pidLabel, err = selinux.DupSecOpt(c.ProcessLabel)
if err != nil {
return nil, err
}
if ipcContainer == "" {
return toHostConfigSelinuxLabels(pidLabel), err
}
}
if pidLabel != nil && ipcLabel != nil {
for i := 0; i < len(pidLabel); i++ {
if pidLabel[i] != ipcLabel[i] {
return nil, errors.New("--ipc and --pid containers SELinux labels aren't the same")
}
}
return toHostConfigSelinuxLabels(pidLabel), nil
}
return nil, nil
}
func (daemon *Daemon) mergeAndVerifyConfig(config *containertypes.Config, img *image.Image) error {
if img != nil && img.Config != nil {
if err := merge(config, img.Config); err != nil {
return err
}
}
// Reset the Entrypoint if it is [""]
if len(config.Entrypoint) == 1 && config.Entrypoint[0] == "" {
config.Entrypoint = nil
}
if len(config.Entrypoint) == 0 && len(config.Cmd) == 0 {
return errors.New("no command specified")
}
return nil
}
// validateNetworkingConfig checks whether a container's NetworkingConfig is valid.
func (daemon *Daemon) validateNetworkingConfig(nwConfig *networktypes.NetworkingConfig) error {
if nwConfig == nil {
return nil
}
var errs []error
for k, v := range nwConfig.EndpointsConfig {
if v == nil {
errs = append(errs, fmt.Errorf("invalid config for network %s: EndpointsConfig is nil", k))
continue
}
// The referenced network k might not exist when the container is created, so just ignore the error in that case.
nw, _ := daemon.FindNetwork(k)
if err := validateEndpointSettings(nw, k, v); err != nil {
errs = append(errs, fmt.Errorf("invalid config for network %s: %w", k, err))
}
}
if len(errs) > 0 {
return errdefs.InvalidParameter(multierror.Join(errs...))
}
return nil
}
// maximumSpec returns the distribution platform with maximum compatibility for the current node.
func maximumSpec() ocispec.Platform {
p := platforms.DefaultSpec()
if p.Architecture == "amd64" {
p.Variant = archvariant.AMD64Variant()
}
return p
}