mirror of
https://github.com/moby/moby.git
synced 2026-01-11 10:41:43 +00:00
The Container.State struct holds the container's state, and most of
its fields are expected to change dynamically. Some o these state-changes
are explicit, for example, setting the container to be "stopped". Other
state changes can be more explicit, for example due to the containers'
process exiting or being "OOM" killed by the kernel.
The distinction between explicit ("desired") state changes and "state"
("actual state") is sometimes vague; for some properties, we clearly
separated them, for example if a user requested the container to be
stopped or restarted, we store state in the Container object itself;
HasBeenManuallyStopped bool // used for unless-stopped restart policy
HasBeenManuallyRestarted bool `json:"-"` // used to distinguish restart caused by restart policy from the manual one
Other properties are more ambiguous. such as "HasBeenStartedBefore" and
"RestartCount", which are stored on the Container (and persisted to
disk), but may be more related to "actual" state, and likely should
not be persisted;
RestartCount int
HasBeenStartedBefore bool
Given that (per the above) concurrency must be taken into account, most
changes to the `container.State` struct should be protected; here's where
things get blurry. While the `State` type provides various accessor methods,
only some of them take concurrency into account; for example, [State.IsRunning]
and [State.GetPID] acquire a lock, whereas [State.ExitCodeValue] does not.
Even the (commonly used) [State.StateString] has no locking at all.
The way to handle this is error-prone; [container.State] contains a mutex,
and it's exported. Given that its embedded in the [container.Container]
struct, it's also exposed as an exported mutex for the container. The
assumption here is that by "merging" the two, the caller to acquire a lock
when either the container _or_ its state must be mutated. However, because
some methods on `container.State` handle their own locking, consumers must
be deeply familiar with the internals; if both changes to the `Container`
AND `Container.State` must be made. This gets amplified more as some
(exported!) methods, such as [container.SetRunning] mutate multiple fields,
but don't acquire a lock (so expect the caller to hold one), but their
(also exported) counterpart (e.g. [State.IsRunning]) do.
It should be clear from the above, that this needs some architectural
changes; a clearer separation between "desired" and "actual" state (opening
the potential to update the container's config without manually touching
its `State`), possibly a method to obtain a read-only copy of the current
state (for those querying state), and reviewing which fields belong where
(and should be persisted to disk, or only remain in memory).
This PR preserves the status quo; it makes no structural changes, other
than exposing where we access the container's state. Where previously the
State fields and methods were referred to as "part of the container"
(e.g. `ctr.IsRunning()` or `ctr.Running`), we now explicitly reference
the embedded `State` (`ctr.State.IsRunning`, `ctr.State.Running`).
The exception (for now) is the mutex, which is still referenced through
the embedded struct (`ctr.Lock()` instead of `ctr.State.Lock()`), as this
is (mostly) by design to protect the container, and what's in it (including
its `State`).
[State.IsRunning]: c4afa77157/daemon/container/state.go (L205-L209)
[State.GetPID]: c4afa77157/daemon/container/state.go (L211-L216)
[State.ExitCodeValue]: c4afa77157/daemon/container/state.go (L218-L228)
[State.StateString]: c4afa77157/daemon/container/state.go (L102-L131)
[container.State]: c4afa77157/daemon/container/state.go (L15-L23)
[container.Container]: c4afa77157/daemon/container/container.go (L67-L75)
[container.SetRunning]: c4afa77157/daemon/container/state.go (L230-L277)
Signed-off-by: Sebastiaan van Stijn <github@gone.nl>
379 lines
11 KiB
Go
379 lines
11 KiB
Go
package daemon
|
|
|
|
import (
|
|
"context"
|
|
"encoding/json"
|
|
"fmt"
|
|
"io"
|
|
"runtime"
|
|
"strings"
|
|
"time"
|
|
|
|
containerd "github.com/containerd/containerd/v2/client"
|
|
"github.com/containerd/log"
|
|
containertypes "github.com/moby/moby/api/types/container"
|
|
"github.com/moby/moby/api/types/events"
|
|
"github.com/moby/moby/v2/daemon/container"
|
|
"github.com/moby/moby/v2/daemon/internal/stream"
|
|
"github.com/moby/moby/v2/daemon/server/backend"
|
|
"github.com/moby/moby/v2/errdefs"
|
|
"github.com/moby/moby/v2/pkg/pools"
|
|
"github.com/moby/sys/signal"
|
|
"github.com/moby/term"
|
|
"github.com/opencontainers/runtime-spec/specs-go"
|
|
"github.com/pkg/errors"
|
|
)
|
|
|
|
func (daemon *Daemon) registerExecCommand(container *container.Container, config *container.ExecConfig) {
|
|
// Storing execs in container in order to kill them gracefully whenever the container is stopped or removed.
|
|
container.ExecCommands.Add(config.ID, config)
|
|
// Storing execs in daemon for easy access via Engine API.
|
|
daemon.execCommands.Add(config.ID, config)
|
|
}
|
|
|
|
// ExecExists looks up the exec instance and returns a bool if it exists or not.
|
|
// It will also return the error produced by `getConfig`
|
|
func (daemon *Daemon) ExecExists(name string) (bool, error) {
|
|
if _, err := daemon.getExecConfig(name); err != nil {
|
|
return false, err
|
|
}
|
|
return true, nil
|
|
}
|
|
|
|
// getExecConfig looks up the exec instance by name. If the container associated
|
|
// with the exec instance is stopped or paused, it will return an error.
|
|
func (daemon *Daemon) getExecConfig(name string) (*container.ExecConfig, error) {
|
|
ec := daemon.execCommands.Get(name)
|
|
if ec == nil {
|
|
return nil, errExecNotFound(name)
|
|
}
|
|
|
|
// If the exec is found but its container is not in the daemon's list of
|
|
// containers then it must have been deleted, in which case instead of
|
|
// saying the container isn't running, we should return a 404 so that
|
|
// the user sees the same error now that they will after the
|
|
// 5 minute clean-up loop is run which erases old/dead execs.
|
|
ctr := daemon.containers.Get(ec.Container.ID)
|
|
if ctr == nil {
|
|
return nil, containerNotFound(name)
|
|
}
|
|
if !ctr.State.IsRunning() {
|
|
return nil, errNotRunning(ctr.ID)
|
|
}
|
|
if ctr.State.IsPaused() {
|
|
return nil, errExecPaused(ctr.ID)
|
|
}
|
|
if ctr.State.IsRestarting() {
|
|
return nil, errContainerIsRestarting(ctr.ID)
|
|
}
|
|
return ec, nil
|
|
}
|
|
|
|
func (daemon *Daemon) unregisterExecCommand(container *container.Container, execConfig *container.ExecConfig) {
|
|
container.ExecCommands.Delete(execConfig.ID)
|
|
daemon.execCommands.Delete(execConfig.ID)
|
|
}
|
|
|
|
func (daemon *Daemon) getActiveContainer(name string) (*container.Container, error) {
|
|
ctr, err := daemon.GetContainer(name)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
if !ctr.State.IsRunning() {
|
|
return nil, errNotRunning(ctr.ID)
|
|
}
|
|
if ctr.State.IsPaused() {
|
|
return nil, errExecPaused(name)
|
|
}
|
|
if ctr.State.IsRestarting() {
|
|
return nil, errContainerIsRestarting(ctr.ID)
|
|
}
|
|
return ctr, nil
|
|
}
|
|
|
|
// ContainerExecCreate sets up an exec in a running container.
|
|
func (daemon *Daemon) ContainerExecCreate(name string, options *containertypes.ExecCreateRequest) (string, error) {
|
|
cntr, err := daemon.getActiveContainer(name)
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
if user := options.User; user != "" {
|
|
// Lookup the user inside the container before starting the exec to
|
|
// allow for an early exit.
|
|
//
|
|
// Note that "technically" this check may have some TOCTOU issues,
|
|
// because '/etc/passwd' and '/etc/groups' may be mutated by the
|
|
// container in between creating the exec and starting it.
|
|
//
|
|
// This is very likely a corner-case, but something we can consider
|
|
// changing in future (either allow creating an invalid exec, and
|
|
// checking before starting, or checking both before create and
|
|
// before start).
|
|
if _, err := getUser(cntr, user); err != nil {
|
|
return "", errdefs.InvalidParameter(err)
|
|
}
|
|
}
|
|
|
|
keys := []byte{}
|
|
if options.DetachKeys != "" {
|
|
keys, err = term.ToBytes(options.DetachKeys)
|
|
if err != nil {
|
|
err = fmt.Errorf("Invalid escape keys (%s) provided", options.DetachKeys)
|
|
return "", err
|
|
}
|
|
}
|
|
|
|
execConfig := container.NewExecConfig(cntr)
|
|
execConfig.OpenStdin = options.AttachStdin
|
|
execConfig.OpenStdout = options.AttachStdout
|
|
execConfig.OpenStderr = options.AttachStderr
|
|
execConfig.DetachKeys = keys
|
|
execConfig.Entrypoint, execConfig.Args = options.Cmd[0], options.Cmd[1:]
|
|
execConfig.Tty = options.Tty
|
|
execConfig.ConsoleSize = options.ConsoleSize
|
|
execConfig.Privileged = options.Privileged
|
|
execConfig.User = options.User
|
|
execConfig.WorkingDir = options.WorkingDir
|
|
|
|
linkedEnv, err := daemon.setupLinkedContainers(cntr)
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
execConfig.Env = container.ReplaceOrAppendEnvValues(cntr.CreateDaemonEnvironment(options.Tty, linkedEnv), options.Env)
|
|
if execConfig.User == "" {
|
|
execConfig.User = cntr.Config.User
|
|
}
|
|
if execConfig.WorkingDir == "" {
|
|
execConfig.WorkingDir = cntr.Config.WorkingDir
|
|
}
|
|
|
|
daemon.registerExecCommand(cntr, execConfig)
|
|
daemon.LogContainerEventWithAttributes(cntr, events.Action(string(events.ActionExecCreate)+": "+execConfig.Entrypoint+" "+strings.Join(execConfig.Args, " ")), map[string]string{
|
|
"execID": execConfig.ID,
|
|
})
|
|
|
|
return execConfig.ID, nil
|
|
}
|
|
|
|
// ContainerExecStart starts a previously set up exec instance. The
|
|
// std streams are set up.
|
|
// If ctx is cancelled, the process is terminated.
|
|
func (daemon *Daemon) ContainerExecStart(ctx context.Context, name string, options backend.ExecStartConfig) (retErr error) {
|
|
var (
|
|
cStdin io.ReadCloser
|
|
cStdout, cStderr io.Writer
|
|
)
|
|
|
|
ec, err := daemon.getExecConfig(name)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
ec.Lock()
|
|
if ec.ExitCode != nil {
|
|
ec.Unlock()
|
|
return errdefs.Conflict(fmt.Errorf("exec command %s has already run", ec.ID))
|
|
}
|
|
|
|
if ec.Running {
|
|
ec.Unlock()
|
|
return errdefs.Conflict(fmt.Errorf("exec command %s is already running", ec.ID))
|
|
}
|
|
ec.Running = true
|
|
ec.Unlock()
|
|
|
|
log.G(ctx).Debugf("starting exec command %s in container %s", ec.ID, ec.Container.ID)
|
|
daemon.LogContainerEventWithAttributes(ec.Container, events.Action(string(events.ActionExecStart)+": "+ec.Entrypoint+" "+strings.Join(ec.Args, " ")), map[string]string{
|
|
"execID": ec.ID,
|
|
})
|
|
|
|
defer func() {
|
|
if retErr != nil {
|
|
ec.Lock()
|
|
ec.Container.ExecCommands.Delete(ec.ID)
|
|
ec.Running = false
|
|
if ec.ExitCode == nil {
|
|
// default to `126` (`EACCES`) if we fail to start
|
|
// the exec without setting an exit code.
|
|
exitCode := exitEaccess
|
|
ec.ExitCode = &exitCode
|
|
}
|
|
if err := ec.CloseStreams(); err != nil {
|
|
log.G(ctx).Errorf("failed to cleanup exec %s streams: %s", ec.Container.ID, err)
|
|
}
|
|
ec.Unlock()
|
|
}
|
|
}()
|
|
|
|
if ec.OpenStdin && options.Stdin != nil {
|
|
r, w := io.Pipe()
|
|
go func() {
|
|
defer func() {
|
|
log.G(ctx).Debug("Closing buffered stdin pipe")
|
|
_ = w.Close()
|
|
}()
|
|
_, _ = pools.Copy(w, options.Stdin)
|
|
}()
|
|
cStdin = r
|
|
}
|
|
if ec.OpenStdout {
|
|
cStdout = options.Stdout
|
|
}
|
|
if ec.OpenStderr {
|
|
cStderr = options.Stderr
|
|
}
|
|
|
|
if ec.OpenStdin {
|
|
ec.StreamConfig.NewInputPipes()
|
|
} else {
|
|
ec.StreamConfig.NewNopInputPipe()
|
|
}
|
|
|
|
p := &specs.Process{}
|
|
if runtime.GOOS != "windows" {
|
|
// TODO(thaJeztah): also enable on Windows;
|
|
// This was added in https://github.com/moby/moby/commit/7603c22c7365d7d7150597fe396e0707d6e561da,
|
|
// which mentions that it failed on Windows "Probably needs to wait for container to be in running state"
|
|
ctr, err := daemon.containerdClient.LoadContainer(ctx, ec.Container.ID)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
md, err := ctr.Info(ctx, containerd.WithoutRefreshedMetadata)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
// Technically, this should be a [specs.Spec], but we're only
|
|
// interested in the Process field.
|
|
spec := struct{ Process *specs.Process }{p}
|
|
if err := json.Unmarshal(md.Spec.GetValue(), &spec); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
|
|
// merge/override properties of the container's process with the exec-config.
|
|
p.Args = append([]string{ec.Entrypoint}, ec.Args...)
|
|
p.Env = ec.Env
|
|
p.Cwd = ec.WorkingDir
|
|
p.Terminal = ec.Tty
|
|
|
|
consoleSize := options.ConsoleSize
|
|
// If size isn't specified for start, use the one provided for create
|
|
if consoleSize == nil {
|
|
consoleSize = ec.ConsoleSize
|
|
}
|
|
if p.Terminal && consoleSize != nil {
|
|
p.ConsoleSize = &specs.Box{
|
|
Height: consoleSize[0],
|
|
Width: consoleSize[1],
|
|
}
|
|
}
|
|
|
|
if p.Cwd == "" {
|
|
p.Cwd = "/"
|
|
}
|
|
|
|
daemonCfg := &daemon.config().Config
|
|
if err := daemon.execSetPlatformOpt(ctx, daemonCfg, ec, p); err != nil {
|
|
return err
|
|
}
|
|
|
|
attachConfig := stream.AttachConfig{
|
|
TTY: ec.Tty,
|
|
UseStdin: cStdin != nil,
|
|
UseStdout: cStdout != nil,
|
|
UseStderr: cStderr != nil,
|
|
Stdin: cStdin,
|
|
Stdout: cStdout,
|
|
Stderr: cStderr,
|
|
DetachKeys: ec.DetachKeys,
|
|
CloseStdin: true,
|
|
}
|
|
ec.StreamConfig.AttachStreams(&attachConfig)
|
|
// using context.Background() so that attachErr does not race ctx.Done().
|
|
copyCtx, cancel := context.WithCancel(context.Background())
|
|
defer cancel()
|
|
attachErr := ec.StreamConfig.CopyStreams(copyCtx, &attachConfig)
|
|
|
|
ec.Container.Lock()
|
|
tsk, err := ec.Container.GetRunningTask()
|
|
ec.Container.Unlock()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
// Synchronize with libcontainerd event loop
|
|
ec.Lock()
|
|
ec.Process, err = tsk.Exec(ctx, ec.ID, p, cStdin != nil, ec.InitializeStdio)
|
|
// the exec context should be ready, or error happened.
|
|
// close the chan to notify readiness
|
|
close(ec.Started)
|
|
if err != nil {
|
|
defer ec.Unlock()
|
|
return setExitCodeFromError(ec.SetExitCode, err)
|
|
}
|
|
ec.Unlock()
|
|
|
|
select {
|
|
case <-ctx.Done():
|
|
logger := log.G(ctx).WithFields(log.Fields{
|
|
"container": ec.Container.ID,
|
|
"execID": ec.ID,
|
|
})
|
|
logger.Debug("Sending KILL signal to container process")
|
|
sigCtx, cancelFunc := context.WithTimeout(context.Background(), 30*time.Second)
|
|
defer cancelFunc()
|
|
if err := ec.Process.Kill(sigCtx, signal.SignalMap["KILL"]); err != nil {
|
|
logger.WithError(err).Error("Could not send KILL signal to container process")
|
|
}
|
|
return ctx.Err()
|
|
case err := <-attachErr:
|
|
if err != nil {
|
|
if _, ok := err.(term.EscapeError); !ok {
|
|
return errdefs.System(errors.Wrap(err, "exec attach failed"))
|
|
}
|
|
daemon.LogContainerEventWithAttributes(ec.Container, events.ActionExecDetach, map[string]string{
|
|
"execID": ec.ID,
|
|
})
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// execCommandGC runs a ticker to clean up the daemon references
|
|
// of exec configs that are no longer part of the container.
|
|
func (daemon *Daemon) execCommandGC() {
|
|
for range time.Tick(5 * time.Minute) {
|
|
var (
|
|
cleaned int
|
|
liveExecCommands = daemon.containerExecIds()
|
|
)
|
|
for id, config := range daemon.execCommands.Commands() {
|
|
if config.CanRemove {
|
|
cleaned++
|
|
daemon.execCommands.Delete(id)
|
|
} else {
|
|
if _, exists := liveExecCommands[id]; !exists {
|
|
config.CanRemove = true
|
|
}
|
|
}
|
|
}
|
|
if cleaned > 0 {
|
|
log.G(context.TODO()).Debugf("clean %d unused exec commands", cleaned)
|
|
}
|
|
}
|
|
}
|
|
|
|
// containerExecIds returns a list of all the current exec ids that are in use
|
|
// and running inside a container.
|
|
func (daemon *Daemon) containerExecIds() map[string]struct{} {
|
|
ids := map[string]struct{}{}
|
|
for _, c := range daemon.containers.List() {
|
|
for _, id := range c.ExecCommands.List() {
|
|
ids[id] = struct{}{}
|
|
}
|
|
}
|
|
return ids
|
|
}
|