Files
moby/daemon/kill.go
Sebastiaan van Stijn 0df791cb72 explicitly access Container.State instead of through embedded struct
The Container.State struct holds the container's state, and most of
its fields are expected to change dynamically. Some o these state-changes
are explicit, for example, setting the container to be "stopped". Other
state changes can be more explicit, for example due to the containers'
process exiting or being "OOM" killed by the kernel.

The distinction between explicit ("desired") state changes and "state"
("actual state") is sometimes vague; for some properties, we clearly
separated them, for example if a user requested the container to be
stopped or restarted, we store state in the Container object itself;

    HasBeenManuallyStopped   bool // used for unless-stopped restart policy
    HasBeenManuallyRestarted bool `json:"-"` // used to distinguish restart caused by restart policy from the manual one

Other properties are more ambiguous. such as "HasBeenStartedBefore" and
"RestartCount", which are stored on the Container (and persisted to
disk), but may be more related to "actual" state, and likely should
not be persisted;

    RestartCount             int
    HasBeenStartedBefore     bool

Given that (per the above) concurrency must be taken into account, most
changes to the `container.State` struct should be protected; here's where
things get blurry. While the `State` type provides various accessor methods,
only some of them take concurrency into account; for example, [State.IsRunning]
and [State.GetPID] acquire a lock, whereas [State.ExitCodeValue] does not.
Even the (commonly used) [State.StateString] has no locking at all.

The way to handle this is error-prone; [container.State] contains a mutex,
and it's exported. Given that its embedded in the [container.Container]
struct, it's also exposed as an exported mutex for the container. The
assumption here is that by "merging" the two, the caller to acquire a lock
when either the container _or_ its state must be mutated. However, because
some methods on `container.State` handle their own locking, consumers must
be deeply familiar with the internals; if both changes to the `Container`
AND `Container.State` must be made. This gets amplified more as some
(exported!) methods, such as [container.SetRunning] mutate multiple fields,
but don't acquire a lock (so expect the caller to hold one), but their
(also exported) counterpart (e.g. [State.IsRunning]) do.

It should be clear from the above, that this needs some architectural
changes; a clearer separation between "desired" and "actual" state (opening
the potential to update the container's config without manually touching
its `State`), possibly a method to obtain a read-only copy of the current
state (for those querying state), and reviewing which fields belong where
(and should be persisted to disk, or only remain in memory).

This PR preserves the status quo; it makes no structural changes, other
than exposing where we access the container's state. Where previously the
State fields and methods were referred to as "part of the container"
(e.g. `ctr.IsRunning()` or `ctr.Running`), we now explicitly reference
the embedded `State` (`ctr.State.IsRunning`, `ctr.State.Running`).

The exception (for now) is the mutex, which is still referenced through
the embedded struct (`ctr.Lock()` instead of `ctr.State.Lock()`), as this
is (mostly) by design to protect the container, and what's in it (including
its `State`).

[State.IsRunning]: c4afa77157/daemon/container/state.go (L205-L209)
[State.GetPID]: c4afa77157/daemon/container/state.go (L211-L216)
[State.ExitCodeValue]: c4afa77157/daemon/container/state.go (L218-L228)
[State.StateString]: c4afa77157/daemon/container/state.go (L102-L131)
[container.State]: c4afa77157/daemon/container/state.go (L15-L23)
[container.Container]: c4afa77157/daemon/container/container.go (L67-L75)
[container.SetRunning]: c4afa77157/daemon/container/state.go (L230-L277)

Signed-off-by: Sebastiaan van Stijn <github@gone.nl>
2025-09-19 16:02:14 +02:00

216 lines
6.8 KiB
Go

package daemon
import (
"context"
"fmt"
"runtime"
"strconv"
"syscall"
"time"
cerrdefs "github.com/containerd/errdefs"
"github.com/containerd/log"
containertypes "github.com/moby/moby/api/types/container"
"github.com/moby/moby/api/types/events"
containerpkg "github.com/moby/moby/v2/daemon/container"
"github.com/moby/moby/v2/errdefs"
"github.com/moby/sys/signal"
"github.com/pkg/errors"
)
type errNoSuchProcess struct {
pid int
signal syscall.Signal
}
func (e errNoSuchProcess) Error() string {
return fmt.Sprintf("cannot kill process (pid=%d) with signal %d: no such process", e.pid, e.signal)
}
func (errNoSuchProcess) NotFound() {}
// ContainerKill sends signal to the container
// If no signal is given, then Kill with SIGKILL and wait
// for the container to exit.
// If a signal is given, then just send it to the container and return.
func (daemon *Daemon) ContainerKill(name, stopSignal string) error {
var (
err error
sig = syscall.SIGKILL
)
if stopSignal != "" {
sig, err = signal.ParseSignal(stopSignal)
if err != nil {
return errdefs.InvalidParameter(err)
}
if !signal.ValidSignalForPlatform(sig) {
return errdefs.InvalidParameter(errors.Errorf("the %s daemon does not support signal %d", runtime.GOOS, sig))
}
}
container, err := daemon.GetContainer(name)
if err != nil {
return err
}
if sig == syscall.SIGKILL {
// perform regular Kill (SIGKILL + wait())
return daemon.Kill(container)
}
return daemon.killWithSignal(container, sig)
}
// killWithSignal sends the container the given signal. This wrapper for the
// host specific kill command prepares the container before attempting
// to send the signal. An error is returned if the container is paused
// or not running, or if there is a problem returned from the
// underlying kill command.
func (daemon *Daemon) killWithSignal(container *containerpkg.Container, stopSignal syscall.Signal) error {
log.G(context.TODO()).WithFields(log.Fields{
"signal": int(stopSignal),
"container": container.ID,
}).Debugf("sending signal %[1]d (%[1]s) to container", stopSignal)
container.Lock()
defer container.Unlock()
task, err := container.GetRunningTask()
if err != nil {
return err
}
var unpause bool
if container.Config.StopSignal != "" && stopSignal != syscall.SIGKILL {
containerStopSignal, err := signal.ParseSignal(container.Config.StopSignal)
if err != nil {
return err
}
if containerStopSignal == stopSignal {
container.ExitOnNext()
unpause = container.State.Paused
}
} else {
container.ExitOnNext()
unpause = container.State.Paused
}
if !daemon.IsShuttingDown() {
container.HasBeenManuallyStopped = true
if err := container.CheckpointTo(context.WithoutCancel(context.TODO()), daemon.containersReplica); err != nil {
log.G(context.TODO()).WithFields(log.Fields{
"error": err,
"container": container.ID,
}).Warn("error checkpointing container state")
}
}
// if the container is currently restarting we do not need to send the signal
// to the process. Telling the monitor that it should exit on its next event
// loop is enough
if container.State.Restarting {
return nil
}
if err := task.Kill(context.Background(), stopSignal); err != nil {
if cerrdefs.IsNotFound(err) {
unpause = false
log.G(context.TODO()).WithFields(log.Fields{
"error": err,
"container": container.ID,
"action": "kill",
}).Debug("container kill failed because of 'container not found' or 'no such process'")
go func() {
// We need to clean up this container but it is possible there is a case where we hit here before the exit event is processed
// but after it was fired off.
// So let's wait the container's stop timeout amount of time to see if the event is eventually processed.
// Doing this has the side effect that if no event was ever going to come we are waiting a longer period of time unnecessarily.
// But this prevents race conditions in processing the container.
ctx, cancel := context.WithTimeout(context.TODO(), time.Duration(container.StopTimeout())*time.Second)
defer cancel()
s := <-container.State.Wait(ctx, containertypes.WaitConditionNotRunning)
if s.Err() != nil {
if err := daemon.handleContainerExit(container, nil); err != nil {
log.G(context.TODO()).WithFields(log.Fields{
"error": err,
"container": container.ID,
"action": "kill",
}).Warn("error while handling container exit")
}
}
}()
} else {
return errors.Wrapf(err, "Cannot kill container %s", container.ID)
}
}
if unpause {
// above kill signal will be sent once resume is finished
if err := task.Resume(context.Background()); err != nil {
log.G(context.TODO()).WithFields(log.Fields{
"error": err,
"container": container.ID,
"action": "kill",
}).Warn("cannot unpause container")
}
}
daemon.LogContainerEventWithAttributes(container, events.ActionKill, map[string]string{
"signal": strconv.Itoa(int(stopSignal)),
})
return nil
}
// Kill forcefully terminates a container.
func (daemon *Daemon) Kill(container *containerpkg.Container) error {
if !container.State.IsRunning() {
return errNotRunning(container.ID)
}
// 1. Send SIGKILL
if err := daemon.killPossiblyDeadProcess(container, syscall.SIGKILL); err != nil {
// kill failed, check if process is no longer running.
if errors.As(err, &errNoSuchProcess{}) {
return nil
}
}
waitTimeout := 10 * time.Second
if runtime.GOOS == "windows" {
waitTimeout = 75 * time.Second // runhcs can be sloooooow.
}
ctx, cancel := context.WithTimeout(context.Background(), waitTimeout)
defer cancel()
status := <-container.State.Wait(ctx, containertypes.WaitConditionNotRunning)
if status.Err() == nil {
return nil
}
log.G(ctx).WithFields(log.Fields{"error": status.Err(), "container": container.ID}).Warnf("Container failed to exit within %v of kill - trying direct SIGKILL", waitTimeout)
if err := killProcessDirectly(container); err != nil {
if errors.As(err, &errNoSuchProcess{}) {
return nil
}
return err
}
// wait for container to exit one last time, if it doesn't then kill didnt work, so return error
ctx2, cancel2 := context.WithTimeout(context.Background(), 2*time.Second)
defer cancel2()
if status := <-container.State.Wait(ctx2, containertypes.WaitConditionNotRunning); status.Err() != nil {
return errors.New("tried to kill container, but did not receive an exit event")
}
return nil
}
// killPossiblyDeadProcess is a wrapper around killSig() suppressing "no such process" error.
func (daemon *Daemon) killPossiblyDeadProcess(container *containerpkg.Container, sig syscall.Signal) error {
err := daemon.killWithSignal(container, sig)
if cerrdefs.IsNotFound(err) {
err = errNoSuchProcess{container.State.GetPID(), sig}
log.G(context.TODO()).Debug(err)
return err
}
return err
}