Files
moby/daemon/prune.go
Sebastiaan van Stijn 0df791cb72 explicitly access Container.State instead of through embedded struct
The Container.State struct holds the container's state, and most of
its fields are expected to change dynamically. Some o these state-changes
are explicit, for example, setting the container to be "stopped". Other
state changes can be more explicit, for example due to the containers'
process exiting or being "OOM" killed by the kernel.

The distinction between explicit ("desired") state changes and "state"
("actual state") is sometimes vague; for some properties, we clearly
separated them, for example if a user requested the container to be
stopped or restarted, we store state in the Container object itself;

    HasBeenManuallyStopped   bool // used for unless-stopped restart policy
    HasBeenManuallyRestarted bool `json:"-"` // used to distinguish restart caused by restart policy from the manual one

Other properties are more ambiguous. such as "HasBeenStartedBefore" and
"RestartCount", which are stored on the Container (and persisted to
disk), but may be more related to "actual" state, and likely should
not be persisted;

    RestartCount             int
    HasBeenStartedBefore     bool

Given that (per the above) concurrency must be taken into account, most
changes to the `container.State` struct should be protected; here's where
things get blurry. While the `State` type provides various accessor methods,
only some of them take concurrency into account; for example, [State.IsRunning]
and [State.GetPID] acquire a lock, whereas [State.ExitCodeValue] does not.
Even the (commonly used) [State.StateString] has no locking at all.

The way to handle this is error-prone; [container.State] contains a mutex,
and it's exported. Given that its embedded in the [container.Container]
struct, it's also exposed as an exported mutex for the container. The
assumption here is that by "merging" the two, the caller to acquire a lock
when either the container _or_ its state must be mutated. However, because
some methods on `container.State` handle their own locking, consumers must
be deeply familiar with the internals; if both changes to the `Container`
AND `Container.State` must be made. This gets amplified more as some
(exported!) methods, such as [container.SetRunning] mutate multiple fields,
but don't acquire a lock (so expect the caller to hold one), but their
(also exported) counterpart (e.g. [State.IsRunning]) do.

It should be clear from the above, that this needs some architectural
changes; a clearer separation between "desired" and "actual" state (opening
the potential to update the container's config without manually touching
its `State`), possibly a method to obtain a read-only copy of the current
state (for those querying state), and reviewing which fields belong where
(and should be persisted to disk, or only remain in memory).

This PR preserves the status quo; it makes no structural changes, other
than exposing where we access the container's state. Where previously the
State fields and methods were referred to as "part of the container"
(e.g. `ctr.IsRunning()` or `ctr.Running`), we now explicitly reference
the embedded `State` (`ctr.State.IsRunning`, `ctr.State.Running`).

The exception (for now) is the mutex, which is still referenced through
the embedded struct (`ctr.Lock()` instead of `ctr.State.Lock()`), as this
is (mostly) by design to protect the container, and what's in it (including
its `State`).

[State.IsRunning]: c4afa77157/daemon/container/state.go (L205-L209)
[State.GetPID]: c4afa77157/daemon/container/state.go (L211-L216)
[State.ExitCodeValue]: c4afa77157/daemon/container/state.go (L218-L228)
[State.StateString]: c4afa77157/daemon/container/state.go (L102-L131)
[container.State]: c4afa77157/daemon/container/state.go (L15-L23)
[container.Container]: c4afa77157/daemon/container/container.go (L67-L75)
[container.SetRunning]: c4afa77157/daemon/container/state.go (L230-L277)

Signed-off-by: Sebastiaan van Stijn <github@gone.nl>
2025-09-19 16:02:14 +02:00

237 lines
6.6 KiB
Go

package daemon
import (
"context"
"strconv"
"time"
"github.com/containerd/log"
"github.com/moby/moby/api/types/container"
"github.com/moby/moby/api/types/events"
"github.com/moby/moby/api/types/filters"
"github.com/moby/moby/api/types/network"
"github.com/moby/moby/v2/daemon/internal/lazyregexp"
"github.com/moby/moby/v2/daemon/internal/timestamp"
"github.com/moby/moby/v2/daemon/libnetwork"
dnetwork "github.com/moby/moby/v2/daemon/network"
"github.com/moby/moby/v2/daemon/server/backend"
"github.com/moby/moby/v2/errdefs"
"github.com/pkg/errors"
)
var (
// errPruneRunning is returned when a prune request is received while
// one is in progress
errPruneRunning = errdefs.Conflict(errors.New("a prune operation is already running"))
containersAcceptedFilters = map[string]bool{
"label": true,
"label!": true,
"until": true,
}
)
// ContainersPrune removes unused containers
func (daemon *Daemon) ContainersPrune(ctx context.Context, pruneFilters filters.Args) (*container.PruneReport, error) {
if !daemon.pruneRunning.CompareAndSwap(false, true) {
return nil, errPruneRunning
}
defer daemon.pruneRunning.Store(false)
rep := &container.PruneReport{}
// make sure that only accepted filters have been received
err := pruneFilters.Validate(containersAcceptedFilters)
if err != nil {
return nil, err
}
until, err := getUntilFromPruneFilters(pruneFilters)
if err != nil {
return nil, err
}
cfg := &daemon.config().Config
allContainers := daemon.List()
for _, c := range allContainers {
select {
case <-ctx.Done():
log.G(ctx).Debugf("ContainersPrune operation cancelled: %#v", *rep)
return rep, nil
default:
}
if !c.State.IsRunning() {
if !until.IsZero() && c.Created.After(until) {
continue
}
if !matchLabels(pruneFilters, c.Config.Labels) {
continue
}
cSize, _, err := daemon.imageService.GetContainerLayerSize(ctx, c.ID)
if err != nil {
return nil, err
}
// TODO: sets RmLink to true?
err = daemon.containerRm(cfg, c.ID, &backend.ContainerRmConfig{})
if err != nil {
log.G(ctx).Warnf("failed to prune container %s: %v", c.ID, err)
continue
}
if cSize > 0 {
rep.SpaceReclaimed += uint64(cSize)
}
rep.ContainersDeleted = append(rep.ContainersDeleted, c.ID)
}
}
daemon.EventsService.Log(events.ActionPrune, events.ContainerEventType, events.Actor{
Attributes: map[string]string{"reclaimed": strconv.FormatUint(rep.SpaceReclaimed, 10)},
})
return rep, nil
}
// localNetworksPrune removes unused local networks
func (daemon *Daemon) localNetworksPrune(ctx context.Context, pruneFilters dnetwork.Filter) *network.PruneReport {
rep := &network.PruneReport{}
// When the function returns true, the walk will stop.
daemon.netController.WalkNetworks(func(nw *libnetwork.Network) bool {
select {
case <-ctx.Done():
// context cancelled
return true
default:
}
if nw.ConfigOnly() {
return false
}
if !pruneFilters.Matches(nw) {
return false
}
if !nw.IsPruneable() {
return false
}
if len(nw.Endpoints()) > 0 {
return false
}
if err := daemon.DeleteNetwork(nw.ID()); err != nil {
log.G(ctx).Warnf("could not remove local network %s: %v", nw.Name(), err)
return false
}
rep.NetworksDeleted = append(rep.NetworksDeleted, nw.Name())
return false
})
return rep
}
var networkIsInUse = lazyregexp.New(`network ([[:alnum:]]+) is in use`)
// clusterNetworksPrune removes unused cluster networks
func (daemon *Daemon) clusterNetworksPrune(ctx context.Context, pruneFilters dnetwork.Filter) (*network.PruneReport, error) {
rep := &network.PruneReport{}
cluster := daemon.GetCluster()
if !cluster.IsManager() {
return rep, nil
}
networks, err := cluster.GetNetworks(pruneFilters, false)
if err != nil {
return rep, err
}
for _, nw := range networks {
select {
case <-ctx.Done():
return rep, nil
default:
if nw.Ingress {
// Routing-mesh network removal has to be explicitly invoked by user
continue
}
// https://github.com/moby/moby/issues/24186
// `docker network inspect` unfortunately displays ONLY those containers that are local to that node.
// So we try to remove it anyway and check the error
err = cluster.RemoveNetwork(nw.ID)
if err != nil {
// we can safely ignore the "network .. is in use" error
match := networkIsInUse.FindStringSubmatch(err.Error())
if len(match) != 2 || match[1] != nw.ID {
log.G(ctx).Warnf("could not remove cluster network %s: %v", nw.Name, err)
}
continue
}
rep.NetworksDeleted = append(rep.NetworksDeleted, nw.Name)
}
}
return rep, nil
}
// NetworksPrune removes unused networks
func (daemon *Daemon) NetworksPrune(ctx context.Context, filterArgs filters.Args) (*network.PruneReport, error) {
if !daemon.pruneRunning.CompareAndSwap(false, true) {
return nil, errPruneRunning
}
defer daemon.pruneRunning.Store(false)
pruneFilters, err := dnetwork.NewPruneFilter(filterArgs)
if err != nil {
return nil, err
}
rep := &network.PruneReport{}
if clusterRep, err := daemon.clusterNetworksPrune(ctx, pruneFilters); err == nil {
rep.NetworksDeleted = append(rep.NetworksDeleted, clusterRep.NetworksDeleted...)
}
localRep := daemon.localNetworksPrune(ctx, pruneFilters)
rep.NetworksDeleted = append(rep.NetworksDeleted, localRep.NetworksDeleted...)
select {
case <-ctx.Done():
log.G(ctx).Debugf("NetworksPrune operation cancelled: %#v", *rep)
return rep, nil
default:
}
daemon.EventsService.Log(events.ActionPrune, events.NetworkEventType, events.Actor{
Attributes: map[string]string{"reclaimed": "0"},
})
return rep, nil
}
func getUntilFromPruneFilters(pruneFilters filters.Args) (time.Time, error) {
until := time.Time{}
if !pruneFilters.Contains("until") {
return until, nil
}
untilFilters := pruneFilters.Get("until")
if len(untilFilters) > 1 {
return until, errdefs.InvalidParameter(errors.New("more than one until filter specified"))
}
ts, err := timestamp.GetTimestamp(untilFilters[0], time.Now())
if err != nil {
return until, errdefs.InvalidParameter(err)
}
seconds, nanoseconds, err := timestamp.ParseTimestamps(ts, 0)
if err != nil {
return until, errdefs.InvalidParameter(err)
}
until = time.Unix(seconds, nanoseconds)
return until, nil
}
func matchLabels(pruneFilters filters.Args, labels map[string]string) bool {
if !pruneFilters.MatchKVList("label", labels) {
return false
}
// By default MatchKVList will return true if field (like 'label!') does not exist
// So we have to add additional Contains("label!") check
if pruneFilters.Contains("label!") {
if pruneFilters.MatchKVList("label!", labels) {
return false
}
}
return true
}