Files
moby/integration/container/remove_test.go
Albin Kerouanton ec9315cd4f daemon: clean up dead containers on start
Stopping the Engine while a container with autoremove set is running may
leave behind dead containers on disk. These containers aren't reclaimed
on next start, appear as "dead" in `docker ps -a` and can't be
inspected or removed by the user.

This bug has existed since a long time but became user visible with
9f5f4f5a42. Prior to that commit,
containers with no rwlayer weren't added to the in-memory viewdb, so
they weren't visible in `docker ps -a`. However, some dangling files
would still live on disk (e.g. folder in /var/lib/docker/containers,
mount points, etc).

The underlying issue is that when the daemon stops, it tries to stop all
running containers and then closes the containerd client. This leaves a
small window of time where the Engine might receive 'task stop' events
from containerd, and trigger autoremove. If the containerd client is
closed in parallel, the Engine is unable to complete the removal,
leaving the container in 'dead' state. In such case, the Engine logs the
following error:

    cannot remove container "bcbc98b4f5c2b072eb3c4ca673fa1c222d2a8af00bf58eae0f37085b9724ea46": Canceled: grpc: the client connection is closing: context canceled

Solving the underlying issue would require complex changes to the
shutdown sequence. Moreover, the same issue could also happen if the
daemon crashes while it deletes a container. Thus, add a cleanup step
on daemon startup to remove these dead containers.

Signed-off-by: Albin Kerouanton <albin.kerouanton@docker.com>
2025-12-11 13:40:23 -06:00

134 lines
4.1 KiB
Go

package container
import (
"os"
"testing"
cerrdefs "github.com/containerd/errdefs"
containertypes "github.com/moby/moby/api/types/container"
"github.com/moby/moby/client"
"github.com/moby/moby/v2/integration/internal/container"
"github.com/moby/moby/v2/internal/testutil/daemon"
"gotest.tools/v3/assert"
is "gotest.tools/v3/assert/cmp"
"gotest.tools/v3/fs"
"gotest.tools/v3/poll"
"gotest.tools/v3/skip"
)
func getPrefixAndSlashFromDaemonPlatform() (prefix, slash string) {
if testEnv.DaemonInfo.OSType == "windows" {
return "c:", `\`
}
return "", "/"
}
// Test case for #5244: `docker rm` fails if bind dir doesn't exist anymore
func TestRemoveContainerWithRemovedVolume(t *testing.T) {
skip.If(t, testEnv.IsRemoteDaemon)
ctx := setupTest(t)
apiClient := testEnv.APIClient()
prefix, slash := getPrefixAndSlashFromDaemonPlatform()
tempDir := fs.NewDir(t, "test-rm-container-with-removed-volume", fs.WithMode(0o755))
cID := container.Run(ctx, t, apiClient, container.WithCmd("true"), container.WithBind(tempDir.Path(), prefix+slash+"test"))
poll.WaitOn(t, container.IsInState(ctx, apiClient, cID, containertypes.StateExited))
err := os.RemoveAll(tempDir.Path())
assert.NilError(t, err)
_, err = apiClient.ContainerRemove(ctx, cID, client.ContainerRemoveOptions{
RemoveVolumes: true,
})
assert.NilError(t, err)
_, err = apiClient.ContainerInspect(ctx, cID, client.ContainerInspectOptions{})
assert.Check(t, is.ErrorType(err, cerrdefs.IsNotFound))
assert.Check(t, is.ErrorContains(err, "No such container"))
}
// Test case for #2099/#2125
func TestRemoveContainerWithVolume(t *testing.T) {
ctx := setupTest(t)
apiClient := testEnv.APIClient()
prefix, slash := getPrefixAndSlashFromDaemonPlatform()
cID := container.Run(ctx, t, apiClient, container.WithVolume(prefix+slash+"srv"))
inspect, err := apiClient.ContainerInspect(ctx, cID, client.ContainerInspectOptions{})
assert.NilError(t, err)
assert.Check(t, is.Equal(1, len(inspect.Container.Mounts)))
volName := inspect.Container.Mounts[0].Name
_, err = apiClient.VolumeInspect(ctx, volName, client.VolumeInspectOptions{})
assert.NilError(t, err)
_, err = apiClient.ContainerRemove(ctx, cID, client.ContainerRemoveOptions{
Force: true,
RemoveVolumes: true,
})
assert.NilError(t, err)
_, err = apiClient.VolumeInspect(ctx, volName, client.VolumeInspectOptions{})
assert.ErrorType(t, err, cerrdefs.IsNotFound, "Expected anonymous volume to be removed")
}
func TestRemoveContainerRunning(t *testing.T) {
ctx := setupTest(t)
apiClient := testEnv.APIClient()
cID := container.Run(ctx, t, apiClient)
_, err := apiClient.ContainerRemove(ctx, cID, client.ContainerRemoveOptions{})
assert.Check(t, is.ErrorType(err, cerrdefs.IsConflict))
assert.Check(t, is.ErrorContains(err, "container is running"))
}
func TestRemoveContainerForceRemoveRunning(t *testing.T) {
ctx := setupTest(t)
apiClient := testEnv.APIClient()
cID := container.Run(ctx, t, apiClient)
_, err := apiClient.ContainerRemove(ctx, cID, client.ContainerRemoveOptions{
Force: true,
})
assert.NilError(t, err)
}
func TestRemoveInvalidContainer(t *testing.T) {
ctx := setupTest(t)
apiClient := testEnv.APIClient()
_, err := apiClient.ContainerRemove(ctx, "unknown", client.ContainerRemoveOptions{})
assert.Check(t, is.ErrorType(err, cerrdefs.IsNotFound))
assert.Check(t, is.ErrorContains(err, "No such container"))
}
func TestRemoveDeadContainersOnDaemonRestart(t *testing.T) {
skip.If(t, testEnv.IsRemoteDaemon)
skip.If(t, testEnv.DaemonInfo.OSType == "windows", "FIXME: Windows CI does not support multiple daemons yet")
ctx := setupTest(t)
d := daemon.New(t)
d.StartWithBusybox(ctx, t)
defer d.Stop(t)
apiClient := d.NewClientT(t)
container.Run(ctx, t, apiClient, container.WithCmd("top"), container.WithAutoRemove)
list, err := apiClient.ContainerList(ctx, client.ContainerListOptions{All: true})
assert.NilError(t, err)
assert.Check(t, is.Len(list.Items, 1))
d.Restart(t)
list, err = apiClient.ContainerList(ctx, client.ContainerListOptions{All: true})
assert.NilError(t, err)
assert.Check(t, is.Len(list.Items, 0))
}