daemon: handleContainerExit: ignore networking errors

Prior to commit fe856b9, containers' network sandbox and interfaces were
created before the containerd task. Now, it's created after.

If this step fails, the containerd task is forcefully deleted, and an
event is sent to the c8d event monitor, which triggers `handleContainerExit`.
Then this method tries to restart the faulty container.

This leads to containers with a published port already in use to be
stuck in a tight restart loop (if they're started with
`--restart=always`) until the port is available. This is needlessly
spamming the daemon logs.

Prior to that commit, a published port already in use wouldn't trigger
the restart process.

This commit adds a check to `handleContainerExit` to ignore exit events
if the latest container error is related to networking setup.

Signed-off-by: Albin Kerouanton <albinker@gmail.com>
This commit is contained in:
Albin Kerouanton
2025-02-20 14:43:11 +01:00
parent af898abe44
commit ac8b4e3e75
4 changed files with 52 additions and 1 deletions

View File

@@ -37,6 +37,8 @@ import (
"go.opentelemetry.io/otel/trace"
)
const errSetupNetworking = "failed to set up container networking"
func ipAddresses(ips []net.IP) []string {
var addrs []string
for _, ip := range ips {

View File

@@ -3,6 +3,7 @@ package daemon // import "github.com/docker/docker/daemon"
import (
"context"
"strconv"
"strings"
"time"
"github.com/containerd/log"
@@ -32,6 +33,20 @@ func (daemon *Daemon) handleContainerExit(c *container.Container, e *libcontaine
var ctrExitStatus container.ExitStatus
c.Lock()
// If the latest container error is related to networking setup, don't try
// to restart the container, and don't change the container state to
// 'exited'. This happens when, for example, [daemon.allocateNetwork] fails
// due to published ports being already in use. In that case, we want to
// keep the container in the 'created' state.
//
// c.ErrorMsg is set by [daemon.containerStart], and doesn't preserve the
// error type (because this field is persisted on disk). So, use string
// matching instead of usual error comparison methods.
if strings.Contains(c.ErrorMsg, errSetupNetworking) {
c.Unlock()
return nil
}
cfg := daemon.config()
// Health checks will be automatically restarted if/when the

View File

@@ -34,5 +34,8 @@ func (daemon *Daemon) initializeCreatedTask(
return errdefs.System(err)
}
}
return daemon.allocateNetwork(ctx, cfg, ctr)
if err := daemon.allocateNetwork(ctx, cfg, ctr); err != nil {
return fmt.Errorf("%s: %w", errSetupNetworking, err)
}
return nil
}

View File

@@ -19,6 +19,7 @@ import (
"github.com/docker/docker/libnetwork/netlabel"
"github.com/docker/docker/testutil"
"github.com/docker/docker/testutil/daemon"
"github.com/docker/go-connections/nat"
"github.com/vishvananda/netlink"
"gotest.tools/v3/assert"
is "gotest.tools/v3/assert/cmp"
@@ -423,3 +424,33 @@ func TestEndpointWithCustomIfname(t *testing.T) {
assert.NilError(t, err)
assert.Assert(t, strings.Contains(out.Stdout, ": foobar@if"), "expected ': foobar@if' in 'ip link show':\n%s", out.Stdout)
}
// TestPublishedPortAlreadyInUse checks that a container that can't start
// because of one its published port being already in use doesn't end up
// triggering the restart loop.
//
// Regression test for: https://github.com/moby/moby/issues/49501
func TestPublishedPortAlreadyInUse(t *testing.T) {
ctx := setupTest(t)
apiClient := testEnv.APIClient()
ctr1 := ctr.Run(ctx, t, apiClient,
ctr.WithCmd("top"),
ctr.WithExposedPorts("80/tcp"),
ctr.WithPortMap(nat.PortMap{"80/tcp": {{HostPort: "8000"}}}))
defer ctr.Remove(ctx, t, apiClient, ctr1, containertypes.RemoveOptions{Force: true})
ctr2 := ctr.Create(ctx, t, apiClient,
ctr.WithCmd("top"),
ctr.WithRestartPolicy(containertypes.RestartPolicyAlways),
ctr.WithExposedPorts("80/tcp"),
ctr.WithPortMap(nat.PortMap{"80/tcp": {{HostPort: "8000"}}}))
defer ctr.Remove(ctx, t, apiClient, ctr2, containertypes.RemoveOptions{Force: true})
err := apiClient.ContainerStart(ctx, ctr2, containertypes.StartOptions{})
assert.Assert(t, is.ErrorContains(err, "failed to set up container networking"))
inspect, err := apiClient.ContainerInspect(ctx, ctr2)
assert.NilError(t, err)
assert.Check(t, is.Equal(inspect.State.Status, "created"))
}