Merge pull request #49292 from robmry/bridge_startup_error

Recover from default bridge init failure
This commit is contained in:
Sebastiaan van Stijn
2025-01-18 01:17:02 +01:00
committed by GitHub
3 changed files with 98 additions and 0 deletions

View File

@@ -0,0 +1,66 @@
package bridge
import (
"testing"
"github.com/docker/docker/integration/internal/network"
"github.com/docker/docker/libnetwork/drivers/bridge"
"github.com/docker/docker/testutil/daemon"
"gotest.tools/v3/assert"
is "gotest.tools/v3/assert/cmp"
)
// TestNetworkInitError checks that, if the default bridge network can't be restored on startup,
// it doesn't prevent the daemon from starting once the underlying problem is resolved.
// Regression test for https://github.com/moby/moby/issues/49291
func TestNetworkInitErrorDocker0(t *testing.T) {
d := daemon.New(t)
d.Start(t)
defer func() {
_ = d.StopWithError()
}()
const brName = "docker0"
d.SetEnvVar("DOCKER_TEST_BRIDGE_INIT_ERROR", brName)
err := d.RestartWithError()
assert.Assert(t, is.ErrorContains(err, "daemon exited during startup"))
d.SetEnvVar("DOCKER_TEST_BRIDGE_INIT_ERROR", "")
d.Start(t)
}
// TestNetworkInitErrorUserDefined is equivalent to TestNetworkInitErrorDocker0, for a
// user-defined network. But, the daemon doesn't try to delete a user-defined network
// and the daemon will still start if it can't be restored on startup. So, try to
// delete the network when it's failed to initialise, and check that it can be
// re-created when the initialisation problem has been resolved.
func TestNetworkInitErrorUserDefined(t *testing.T) {
ctx := setupTest(t)
d := daemon.New(t)
d.Start(t)
defer func() {
_ = d.StopWithError()
}()
c := d.NewClientT(t)
defer c.Close()
const netName = "testnet"
const brName = "br-" + netName
network.CreateNoError(ctx, t, c, netName,
network.WithOption(bridge.BridgeName, brName),
)
defer network.RemoveNoError(ctx, t, c, netName)
d.SetEnvVar("DOCKER_TEST_BRIDGE_INIT_ERROR", brName)
d.Restart(t)
err := c.NetworkRemove(ctx, netName)
assert.NilError(t, err)
d.SetEnvVar("DOCKER_TEST_BRIDGE_INIT_ERROR", "")
d.Restart(t)
network.CreateNoError(ctx, t, c, netName,
network.WithOption(bridge.BridgeName, brName),
)
}

View File

@@ -5,6 +5,7 @@ import (
"fmt"
"net"
"net/netip"
"os"
"strconv"
"sync"
@@ -966,6 +967,13 @@ func (d *driver) createNetwork(config *networkConfiguration) (err error) {
// Apply the prepared list of steps, and abort at the first error.
bridgeSetup.queueStep(setupDeviceUp)
if v := os.Getenv("DOCKER_TEST_BRIDGE_INIT_ERROR"); v == config.BridgeName {
bridgeSetup.queueStep(func(n *networkConfiguration, b *bridgeInterface) error {
return fmt.Errorf("DOCKER_TEST_BRIDGE_INIT_ERROR is %q", v)
})
}
return bridgeSetup.apply()
}
@@ -985,6 +993,18 @@ func (d *driver) deleteNetwork(nid string) error {
d.Unlock()
if !ok {
// If the network was successfully created by an earlier incarnation of the daemon,
// but it failed to initialise this time, the network is still in the store (in
// case whatever caused the failure can be fixed for a future daemon restart). But,
// it's not in d.networks. To prevent the driver's state from getting out of step
// with its parent, make sure it's not in the store before reporting that it does
// not exist.
if err := d.storeDelete(&networkConfiguration{ID: nid}); err != nil && err != datastore.ErrKeyNotFound {
log.G(context.TODO()).WithFields(log.Fields{
"error": err,
"network": nid,
}).Warnf("Failed to delete network from bridge store")
}
return types.InternalMaskableErrorf("network %s does not exist", nid)
}

View File

@@ -11,6 +11,7 @@ import (
"os/exec"
"os/user"
"path/filepath"
"slices"
"strconv"
"strings"
"syscall"
@@ -842,6 +843,17 @@ func (d *Daemon) ReloadConfig() error {
return nil
}
// SetEnvVar updates the set of extra env variables for the daemon, to take
// effect on the next start/restart.
func (d *Daemon) SetEnvVar(name, val string) {
prefix := name + "="
if idx := slices.IndexFunc(d.extraEnv, func(ev string) bool { return strings.HasPrefix(ev, prefix) }); idx > 0 {
d.extraEnv[idx] = prefix + val
return
}
d.extraEnv = append(d.extraEnv, prefix+val)
}
// LoadBusybox image into the daemon
func (d *Daemon) LoadBusybox(ctx context.Context, t testing.TB) {
t.Helper()