mirror of
https://github.com/moby/moby.git
synced 2026-01-11 18:51:37 +00:00
Do not remove kernel-ll addresses from bridges
Make the behaviour enabled by env var DOCKER_BRIDGE_PRESERVE_KERNEL_LL the default... - don't remove kernel assigned link-local addresses - or any address in fe80::/64 - don't assign fe80::1 to a bridge Signed-off-by: Rob Murray <rob.murray@docker.com>
This commit is contained in:
@@ -531,41 +531,35 @@ func TestDefaultBridgeAddresses(t *testing.T) {
|
||||
},
|
||||
}
|
||||
|
||||
for _, preserveKernelLL := range []bool{false, true} {
|
||||
var dopts []daemon.Option
|
||||
if preserveKernelLL {
|
||||
dopts = append(dopts, daemon.WithEnvVars("DOCKER_BRIDGE_PRESERVE_KERNEL_LL=1"))
|
||||
}
|
||||
d := daemon.New(t, dopts...)
|
||||
c := d.NewClientT(t)
|
||||
d := daemon.New(t)
|
||||
defer d.Stop(t)
|
||||
c := d.NewClientT(t)
|
||||
|
||||
for _, tc := range testcases {
|
||||
for _, step := range tc.steps {
|
||||
tcName := fmt.Sprintf("kernel_ll_%v/%s/%s", preserveKernelLL, tc.name, step.stepName)
|
||||
t.Run(tcName, func(t *testing.T) {
|
||||
ctx := testutil.StartSpan(ctx, t)
|
||||
// Check that the daemon starts - regression test for:
|
||||
// https://github.com/moby/moby/issues/46829
|
||||
d.StartWithBusybox(ctx, t, "--experimental", "--ipv6", "--ip6tables", "--fixed-cidr-v6="+step.fixedCIDRV6)
|
||||
for _, tc := range testcases {
|
||||
for _, step := range tc.steps {
|
||||
t.Run(tc.name+"/"+step.stepName, func(t *testing.T) {
|
||||
ctx := testutil.StartSpan(ctx, t)
|
||||
// Check that the daemon starts - regression test for:
|
||||
// https://github.com/moby/moby/issues/46829
|
||||
d.StartWithBusybox(ctx, t, "--experimental", "--ipv6", "--ip6tables", "--fixed-cidr-v6="+step.fixedCIDRV6)
|
||||
|
||||
// Start a container, so that the bridge is set "up" and gets a kernel_ll address.
|
||||
cID := container.Run(ctx, t, c)
|
||||
defer c.ContainerRemove(ctx, cID, containertypes.RemoveOptions{Force: true})
|
||||
// Start a container, so that the bridge is set "up" and gets a kernel_ll address.
|
||||
cID := container.Run(ctx, t, c)
|
||||
defer c.ContainerRemove(ctx, cID, containertypes.RemoveOptions{Force: true})
|
||||
|
||||
d.Stop(t)
|
||||
d.Stop(t)
|
||||
|
||||
// Check that the expected addresses have been applied to the bridge. (Skip in
|
||||
// rootless mode, because the bridge is in a different network namespace.)
|
||||
if !testEnv.IsRootless() {
|
||||
res := testutil.RunCommand(ctx, "ip", "-6", "addr", "show", "docker0")
|
||||
assert.Equal(t, res.ExitCode, 0, step.stepName)
|
||||
stdout := res.Stdout()
|
||||
for _, expAddr := range step.expAddrs {
|
||||
assert.Check(t, is.Contains(stdout, expAddr))
|
||||
}
|
||||
// Check that the expected addresses have been applied to the bridge. (Skip in
|
||||
// rootless mode, because the bridge is in a different network namespace.)
|
||||
if !testEnv.IsRootless() {
|
||||
res := testutil.RunCommand(ctx, "ip", "-6", "addr", "show", "docker0")
|
||||
assert.Equal(t, res.ExitCode, 0, step.stepName)
|
||||
stdout := res.Stdout()
|
||||
for _, expAddr := range step.expAddrs {
|
||||
assert.Check(t, is.Contains(stdout, expAddr))
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -5,7 +5,6 @@ import (
|
||||
"fmt"
|
||||
"net"
|
||||
"net/netip"
|
||||
"os"
|
||||
|
||||
"github.com/containerd/log"
|
||||
"github.com/docker/docker/errdefs"
|
||||
@@ -71,90 +70,66 @@ func (i *bridgeInterface) addresses(family int) ([]netlink.Addr, error) {
|
||||
return addrs, nil
|
||||
}
|
||||
|
||||
func getRequiredIPv6Addrs(config *networkConfiguration) (requiredAddrs map[netip.Addr]netip.Prefix, err error) {
|
||||
requiredAddrs = make(map[netip.Addr]netip.Prefix)
|
||||
|
||||
if os.Getenv("DOCKER_BRIDGE_PRESERVE_KERNEL_LL") != "1" {
|
||||
// Always give the bridge 'fe80::1' - every interface is required to have an
|
||||
// address in 'fe80::/64'. Linux may assign an address, but we'll replace it with
|
||||
// 'fe80::1'. Then, if the configured prefix is 'fe80::/64', the IPAM pool
|
||||
// assigned address will not be a second address in the LL subnet.
|
||||
ra, ok := netiputil.ToPrefix(bridgeIPv6)
|
||||
if !ok {
|
||||
err = fmt.Errorf("Failed to convert Link-Local IPv6 address to netip.Prefix")
|
||||
return nil, err
|
||||
}
|
||||
requiredAddrs[ra.Addr()] = ra
|
||||
}
|
||||
|
||||
ra, ok := netiputil.ToPrefix(config.AddressIPv6)
|
||||
if !ok {
|
||||
err = fmt.Errorf("failed to convert bridge IPv6 address '%s' to netip.Prefix", config.AddressIPv6.String())
|
||||
return nil, err
|
||||
}
|
||||
requiredAddrs[ra.Addr()] = ra
|
||||
|
||||
return requiredAddrs, nil
|
||||
}
|
||||
|
||||
func (i *bridgeInterface) programIPv6Addresses(config *networkConfiguration) error {
|
||||
// Remember the configured addresses.
|
||||
i.bridgeIPv6 = config.AddressIPv6
|
||||
i.gatewayIPv6 = config.AddressIPv6.IP
|
||||
|
||||
addrPrefix, ok := netiputil.ToPrefix(config.AddressIPv6)
|
||||
if !ok {
|
||||
return errdefs.System(
|
||||
fmt.Errorf("failed to convert bridge IPv6 address '%s' to netip.Prefix",
|
||||
config.AddressIPv6.String()))
|
||||
}
|
||||
|
||||
// Get the IPv6 addresses currently assigned to the bridge, if any.
|
||||
existingAddrs, err := i.addresses(netlink.FAMILY_V6)
|
||||
if err != nil {
|
||||
return errdefs.System(err)
|
||||
}
|
||||
|
||||
// Get the list of required IPv6 addresses for this bridge.
|
||||
var requiredAddrs map[netip.Addr]netip.Prefix
|
||||
requiredAddrs, err = getRequiredIPv6Addrs(config)
|
||||
if err != nil {
|
||||
return errdefs.System(err)
|
||||
}
|
||||
i.bridgeIPv6 = config.AddressIPv6
|
||||
i.gatewayIPv6 = config.AddressIPv6.IP
|
||||
|
||||
// Remove addresses that aren't required.
|
||||
for _, existingAddr := range existingAddrs {
|
||||
ea, ok := netip.AddrFromSlice(existingAddr.IP)
|
||||
if !ok {
|
||||
return errdefs.System(fmt.Errorf("Failed to convert IPv6 address '%s' to netip.Addr", config.AddressIPv6))
|
||||
return errdefs.System(
|
||||
fmt.Errorf("Failed to convert IPv6 address '%s' to netip.Addr", config.AddressIPv6))
|
||||
}
|
||||
// Optionally, avoid deleting the kernel-assigned link local address.
|
||||
// (Don't delete fe80::1 either - if it was previously assigned to the bridge, and the
|
||||
// kernel_ll address was deleted, the bridge won't get a new kernel_ll address.)
|
||||
if os.Getenv("DOCKER_BRIDGE_PRESERVE_KERNEL_LL") == "1" {
|
||||
if p, _ := ea.Prefix(64); p == linkLocalPrefix {
|
||||
continue
|
||||
}
|
||||
// Don't delete the kernel-assigned link local address (or fe80::1 - if it was
|
||||
// assigned to the bridge by an older version of the daemon that deleted the
|
||||
// kernel_ll address, the bridge won't get a new kernel_ll address.) But, do
|
||||
// delete unexpected link-local addresses (fe80::/10) that aren't in fe80::/64,
|
||||
// those have been IPAM-assigned.
|
||||
if p, _ := ea.Prefix(64); p == linkLocalPrefix {
|
||||
continue
|
||||
}
|
||||
// Ignore the prefix length when comparing addresses, it's informational
|
||||
// (RFC-5942 section 4), and removing/re-adding an address that's still valid
|
||||
// would disrupt traffic on live-restore.
|
||||
if _, required := requiredAddrs[ea]; !required {
|
||||
if ea != addrPrefix.Addr() {
|
||||
err := i.nlh.AddrDel(i.Link, &existingAddr) //#nosec G601 -- Memory aliasing is not an issue in practice as the &existingAddr pointer is not retained by the callee after the AddrDel() call returns.
|
||||
if err != nil {
|
||||
log.G(context.TODO()).WithFields(log.Fields{"error": err, "address": existingAddr.IPNet}).Warnf("Failed to remove residual IPv6 address from bridge")
|
||||
log.G(context.TODO()).WithFields(log.Fields{
|
||||
"error": err,
|
||||
"address": existingAddr.IPNet},
|
||||
).Warnf("Failed to remove residual IPv6 address from bridge")
|
||||
}
|
||||
}
|
||||
}
|
||||
// Add or update required addresses.
|
||||
for _, addrPrefix := range requiredAddrs {
|
||||
// Using AddrReplace(), rather than AddrAdd(). When the subnet is changed for an
|
||||
// existing bridge in a way that doesn't affect the bridge's assigned address,
|
||||
// the old address has not been removed at this point - because that would be
|
||||
// service-affecting for a running container.
|
||||
//
|
||||
// But if, for example, 'fixed-cidr-v6' is changed from '2000:dbe::/64' to
|
||||
// '2000:dbe::/80', the default bridge will still be assigned address
|
||||
// '2000:dbe::1'. In the output of 'ip a', the prefix length is displayed - and
|
||||
// the user is likely to expect to see it updated from '64' to '80'.
|
||||
// Unfortunately, 'netlink.AddrReplace()' ('RTM_NEWADDR' with 'NLM_F_REPLACE')
|
||||
// doesn't update the prefix length. This is a cosmetic problem, the prefix
|
||||
// length of an assigned address is not used to determine whether an address is
|
||||
// "on-link" (RFC-5942).
|
||||
if err := i.nlh.AddrReplace(i.Link, &netlink.Addr{IPNet: netiputil.ToIPNet(addrPrefix)}); err != nil {
|
||||
return errdefs.System(fmt.Errorf("failed to add IPv6 address %s to bridge: %v", i.bridgeIPv6, err))
|
||||
}
|
||||
// Using AddrReplace(), rather than AddrAdd(). When the subnet is changed for an
|
||||
// existing bridge in a way that doesn't affect the bridge's assigned address,
|
||||
// the old address has not been removed at this point - because that would be
|
||||
// service-affecting for a running container.
|
||||
//
|
||||
// But if, for example, 'fixed-cidr-v6' is changed from '2000:dbe::/64' to
|
||||
// '2000:dbe::/80', the default bridge will still be assigned address
|
||||
// '2000:dbe::1'. In the output of 'ip a', the prefix length is displayed - and
|
||||
// the user is likely to expect to see it updated from '64' to '80'.
|
||||
// Unfortunately, 'netlink.AddrReplace()' ('RTM_NEWADDR' with 'NLM_F_REPLACE')
|
||||
// doesn't update the prefix length. This is a cosmetic problem, the prefix
|
||||
// length of an assigned address is not used to determine whether an address is
|
||||
// "on-link" (RFC-5942).
|
||||
if err := i.nlh.AddrReplace(i.Link, &netlink.Addr{IPNet: netiputil.ToIPNet(addrPrefix)}); err != nil {
|
||||
return errdefs.System(fmt.Errorf("failed to add IPv6 address %s to bridge: %v", i.bridgeIPv6, err))
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -2,12 +2,9 @@ package bridge
|
||||
|
||||
import (
|
||||
"net"
|
||||
"net/netip"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/docker/docker/internal/testutils/netnsutils"
|
||||
"github.com/google/go-cmp/cmp"
|
||||
"github.com/vishvananda/netlink"
|
||||
"gotest.tools/v3/assert"
|
||||
is "gotest.tools/v3/assert/cmp"
|
||||
@@ -96,48 +93,6 @@ func TestAddressesNonEmptyInterface(t *testing.T) {
|
||||
assert.Equal(t, addrs[0].IPNet.String(), expAddrV6)
|
||||
}
|
||||
|
||||
func TestGetRequiredIPv6Addrs(t *testing.T) {
|
||||
testcases := []struct {
|
||||
name string
|
||||
addressIPv6 string
|
||||
expReqdAddrs []string
|
||||
}{
|
||||
{
|
||||
name: "Regular address, expect default link local",
|
||||
addressIPv6: "2000:3000::1/80",
|
||||
expReqdAddrs: []string{"fe80::1/64", "2000:3000::1/80"},
|
||||
},
|
||||
{
|
||||
name: "Standard link local address only",
|
||||
addressIPv6: "fe80::1/64",
|
||||
expReqdAddrs: []string{"fe80::1/64"},
|
||||
},
|
||||
{
|
||||
name: "Nonstandard link local address",
|
||||
addressIPv6: "fe80:abcd::1/42",
|
||||
expReqdAddrs: []string{"fe80:abcd::1/42", "fe80::1/64"},
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range testcases {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
config := &networkConfiguration{
|
||||
AddressIPv6: cidrToIPNet(t, tc.addressIPv6),
|
||||
}
|
||||
|
||||
expResult := map[netip.Addr]netip.Prefix{}
|
||||
for _, addr := range tc.expReqdAddrs {
|
||||
expResult[netip.MustParseAddr(strings.Split(addr, "/")[0])] = netip.MustParsePrefix(addr)
|
||||
}
|
||||
|
||||
reqd, err := getRequiredIPv6Addrs(config)
|
||||
assert.Check(t, is.Nil(err))
|
||||
assert.Check(t, is.DeepEqual(reqd, expResult,
|
||||
cmp.Comparer(func(a, b netip.Prefix) bool { return a == b })))
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestProgramIPv6Addresses(t *testing.T) {
|
||||
defer netnsutils.SetupTestOSContext(t)()
|
||||
|
||||
@@ -159,11 +114,11 @@ func TestProgramIPv6Addresses(t *testing.T) {
|
||||
i := prepTestBridge(t, nc)
|
||||
|
||||
// The bridge has no addresses, ask for a regular IPv6 network and expect it to
|
||||
// be added to the bridge, with the default link local address.
|
||||
// be added to the bridge.
|
||||
nc.AddressIPv6 = cidrToIPNet(t, "2000:3000::1/64")
|
||||
err := i.programIPv6Addresses(nc)
|
||||
assert.NilError(t, err)
|
||||
checkAddrs(i, nc, []string{"2000:3000::1/64", "fe80::1/64"})
|
||||
checkAddrs(i, nc, []string{"2000:3000::1/64"})
|
||||
|
||||
// Shrink the subnet of that regular address, the prefix length of the address
|
||||
// will not be modified - but it's informational-only, the address itself has
|
||||
@@ -171,7 +126,7 @@ func TestProgramIPv6Addresses(t *testing.T) {
|
||||
nc.AddressIPv6 = cidrToIPNet(t, "2000:3000::1/80")
|
||||
err = i.programIPv6Addresses(nc)
|
||||
assert.NilError(t, err)
|
||||
checkAddrs(i, nc, []string{"2000:3000::1/64", "fe80::1/64"})
|
||||
checkAddrs(i, nc, []string{"2000:3000::1/64"})
|
||||
|
||||
// Ask for link-local only, by specifying an address with the Link Local prefix.
|
||||
// The regular address should be removed.
|
||||
@@ -180,9 +135,16 @@ func TestProgramIPv6Addresses(t *testing.T) {
|
||||
assert.NilError(t, err)
|
||||
checkAddrs(i, nc, []string{"fe80::1/64"})
|
||||
|
||||
// Swap the standard link local address for a nonstandard one.
|
||||
// Swap the standard link local address for a nonstandard one. The standard LL
|
||||
// address will not be removed.
|
||||
nc.AddressIPv6 = cidrToIPNet(t, "fe80:5555::1/55")
|
||||
err = i.programIPv6Addresses(nc)
|
||||
assert.NilError(t, err)
|
||||
checkAddrs(i, nc, []string{"fe80:5555::1/55", "fe80::1/64"})
|
||||
|
||||
// Back to the original address, expect the nonstandard LL address to be replaced.
|
||||
nc.AddressIPv6 = cidrToIPNet(t, "2000:3000::1/64")
|
||||
err = i.programIPv6Addresses(nc)
|
||||
assert.NilError(t, err)
|
||||
checkAddrs(i, nc, []string{"2000:3000::1/64", "fe80::1/64"})
|
||||
}
|
||||
|
||||
@@ -90,7 +90,7 @@ func TestLinkCreate(t *testing.T) {
|
||||
|
||||
ip6 := te.iface.addrv6.IP
|
||||
if !n.bridge.bridgeIPv6.Contains(ip6) {
|
||||
t.Fatalf("IP %s is not a valid ip in the subnet %s", ip6.String(), bridgeIPv6.String())
|
||||
t.Fatalf("IP %s is not a valid ip in the subnet %s", ip6.String(), n.bridge.bridgeIPv6.String())
|
||||
}
|
||||
|
||||
if !te.gw.Equal(n.bridge.bridgeIPv4.IP) {
|
||||
|
||||
@@ -43,14 +43,14 @@ func TestSetupIPv6(t *testing.T) {
|
||||
|
||||
var found bool
|
||||
for _, addr := range addrsv6 {
|
||||
if bridgeIPv6.String() == addr.IPNet.String() {
|
||||
if config.AddressIPv6.String() == addr.IPNet.String() {
|
||||
found = true
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if !found {
|
||||
t.Fatalf("Bridge device does not have requested IPv6 address %v", bridgeIPv6)
|
||||
t.Fatalf("Bridge device does not have requested IPv6 address %v", config.AddressIPv6)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user