mirror of
https://github.com/moby/moby.git
synced 2026-01-11 18:51:37 +00:00
The previous allocator was subnetting address pools eagerly when the daemon started, and would then just iterate over that list whenever RequestPool was called. This was leading to high memory usage whenever IPv6 pools were configured with a target subnet size too different from the pools prefix size. For instance: pool = fd00::/8, target size = /64 -- 2 ^ (64-8) subnets would be generated upfront. This would take approx. 9 * 10^18 bits -- way too much for any human computer in 2024. Another noteworthy issue, the previous implementation was allocating a subnet, and then in another layer was checking whether the allocation was conflicting with some 'reserved networks'. If so, the allocation would be retried, etc... To make it worse, 'reserved networks' would be recomputed on every iteration. This is totally ineffective as there could be 'reserved networks' that fully overlap a given address pool (or many!). To fix this issue, a new field `Exclude` is added to `RequestPool`. It's up to each driver to take it into account. Since we don't know whether this retry loop is useful for some remote IPAM driver, it's reimplemented bug-for-bug directly in the remote driver. The new allocator uses a linear-search algorithm. It takes advantage of all lists (predefined pools, allocated subnets and reserved networks) being sorted and logically combines 'allocated' and 'reserved' through a 'double cursor' to iterate on both lists at the same time while preserving the total order. At the same time, it iterates over 'predefined' pools and looks for the first empty space that would be a good fit. Currently, the size of the allocated subnet is still dictated by each 'predefined' pools. We should consider hardcoding that size instead, and let users specify what subnet size they want. This wasn't possible before as the subnets were generated upfront. This new allocator should be able to deal with this easily. The method used for static allocation has been updated to make sure the ascending order of 'allocated' is preserved. It's bug-for-bug compatible with the previous implementation. One consequence of this new algorithm is that we don't keep track of where the last allocation happened, we just allocate the first free subnet we find. Before: - Allocate: 10.0.1.0/24, 10.0.2.0/24 ; Deallocate: 10.0.1.0/24 ; Allocate 10.0.3.0/24. Now, the 3rd allocation would yield 10.0.1.0/24 once again. As it doesn't change the semantics of the allocator, there's no reason to worry about that. Finally, about 'reserved networks'. The heuristics we use are now properly documented. It was discovered that we don't check routes for IPv6 allocations -- this can't be changed because there's no such thing as on-link routes for IPv6. (Kudos to Rob Murray for coming up with the linear-search idea.) Signed-off-by: Albin Kerouanton <albinker@gmail.com>
127 lines
3.7 KiB
Go
127 lines
3.7 KiB
Go
package netutils
|
|
|
|
import (
|
|
"bytes"
|
|
"fmt"
|
|
"net/netip"
|
|
"slices"
|
|
"strings"
|
|
"testing"
|
|
|
|
"github.com/docker/docker/internal/testutils/netnsutils"
|
|
"github.com/docker/docker/libnetwork/internal/netiputil"
|
|
"github.com/vishvananda/netlink"
|
|
"gotest.tools/v3/assert"
|
|
is "gotest.tools/v3/assert/cmp"
|
|
)
|
|
|
|
// Test veth name generation "veth"+rand (e.g.veth0f60e2c)
|
|
func TestGenerateRandomName(t *testing.T) {
|
|
const vethPrefix = "veth"
|
|
const vethLen = len(vethPrefix) + 7
|
|
|
|
testCases := []struct {
|
|
prefix string
|
|
length int
|
|
error bool
|
|
}{
|
|
{vethPrefix, -1, true},
|
|
{vethPrefix, 0, true},
|
|
{vethPrefix, len(vethPrefix) - 1, true},
|
|
{vethPrefix, len(vethPrefix), true},
|
|
{vethPrefix, len(vethPrefix) + 1, false},
|
|
{vethPrefix, 255, false},
|
|
}
|
|
for _, tc := range testCases {
|
|
t.Run(fmt.Sprintf("prefix=%s/length=%d", tc.prefix, tc.length), func(t *testing.T) {
|
|
name, err := GenerateRandomName(tc.prefix, tc.length)
|
|
if tc.error {
|
|
assert.Check(t, is.ErrorContains(err, "invalid length"))
|
|
} else {
|
|
assert.NilError(t, err)
|
|
assert.Check(t, strings.HasPrefix(name, tc.prefix), "Expected name to start with %s", tc.prefix)
|
|
assert.Check(t, is.Equal(len(name), tc.length), "Expected %d characters, instead received %d characters", tc.length, len(name))
|
|
}
|
|
})
|
|
}
|
|
|
|
var randomNames [16]string
|
|
for i := range randomNames {
|
|
randomName, err := GenerateRandomName(vethPrefix, vethLen)
|
|
assert.NilError(t, err)
|
|
|
|
for _, oldName := range randomNames {
|
|
if randomName == oldName {
|
|
t.Fatalf("Duplicate random name generated: %s", randomName)
|
|
}
|
|
}
|
|
|
|
randomNames[i] = randomName
|
|
}
|
|
}
|
|
|
|
// Test mac generation.
|
|
func TestUtilGenerateRandomMAC(t *testing.T) {
|
|
mac1 := GenerateRandomMAC()
|
|
mac2 := GenerateRandomMAC()
|
|
// ensure bytes are unique
|
|
if bytes.Equal(mac1, mac2) {
|
|
t.Fatalf("mac1 %s should not equal mac2 %s", mac1, mac2)
|
|
}
|
|
// existing tests check string functionality so keeping the pattern
|
|
if mac1.String() == mac2.String() {
|
|
t.Fatalf("mac1 %s should not equal mac2 %s", mac1, mac2)
|
|
}
|
|
}
|
|
|
|
func TestInferReservedNetworksV4(t *testing.T) {
|
|
defer netnsutils.SetupTestOSContext(t)()
|
|
|
|
ifaceID := createInterface(t, "foobar")
|
|
addRoute(t, ifaceID, netlink.SCOPE_LINK, netip.MustParsePrefix("100.0.0.0/24"))
|
|
addRoute(t, ifaceID, netlink.SCOPE_LINK, netip.MustParsePrefix("10.0.0.0/8"))
|
|
addRoute(t, ifaceID, netlink.SCOPE_UNIVERSE, netip.MustParsePrefix("20.0.0.0/8"))
|
|
|
|
reserved := InferReservedNetworks(false)
|
|
t.Logf("reserved: %+v", reserved)
|
|
|
|
// We don't check the size of 'reserved' here because it also includes
|
|
// nameservers set in /etc/resolv.conf. This file might change from one test
|
|
// env to another, and it'd be unnecessarily complex to set up a mount
|
|
// namespace just to check that. Current implementation uses a function
|
|
// which is properly tested, so everything should be good.
|
|
assert.Check(t, slices.Contains(reserved, netip.MustParsePrefix("100.0.0.0/24")))
|
|
assert.Check(t, slices.Contains(reserved, netip.MustParsePrefix("10.0.0.0/8")))
|
|
assert.Check(t, !slices.Contains(reserved, netip.MustParsePrefix("20.0.0.0/8")))
|
|
}
|
|
|
|
func createInterface(t *testing.T, name string) int {
|
|
t.Helper()
|
|
|
|
link := &netlink.Dummy{
|
|
LinkAttrs: netlink.LinkAttrs{
|
|
Name: name,
|
|
},
|
|
}
|
|
if err := netlink.LinkAdd(link); err != nil {
|
|
t.Fatalf("failed to create interface %s: %v", name, err)
|
|
}
|
|
if err := netlink.LinkSetUp(link); err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
|
|
return link.Attrs().Index
|
|
}
|
|
|
|
func addRoute(t *testing.T, linkID int, scope netlink.Scope, prefix netip.Prefix) {
|
|
t.Helper()
|
|
|
|
if err := netlink.RouteAdd(&netlink.Route{
|
|
Scope: scope,
|
|
LinkIndex: linkID,
|
|
Dst: netiputil.ToIPNet(prefix),
|
|
}); err != nil {
|
|
t.Fatalf("failed to add on-link route %s: %v", prefix, err)
|
|
}
|
|
}
|