mirror of
https://github.com/moby/moby.git
synced 2026-01-11 18:51:37 +00:00
Merge pull request #50106 from corhere/libn/overlay-peerdb-soundness
libnetwork/d/overlay: properly model peer db
This commit is contained in:
@@ -91,8 +91,13 @@ func (s *spi) String() string {
|
||||
return fmt.Sprintf("SPI(FWD: 0x%x, REV: 0x%x)", uint32(s.forward), uint32(s.reverse))
|
||||
}
|
||||
|
||||
type encrNode struct {
|
||||
spi []spi
|
||||
count int
|
||||
}
|
||||
|
||||
type encrMap struct {
|
||||
nodes map[netip.Addr][]*spi
|
||||
nodes map[netip.Addr]encrNode
|
||||
sync.Mutex
|
||||
}
|
||||
|
||||
@@ -105,7 +110,7 @@ func (e *encrMap) String() string {
|
||||
b.WriteString(k.String())
|
||||
b.WriteString(":")
|
||||
b.WriteString("[")
|
||||
for _, s := range v {
|
||||
for _, s := range v.spi {
|
||||
b.WriteString(s.String())
|
||||
b.WriteString(",")
|
||||
}
|
||||
@@ -126,10 +131,10 @@ func (d *driver) setupEncryption(remoteIP netip.Addr) error {
|
||||
}
|
||||
log.G(context.TODO()).Debugf("Programming encryption between %s and %s", localIP, remoteIP)
|
||||
|
||||
indices := make([]*spi, 0, len(keys))
|
||||
indices := make([]spi, 0, len(keys))
|
||||
|
||||
for i, k := range keys {
|
||||
spis := &spi{buildSPI(advIP.AsSlice(), remoteIP.AsSlice(), k.tag), buildSPI(remoteIP.AsSlice(), advIP.AsSlice(), k.tag)}
|
||||
spis := spi{buildSPI(advIP.AsSlice(), remoteIP.AsSlice(), k.tag), buildSPI(remoteIP.AsSlice(), advIP.AsSlice(), k.tag)}
|
||||
dir := reverse
|
||||
if i == 0 {
|
||||
dir = bidir
|
||||
@@ -149,7 +154,10 @@ func (d *driver) setupEncryption(remoteIP netip.Addr) error {
|
||||
}
|
||||
|
||||
d.secMap.Lock()
|
||||
d.secMap.nodes[remoteIP] = indices
|
||||
node := d.secMap.nodes[remoteIP]
|
||||
node.spi = indices
|
||||
node.count++
|
||||
d.secMap.nodes[remoteIP] = node
|
||||
d.secMap.Unlock()
|
||||
|
||||
return nil
|
||||
@@ -158,13 +166,20 @@ func (d *driver) setupEncryption(remoteIP netip.Addr) error {
|
||||
func (d *driver) removeEncryption(remoteIP netip.Addr) error {
|
||||
log.G(context.TODO()).Debugf("removeEncryption(%s)", remoteIP)
|
||||
|
||||
d.secMap.Lock()
|
||||
indices, ok := d.secMap.nodes[remoteIP]
|
||||
d.secMap.Unlock()
|
||||
if !ok {
|
||||
spi := func() []spi {
|
||||
d.secMap.Lock()
|
||||
defer d.secMap.Unlock()
|
||||
node := d.secMap.nodes[remoteIP]
|
||||
if node.count == 1 {
|
||||
delete(d.secMap.nodes, remoteIP)
|
||||
return node.spi
|
||||
}
|
||||
node.count--
|
||||
d.secMap.nodes[remoteIP] = node
|
||||
return nil
|
||||
}
|
||||
for i, idxs := range indices {
|
||||
}()
|
||||
|
||||
for i, idxs := range spi {
|
||||
dir := reverse
|
||||
if i == 0 {
|
||||
dir = bidir
|
||||
@@ -263,7 +278,7 @@ func (d *driver) programInput(vni uint32, add bool) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func programSA(localIP, remoteIP net.IP, spi *spi, k *key, dir int, add bool) (fSA *netlink.XfrmState, rSA *netlink.XfrmState, lastErr error) {
|
||||
func programSA(localIP, remoteIP net.IP, spi spi, k *key, dir int, add bool) (fSA *netlink.XfrmState, rSA *netlink.XfrmState, lastErr error) {
|
||||
var (
|
||||
action = "Removing"
|
||||
xfrmProgram = ns.NlHandle().XfrmStateDel
|
||||
@@ -436,12 +451,12 @@ func buildAeadAlgo(k *key, s int) *netlink.XfrmStateAlgo {
|
||||
}
|
||||
}
|
||||
|
||||
func (d *driver) secMapWalk(f func(netip.Addr, []*spi) ([]*spi, bool)) error {
|
||||
func (d *driver) secMapWalk(f func(netip.Addr, []spi) ([]spi, bool)) error {
|
||||
d.secMap.Lock()
|
||||
for node, indices := range d.secMap.nodes {
|
||||
idxs, stop := f(node, indices)
|
||||
for rIP, node := range d.secMap.nodes {
|
||||
idxs, stop := f(rIP, node.spi)
|
||||
if idxs != nil {
|
||||
d.secMap.nodes[node] = idxs
|
||||
d.secMap.nodes[rIP] = encrNode{idxs, node.count}
|
||||
}
|
||||
if stop {
|
||||
break
|
||||
@@ -457,7 +472,7 @@ func (d *driver) setKeys(keys []*key) error {
|
||||
// Accept the encryption keys and clear any stale encryption map
|
||||
d.Lock()
|
||||
d.keys = keys
|
||||
d.secMap = &encrMap{nodes: map[netip.Addr][]*spi{}}
|
||||
d.secMap = &encrMap{nodes: map[netip.Addr]encrNode{}}
|
||||
d.Unlock()
|
||||
log.G(context.TODO()).Debugf("Initial encryption keys: %v", keys)
|
||||
return nil
|
||||
@@ -506,7 +521,7 @@ func (d *driver) updateKeys(newKey, primary, pruneKey *key) error {
|
||||
return types.InvalidParameterErrorf("attempting to both make a key (index %d) primary and delete it", priIdx)
|
||||
}
|
||||
|
||||
d.secMapWalk(func(rIP netip.Addr, spis []*spi) ([]*spi, bool) {
|
||||
d.secMapWalk(func(rIP netip.Addr, spis []spi) ([]spi, bool) {
|
||||
return updateNodeKey(lIP.AsSlice(), aIP.AsSlice(), rIP.AsSlice(), spis, d.keys, newIdx, priIdx, delIdx), false
|
||||
})
|
||||
|
||||
@@ -534,7 +549,7 @@ func (d *driver) updateKeys(newKey, primary, pruneKey *key) error {
|
||||
*********************************************************/
|
||||
|
||||
// Spis and keys are sorted in such away the one in position 0 is the primary
|
||||
func updateNodeKey(lIP, aIP, rIP net.IP, idxs []*spi, curKeys []*key, newIdx, priIdx, delIdx int) []*spi {
|
||||
func updateNodeKey(lIP, aIP, rIP net.IP, idxs []spi, curKeys []*key, newIdx, priIdx, delIdx int) []spi {
|
||||
log.G(context.TODO()).Debugf("Updating keys for node: %s (%d,%d,%d)", rIP, newIdx, priIdx, delIdx)
|
||||
|
||||
spis := idxs
|
||||
@@ -542,7 +557,7 @@ func updateNodeKey(lIP, aIP, rIP net.IP, idxs []*spi, curKeys []*key, newIdx, pr
|
||||
|
||||
// add new
|
||||
if newIdx != -1 {
|
||||
spis = append(spis, &spi{
|
||||
spis = append(spis, spi{
|
||||
forward: buildSPI(aIP, rIP, curKeys[newIdx].tag),
|
||||
reverse: buildSPI(rIP, aIP, curKeys[newIdx].tag),
|
||||
})
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
//go:build linux
|
||||
// FIXME(thaJeztah): remove once we are a module; the go:build directive prevents go from downgrading language version to go1.16:
|
||||
//go:build go1.23 && linux
|
||||
|
||||
package overlay
|
||||
|
||||
@@ -18,6 +19,7 @@ import (
|
||||
"github.com/docker/docker/internal/nlwrap"
|
||||
"github.com/docker/docker/libnetwork/driverapi"
|
||||
"github.com/docker/docker/libnetwork/drivers/overlay/overlayutils"
|
||||
"github.com/docker/docker/libnetwork/internal/countmap"
|
||||
"github.com/docker/docker/libnetwork/internal/netiputil"
|
||||
"github.com/docker/docker/libnetwork/netlabel"
|
||||
"github.com/docker/docker/libnetwork/ns"
|
||||
@@ -53,6 +55,8 @@ type network struct {
|
||||
endpoints endpointTable
|
||||
driver *driver
|
||||
joinCnt int
|
||||
// Ref count of VXLAN Forwarding Database entries programmed into the kernel
|
||||
fdbCnt countmap.Map[ipmac]
|
||||
sboxInit bool
|
||||
initEpoch int
|
||||
initErr error
|
||||
@@ -99,6 +103,7 @@ func (d *driver) CreateNetwork(ctx context.Context, id string, option map[string
|
||||
driver: d,
|
||||
endpoints: endpointTable{},
|
||||
subnets: []*subnet{},
|
||||
fdbCnt: countmap.Map[ipmac]{},
|
||||
}
|
||||
|
||||
vnis := make([]uint32, 0, len(ipV4Data))
|
||||
@@ -586,6 +591,7 @@ func (n *network) initSandbox() error {
|
||||
|
||||
// this is needed to let the peerAdd configure the sandbox
|
||||
n.sbox = sbox
|
||||
n.fdbCnt = countmap.Map[ipmac]{}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -48,7 +48,7 @@ func Register(r driverapi.Registerer, config map[string]interface{}) error {
|
||||
peerDb: peerNetworkMap{
|
||||
mp: map[string]*peerMap{},
|
||||
},
|
||||
secMap: &encrMap{nodes: map[netip.Addr][]*spi{}},
|
||||
secMap: &encrMap{nodes: map[netip.Addr]encrNode{}},
|
||||
config: config,
|
||||
}
|
||||
return r.RegisterDriver(NetworkType, d, driverapi.Capability{
|
||||
|
||||
@@ -5,6 +5,7 @@ package overlay
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"net"
|
||||
"net/netip"
|
||||
@@ -19,9 +20,9 @@ import (
|
||||
const ovPeerTable = "overlay_peer_table"
|
||||
|
||||
type peerEntry struct {
|
||||
eid string
|
||||
vtep netip.Addr // Virtual Tunnel End Point for non-local peers
|
||||
prefixBits int // number of 1-bits in network mask of peerIP
|
||||
eid string
|
||||
mac macAddr
|
||||
vtep netip.Addr
|
||||
}
|
||||
|
||||
func (p *peerEntry) isLocal() bool {
|
||||
@@ -29,8 +30,7 @@ func (p *peerEntry) isLocal() bool {
|
||||
}
|
||||
|
||||
type peerMap struct {
|
||||
// set of peerEntry, note the values have to be objects and not pointers to maintain the proper equality checks
|
||||
mp setmatrix.SetMatrix[ipmac, peerEntry]
|
||||
mp setmatrix.SetMatrix[netip.Prefix, peerEntry]
|
||||
sync.Mutex
|
||||
}
|
||||
|
||||
@@ -40,16 +40,16 @@ type peerNetworkMap struct {
|
||||
sync.Mutex
|
||||
}
|
||||
|
||||
func (d *driver) peerDbNetworkWalk(nid string, f func(netip.Addr, net.HardwareAddr, *peerEntry) bool) error {
|
||||
func (d *driver) peerDbNetworkWalk(nid string, f func(netip.Prefix, peerEntry) bool) {
|
||||
d.peerDb.Lock()
|
||||
pMap, ok := d.peerDb.mp[nid]
|
||||
d.peerDb.Unlock()
|
||||
|
||||
if !ok {
|
||||
return nil
|
||||
return
|
||||
}
|
||||
|
||||
mp := map[ipmac]peerEntry{}
|
||||
mp := map[netip.Prefix]peerEntry{}
|
||||
pMap.Lock()
|
||||
for _, pKey := range pMap.mp.Keys() {
|
||||
entryDBList, ok := pMap.mp.Get(pKey)
|
||||
@@ -59,38 +59,28 @@ func (d *driver) peerDbNetworkWalk(nid string, f func(netip.Addr, net.HardwareAd
|
||||
}
|
||||
pMap.Unlock()
|
||||
|
||||
for pKey, pEntry := range mp {
|
||||
if f(pKey.ip, pKey.mac.HardwareAddr(), &pEntry) {
|
||||
return nil
|
||||
for k, v := range mp {
|
||||
if f(k, v) {
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (d *driver) peerDbSearch(nid string, peerIP netip.Addr) (netip.Addr, net.HardwareAddr, *peerEntry, error) {
|
||||
var peerIPMatched netip.Addr
|
||||
var peerMacMatched net.HardwareAddr
|
||||
var pEntryMatched *peerEntry
|
||||
err := d.peerDbNetworkWalk(nid, func(ip netip.Addr, mac net.HardwareAddr, pEntry *peerEntry) bool {
|
||||
if ip == peerIP {
|
||||
peerIPMatched = ip
|
||||
peerMacMatched = mac
|
||||
pEntryMatched = pEntry
|
||||
return true
|
||||
}
|
||||
|
||||
return false
|
||||
})
|
||||
if err != nil {
|
||||
return netip.Addr{}, nil, nil, fmt.Errorf("peerdb search for peer ip %q failed: %v", peerIP, err)
|
||||
func (d *driver) peerDbGet(nid string, peerIP netip.Prefix) (peerEntry, bool) {
|
||||
d.peerDb.Lock()
|
||||
pMap, ok := d.peerDb.mp[nid]
|
||||
d.peerDb.Unlock()
|
||||
if !ok {
|
||||
return peerEntry{}, false
|
||||
}
|
||||
|
||||
if !peerIPMatched.IsValid() || pEntryMatched == nil {
|
||||
return netip.Addr{}, nil, nil, fmt.Errorf("peer ip %q not found in peerdb", peerIP)
|
||||
pMap.Lock()
|
||||
defer pMap.Unlock()
|
||||
c, _ := pMap.mp.Get(peerIP)
|
||||
if len(c) == 0 {
|
||||
return peerEntry{}, false
|
||||
}
|
||||
|
||||
return peerIPMatched, peerMacMatched, pEntryMatched, nil
|
||||
return c[0], true
|
||||
}
|
||||
|
||||
func (d *driver) peerDbAdd(nid, eid string, peerIP netip.Prefix, peerMac net.HardwareAddr, vtep netip.Addr) (bool, int) {
|
||||
@@ -102,22 +92,21 @@ func (d *driver) peerDbAdd(nid, eid string, peerIP netip.Prefix, peerMac net.Har
|
||||
}
|
||||
d.peerDb.Unlock()
|
||||
|
||||
pKey := ipmacOf(peerIP.Addr(), peerMac)
|
||||
|
||||
pEntry := peerEntry{
|
||||
eid: eid,
|
||||
vtep: vtep,
|
||||
prefixBits: peerIP.Bits(),
|
||||
eid: eid,
|
||||
mac: macAddrOf(peerMac),
|
||||
vtep: vtep,
|
||||
}
|
||||
|
||||
pMap.Lock()
|
||||
defer pMap.Unlock()
|
||||
b, i := pMap.mp.Insert(pKey, pEntry)
|
||||
b, i := pMap.mp.Insert(peerIP, pEntry)
|
||||
if i != 1 {
|
||||
// Transient case, there is more than one endpoint that is using the same IP,MAC pair
|
||||
s, _ := pMap.mp.String(pKey)
|
||||
log.G(context.TODO()).Warnf("peerDbAdd transient condition - Key:%s cardinality:%d db state:%s", pKey.String(), i, s)
|
||||
// Transient case, there is more than one endpoint that is using the same IP
|
||||
s, _ := pMap.mp.String(peerIP)
|
||||
log.G(context.TODO()).Warnf("peerDbAdd transient condition - Key:%s cardinality:%d db state:%s", peerIP, i, s)
|
||||
}
|
||||
|
||||
return b, i
|
||||
}
|
||||
|
||||
@@ -130,21 +119,19 @@ func (d *driver) peerDbDelete(nid, eid string, peerIP netip.Prefix, peerMac net.
|
||||
}
|
||||
d.peerDb.Unlock()
|
||||
|
||||
pKey := ipmacOf(peerIP.Addr(), peerMac)
|
||||
|
||||
pEntry := peerEntry{
|
||||
eid: eid,
|
||||
vtep: vtep,
|
||||
prefixBits: peerIP.Bits(),
|
||||
eid: eid,
|
||||
mac: macAddrOf(peerMac),
|
||||
vtep: vtep,
|
||||
}
|
||||
|
||||
pMap.Lock()
|
||||
defer pMap.Unlock()
|
||||
b, i := pMap.mp.Remove(pKey, pEntry)
|
||||
b, i := pMap.mp.Remove(peerIP, pEntry)
|
||||
if i != 0 {
|
||||
// Transient case, there is more than one endpoint that is using the same IP,MAC pair
|
||||
s, _ := pMap.mp.String(pKey)
|
||||
log.G(context.TODO()).Warnf("peerDbDelete transient condition - Key:%s cardinality:%d db state:%s", pKey, i, s)
|
||||
// Transient case, there is more than one endpoint that is using the same IP
|
||||
s, _ := pMap.mp.String(peerIP)
|
||||
log.G(context.TODO()).Warnf("peerDbDelete transient condition - Key:%s cardinality:%d db state:%s", peerIP, i, s)
|
||||
}
|
||||
return b, i
|
||||
}
|
||||
@@ -156,28 +143,18 @@ func (d *driver) peerDbDelete(nid, eid string, peerIP netip.Prefix, peerMac net.
|
||||
// networkDB has already delivered some events of peers already available on remote nodes,
|
||||
// these peers are saved into the peerDB and this function is used to properly configure
|
||||
// the network sandbox with all those peers that got previously notified.
|
||||
// Note also that this method sends a single message on the channel and the go routine on the
|
||||
// other side, will atomically loop on the whole table of peers and will program their state
|
||||
// in one single atomic operation. This is fundamental to guarantee consistency, and avoid that
|
||||
// Note also that this method atomically loops on the whole table of peers
|
||||
// and programs their state in one single atomic operation.
|
||||
// This is fundamental to guarantee consistency, and avoid that
|
||||
// new peerAdd or peerDelete gets reordered during the sandbox init.
|
||||
func (d *driver) initSandboxPeerDB(nid string) {
|
||||
d.peerOpMu.Lock()
|
||||
defer d.peerOpMu.Unlock()
|
||||
if err := d.peerInitOp(nid); err != nil {
|
||||
log.G(context.TODO()).WithError(err).Warn("Peer init operation failed")
|
||||
}
|
||||
}
|
||||
|
||||
func (d *driver) peerInitOp(nid string) error {
|
||||
return d.peerDbNetworkWalk(nid, func(peerIP netip.Addr, peerMac net.HardwareAddr, pEntry *peerEntry) bool {
|
||||
// Local entries do not need to be added
|
||||
if pEntry.isLocal() {
|
||||
return false
|
||||
d.peerDbNetworkWalk(nid, func(peerIP netip.Prefix, pEntry peerEntry) bool {
|
||||
if !pEntry.isLocal() {
|
||||
d.addNeighbor(nid, peerIP, pEntry.mac.HardwareAddr(), pEntry.vtep)
|
||||
}
|
||||
|
||||
d.peerAddOp(nid, pEntry.eid, netip.PrefixFrom(peerIP, pEntry.prefixBits), peerMac, pEntry.vtep, false)
|
||||
// return false to loop on all entries
|
||||
return false
|
||||
return false // walk all entries
|
||||
})
|
||||
}
|
||||
|
||||
@@ -185,34 +162,34 @@ func (d *driver) peerInitOp(nid string) error {
|
||||
//
|
||||
// Local peers are signified by an invalid vtep (i.e. netip.Addr{}).
|
||||
func (d *driver) peerAdd(nid, eid string, peerIP netip.Prefix, peerMac net.HardwareAddr, vtep netip.Addr) {
|
||||
if err := validateID(nid, eid); err != nil {
|
||||
log.G(context.TODO()).WithError(err).Warn("Peer add operation failed")
|
||||
return
|
||||
}
|
||||
|
||||
d.peerOpMu.Lock()
|
||||
defer d.peerOpMu.Unlock()
|
||||
err := d.peerAddOp(nid, eid, peerIP, peerMac, vtep, true)
|
||||
if err != nil {
|
||||
log.G(context.TODO()).WithError(err).Warn("Peer add operation failed")
|
||||
|
||||
inserted, dbEntries := d.peerDbAdd(nid, eid, peerIP, peerMac, vtep)
|
||||
if !inserted {
|
||||
log.G(context.TODO()).Warnf("Entry already present in db: nid:%s eid:%s peerIP:%v peerMac:%v vtep:%v",
|
||||
nid, eid, peerIP, peerMac, vtep)
|
||||
}
|
||||
if vtep.IsValid() {
|
||||
err := d.addNeighbor(nid, peerIP, peerMac, vtep)
|
||||
if err != nil {
|
||||
if dbEntries > 1 && errors.As(err, &osl.NeighborSearchError{}) {
|
||||
// We are in the transient case so only the first configuration is programmed into the kernel.
|
||||
// Upon deletion if the active configuration is deleted the next one from the database will be restored.
|
||||
return
|
||||
}
|
||||
log.G(context.TODO()).WithFields(log.Fields{"nid": nid, "eid": eid}).WithError(err).Warn("Peer add operation failed")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (d *driver) peerAddOp(nid, eid string, peerIP netip.Prefix, peerMac net.HardwareAddr, vtep netip.Addr, updateDB bool) error {
|
||||
if err := validateID(nid, eid); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
var dbEntries int
|
||||
var inserted bool
|
||||
if updateDB {
|
||||
inserted, dbEntries = d.peerDbAdd(nid, eid, peerIP, peerMac, vtep)
|
||||
if !inserted {
|
||||
log.G(context.TODO()).Warnf("Entry already present in db: nid:%s eid:%s peerIP:%v peerMac:%v vtep:%v",
|
||||
nid, eid, peerIP, peerMac, vtep)
|
||||
}
|
||||
}
|
||||
|
||||
// Local peers do not need any further configuration
|
||||
if !vtep.IsValid() {
|
||||
return nil
|
||||
}
|
||||
|
||||
// addNeighbor programs the kernel so the given peer is reachable through the VXLAN tunnel.
|
||||
func (d *driver) addNeighbor(nid string, peerIP netip.Prefix, peerMac net.HardwareAddr, vtep netip.Addr) error {
|
||||
n := d.network(nid)
|
||||
if n == nil {
|
||||
return nil
|
||||
@@ -235,7 +212,7 @@ func (d *driver) peerAddOp(nid, eid string, peerIP netip.Prefix, peerMac net.Har
|
||||
return fmt.Errorf("subnet sandbox join failed for %q: %v", s.subnetIP.String(), err)
|
||||
}
|
||||
|
||||
if n.secure && len(n.endpoints) > 0 {
|
||||
if n.secure {
|
||||
if err := d.setupEncryption(vtep); err != nil {
|
||||
log.G(context.TODO()).Warn(err)
|
||||
}
|
||||
@@ -243,18 +220,14 @@ func (d *driver) peerAddOp(nid, eid string, peerIP netip.Prefix, peerMac net.Har
|
||||
|
||||
// Add neighbor entry for the peer IP
|
||||
if err := sbox.AddNeighbor(peerIP.Addr().AsSlice(), peerMac, osl.WithLinkName(s.vxlanName)); err != nil {
|
||||
if _, ok := err.(osl.NeighborSearchError); ok && dbEntries > 1 {
|
||||
// We are in the transient case so only the first configuration is programmed into the kernel
|
||||
// Upon deletion if the active configuration is deleted the next one from the database will be restored
|
||||
// Note we are skipping also the next configuration
|
||||
return nil
|
||||
}
|
||||
return fmt.Errorf("could not add neighbor entry for nid:%s eid:%s into the sandbox:%v", nid, eid, err)
|
||||
return fmt.Errorf("could not add neighbor entry into the sandbox: %w", err)
|
||||
}
|
||||
|
||||
// Add fdb entry to the bridge for the peer mac
|
||||
if err := sbox.AddNeighbor(vtep.AsSlice(), peerMac, osl.WithLinkName(s.vxlanName), osl.WithFamily(syscall.AF_BRIDGE)); err != nil {
|
||||
return fmt.Errorf("could not add fdb entry for nid:%s eid:%s into the sandbox:%v", nid, eid, err)
|
||||
if n.fdbCnt.Add(ipmacOf(vtep, peerMac), 1) == 1 {
|
||||
if err := sbox.AddNeighbor(vtep.AsSlice(), peerMac, osl.WithLinkName(s.vxlanName), osl.WithFamily(syscall.AF_BRIDGE)); err != nil {
|
||||
return fmt.Errorf("could not add fdb entry into the sandbox: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
@@ -264,25 +237,66 @@ func (d *driver) peerAddOp(nid, eid string, peerIP netip.Prefix, peerMac net.Har
|
||||
//
|
||||
// Local peers are signified by an invalid vtep (i.e. netip.Addr{}).
|
||||
func (d *driver) peerDelete(nid, eid string, peerIP netip.Prefix, peerMac net.HardwareAddr, vtep netip.Addr) {
|
||||
logger := log.G(context.TODO()).WithFields(log.Fields{
|
||||
"nid": nid,
|
||||
"eid": eid,
|
||||
"ip": peerIP,
|
||||
"mac": peerMac,
|
||||
"vtep": vtep,
|
||||
})
|
||||
if err := validateID(nid, eid); err != nil {
|
||||
logger.WithError(err).Warn("Peer delete operation failed")
|
||||
return
|
||||
}
|
||||
|
||||
d.peerOpMu.Lock()
|
||||
defer d.peerOpMu.Unlock()
|
||||
err := d.peerDeleteOp(nid, eid, peerIP, peerMac, vtep)
|
||||
if err != nil {
|
||||
log.G(context.TODO()).WithError(err).Warn("Peer delete operation failed")
|
||||
}
|
||||
}
|
||||
|
||||
func (d *driver) peerDeleteOp(nid, eid string, peerIP netip.Prefix, peerMac net.HardwareAddr, vtep netip.Addr) error {
|
||||
if err := validateID(nid, eid); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
deleted, dbEntries := d.peerDbDelete(nid, eid, peerIP, peerMac, vtep)
|
||||
if !deleted {
|
||||
log.G(context.TODO()).Warnf("Entry was not in db: nid:%s eid:%s peerIP:%v peerMac:%v vtep:%v",
|
||||
nid, eid, peerIP, peerMac, vtep)
|
||||
logger.Warn("Peer entry was not in db")
|
||||
}
|
||||
|
||||
if vtep.IsValid() {
|
||||
err := d.deleteNeighbor(nid, peerIP, peerMac, vtep)
|
||||
if err != nil {
|
||||
if dbEntries > 0 && errors.As(err, &osl.NeighborSearchError{}) {
|
||||
// We fall in here if there is a transient state and if the neighbor that is being deleted
|
||||
// was never been configured into the kernel (we allow only 1 configuration at the time per <ip,mac> mapping)
|
||||
return
|
||||
}
|
||||
logger.WithError(err).Warn("Peer delete operation failed")
|
||||
}
|
||||
|
||||
if dbEntries > 0 {
|
||||
// If there is still an entry into the database and the deletion went through without errors means that there is now no
|
||||
// configuration active in the kernel.
|
||||
// Restore one configuration for the ip directly from the database, note that is guaranteed that there is one
|
||||
peerEntry, ok := d.peerDbGet(nid, peerIP)
|
||||
if !ok {
|
||||
log.G(context.TODO()).WithFields(log.Fields{
|
||||
"nid": nid,
|
||||
"ip": peerIP,
|
||||
}).Error("peerDelete unable to restore a configuration: no entry found in the database")
|
||||
return
|
||||
}
|
||||
err = d.addNeighbor(nid, peerIP, peerEntry.mac.HardwareAddr(), peerEntry.vtep)
|
||||
if err != nil {
|
||||
log.G(context.TODO()).WithFields(log.Fields{
|
||||
"nid": nid,
|
||||
"eid": eid,
|
||||
"ip": peerIP,
|
||||
"mac": peerEntry.mac,
|
||||
"vtep": peerEntry.vtep,
|
||||
}).WithError(err).Error("Peer delete operation failed")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// deleteNeighbor removes programming from the kernel for the given peer to be
|
||||
// reachable through the VXLAN tunnel. It is the inverse of [driver.addNeighbor].
|
||||
func (d *driver) deleteNeighbor(nid string, peerIP netip.Prefix, peerMac net.HardwareAddr, vtep netip.Addr) error {
|
||||
n := d.network(nid)
|
||||
if n == nil {
|
||||
return nil
|
||||
@@ -293,64 +307,40 @@ func (d *driver) peerDeleteOp(nid, eid string, peerIP netip.Prefix, peerMac net.
|
||||
return nil
|
||||
}
|
||||
|
||||
if n.secure && len(n.endpoints) == 0 {
|
||||
if n.secure {
|
||||
if err := d.removeEncryption(vtep); err != nil {
|
||||
log.G(context.TODO()).Warn(err)
|
||||
}
|
||||
}
|
||||
|
||||
// Local peers do not have any local configuration to delete
|
||||
if vtep.IsValid() {
|
||||
s := n.getSubnetforIP(peerIP)
|
||||
if s == nil {
|
||||
return fmt.Errorf("could not find the subnet %q in network %q", peerIP.String(), n.id)
|
||||
}
|
||||
// Remove fdb entry to the bridge for the peer mac
|
||||
s := n.getSubnetforIP(peerIP)
|
||||
if s == nil {
|
||||
return fmt.Errorf("could not find the subnet %q in network %q", peerIP.String(), n.id)
|
||||
}
|
||||
// Remove fdb entry to the bridge for the peer mac
|
||||
if n.fdbCnt.Add(ipmacOf(vtep, peerMac), -1) == 0 {
|
||||
if err := sbox.DeleteNeighbor(vtep.AsSlice(), peerMac, osl.WithLinkName(s.vxlanName), osl.WithFamily(syscall.AF_BRIDGE)); err != nil {
|
||||
if _, ok := err.(osl.NeighborSearchError); ok && dbEntries > 0 {
|
||||
// We fall in here if there is a transient state and if the neighbor that is being deleted
|
||||
// was never been configured into the kernel (we allow only 1 configuration at the time per <ip,mac> mapping)
|
||||
return nil
|
||||
}
|
||||
return fmt.Errorf("could not delete fdb entry for nid:%s eid:%s into the sandbox:%v", nid, eid, err)
|
||||
}
|
||||
|
||||
// Delete neighbor entry for the peer IP
|
||||
if err := sbox.DeleteNeighbor(peerIP.Addr().AsSlice(), peerMac, osl.WithLinkName(s.vxlanName)); err != nil {
|
||||
return fmt.Errorf("could not delete neighbor entry for nid:%s eid:%s into the sandbox:%v", nid, eid, err)
|
||||
return fmt.Errorf("could not delete fdb entry in the sandbox: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
if dbEntries == 0 {
|
||||
return nil
|
||||
// Delete neighbor entry for the peer IP
|
||||
if err := sbox.DeleteNeighbor(peerIP.Addr().AsSlice(), peerMac, osl.WithLinkName(s.vxlanName)); err != nil {
|
||||
return fmt.Errorf("could not delete neighbor entry in the sandbox:%v", err)
|
||||
}
|
||||
|
||||
// If there is still an entry into the database and the deletion went through without errors means that there is now no
|
||||
// configuration active in the kernel.
|
||||
// Restore one configuration for the <ip,mac> directly from the database, note that is guaranteed that there is one
|
||||
peerIPAddr, peerMac, peerEntry, err := d.peerDbSearch(nid, peerIP.Addr())
|
||||
if err != nil {
|
||||
log.G(context.TODO()).Errorf("peerDeleteOp unable to restore a configuration for nid:%s ip:%v mac:%v err:%s", nid, peerIP, peerMac, err)
|
||||
return err
|
||||
}
|
||||
return d.peerAddOp(nid, peerEntry.eid, netip.PrefixFrom(peerIPAddr, peerEntry.prefixBits), peerMac, peerEntry.vtep, false)
|
||||
return nil
|
||||
}
|
||||
|
||||
func (d *driver) peerFlush(nid string) {
|
||||
d.peerOpMu.Lock()
|
||||
defer d.peerOpMu.Unlock()
|
||||
if err := d.peerFlushOp(nid); err != nil {
|
||||
log.G(context.TODO()).WithError(err).Warn("Peer flush operation failed")
|
||||
}
|
||||
}
|
||||
|
||||
func (d *driver) peerFlushOp(nid string) error {
|
||||
d.peerDb.Lock()
|
||||
defer d.peerDb.Unlock()
|
||||
_, ok := d.peerDb.mp[nid]
|
||||
if !ok {
|
||||
return fmt.Errorf("Unable to find the peerDB for nid:%s", nid)
|
||||
log.G(context.TODO()).Warnf("Peer flush operation failed: unable to find the peerDB for nid:%s", nid)
|
||||
return
|
||||
}
|
||||
delete(d.peerDb.mp, nid)
|
||||
return nil
|
||||
}
|
||||
|
||||
19
libnetwork/internal/countmap/countmap.go
Normal file
19
libnetwork/internal/countmap/countmap.go
Normal file
@@ -0,0 +1,19 @@
|
||||
// FIXME(thaJeztah): remove once we are a module; the go:build directive prevents go from downgrading language version to go1.16:
|
||||
//go:build go1.23
|
||||
|
||||
package countmap
|
||||
|
||||
// Map is a map of counters.
|
||||
type Map[T comparable] map[T]int
|
||||
|
||||
// Add adds delta to the counter for v and returns the new value.
|
||||
//
|
||||
// If the new value is 0, the entry is removed from the map.
|
||||
func (m Map[T]) Add(v T, delta int) int {
|
||||
m[v] += delta
|
||||
c := m[v]
|
||||
if c == 0 {
|
||||
delete(m, v)
|
||||
}
|
||||
return c
|
||||
}
|
||||
27
libnetwork/internal/countmap/countmap_test.go
Normal file
27
libnetwork/internal/countmap/countmap_test.go
Normal file
@@ -0,0 +1,27 @@
|
||||
package countmap_test
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/docker/docker/libnetwork/internal/countmap"
|
||||
"gotest.tools/v3/assert"
|
||||
is "gotest.tools/v3/assert/cmp"
|
||||
)
|
||||
|
||||
func TestMap(t *testing.T) {
|
||||
m := countmap.Map[string]{}
|
||||
m["foo"] = 7
|
||||
m["bar"] = 2
|
||||
m["zeroed"] = -2
|
||||
|
||||
m.Add("bar", -3)
|
||||
m.Add("foo", -8)
|
||||
m.Add("baz", 1)
|
||||
m.Add("zeroed", 2)
|
||||
assert.Check(t, is.DeepEqual(m, countmap.Map[string]{"foo": -1, "bar": -1, "baz": 1}))
|
||||
|
||||
m.Add("foo", 1)
|
||||
m.Add("bar", 1)
|
||||
m.Add("baz", -1)
|
||||
assert.Check(t, is.DeepEqual(m, countmap.Map[string]{}))
|
||||
}
|
||||
Reference in New Issue
Block a user