mirror of
https://github.com/moby/moby.git
synced 2026-01-11 18:51:37 +00:00
These are no longer needed as these are now part of a module. Signed-off-by: Sebastiaan van Stijn <github@gone.nl>
246 lines
8.0 KiB
Go
246 lines
8.0 KiB
Go
//go:build linux
|
|
|
|
package overlay
|
|
|
|
import (
|
|
"context"
|
|
"errors"
|
|
"fmt"
|
|
"net/netip"
|
|
"syscall"
|
|
|
|
"github.com/containerd/log"
|
|
"github.com/moby/moby/v2/daemon/libnetwork/internal/hashable"
|
|
"github.com/moby/moby/v2/daemon/libnetwork/internal/setmatrix"
|
|
"github.com/moby/moby/v2/daemon/libnetwork/osl"
|
|
)
|
|
|
|
type peerEntry struct {
|
|
eid string
|
|
mac hashable.MACAddr
|
|
vtep netip.Addr
|
|
}
|
|
|
|
func (p *peerEntry) isLocal() bool {
|
|
return !p.vtep.IsValid()
|
|
}
|
|
|
|
type peerMap struct {
|
|
mp setmatrix.SetMatrix[netip.Prefix, peerEntry]
|
|
}
|
|
|
|
func (pm *peerMap) Walk(f func(netip.Prefix, peerEntry)) {
|
|
for _, peerAddr := range pm.mp.Keys() {
|
|
entry, ok := pm.Get(peerAddr)
|
|
if ok {
|
|
f(peerAddr, entry)
|
|
}
|
|
}
|
|
}
|
|
|
|
func (pm *peerMap) Get(peerIP netip.Prefix) (peerEntry, bool) {
|
|
c, _ := pm.mp.Get(peerIP)
|
|
if len(c) == 0 {
|
|
return peerEntry{}, false
|
|
}
|
|
return c[0], true
|
|
}
|
|
|
|
func (pm *peerMap) Add(eid string, peerIP netip.Prefix, peerMac hashable.MACAddr, vtep netip.Addr) (bool, int) {
|
|
pEntry := peerEntry{
|
|
eid: eid,
|
|
mac: peerMac,
|
|
vtep: vtep,
|
|
}
|
|
b, i := pm.mp.Insert(peerIP, pEntry)
|
|
if i != 1 {
|
|
// Transient case, there is more than one endpoint that is using the same IP
|
|
s, _ := pm.mp.String(peerIP)
|
|
log.G(context.TODO()).Warnf("peerDbAdd transient condition - Key:%s cardinality:%d db state:%s", peerIP, i, s)
|
|
}
|
|
return b, i
|
|
}
|
|
|
|
func (pm *peerMap) Delete(eid string, peerIP netip.Prefix, peerMac hashable.MACAddr, vtep netip.Addr) (bool, int) {
|
|
pEntry := peerEntry{
|
|
eid: eid,
|
|
mac: peerMac,
|
|
vtep: vtep,
|
|
}
|
|
|
|
b, i := pm.mp.Remove(peerIP, pEntry)
|
|
if i != 0 {
|
|
// Transient case, there is more than one endpoint that is using the same IP
|
|
s, _ := pm.mp.String(peerIP)
|
|
log.G(context.TODO()).Warnf("peerDbDelete transient condition - Key:%s cardinality:%d db state:%s", peerIP, i, s)
|
|
}
|
|
return b, i
|
|
}
|
|
|
|
// The overlay uses a lazy initialization approach, this means that when a network is created
|
|
// and the driver registered the overlay does not allocate resources till the moment that a
|
|
// sandbox is actually created.
|
|
// At the moment of this call, that happens when a sandbox is initialized, is possible that
|
|
// networkDB has already delivered some events of peers already available on remote nodes,
|
|
// these peers are saved into the peerDB and this function is used to properly configure
|
|
// the network sandbox with all those peers that got previously notified.
|
|
//
|
|
// The caller is responsible for ensuring that peerAdd and peerDelete are not
|
|
// called concurrently with this function to guarantee consistency.
|
|
func (n *network) initSandboxPeerDB() error {
|
|
var errs []error
|
|
n.peerdb.Walk(func(peerIP netip.Prefix, pEntry peerEntry) {
|
|
if !pEntry.isLocal() {
|
|
if err := n.addNeighbor(peerIP, pEntry.mac, pEntry.vtep); err != nil {
|
|
errs = append(errs, fmt.Errorf("failed to add neighbor entries for %s: %w", peerIP, err))
|
|
}
|
|
}
|
|
})
|
|
return errors.Join(errs...)
|
|
}
|
|
|
|
// peerAdd adds a new entry to the peer database.
|
|
//
|
|
// Local peers are signified by an invalid vtep (i.e. netip.Addr{}).
|
|
func (n *network) peerAdd(eid string, peerIP netip.Prefix, peerMac hashable.MACAddr, vtep netip.Addr) error {
|
|
if eid == "" {
|
|
return errors.New("invalid endpoint id")
|
|
}
|
|
|
|
inserted, dbEntries := n.peerdb.Add(eid, peerIP, peerMac, vtep)
|
|
if !inserted {
|
|
log.G(context.TODO()).Warnf("Entry already present in db: nid:%s eid:%s peerIP:%v peerMac:%v vtep:%v",
|
|
n.id, eid, peerIP, peerMac, vtep)
|
|
}
|
|
if vtep.IsValid() {
|
|
err := n.addNeighbor(peerIP, peerMac, vtep)
|
|
if err != nil {
|
|
if dbEntries > 1 && errors.As(err, &osl.NeighborSearchError{}) {
|
|
// Conflicting neighbor entries are already programmed into the kernel and we are in the transient case.
|
|
// Upon deletion if the active configuration is deleted the next one from the database will be restored.
|
|
return nil
|
|
}
|
|
return fmt.Errorf("peer add operation failed: %w", err)
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// addNeighbor programs the kernel so the given peer is reachable through the VXLAN tunnel.
|
|
func (n *network) addNeighbor(peerIP netip.Prefix, peerMac hashable.MACAddr, vtep netip.Addr) error {
|
|
if n.sbox == nil {
|
|
// We are hitting this case for all the events that are arriving before that the sandbox
|
|
// is being created. The peer got already added into the database and the sandbox init will
|
|
// call the peerDbUpdateSandbox that will configure all these peers from the database
|
|
return nil
|
|
}
|
|
|
|
s := n.getSubnetforIP(peerIP)
|
|
if s == nil {
|
|
return fmt.Errorf("couldn't find the subnet %q in network %q", peerIP.String(), n.id)
|
|
}
|
|
|
|
if err := n.joinSandbox(s, false); err != nil {
|
|
return fmt.Errorf("subnet sandbox join failed for %q: %v", s.subnetIP.String(), err)
|
|
}
|
|
|
|
if n.secure {
|
|
if err := n.driver.setupEncryption(vtep); err != nil {
|
|
log.G(context.TODO()).Warn(err)
|
|
}
|
|
}
|
|
|
|
// Add neighbor entry for the peer IP
|
|
if err := n.sbox.AddNeighbor(peerIP.Addr().AsSlice(), peerMac.AsSlice(), osl.WithLinkName(s.vxlanName)); err != nil {
|
|
return fmt.Errorf("could not add neighbor entry into the sandbox: %w", err)
|
|
}
|
|
|
|
// Add fdb entry to the bridge for the peer mac
|
|
if n.fdbCnt.Add(hashable.IPMACFrom(vtep, peerMac), 1) == 1 {
|
|
if err := n.sbox.AddNeighbor(vtep.AsSlice(), peerMac.AsSlice(), osl.WithLinkName(s.vxlanName), osl.WithFamily(syscall.AF_BRIDGE)); err != nil {
|
|
return fmt.Errorf("could not add fdb entry into the sandbox: %w", err)
|
|
}
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// peerDelete removes an entry from the peer database.
|
|
//
|
|
// Local peers are signified by an invalid vtep (i.e. netip.Addr{}).
|
|
func (n *network) peerDelete(eid string, peerIP netip.Prefix, peerMac hashable.MACAddr, vtep netip.Addr) error {
|
|
if eid == "" {
|
|
return errors.New("invalid endpoint id")
|
|
}
|
|
|
|
logger := log.G(context.TODO()).WithFields(log.Fields{
|
|
"nid": n.id,
|
|
"eid": eid,
|
|
"ip": peerIP,
|
|
"mac": peerMac,
|
|
"vtep": vtep,
|
|
})
|
|
deleted, dbEntries := n.peerdb.Delete(eid, peerIP, peerMac, vtep)
|
|
if !deleted {
|
|
logger.Warn("Peer entry was not in db")
|
|
}
|
|
if vtep.IsValid() {
|
|
err := n.deleteNeighbor(peerIP, peerMac, vtep)
|
|
if err != nil {
|
|
if dbEntries > 0 && errors.As(err, &osl.NeighborSearchError{}) {
|
|
// We fall in here if there is a transient state and if the neighbor that is being deleted
|
|
// was never been configured into the kernel (we allow only 1 configuration at the time per <ip,mac> mapping)
|
|
return nil
|
|
}
|
|
logger.WithError(err).Warn("Peer delete operation failed")
|
|
}
|
|
}
|
|
|
|
if dbEntries > 0 {
|
|
// If there is still an entry into the database and the deletion went through without errors means that there is now no
|
|
// configuration active in the kernel.
|
|
// Restore one configuration for the ip directly from the database, note that is guaranteed that there is one
|
|
peerEntry, ok := n.peerdb.Get(peerIP)
|
|
if !ok {
|
|
return fmt.Errorf("peerDelete: unable to restore a configuration: no entry for %v found in the database", peerIP)
|
|
}
|
|
err := n.addNeighbor(peerIP, peerEntry.mac, peerEntry.vtep)
|
|
if err != nil {
|
|
return fmt.Errorf("peer delete operation failed: %w", err)
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// deleteNeighbor removes programming from the kernel for the given peer to be
|
|
// reachable through the VXLAN tunnel. It is the inverse of [driver.addNeighbor].
|
|
func (n *network) deleteNeighbor(peerIP netip.Prefix, peerMac hashable.MACAddr, vtep netip.Addr) error {
|
|
if n.sbox == nil {
|
|
return nil
|
|
}
|
|
|
|
if n.secure {
|
|
if err := n.driver.removeEncryption(vtep); err != nil {
|
|
log.G(context.TODO()).Warn(err)
|
|
}
|
|
}
|
|
|
|
s := n.getSubnetforIP(peerIP)
|
|
if s == nil {
|
|
return fmt.Errorf("could not find the subnet %q in network %q", peerIP.String(), n.id)
|
|
}
|
|
// Remove fdb entry to the bridge for the peer mac
|
|
if n.fdbCnt.Add(hashable.IPMACFrom(vtep, peerMac), -1) == 0 {
|
|
if err := n.sbox.DeleteNeighbor(vtep.AsSlice(), peerMac.AsSlice(), osl.WithLinkName(s.vxlanName), osl.WithFamily(syscall.AF_BRIDGE)); err != nil {
|
|
return fmt.Errorf("could not delete fdb entry in the sandbox: %w", err)
|
|
}
|
|
}
|
|
|
|
// Delete neighbor entry for the peer IP
|
|
if err := n.sbox.DeleteNeighbor(peerIP.Addr().AsSlice(), peerMac.AsSlice(), osl.WithLinkName(s.vxlanName)); err != nil {
|
|
return fmt.Errorf("could not delete neighbor entry in the sandbox:%v", err)
|
|
}
|
|
|
|
return nil
|
|
}
|