From 916fa31ab5987779fa2d367fdfabde33b8a811cf Mon Sep 17 00:00:00 2001 From: Rob Murray Date: Thu, 11 Sep 2025 10:18:57 +0100 Subject: [PATCH] Acquire Sandbox.joinLeaveMu for Endpoint force-Delete If an endpoint is still attached to a Sandbox when Endpoint.Delete is called with force=true, sbLeave is called. It may change the Sandbox's gateway, which may conflict with a concurrent Join. So, acquire the Sandbox's joinLeaveMu to do that, and clarify the purpose of that mutex in struct Sandbox comments. Signed-off-by: Rob Murray --- daemon/libnetwork/endpoint.go | 21 ++++++++++------- daemon/libnetwork/sandbox.go | 43 ++++++++++++++++++++--------------- 2 files changed, 38 insertions(+), 26 deletions(-) diff --git a/daemon/libnetwork/endpoint.go b/daemon/libnetwork/endpoint.go index e342f65d1d..f32ecccbcd 100644 --- a/daemon/libnetwork/endpoint.go +++ b/daemon/libnetwork/endpoint.go @@ -914,15 +914,20 @@ func (ep *Endpoint) Delete(ctx context.Context, force bool) error { sbid := ep.sandboxID ep.mu.Unlock() - sb, _ := n.getController().SandboxByID(sbid) - if sb != nil && !force { - return &ActiveContainerError{name: name, id: epid} - } - - if sb != nil { - if e := ep.sbLeave(context.WithoutCancel(ctx), sb, n, force); e != nil { - log.G(ctx).Warnf("failed to leave sandbox for endpoint %s : %v", name, e) + if sb, _ := n.getController().SandboxByID(sbid); sb != nil { + if !force { + return &ActiveContainerError{name: name, id: epid} } + func() { + // Make sure this Delete isn't racing a Join/Leave/Delete that might also be + // updating the Sandbox's selection of gateway endpoints. + sb.joinLeaveMu.Lock() + defer sb.joinLeaveMu.Unlock() + + if e := ep.sbLeave(context.WithoutCancel(ctx), sb, n, force); e != nil { + log.G(ctx).Warnf("failed to leave sandbox for endpoint %s : %v", name, e) + } + }() } if err = n.getController().deleteStoredEndpoint(ep); err != nil { diff --git a/daemon/libnetwork/sandbox.go b/daemon/libnetwork/sandbox.go index 52916cc3c5..7d77b12f66 100644 --- a/daemon/libnetwork/sandbox.go +++ b/daemon/libnetwork/sandbox.go @@ -36,27 +36,34 @@ func (sb *Sandbox) processOptions(options ...SandboxOption) { // Sandbox provides the control over the network container entity. // It is a one to one mapping with the container. type Sandbox struct { - id string - containerID string - config containerConfig - extDNS []extDNSEntry - osSbox *osl.Namespace - controller *Controller - resolver *Resolver - resolverOnce sync.Once + id string + containerID string + config containerConfig + extDNS []extDNSEntry + osSbox *osl.Namespace + controller *Controller + resolver *Resolver + resolverOnce sync.Once + dbIndex uint64 + dbExists bool + isStub bool + inDelete bool + ingress bool + ndotsSet bool + oslTypes []osl.SandboxType // slice of properties of this sandbox + loadBalancerNID string // NID that this SB is a load balancer for + mu sync.Mutex + + // joinLeaveMu is required as well as mu to modify the following fields, + // acquire joinLeaveMu first, and keep it at-least until gateway changes + // have been applied following updates to endpoints. + // + // mu is required to access these fields. + joinLeaveMu sync.Mutex endpoints []*Endpoint epPriority map[string]int populatedEndpoints map[string]struct{} - joinLeaveMu sync.Mutex - dbIndex uint64 - dbExists bool - isStub bool - inDelete bool - ingress bool - ndotsSet bool - oslTypes []osl.SandboxType // slice of properties of this sandbox - loadBalancerNID string // NID that this SB is a load balancer for - mu sync.Mutex + // This mutex is used to serialize service related operation for an endpoint // The lock is here because the endpoint is saved into the store so is not unique service sync.Mutex