Files
moby/daemon/libnetwork/sandbox.go
Cory Snider a90adb6dc1 api/types/network: use netip types as appropriate
And generate the ServiceInfo struct from the Swagger spec.

Signed-off-by: Cory Snider <csnider@mirantis.com>
2025-10-03 21:39:14 +02:00

682 lines
18 KiB
Go

package libnetwork
import (
"context"
"encoding/json"
"fmt"
"net"
"net/netip"
"slices"
"sort"
"strings"
"sync"
"github.com/containerd/log"
"github.com/moby/moby/v2/daemon/libnetwork/etchosts"
"github.com/moby/moby/v2/daemon/libnetwork/osl"
"github.com/moby/moby/v2/daemon/libnetwork/scope"
"github.com/moby/moby/v2/daemon/libnetwork/types"
"go.opentelemetry.io/otel"
"go.opentelemetry.io/otel/attribute"
"go.opentelemetry.io/otel/trace"
)
// SandboxOption is an option setter function type used to pass various options to
// NewNetContainer method. The various setter functions of type SandboxOption are
// provided by libnetwork, they look like ContainerOptionXXXX(...)
type SandboxOption func(sb *Sandbox)
func (sb *Sandbox) processOptions(options ...SandboxOption) {
for _, opt := range options {
if opt != nil {
opt(sb)
}
}
}
// Sandbox provides the control over the network container entity.
// It is a one to one mapping with the container.
type Sandbox struct {
id string
containerID string
config containerConfig
extDNS []extDNSEntry
osSbox *osl.Namespace
controller *Controller
resolver *Resolver
resolverOnce sync.Once
dbIndex uint64
dbExists bool
isStub bool
inDelete bool
ingress bool
ndotsSet bool
oslTypes []osl.SandboxType // slice of properties of this sandbox
loadBalancerNID string // NID that this SB is a load balancer for
mu sync.Mutex
// joinLeaveMu is required as well as mu to modify the following fields,
// acquire joinLeaveMu first, and keep it at-least until gateway changes
// have been applied following updates to endpoints.
//
// mu is required to access these fields.
joinLeaveMu sync.Mutex
endpoints []*Endpoint
epPriority map[string]int
populatedEndpoints map[string]struct{}
// This mutex is used to serialize service related operation for an endpoint
// The lock is here because the endpoint is saved into the store so is not unique
service sync.Mutex
}
// These are the container configs used to customize container /etc/hosts file.
type hostsPathConfig struct {
hostName string
domainName string
hostsPath string
originHostsPath string
extraHosts []extraHost
}
type extraHost struct {
name string
IP netip.Addr
}
// These are the container configs used to customize container /etc/resolv.conf file.
type resolvConfPathConfig struct {
resolvConfPath string
originResolvConfPath string
resolvConfHashFile string
dnsList []netip.Addr
dnsSearchList []string
dnsOptionsList []string
}
type containerConfig struct {
hostsPathConfig
resolvConfPathConfig
generic map[string]any
useDefaultSandBox bool
useExternalKey bool
exposedPorts []types.TransportPort
}
// ID returns the ID of the sandbox.
func (sb *Sandbox) ID() string {
return sb.id
}
// ContainerID returns the container id associated to this sandbox.
func (sb *Sandbox) ContainerID() string {
return sb.containerID
}
// Key returns the sandbox's key.
func (sb *Sandbox) Key() string {
if sb.config.useDefaultSandBox {
return osl.GenerateKey("default")
}
return osl.GenerateKey(sb.id)
}
// Labels returns the sandbox's labels.
func (sb *Sandbox) Labels() map[string]any {
sb.mu.Lock()
defer sb.mu.Unlock()
opts := make(map[string]any, len(sb.config.generic))
for k, v := range sb.config.generic {
opts[k] = v
}
return opts
}
// Delete destroys this container after detaching it from all connected endpoints.
func (sb *Sandbox) Delete(ctx context.Context) error {
return sb.delete(ctx, false)
}
func (sb *Sandbox) delete(ctx context.Context, force bool) error {
sb.mu.Lock()
if sb.inDelete {
sb.mu.Unlock()
return types.ForbiddenErrorf("another sandbox delete in progress")
}
// Set the inDelete flag. This will ensure that we don't
// update the store until we have completed all the endpoint
// leaves and deletes. And when endpoint leaves and deletes
// are completed then we can finally delete the sandbox object
// altogether from the data store. If the daemon exits
// ungracefully in the middle of a sandbox delete this way we
// will have all the references to the endpoints in the
// sandbox so that we can clean them up when we restart
sb.inDelete = true
sb.mu.Unlock()
c := sb.controller
// Detach from all endpoints
retain := false
for _, ep := range sb.Endpoints() {
// gw network endpoint detach and removal are automatic
if ep.endpointInGWNetwork() && !force {
continue
}
// Retain the sandbox if we can't obtain the network from store.
if _, err := c.getNetworkFromStore(ep.getNetwork().ID()); err != nil {
if !c.isSwarmNode() {
retain = true
}
log.G(ctx).Warnf("Failed getting network for ep %s during sandbox %s delete: %v", ep.ID(), sb.ID(), err)
continue
}
if !force {
if err := ep.Leave(context.WithoutCancel(ctx), sb); err != nil {
log.G(ctx).Warnf("Failed detaching sandbox %s from endpoint %s: %v\n", sb.ID(), ep.ID(), err)
}
}
if err := ep.Delete(context.WithoutCancel(ctx), force); err != nil {
log.G(ctx).Warnf("Failed deleting endpoint %s: %v\n", ep.ID(), err)
}
}
if retain {
sb.mu.Lock()
sb.inDelete = false
sb.mu.Unlock()
return fmt.Errorf("could not cleanup all the endpoints in container %s / sandbox %s", sb.containerID, sb.id)
}
// Container is going away. Path cache in etchosts is most
// likely not required any more. Drop it.
etchosts.Drop(sb.config.hostsPath)
if sb.resolver != nil {
sb.resolver.Stop()
}
if sb.osSbox != nil && !sb.config.useDefaultSandBox {
if err := sb.osSbox.Destroy(); err != nil {
log.G(ctx).WithError(err).Warn("error destroying network sandbox")
}
}
if err := sb.storeDelete(); err != nil {
log.G(ctx).Warnf("Failed to delete sandbox %s from store: %v", sb.ID(), err)
}
c.mu.Lock()
if sb.ingress {
c.ingressSandbox = nil
}
delete(c.sandboxes, sb.ID())
c.mu.Unlock()
return nil
}
// Rename changes the name of all attached Endpoints.
func (sb *Sandbox) Rename(name string) error {
var err error
for _, ep := range sb.Endpoints() {
if ep.endpointInGWNetwork() {
continue
}
oldName := ep.Name()
lEp := ep
if err = ep.rename(name); err != nil {
break
}
defer func() {
if err != nil {
if err2 := lEp.rename(oldName); err2 != nil {
log.G(context.TODO()).WithField("old", oldName).WithField("origError", err).WithError(err2).Error("error renaming sandbox")
}
}
}()
}
return err
}
// Refresh leaves all the endpoints, resets and re-applies the options,
// re-joins all the endpoints without destroying the osl sandbox
func (sb *Sandbox) Refresh(ctx context.Context, options ...SandboxOption) error {
// Store connected endpoints
epList := sb.Endpoints()
// Detach from all endpoints
for _, ep := range epList {
if err := ep.Leave(context.WithoutCancel(ctx), sb); err != nil {
log.G(ctx).Warnf("Failed detaching sandbox %s from endpoint %s: %v\n", sb.ID(), ep.ID(), err)
}
}
// Re-apply options
sb.config = containerConfig{}
sb.processOptions(options...)
// Setup discovery files
if err := sb.setupResolutionFiles(ctx); err != nil {
return err
}
// Re-connect to all endpoints
for _, ep := range epList {
if err := ep.Join(context.WithoutCancel(ctx), sb); err != nil {
log.G(ctx).Warnf("Failed attach sandbox %s to endpoint %s: %v\n", sb.ID(), ep.ID(), err)
}
}
return nil
}
func (sb *Sandbox) UpdateLabels(labels map[string]any) {
if sb.config.generic == nil {
sb.config.generic = make(map[string]any, len(labels))
}
for k, v := range labels {
sb.config.generic[k] = v
}
}
func (sb *Sandbox) MarshalJSON() ([]byte, error) {
sb.mu.Lock()
defer sb.mu.Unlock()
// We are just interested in the container ID. This can be expanded to include all of containerInfo if there is a need
return json.Marshal(sb.id)
}
func (sb *Sandbox) UnmarshalJSON(b []byte) (err error) {
sb.mu.Lock()
defer sb.mu.Unlock()
var id string
if err := json.Unmarshal(b, &id); err != nil {
return err
}
sb.id = id
return nil
}
// Endpoints returns all the endpoints connected to the sandbox.
func (sb *Sandbox) Endpoints() []*Endpoint {
sb.mu.Lock()
defer sb.mu.Unlock()
eps := make([]*Endpoint, len(sb.endpoints))
copy(eps, sb.endpoints)
return eps
}
func (sb *Sandbox) addEndpoint(ep *Endpoint) {
sb.mu.Lock()
defer sb.mu.Unlock()
i := sort.Search(len(sb.endpoints), func(j int) bool {
return ep.Less(sb.endpoints[j])
})
sb.endpoints = slices.Insert(sb.endpoints, i, ep)
}
func (sb *Sandbox) updateGwPriorityOrdering(ep *Endpoint) {
sb.mu.Lock()
defer sb.mu.Unlock()
sb.endpoints = slices.DeleteFunc(sb.endpoints, func(other *Endpoint) bool { return other.id == ep.id })
i := sort.Search(len(sb.endpoints), func(j int) bool {
return ep.Less(sb.endpoints[j])
})
sb.endpoints = slices.Insert(sb.endpoints, i, ep)
}
func (sb *Sandbox) populateNetworkResources(ctx context.Context, ep *Endpoint) (retErr error) {
ctx, span := otel.Tracer("").Start(ctx, "libnetwork.Sandbox.populateNetworkResources", trace.WithAttributes(
attribute.String("endpoint.Name", ep.Name())))
defer span.End()
if err := sb.populateNetworkResourcesOS(ctx, ep); err != nil {
return err
}
// Populate DNS records.
n := ep.getNetwork()
if !n.getController().isAgent() {
if !n.getController().isSwarmNode() || n.Scope() != scope.Swarm || !n.driverIsMultihost() {
n.updateSvcRecord(context.WithoutCancel(ctx), ep, true)
}
}
if err := ep.addDriverInfoToCluster(); err != nil {
return err
}
defer func() {
if retErr != nil {
if e := ep.deleteDriverInfoFromCluster(); e != nil {
log.G(ctx).WithError(e).Error("Could not delete endpoint state from cluster on join failure")
}
}
}()
// Load balancing endpoints should never have a default gateway nor
// should they alter the status of a network's default gateway
if !ep.loadBalancer || sb.ingress {
if sb.needDefaultGW() {
if sb.getEndpointInGWNetwork() == nil {
// sb.populateNetworkResources() will be called recursively for the new
// gateway endpoint. So, it'll set the resolver's forwarding policy.
return sb.setupDefaultGW()
}
} else if err := sb.clearDefaultGW(); err != nil {
log.G(ctx).WithFields(log.Fields{
"error": err,
"sid": sb.ID(),
"cid": sb.ContainerID(),
}).Warn("Failure while disconnecting sandbox from gateway network")
}
// Enable upstream forwarding if the sandbox gained external connectivity.
if sb.resolver != nil {
sb.resolver.SetForwardingPolicy(sb.hasExternalAccess())
}
}
return nil
}
func (sb *Sandbox) GetEndpoint(id string) *Endpoint {
sb.mu.Lock()
defer sb.mu.Unlock()
for _, ep := range sb.endpoints {
if ep.id == id {
return ep
}
}
return nil
}
func (sb *Sandbox) HandleQueryResp(name string, ip net.IP) {
for _, ep := range sb.Endpoints() {
n := ep.getNetwork()
n.HandleQueryResp(name, ip)
}
}
func (sb *Sandbox) ResolveIP(ctx context.Context, ip string) string {
var svc string
log.G(ctx).Debugf("IP To resolve %v", ip)
for _, ep := range sb.Endpoints() {
n := ep.getNetwork()
svc = n.ResolveIP(ctx, ip)
if svc != "" {
return svc
}
}
return svc
}
// ResolveService returns all the backend details about the containers or hosts
// backing a service. Its purpose is to satisfy an SRV query.
func (sb *Sandbox) ResolveService(ctx context.Context, name string) ([]*net.SRV, []net.IP) {
log.G(ctx).Debugf("Service name To resolve: %v", name)
// There are DNS implementations that allow SRV queries for names not in
// the format defined by RFC 2782. Hence specific validations checks are
// not done
if parts := strings.SplitN(name, ".", 3); len(parts) < 3 {
return nil, nil
}
for _, ep := range sb.Endpoints() {
n := ep.getNetwork()
srv, ip := n.ResolveService(ctx, name)
if len(srv) > 0 {
return srv, ip
}
}
return nil, nil
}
func (sb *Sandbox) ResolveName(ctx context.Context, name string, ipType types.IPFamily) ([]net.IP, bool) {
// Embedded server owns the docker network domain. Resolution should work
// for both container_name and container_name.network_name
// We allow '.' in service name and network name. For a name a.b.c.d the
// following have to tried;
// {a.b.c.d in the networks container is connected to}
// {a.b.c in network d},
// {a.b in network c.d},
// {a in network b.c.d},
log.G(ctx).Debugf("Name To resolve: %v", name)
name = strings.TrimSuffix(name, ".")
reqName := []string{name}
networkName := []string{""}
if strings.Contains(name, ".") {
var i int
dup := name
for {
if i = strings.LastIndex(dup, "."); i == -1 {
break
}
networkName = append(networkName, name[i+1:])
reqName = append(reqName, name[:i])
dup = dup[:i]
}
}
epList := sb.Endpoints()
// In swarm mode, services with exposed ports are connected to user overlay
// network, ingress network and docker_gwbridge networks. Name resolution
// should prioritize returning the VIP/IPs on user overlay network.
//
// Re-order the endpoints based on the network-type they're attached to;
//
// 1. dynamic networks (user overlay networks)
// 2. ingress network(s)
// 3. local networks ("docker_gwbridge")
if sb.controller.isSwarmNode() {
sort.Sort(ByNetworkType(epList))
}
for i := 0; i < len(reqName); i++ {
// First check for local container alias
if ip, ok := sb.resolveName(ctx, reqName[i], networkName[i], epList, true, ipType); ok {
return ip, true
}
// Resolve the actual container name
if ip, ok := sb.resolveName(ctx, reqName[i], networkName[i], epList, false, ipType); ok {
return ip, true
}
}
return nil, false
}
func (sb *Sandbox) resolveName(ctx context.Context, nameOrAlias string, networkName string, epList []*Endpoint, lookupAlias bool, ipType types.IPFamily) ([]net.IP, bool) {
ctx, span := otel.Tracer("").Start(ctx, "Sandbox.resolveName", trace.WithAttributes(
attribute.String("libnet.resolver.name-or-alias", nameOrAlias),
attribute.String("libnet.network.name", networkName),
attribute.Bool("libnet.resolver.alias-lookup", lookupAlias),
attribute.Int("libnet.resolver.ip-family", int(ipType))))
defer span.End()
for _, ep := range epList {
if lookupAlias && len(ep.aliases) == 0 {
continue
}
nw := ep.getNetwork()
if networkName != "" && networkName != nw.Name() {
continue
}
name := nameOrAlias
if lookupAlias {
ep.mu.Lock()
alias, ok := ep.aliases[nameOrAlias]
ep.mu.Unlock()
if !ok {
continue
}
name = alias
} else {
// If it is a regular lookup and if the requested name is an alias
// don't perform a svc lookup for this endpoint.
ep.mu.Lock()
_, ok := ep.aliases[nameOrAlias]
ep.mu.Unlock()
if ok {
continue
}
}
ip, ok := nw.ResolveName(ctx, name, ipType)
if ok {
return ip, true
}
}
return nil, false
}
// hasExternalAccess returns true if any of sb's Endpoints appear to have external
// network access.
func (sb *Sandbox) hasExternalAccess() bool {
for _, ep := range sb.Endpoints() {
nw := ep.getNetwork()
if nw.Internal() || nw.Type() == "null" || nw.Type() == "host" {
continue
}
if v4, v6 := ep.hasGatewayOrDefaultRoute(); v4 || v6 {
return true
}
}
return false
}
// EnableService makes a managed container's service available by adding the
// endpoint to the service load balancer and service discovery.
func (sb *Sandbox) EnableService() (retErr error) {
log.G(context.TODO()).WithField("container", sb.containerID).Debug("EnableService START")
defer func() {
if retErr != nil {
if err := sb.DisableService(); err != nil {
log.G(context.TODO()).WithFields(log.Fields{
"error": err,
"origError": retErr,
"container": sb.containerID,
}).Error("Error while disabling service after original error")
}
}
}()
for _, ep := range sb.Endpoints() {
if !ep.isServiceEnabled() {
if err := ep.addServiceInfoToCluster(sb); err != nil {
return fmt.Errorf("could not update state for endpoint %s into cluster: %v", ep.Name(), err)
}
ep.enableService()
}
}
log.G(context.TODO()).WithField("container", sb.containerID).Debug("EnableService DONE")
return nil
}
// DisableService removes a managed container's endpoints from the load balancer
// and service discovery.
func (sb *Sandbox) DisableService() error {
log.G(context.TODO()).WithField("container", sb.containerID).Debug("DisableService START")
var failedEps []string
for _, ep := range sb.Endpoints() {
if !ep.isServiceEnabled() {
continue
}
if err := ep.deleteServiceInfoFromCluster(sb, false, "DisableService"); err != nil {
failedEps = append(failedEps, ep.Name())
log.G(context.TODO()).WithFields(log.Fields{
"container": sb.containerID,
"error": err,
"ep": ep.Name(),
}).Warn("failed to update state for endpoint into cluster")
}
ep.disableService()
}
log.G(context.TODO()).WithField("container", sb.containerID).Debug("DisableService DONE")
if len(failedEps) > 0 {
return fmt.Errorf("failed to disable service on sandbox:%s, for endpoints %s", sb.ID(), strings.Join(failedEps, ","))
}
return nil
}
// Less defines an ordering over endpoints, with better candidates for the default
// gateway sorted first.
//
// <=> Returns true if a < b, false if a > b and advances to next level if a == b
// ep.prio <=> epj.prio # 2 < 1
// ep.gw <=> epj.gw # non-gw < gw
// ep.internal <=> epj.internal # non-internal < internal
// ep.hasGw <=> epj.hasGw # (gw4 and gw6) < (gw4 or gw6) < (no gw)
// ep.name <=> epj.name # bar < foo
func (ep *Endpoint) Less(epj *Endpoint) bool {
sbi, _ := ep.getSandbox()
sbj, _ := epj.getSandbox()
// Prio defaults to 0
var prioi, prioj int
if sbi != nil {
prioi = sbi.epPriority[ep.ID()]
}
if sbj != nil {
prioj = sbj.epPriority[epj.ID()]
}
if prioi != prioj {
return prioi > prioj
}
gwNeti := ep.endpointInGWNetwork()
gwNetj := epj.endpointInGWNetwork()
if gwNeti != gwNetj {
return gwNetj
}
inti := ep.getNetwork().Internal()
intj := epj.getNetwork().Internal()
if inti != intj {
return intj
}
gwCount := func(ep *Endpoint) int {
gw4, gw6 := ep.hasGatewayOrDefaultRoute()
if gw4 && gw6 {
return 2
}
if gw4 || gw6 {
return 1
}
return 0
}
gwCounti := gwCount(ep)
gwCountj := gwCount(epj)
if gwCounti != gwCountj {
return gwCounti > gwCountj
}
return ep.network.Name() < epj.network.Name()
}
func (sb *Sandbox) NdotsSet() bool {
return sb.ndotsSet
}