diff --git a/integration-cli/docker_cli_daemon_test.go b/integration-cli/docker_cli_daemon_test.go index 683dfcfbed..f7ddfe7d2e 100644 --- a/integration-cli/docker_cli_daemon_test.go +++ b/integration-cli/docker_cli_daemon_test.go @@ -704,7 +704,7 @@ func (s *DockerDaemonSuite) TestDaemonIP(c *testing.T) { out, err := d.Cmd("run", "-d", "-p", "8000:8000", "busybox", "top") assert.Assert(c, err != nil, "Running a container must fail with an invalid --ip option") - assert.Equal(c, strings.Contains(out, "Error starting userland proxy"), true) + assert.Check(c, is.Contains(out, "failed to bind host port for 192.170.1.1")) ifName := "dummy" createInterface(c, "dummy", ifName, ipStr) diff --git a/libnetwork/drivers/bridge/port_mapping_linux.go b/libnetwork/drivers/bridge/port_mapping_linux.go index d00ccd2343..95b3db45da 100644 --- a/libnetwork/drivers/bridge/port_mapping_linux.go +++ b/libnetwork/drivers/bridge/port_mapping_linux.go @@ -12,6 +12,8 @@ import ( "os" "slices" "strconv" + "syscall" + "unsafe" "github.com/containerd/log" "github.com/docker/docker/libnetwork/iptables" @@ -19,10 +21,15 @@ import ( "github.com/docker/docker/libnetwork/portallocator" "github.com/docker/docker/libnetwork/portmapper" "github.com/docker/docker/libnetwork/types" + "github.com/ishidawataru/sctp" ) type portBinding struct { types.PortBinding + // boundSocket is used to reserve a host port for the binding. If the + // userland proxy is in-use, it's passed to the proxy when the proxy is + // started, then it's closed and set to nil here. + boundSocket *os.File // childHostIP is the host IP address, as seen from the daemon. This // is normally the same as PortBinding.HostIP but, in rootless mode, it // will be an address in the rootless network namespace. RootlessKit @@ -41,21 +48,33 @@ type portBinding struct { stopProxy func() error } +// childPortBinding is pb.PortBinding, with the host address the daemon +// will see - which, in rootless mode, will be an address in the RootlessKit's +// child namespace (see portBinding.childHostIP). +func (pb portBinding) childPortBinding() types.PortBinding { + res := pb.PortBinding + res.HostIP = pb.childHostIP + return res +} + type portBindingReq struct { types.PortBinding childHostIP net.IP disableNAT bool } +// Allow unit tests to supply a dummy StartProxy. +var startProxy = portmapper.StartProxy + // addPortMappings takes cfg, the configuration for port mappings, selects host -// ports when ranges are given, starts docker-proxy or its dummy to reserve -// host ports, and sets up iptables NAT/forwarding rules as necessary. If -// anything goes wrong, it will undo any work it's done and return an error. -// Otherwise, the returned slice of portBinding has an entry per address -// family (if cfg describes a mapping for 'any' host address, it's expanded -// into mappings for IPv4 and IPv6, because that's how the mapping is presented -// in 'inspect'). HostPort and HostPortEnd in each returned portBinding are set -// to the selected and reserved port. +// ports when ranges are given, binds host ports to check they're available and +// reserve them, starts docker-proxy if required, and sets up iptables +// NAT/forwarding rules as necessary. If anything goes wrong, it will undo any +// work it's done and return an error. Otherwise, the returned slice of +// portBinding has an entry per address family (if cfg describes a mapping for +// 'any' host address, it's expanded into mappings for IPv4 and IPv6, because +// that's how the mapping is presented in 'inspect'). HostPort and HostPortEnd in +// each returned portBinding are set to the selected and reserved port. func (n *bridgeNetwork) addPortMappings( epAddrV4, epAddrV6 *net.IPNet, cfg []types.PortBinding, @@ -134,15 +153,14 @@ func (n *bridgeNetwork) addPortMappings( continue } - // Allocate a host port, and reserve it by starting docker-proxy for each host - // address in toBind. + // Allocate and bind a host port. newB, err := bindHostPorts(toBind, proxyPath) if err != nil { return nil, err } bindings = append(bindings, newB...) - // Reset the collection of bindings now they're bound. + // Reset toBind now the ports are bound. toBind = toBind[:0] } @@ -168,6 +186,53 @@ func (n *bridgeNetwork) addPortMappings( } } + // Now the iptables rules are set up, it's safe to start the userland proxy. + // (If it was started before the iptables rules were created, it may have + // accepted a connection, then become unreachable due to NAT rules sending + // packets directly to the container.) + // If not starting the proxy, nothing will ever accept a connection on the + // socket. But, listen anyway so that the binding shows up in "netstat -at". + somaxconn := 0 + if proxyPath != "" { + somaxconn = -1 // silently capped to "/proc/sys/net/core/somaxconn" + } + for i := range bindings { + if bindings[i].boundSocket == nil { + continue + } + if bindings[i].Proto == types.TCP { + rc, err := bindings[i].boundSocket.SyscallConn() + if err != nil { + return nil, fmt.Errorf("raw conn not available on TCP socket: %w", err) + } + if errC := rc.Control(func(fd uintptr) { + err = syscall.Listen(int(fd), somaxconn) + }); errC != nil { + return nil, fmt.Errorf("failed to Control TCP socket: %w", err) + } + if err != nil { + return nil, fmt.Errorf("failed to listen on TCP socket: %w", err) + } + } + if proxyPath != "" { + var err error + bindings[i].stopProxy, err = startProxy( + bindings[i].childPortBinding(), proxyPath, bindings[i].boundSocket, + ) + if err != nil { + return nil, fmt.Errorf("failed to start userland proxy for port mapping %s: %w", + bindings[i].PortBinding, err) + } + if err := bindings[i].boundSocket.Close(); err != nil { + log.G(context.TODO()).WithFields(log.Fields{ + "error": err, + "mapping": bindings[i].PortBinding, + }).Warnf("failed to close proxy socket") + } + bindings[i].boundSocket = nil + } + } + return bindings, nil } @@ -366,7 +431,7 @@ func setChildHostIP(pdc portDriverClient, req portBindingReq) portBindingReq { return req } -// bindHostPorts allocates ports and starts docker-proxy for the given cfg. The +// bindHostPorts allocates and binds host ports for the given cfg. The // caller is responsible for ensuring that all entries in cfg map the same proto, // container port, and host port range (their host addresses must differ). func bindHostPorts(cfg []portBindingReq, proxyPath string) ([]portBinding, error) { @@ -401,17 +466,13 @@ func bindHostPorts(cfg []portBindingReq, proxyPath string) ([]portBinding, error return nil, err } -// Allow unit tests to supply a dummy StartProxy. -var startProxy = portmapper.StartProxy - // attemptBindHostPorts allocates host ports for each port mapping that requires -// one, and reserves those ports by starting docker-proxy. +// one, and reserves those ports by binding them. // // If the allocator doesn't have an available port in the required range, or the -// docker-proxy process doesn't start (perhaps because another process has -// already bound the port), all resources are released and an error is returned. -// When ports are successfully reserved, a portBinding is returned for each -// mapping. +// port can't be bound (perhaps because another process has already bound it), +// all resources are released and an error is returned. When ports are +// successfully reserved, a portBinding is returned for each mapping. // // If NAT is disabled for any of the bindings, no host port reservation is // needed. These bindings are included in results, as the container port itself @@ -428,7 +489,7 @@ func attemptBindHostPorts( addrs := make([]net.IP, 0, len(cfg)) for _, c := range cfg { if !c.disableNAT { - addrs = append(addrs, c.HostIP) + addrs = append(addrs, c.childHostIP) } } @@ -448,31 +509,177 @@ func attemptBindHostPorts( } res := make([]portBinding, 0, len(cfg)) - for _, c := range cfg { - pb := portBinding{PortBinding: c.GetCopy()} - if c.disableNAT { - pb.HostPort = 0 - } else { - pb.stopProxy, err = startProxy(c.Proto.String(), c.childHostIP, port, c.IP, int(c.Port), proxyPath) - if err != nil { - return nil, fmt.Errorf("failed to bind port %s:%d/%s: %w", c.HostIP, port, c.Proto, err) - } - defer func() { - if retErr != nil { - if err := pb.stopProxy(); err != nil { - log.G(context.TODO()).Warnf("Failed to stop userland proxy for port mapping %s: %s", pb, err) + defer func() { + if retErr != nil { + for _, pb := range res { + if pb.boundSocket != nil { + if err := pb.boundSocket.Close(); err != nil { + log.G(context.TODO()).Warnf("Failed to close port binding for %s: %s", pb, err) } } - }() - pb.HostPort = uint16(port) + // TODO(robmry) - this is only needed because the userland proxy may have + // been started for SCTP. If a bound socket is passed to the proxy after + // iptables rules have been configured (as it is for TCP/UDP), remove this. + if pb.stopProxy != nil { + if err := pb.stopProxy(); err != nil { + log.G(context.TODO()).Warnf("Failed to stop proxy for %s: %s", pb, err) + } + } + } + } + }() + + for _, c := range cfg { + var pb portBinding + if c.disableNAT { + pb = portBinding{PortBinding: c.GetCopy()} + pb.HostPort = 0 + pb.HostPortEnd = 0 + } else { + switch proto { + case "tcp": + pb, err = bindTCPOrUDP(c, port, syscall.SOCK_STREAM, syscall.IPPROTO_TCP) + case "udp": + pb, err = bindTCPOrUDP(c, port, syscall.SOCK_DGRAM, syscall.IPPROTO_UDP) + case "sctp": + if proxyPath == "" { + pb, err = bindSCTP(c, port) + } else { + // TODO(robmry) - it's not currently possible to pass a bound SCTP port + // to the userland proxy, because the proxy is not able to convert the + // file descriptor into an sctp.SCTPListener (fd is an unexported member + // of the struct, and ListenSCTP is the only constructor). + // So, it is possible for the proxy to start listening and accept + // connections before iptables rules are created that would bypass + // the proxy for external connections. + // Remove this and pb.stopProxy() from the cleanup function above if + // this is fixed. + pb, err = startSCTPProxy(c, port, proxyPath) + } + default: + return nil, fmt.Errorf("Unknown addr type: %s", proto) + } + if err != nil { + return nil, err + } } - pb.HostPortEnd = pb.HostPort - pb.childHostIP = c.childHostIP res = append(res, pb) } return res, nil } +func bindTCPOrUDP(cfg portBindingReq, port, typ, proto int) (_ portBinding, retErr error) { + pb := portBinding{PortBinding: cfg.PortBinding.GetCopy()} + pb.HostPort = uint16(port) + pb.HostPortEnd = pb.HostPort + pb.childHostIP = cfg.childHostIP + + var domain int + var sa syscall.Sockaddr + if hip := cfg.childHostIP.To4(); hip != nil { + domain = syscall.AF_INET + sa4 := syscall.SockaddrInet4{Port: port} + copy(sa4.Addr[:], hip) + sa = &sa4 + } else { + domain = syscall.AF_INET6 + sa6 := syscall.SockaddrInet6{Port: port} + copy(sa6.Addr[:], cfg.childHostIP) + sa = &sa6 + } + + sd, err := syscall.Socket(domain, typ|syscall.SOCK_CLOEXEC, proto) + if err != nil { + return portBinding{}, fmt.Errorf("failed to create socket for userland proxy for %s: %w", cfg, err) + } + defer func() { + if retErr != nil { + syscall.Close(sd) + } + }() + + if domain == syscall.AF_INET6 { + syscall.SetsockoptInt(sd, syscall.IPPROTO_IPV6, syscall.IPV6_V6ONLY, 1) + } + if err := syscall.Bind(sd, sa); err != nil { + if cfg.HostPort == cfg.HostPortEnd { + return portBinding{}, fmt.Errorf("failed to bind host port for %s: %w", cfg, err) + } + return portBinding{}, fmt.Errorf("failed to bind host port %d for %s: %w", port, cfg, err) + } + + pb.boundSocket = os.NewFile(uintptr(sd), "listener") + if pb.boundSocket == nil { + return portBinding{}, fmt.Errorf("failed to convert socket for userland proxy for %s", cfg) + } + return pb, nil +} + +// bindSCTP is based on sctp.ListenSCTP. The socket is created and bound, but +// does not start listening. +func bindSCTP(cfg portBindingReq, port int) (_ portBinding, retErr error) { + pb := portBinding{PortBinding: cfg.GetCopy()} + pb.HostPort = uint16(port) + pb.HostPortEnd = pb.HostPort + pb.childHostIP = cfg.childHostIP + + domain := syscall.AF_INET + if cfg.childHostIP.To4() == nil { + domain = syscall.AF_INET6 + } + + sd, err := syscall.Socket(domain, syscall.SOCK_STREAM|syscall.SOCK_CLOEXEC, syscall.IPPROTO_SCTP) + if err != nil { + return portBinding{}, fmt.Errorf("failed to create socket for userland proxy for %s: %w", cfg, err) + } + defer func() { + if retErr != nil { + syscall.Close(sd) + } + }() + + if domain == syscall.AF_INET6 { + syscall.SetsockoptInt(sd, syscall.IPPROTO_IPV6, syscall.IPV6_V6ONLY, 1) + } + + options := sctp.InitMsg{NumOstreams: sctp.SCTP_MAX_STREAM} + if _, _, errno := syscall.Syscall6(syscall.SYS_SETSOCKOPT, + uintptr(sd), + sctp.SOL_SCTP, + sctp.SCTP_INITMSG, + uintptr(unsafe.Pointer(&options)), + unsafe.Sizeof(options), + 0); errno != 0 { + return portBinding{}, errno + } + + if err := sctp.SCTPBind(sd, + &sctp.SCTPAddr{IPAddrs: []net.IPAddr{{IP: cfg.childHostIP}}, Port: int(cfg.HostPort)}, + sctp.SCTP_BINDX_ADD_ADDR); err != nil { + return portBinding{}, fmt.Errorf("failed to bind socket for userland proxy for %s: %w", cfg, err) + } + + pb.boundSocket = os.NewFile(uintptr(sd), "listener") + if pb.boundSocket == nil { + return portBinding{}, fmt.Errorf("failed to convert socket for userland proxy for %s", cfg) + } + return pb, nil +} + +func startSCTPProxy(cfg portBindingReq, port int, proxyPath string) (_ portBinding, retErr error) { + pb := portBinding{PortBinding: cfg.GetCopy()} + pb.HostPort = uint16(port) + pb.HostPortEnd = pb.HostPort + pb.childHostIP = cfg.childHostIP + + var err error + pb.stopProxy, err = startProxy(pb.childPortBinding(), proxyPath, nil) + if err != nil { + return portBinding{}, err + } + return pb, nil +} + // releasePorts attempts to release all port bindings, does not stop on failure func (n *bridgeNetwork) releasePorts(ep *bridgeEndpoint) error { n.Lock() @@ -486,14 +693,20 @@ func (n *bridgeNetwork) releasePorts(ep *bridgeEndpoint) error { func (n *bridgeNetwork) releasePortBindings(pbs []portBinding) error { var errs []error for _, pb := range pbs { - var errPD, errP error + var errS, errPD, errP error + if pb.boundSocket != nil { + errS = pb.boundSocket.Close() + if errS != nil { + errS = fmt.Errorf("failed to close socket for port mapping %s: %w", pb, errS) + } + } if pb.portDriverRemove != nil { errPD = pb.portDriverRemove() } if pb.stopProxy != nil { errP = pb.stopProxy() if errP != nil { - errP = fmt.Errorf("failed to stop docker-proxy for port mapping %s: %w", pb, errP) + errP = fmt.Errorf("failed to stop userland proxy for port mapping %s: %w", pb, errP) } } errN := n.setPerPortIptables(pb, false) @@ -501,9 +714,9 @@ func (n *bridgeNetwork) releasePortBindings(pbs []portBinding) error { errN = fmt.Errorf("failed to remove iptables rules for port mapping %s: %w", pb, errN) } if pb.HostPort > 0 { - portallocator.Get().ReleasePort(pb.HostIP, pb.Proto.String(), int(pb.HostPort)) + portallocator.Get().ReleasePort(pb.childHostIP, pb.Proto.String(), int(pb.HostPort)) } - errs = append(errs, errPD, errP, errN) + errs = append(errs, errS, errPD, errP, errN) } return errors.Join(errs...) } @@ -545,8 +758,8 @@ func setPerPortNAT(b portBinding, ipv iptables.IPVersion, proxyPath string, brid // want "0.0.0.0/0". "0/0" is correctly interpreted as "any // value" by both iptables and ip6tables. hostIP := "0/0" - if !b.HostIP.IsUnspecified() { - hostIP = b.HostIP.String() + if !b.childHostIP.IsUnspecified() { + hostIP = b.childHostIP.String() } args := []string{ "-p", b.Proto.String(), diff --git a/libnetwork/drivers/bridge/port_mapping_linux_test.go b/libnetwork/drivers/bridge/port_mapping_linux_test.go index 23972ebd61..684bece84b 100644 --- a/libnetwork/drivers/bridge/port_mapping_linux_test.go +++ b/libnetwork/drivers/bridge/port_mapping_linux_test.go @@ -6,8 +6,10 @@ import ( "fmt" "net" "net/netip" + "os" "strconv" "strings" + "syscall" "testing" "github.com/docker/docker/internal/testutils/netnsutils" @@ -16,6 +18,7 @@ import ( "github.com/docker/docker/libnetwork/ns" "github.com/docker/docker/libnetwork/portallocator" "github.com/docker/docker/libnetwork/types" + "github.com/vishvananda/netlink" "gotest.tools/v3/assert" is "gotest.tools/v3/assert/cmp" ) @@ -423,6 +426,7 @@ func TestAddPortMappings(t *testing.T) { proxyPath string busyPortIPv4 int rootless bool + hostAddrs []string expErr string expPBs []types.PortBinding @@ -441,6 +445,7 @@ func TestAddPortMappings(t *testing.T) { {Proto: types.TCP, Port: 22}, {Proto: types.TCP, Port: 80}, }, + proxyPath: "/dummy/path/to/proxy", expPBs: []types.PortBinding{ {Proto: types.TCP, IP: ctrIP4.IP, Port: 22, HostIP: net.IPv4zero, HostPort: firstEphemPort}, {Proto: types.TCP, IP: ctrIP6.IP, Port: 22, HostIP: net.IPv6zero, HostPort: firstEphemPort}, @@ -449,22 +454,24 @@ func TestAddPortMappings(t *testing.T) { }, }, { - name: "specific host port", - epAddrV4: ctrIP4, - epAddrV6: ctrIP6, - cfg: []types.PortBinding{{Proto: types.TCP, Port: 80, HostPort: 8080}}, + name: "specific host port", + epAddrV4: ctrIP4, + epAddrV6: ctrIP6, + cfg: []types.PortBinding{{Proto: types.TCP, Port: 80, HostPort: 8080}}, + proxyPath: "/dummy/path/to/proxy", expPBs: []types.PortBinding{ {Proto: types.TCP, IP: ctrIP4.IP, Port: 80, HostIP: net.IPv4zero, HostPort: 8080, HostPortEnd: 8080}, {Proto: types.TCP, IP: ctrIP6.IP, Port: 80, HostIP: net.IPv6zero, HostPort: 8080, HostPortEnd: 8080}, }, }, { - name: "nat explicitly enabled", - epAddrV4: ctrIP4, - epAddrV6: ctrIP6, - cfg: []types.PortBinding{{Proto: types.TCP, Port: 80, HostPort: 8080}}, - gwMode4: gwModeNAT, - gwMode6: gwModeNAT, + name: "nat explicitly enabled", + epAddrV4: ctrIP4, + epAddrV6: ctrIP6, + cfg: []types.PortBinding{{Proto: types.TCP, Port: 80, HostPort: 8080}}, + gwMode4: gwModeNAT, + gwMode6: gwModeNAT, + proxyPath: "/dummy/path/to/proxy", expPBs: []types.PortBinding{ {Proto: types.TCP, IP: ctrIP4.IP, Port: 80, HostIP: net.IPv4zero, HostPort: 8080, HostPortEnd: 8080}, {Proto: types.TCP, IP: ctrIP6.IP, Port: 80, HostIP: net.IPv6zero, HostPort: 8080, HostPortEnd: 8080}, @@ -475,24 +482,27 @@ func TestAddPortMappings(t *testing.T) { epAddrV4: ctrIP4, epAddrV6: ctrIP6, cfg: []types.PortBinding{{Proto: types.TCP, Port: 80, HostPort: 8080}}, + proxyPath: "/dummy/path/to/proxy", busyPortIPv4: 8080, - expErr: "failed to bind port 0.0.0.0:8080/tcp: busy port", + expErr: "failed to bind host port for 0.0.0.0:8080:172.19.0.2:80/tcp: address already in use", }, { - name: "ipv4 mapped container address with specific host port", - epAddrV4: ctrIP4Mapped, - epAddrV6: ctrIP6, - cfg: []types.PortBinding{{Proto: types.TCP, Port: 80, HostPort: 8080}}, + name: "ipv4 mapped container address with specific host port", + epAddrV4: ctrIP4Mapped, + epAddrV6: ctrIP6, + cfg: []types.PortBinding{{Proto: types.TCP, Port: 80, HostPort: 8080}}, + proxyPath: "/dummy/path/to/proxy", expPBs: []types.PortBinding{ {Proto: types.TCP, IP: ctrIP4.IP, Port: 80, HostIP: net.IPv4zero, HostPort: 8080, HostPortEnd: 8080}, {Proto: types.TCP, IP: ctrIP6.IP, Port: 80, HostIP: net.IPv6zero, HostPort: 8080, HostPortEnd: 8080}, }, }, { - name: "ipv4 mapped host address with specific host port", - epAddrV4: ctrIP4, - epAddrV6: ctrIP6, - cfg: []types.PortBinding{{Proto: types.TCP, Port: 80, HostIP: newIPNet(t, "::ffff:127.0.0.1/128").IP, HostPort: 8080}}, + name: "ipv4 mapped host address with specific host port", + epAddrV4: ctrIP4, + epAddrV6: ctrIP6, + cfg: []types.PortBinding{{Proto: types.TCP, Port: 80, HostIP: newIPNet(t, "::ffff:127.0.0.1/128").IP, HostPort: 8080}}, + proxyPath: "/dummy/path/to/proxy", expPBs: []types.PortBinding{ {Proto: types.TCP, IP: ctrIP4.IP, Port: 80, HostIP: newIPNet(t, "127.0.0.1/32").IP, HostPort: 8080, HostPortEnd: 8080}, }, @@ -502,6 +512,7 @@ func TestAddPortMappings(t *testing.T) { epAddrV4: ctrIP4, epAddrV6: ctrIP6, cfg: []types.PortBinding{{Proto: types.TCP, Port: 80, HostPort: 8080, HostPortEnd: 8081}}, + proxyPath: "/dummy/path/to/proxy", busyPortIPv4: 8080, expPBs: []types.PortBinding{ {Proto: types.TCP, IP: ctrIP4.IP, Port: 80, HostIP: net.IPv4zero, HostPort: 8081, HostPortEnd: 8081}, @@ -516,6 +527,7 @@ func TestAddPortMappings(t *testing.T) { {Proto: types.TCP, Port: 80, HostIP: net.IPv4zero, HostPort: 8080, HostPortEnd: 8081}, {Proto: types.TCP, Port: 80, HostIP: net.IPv6zero, HostPort: 8080, HostPortEnd: 8081}, }, + proxyPath: "/dummy/path/to/proxy", busyPortIPv4: 8080, expPBs: []types.PortBinding{ {Proto: types.TCP, IP: ctrIP4.IP, Port: 80, HostIP: net.IPv4zero, HostPort: 8081}, @@ -534,6 +546,7 @@ func TestAddPortMappings(t *testing.T) { {Proto: types.UDP, Port: 81, HostPort: 8080, HostPortEnd: 8083}, {Proto: types.UDP, Port: 82, HostPort: 8080, HostPortEnd: 8083}, }, + proxyPath: "/dummy/path/to/proxy", busyPortIPv4: 8082, expPBs: []types.PortBinding{ {Proto: types.TCP, IP: ctrIP4.IP, Port: 80, HostIP: net.IPv4zero, HostPort: 8080, HostPortEnd: 8080}, @@ -559,8 +572,9 @@ func TestAddPortMappings(t *testing.T) { {Proto: types.TCP, Port: 81, HostPort: 8080, HostPortEnd: 8082}, {Proto: types.TCP, Port: 82, HostPort: 8080, HostPortEnd: 8082}, }, + proxyPath: "/dummy/path/to/proxy", busyPortIPv4: 8081, - expErr: "failed to bind port 0.0.0.0:8081/tcp: busy port", + expErr: "failed to bind host port 8081 for 0.0.0.0:8080-8082:172.19.0.2:82/tcp", }, { name: "map host ipv6 to ipv4 container with proxy", @@ -591,9 +605,10 @@ func TestAddPortMappings(t *testing.T) { epAddrV4: ctrIP4, epAddrV6: ctrIP6, cfg: []types.PortBinding{{Proto: types.TCP, Port: 80}}, - defHostIP: newIPNet(t, "10.11.12.13/24").IP, + proxyPath: "/dummy/path/to/proxy", + defHostIP: newIPNet(t, "127.0.0.1/8").IP, expPBs: []types.PortBinding{ - {Proto: types.TCP, IP: ctrIP4.IP, Port: 80, HostIP: newIPNet(t, "10.11.12.13/24").IP, HostPort: firstEphemPort}, + {Proto: types.TCP, IP: ctrIP4.IP, Port: 80, HostIP: newIPNet(t, "127.0.0.1/8").IP, HostPort: firstEphemPort}, }, }, { @@ -601,9 +616,10 @@ func TestAddPortMappings(t *testing.T) { epAddrV4: ctrIP4, epAddrV6: ctrIP6, cfg: []types.PortBinding{{Proto: types.TCP, Port: 80}}, - defHostIP: newIPNet(t, "::ffff:10.11.12.13/120").IP, + proxyPath: "/dummy/path/to/proxy", + defHostIP: newIPNet(t, "::ffff:127.0.0.1/72").IP, expPBs: []types.PortBinding{ - {Proto: types.TCP, IP: ctrIP4.IP, Port: 80, HostIP: newIPNet(t, "10.11.12.13/24").IP, HostPort: firstEphemPort}, + {Proto: types.TCP, IP: ctrIP4.IP, Port: 80, HostIP: newIPNet(t, "127.0.0.1/8").IP, HostPort: firstEphemPort}, }, }, { @@ -611,6 +627,7 @@ func TestAddPortMappings(t *testing.T) { epAddrV4: ctrIP4, epAddrV6: ctrIP6, cfg: []types.PortBinding{{Proto: types.TCP, Port: 80}}, + proxyPath: "/dummy/path/to/proxy", defHostIP: net.IPv6zero, expPBs: []types.PortBinding{ {Proto: types.TCP, IP: ctrIP6.IP, Port: 80, HostIP: net.IPv6zero, HostPort: firstEphemPort}, @@ -621,6 +638,7 @@ func TestAddPortMappings(t *testing.T) { epAddrV4: ctrIP4, epAddrV6: ctrIP6, cfg: []types.PortBinding{{Proto: types.TCP, Port: 80}}, + proxyPath: "/dummy/path/to/proxy", defHostIP: newIPNet(t, "::1/128").IP, expPBs: []types.PortBinding{ {Proto: types.TCP, IP: ctrIP6.IP, Port: 80, HostIP: newIPNet(t, "::1/128").IP, HostPort: firstEphemPort}, @@ -634,16 +652,17 @@ func TestAddPortMappings(t *testing.T) { {Proto: types.TCP, Port: 80, HostPort: 8080}, {Proto: types.TCP, Port: 22, HostPort: 2222}, }, + proxyPath: "/dummy/path/to/proxy", expPBs: []types.PortBinding{ {Proto: types.TCP, IP: ctrIP4.IP, Port: 22, HostIP: net.IPv4zero, HostPort: 2222}, {Proto: types.TCP, IP: ctrIP6.IP, Port: 22, HostIP: net.IPv6zero, HostPort: 2222}, {Proto: types.TCP, IP: ctrIP4.IP, Port: 80, HostIP: net.IPv4zero, HostPort: 8080}, {Proto: types.TCP, IP: ctrIP6.IP, Port: 80, HostIP: net.IPv6zero, HostPort: 8080}, }, - expReleaseErr: "failed to stop docker-proxy for port mapping 0.0.0.0:2222:172.19.0.2:22/tcp: can't stop now\n" + - "failed to stop docker-proxy for port mapping [::]:2222:[fdf8:b88e:bb5c:3483::2]:22/tcp: can't stop now\n" + - "failed to stop docker-proxy for port mapping 0.0.0.0:8080:172.19.0.2:80/tcp: can't stop now\n" + - "failed to stop docker-proxy for port mapping [::]:8080:[fdf8:b88e:bb5c:3483::2]:80/tcp: can't stop now", + expReleaseErr: "failed to stop userland proxy for port mapping 0.0.0.0:2222:172.19.0.2:22/tcp: can't stop now\n" + + "failed to stop userland proxy for port mapping [::]:2222:[fdf8:b88e:bb5c:3483::2]:22/tcp: can't stop now\n" + + "failed to stop userland proxy for port mapping 0.0.0.0:8080:172.19.0.2:80/tcp: can't stop now\n" + + "failed to stop userland proxy for port mapping [::]:8080:[fdf8:b88e:bb5c:3483::2]:80/tcp: can't stop now", }, { name: "disable nat6", @@ -653,7 +672,8 @@ func TestAddPortMappings(t *testing.T) { {Proto: types.TCP, Port: 22}, {Proto: types.TCP, Port: 80}, }, - gwMode6: gwModeRouted, + proxyPath: "/dummy/path/to/proxy", + gwMode6: gwModeRouted, expPBs: []types.PortBinding{ {Proto: types.TCP, IP: ctrIP4.IP, Port: 22, HostIP: net.IPv4zero, HostPort: firstEphemPort}, {Proto: types.TCP, IP: ctrIP6.IP, Port: 22, HostIP: net.IPv6zero}, @@ -669,7 +689,8 @@ func TestAddPortMappings(t *testing.T) { {Proto: types.TCP, Port: 22}, {Proto: types.TCP, Port: 80}, }, - gwMode4: gwModeRouted, + proxyPath: "/dummy/path/to/proxy", + gwMode4: gwModeRouted, expPBs: []types.PortBinding{ {Proto: types.TCP, IP: ctrIP4.IP, Port: 22, HostIP: net.IPv4zero}, {Proto: types.TCP, IP: ctrIP6.IP, Port: 22, HostIP: net.IPv6zero, HostPort: firstEphemPort}, @@ -685,8 +706,9 @@ func TestAddPortMappings(t *testing.T) { {Proto: types.TCP, Port: 22}, {Proto: types.TCP, Port: 80}, }, - gwMode4: gwModeRouted, - gwMode6: gwModeRouted, + proxyPath: "/dummy/path/to/proxy", + gwMode4: gwModeRouted, + gwMode6: gwModeRouted, expPBs: []types.PortBinding{ {Proto: types.TCP, IP: ctrIP4.IP, Port: 22, HostIP: net.IPv4zero}, {Proto: types.TCP, IP: ctrIP6.IP, Port: 22, HostIP: net.IPv6zero}, @@ -695,9 +717,10 @@ func TestAddPortMappings(t *testing.T) { }, }, { - name: "same ports for matching mappings with different host addresses", - epAddrV4: ctrIP4, - epAddrV6: ctrIP6, + name: "same ports for matching mappings with different host addresses", + epAddrV4: ctrIP4, + epAddrV6: ctrIP6, + hostAddrs: []string{"192.168.1.2/24", "fd0c:9167:5b11::2/64", "fd0c:9167:5b11::3/64"}, cfg: []types.PortBinding{ // These two should both get the same host port. {Proto: types.TCP, Port: 80, HostIP: newIPNet(t, "fd0c:9167:5b11::2/64").IP}, @@ -711,6 +734,7 @@ func TestAddPortMappings(t *testing.T) { {Proto: types.TCP, Port: 12345, HostPort: 12345, HostPortEnd: 12346}, {Proto: types.TCP, Port: 12345, HostPort: 12345}, }, + proxyPath: "/dummy/path/to/proxy", expPBs: []types.PortBinding{ {Proto: types.TCP, IP: ctrIP4.IP, Port: 12345, HostIP: net.IPv4zero, HostPort: 12345}, {Proto: types.TCP, IP: ctrIP6.IP, Port: 12345, HostIP: net.IPv6zero, HostPort: 12345}, @@ -751,15 +775,14 @@ func TestAddPortMappings(t *testing.T) { origStartProxy := startProxy defer func() { startProxy = origStartProxy }() proxies := map[proxyCall]bool{} // proxy -> is not stopped - startProxy = func(proto string, - hostIP net.IP, hostPort int, - containerIP net.IP, containerPort int, + startProxy = func(pb types.PortBinding, proxyPath string, + listenSock *os.File, ) (stop func() error, retErr error) { - if tc.busyPortIPv4 > 0 && tc.busyPortIPv4 == hostPort && hostIP.To4() != nil { + if tc.busyPortIPv4 > 0 && tc.busyPortIPv4 == int(pb.HostPort) && pb.HostIP.To4() != nil { return nil, errors.New("busy port") } - c := newProxyCall(proto, hostIP, hostPort, containerIP, containerPort, proxyPath) + c := newProxyCall(pb.Proto.String(), pb.HostIP, int(pb.HostPort), pb.IP, int(pb.Port), proxyPath) if _, ok := proxies[c]; ok { return nil, fmt.Errorf("duplicate proxy: %#v", c) } @@ -781,6 +804,28 @@ func TestAddPortMappings(t *testing.T) { defer func() { newPortDriverClient = origNewPortDriverClient }() newPortDriverClient = func() (portDriverClient, error) { return newMockPortDriverClient() } + if len(tc.hostAddrs) > 0 { + dummyLink := &netlink.Bridge{LinkAttrs: netlink.LinkAttrs{Name: "br-dummy"}} + err := netlink.LinkAdd(dummyLink) + assert.NilError(t, err) + for _, addr := range tc.hostAddrs { + // Add with NODAD so that the address is available immediately. + err := netlink.AddrAdd(dummyLink, + &netlink.Addr{IPNet: newIPNet(t, addr), Flags: syscall.IFA_F_NODAD}) + assert.NilError(t, err) + } + err = netlink.LinkSetUp(dummyLink) + assert.NilError(t, err) + } + if tc.busyPortIPv4 != 0 { + tl, err := net.ListenTCP("tcp4", &net.TCPAddr{IP: net.IPv4zero, Port: tc.busyPortIPv4}) + assert.NilError(t, err) + defer tl.Close() + ul, err := net.ListenUDP("udp4", &net.UDPAddr{IP: net.IPv4zero, Port: tc.busyPortIPv4}) + assert.NilError(t, err) + defer ul.Close() + } + n := &bridgeNetwork{ config: &networkConfiguration{ BridgeName: "dummybridge", @@ -829,20 +874,21 @@ func TestAddPortMappings(t *testing.T) { var disableNAT bool var addrM, addrD, addrH string var ipv iptables.IPVersion + hip := expChildIP(expPB.HostIP) if expPB.IP.To4() == nil { disableNAT = tc.gwMode6.natDisabled() ipv = iptables.IPv6 addrM = ctrIP6.IP.String() + "/128" addrD = "[" + ctrIP6.IP.String() + "]" - addrH = expPB.HostIP.String() + "/128" + addrH = hip.String() + "/128" } else { disableNAT = tc.gwMode4.natDisabled() ipv = iptables.IPv4 addrM = ctrIP4.IP.String() + "/32" addrD = ctrIP4.IP.String() - addrH = expPB.HostIP.String() + "/32" + addrH = hip.String() + "/32" } - if expPB.HostIP.IsUnspecified() { + if hip.IsUnspecified() { addrH = "0/0" } @@ -887,19 +933,21 @@ func TestAddPortMappings(t *testing.T) { } // Check a docker-proxy was started and stopped for each expected port binding. - expProxies := map[proxyCall]bool{} - for _, expPB := range tc.expPBs { - hip := expChildIP(expPB.HostIP) - is4 := hip.To4() != nil - if (is4 && tc.gwMode4.natDisabled()) || (!is4 && tc.gwMode6.natDisabled()) { - continue + if tc.proxyPath != "" { + expProxies := map[proxyCall]bool{} + for _, expPB := range tc.expPBs { + hip := expChildIP(expPB.HostIP) + is4 := hip.To4() != nil + if (is4 && tc.gwMode4.natDisabled()) || (!is4 && tc.gwMode6.natDisabled()) { + continue + } + p := newProxyCall(expPB.Proto.String(), + hip, int(expPB.HostPort), + expPB.IP, int(expPB.Port), tc.proxyPath) + expProxies[p] = tc.expReleaseErr != "" } - p := newProxyCall(expPB.Proto.String(), - hip, int(expPB.HostPort), - expPB.IP, int(expPB.Port), tc.proxyPath) - expProxies[p] = tc.expReleaseErr != "" + assert.Check(t, is.DeepEqual(expProxies, proxies)) } - assert.Check(t, is.DeepEqual(expProxies, proxies)) // Check the port driver has seen the expected port mappings and no others, // and that they have all been closed. diff --git a/libnetwork/drivers/bridge/rlkclient/rootlesskit_client_linux.go b/libnetwork/drivers/bridge/rlkclient/rootlesskit_client_linux.go index 4b92d17345..f81380bf0c 100644 --- a/libnetwork/drivers/bridge/rlkclient/rootlesskit_client_linux.go +++ b/libnetwork/drivers/bridge/rlkclient/rootlesskit_client_linux.go @@ -99,7 +99,8 @@ func (c *PortDriverClient) AddPort( hostIP netip.Addr, childIP netip.Addr, hostPort int, -) (func() error, error) { // proto is like "tcp", but we need to convert it to "tcp4" or "tcp6" explicitly +) (func() error, error) { + // proto is like "tcp", but we need to convert it to "tcp4" or "tcp6" explicitly // for libnetwork >= 20201216 // // See https://github.com/moby/libnetwork/pull/2604/files#diff-8fa48beed55dd033bf8e4f8c40b31cf69d0b2cc5d4bb53cde8594670ea6c938aR20 diff --git a/libnetwork/portmapper/proxy_linux.go b/libnetwork/portmapper/proxy_linux.go index 9bdb0f5567..d7256f76f0 100644 --- a/libnetwork/portmapper/proxy_linux.go +++ b/libnetwork/portmapper/proxy_linux.go @@ -1,78 +1,61 @@ package portmapper import ( + "errors" "fmt" "io" - "net" "os" "os/exec" "runtime" "strconv" "syscall" "time" + + "github.com/docker/docker/libnetwork/types" ) -// StartProxy starts the proxy process at proxyPath, or instantiates a dummy proxy -// to bind the host port if proxyPath is the empty string. -func StartProxy( - proto string, - hostIP net.IP, hostPort int, - containerIP net.IP, containerPort int, - proxyPath string, -) (stop func() error, retErr error) { - if proxyPath == "" { - return newDummyProxy(proto, hostIP, hostPort) - } - return newProxyCommand(proto, hostIP, hostPort, containerIP, containerPort, proxyPath) -} - -func newProxyCommand( - proto string, - hostIP net.IP, hostPort int, - containerIP net.IP, containerPort int, +// StartProxy starts the proxy process at proxyPath. +// If listenSock is not nil, it must be a bound socket that can be passed to +// the proxy process for it to listen on. +func StartProxy(pb types.PortBinding, proxyPath string, + listenSock *os.File, ) (stop func() error, retErr error) { if proxyPath == "" { return nil, fmt.Errorf("no path provided for userland-proxy binary") } - - p := &proxyCommand{ - cmd: &exec.Cmd{ - Path: proxyPath, - Args: []string{ - proxyPath, - "-proto", proto, - "-host-ip", hostIP.String(), - "-host-port", strconv.Itoa(hostPort), - "-container-ip", containerIP.String(), - "-container-port", strconv.Itoa(containerPort), - }, - SysProcAttr: &syscall.SysProcAttr{ - Pdeathsig: syscall.SIGTERM, // send a sigterm to the proxy if the creating thread in the daemon process dies (https://go.dev/issue/27505) - }, - }, - wait: make(chan error, 1), - } - if err := p.start(); err != nil { - return nil, err - } - return p.stop, nil -} - -// proxyCommand wraps an exec.Cmd to run the userland TCP and UDP -// proxies as separate processes. -type proxyCommand struct { - cmd *exec.Cmd - wait chan error -} - -func (p *proxyCommand) start() error { r, w, err := os.Pipe() if err != nil { - return fmt.Errorf("proxy unable to open os.Pipe %s", err) + return nil, fmt.Errorf("proxy unable to open os.Pipe %s", err) } - defer r.Close() - p.cmd.ExtraFiles = []*os.File{w} + defer func() { + if w != nil { + w.Close() + } + r.Close() + }() + + cmd := &exec.Cmd{ + Path: proxyPath, + Args: []string{ + proxyPath, + "-proto", pb.Proto.String(), + "-host-ip", pb.HostIP.String(), + "-host-port", strconv.FormatUint(uint64(pb.HostPort), 10), + "-container-ip", pb.IP.String(), + "-container-port", strconv.FormatUint(uint64(pb.Port), 10), + }, + ExtraFiles: []*os.File{w}, + SysProcAttr: &syscall.SysProcAttr{ + Pdeathsig: syscall.SIGTERM, // send a sigterm to the proxy if the creating thread in the daemon process dies (https://go.dev/issue/27505) + }, + } + if listenSock != nil { + cmd.Args = append(cmd.Args, "-use-listen-fd") + cmd.ExtraFiles = append(cmd.ExtraFiles, listenSock) + } + + wait := make(chan error, 1) // As p.cmd.SysProcAttr.Pdeathsig is set, the signal will be sent to the // process when the OS thread on which p.cmd.Start() was executed dies. @@ -88,17 +71,18 @@ func (p *proxyCommand) start() error { go func() { runtime.LockOSThread() defer runtime.UnlockOSThread() - err := p.cmd.Start() + err := cmd.Start() started <- err if err != nil { return } - p.wait <- p.cmd.Wait() + wait <- cmd.Wait() }() if err := <-started; err != nil { - return err + return nil, err } w.Close() + w = nil errchan := make(chan error, 1) go func() { @@ -108,11 +92,16 @@ func (p *proxyCommand) start() error { if string(buf) != "0\n" { errStr, err := io.ReadAll(r) if err != nil { - errchan <- fmt.Errorf("Error reading exit status from userland proxy: %v", err) + errchan <- fmt.Errorf("error reading exit status from userland proxy: %v", err) return } - - errchan <- fmt.Errorf("Error starting userland proxy: %s", errStr) + // If the user has an old docker-proxy in their PATH, and we passed "-use-listen-fd" + // on the command line, it exits with no response on the pipe. + if listenSock != nil && buf[0] == 0 && len(errStr) == 0 { + errchan <- errors.New("failed to start docker-proxy, check that the current version is in your $PATH") + return + } + errchan <- fmt.Errorf("error starting userland proxy: %s", errStr) return } errchan <- nil @@ -120,18 +109,21 @@ func (p *proxyCommand) start() error { select { case err := <-errchan: - return err + if err != nil { + return nil, err + } case <-time.After(16 * time.Second): - return fmt.Errorf("Timed out proxy starting the userland proxy") + return nil, fmt.Errorf("timed out starting the userland proxy") } -} -func (p *proxyCommand) stop() error { - if p.cmd.Process != nil { - if err := p.cmd.Process.Signal(os.Interrupt); err != nil { + stopFn := func() error { + if cmd.Process == nil { + return nil + } + if err := cmd.Process.Signal(os.Interrupt); err != nil { return err } - return <-p.wait + return <-wait } - return nil + return stopFn, nil } diff --git a/libnetwork/portmapper/proxy.go b/libnetwork/portmapper/proxy_windows.go similarity index 100% rename from libnetwork/portmapper/proxy.go rename to libnetwork/portmapper/proxy_windows.go