Create docker-proxy TCP/UDP listener sockets in the daemon

Before commit 4f09af6, when allocating host ports for a new
port mapping, iptables rules were set up then docker-proxy was
started. If the host port was already in-use, docker-proxy exited
with an error, and the iptables rules were removed. That could
potentially interfere with a non-docker service that was already
using the host port for something unrelated.

Commit 4f09af6 swapped that problem for a different one... in
order to check that a port was available before creating iptables
rules, it attempted to start docker-proxy first. If it failed, it
could then try a different host port, without interfering with
any other service. The problem with that is docker-proxy would
start listening before the iptables rules were in place, so it
could accept connections then become unusable because new NAT
rules diverted packets directly to the container. This would leave
the client with a broken connection, causing at-least a delay
while it figured that out and reconnected.

This change creates and binds the socket in the daemon, before
creating iptables rules. If the bind fails, it may try a different
port. When or if the bind succeeds, iptables rules are created,
then the daemon calls listen on the socket. If docker-proxy is
needed, the socket is handed over to it at that point.

In rootless mode, the ports have to be bound to an address in the
rootless network namespace (where dockerd is running). DNAT rules
now use the same address.

If docker-proxy is not needed ("--userland-proxy=false"), the daemon
still listens on TCP sockets as the old dummyProxy would have done.
This makes the socket show up in "netstat" output.

The dummyProxy is no longer needed on Linux. Its job was to bind the
host ports if docker-proxy was disabled, but that's now already
handled by binding the sockets early.

This change doesn't affect SCTP, because it's not currently possible
for docker-proxy to convert the file descriptor into an SCTPListener.
So, docker-proxy is still started early, and the window for lost
connections remains.

If the user has an old docker-proxy in their path and it's given a
listener docker with '-use-listen-fd', it'll fail because of the
unknown option. In this case, the daemon's error message suggests
checking $PATH.

Signed-off-by: Rob Murray <rob.murray@docker.com>
This commit is contained in:
Rob Murray
2024-07-08 13:50:40 +01:00
parent dac7ffa340
commit b3fabedecc
6 changed files with 422 additions and 168 deletions

View File

@@ -704,7 +704,7 @@ func (s *DockerDaemonSuite) TestDaemonIP(c *testing.T) {
out, err := d.Cmd("run", "-d", "-p", "8000:8000", "busybox", "top")
assert.Assert(c, err != nil, "Running a container must fail with an invalid --ip option")
assert.Equal(c, strings.Contains(out, "Error starting userland proxy"), true)
assert.Check(c, is.Contains(out, "failed to bind host port for 192.170.1.1"))
ifName := "dummy"
createInterface(c, "dummy", ifName, ipStr)

View File

@@ -12,6 +12,8 @@ import (
"os"
"slices"
"strconv"
"syscall"
"unsafe"
"github.com/containerd/log"
"github.com/docker/docker/libnetwork/iptables"
@@ -19,10 +21,15 @@ import (
"github.com/docker/docker/libnetwork/portallocator"
"github.com/docker/docker/libnetwork/portmapper"
"github.com/docker/docker/libnetwork/types"
"github.com/ishidawataru/sctp"
)
type portBinding struct {
types.PortBinding
// boundSocket is used to reserve a host port for the binding. If the
// userland proxy is in-use, it's passed to the proxy when the proxy is
// started, then it's closed and set to nil here.
boundSocket *os.File
// childHostIP is the host IP address, as seen from the daemon. This
// is normally the same as PortBinding.HostIP but, in rootless mode, it
// will be an address in the rootless network namespace. RootlessKit
@@ -41,21 +48,33 @@ type portBinding struct {
stopProxy func() error
}
// childPortBinding is pb.PortBinding, with the host address the daemon
// will see - which, in rootless mode, will be an address in the RootlessKit's
// child namespace (see portBinding.childHostIP).
func (pb portBinding) childPortBinding() types.PortBinding {
res := pb.PortBinding
res.HostIP = pb.childHostIP
return res
}
type portBindingReq struct {
types.PortBinding
childHostIP net.IP
disableNAT bool
}
// Allow unit tests to supply a dummy StartProxy.
var startProxy = portmapper.StartProxy
// addPortMappings takes cfg, the configuration for port mappings, selects host
// ports when ranges are given, starts docker-proxy or its dummy to reserve
// host ports, and sets up iptables NAT/forwarding rules as necessary. If
// anything goes wrong, it will undo any work it's done and return an error.
// Otherwise, the returned slice of portBinding has an entry per address
// family (if cfg describes a mapping for 'any' host address, it's expanded
// into mappings for IPv4 and IPv6, because that's how the mapping is presented
// in 'inspect'). HostPort and HostPortEnd in each returned portBinding are set
// to the selected and reserved port.
// ports when ranges are given, binds host ports to check they're available and
// reserve them, starts docker-proxy if required, and sets up iptables
// NAT/forwarding rules as necessary. If anything goes wrong, it will undo any
// work it's done and return an error. Otherwise, the returned slice of
// portBinding has an entry per address family (if cfg describes a mapping for
// 'any' host address, it's expanded into mappings for IPv4 and IPv6, because
// that's how the mapping is presented in 'inspect'). HostPort and HostPortEnd in
// each returned portBinding are set to the selected and reserved port.
func (n *bridgeNetwork) addPortMappings(
epAddrV4, epAddrV6 *net.IPNet,
cfg []types.PortBinding,
@@ -134,15 +153,14 @@ func (n *bridgeNetwork) addPortMappings(
continue
}
// Allocate a host port, and reserve it by starting docker-proxy for each host
// address in toBind.
// Allocate and bind a host port.
newB, err := bindHostPorts(toBind, proxyPath)
if err != nil {
return nil, err
}
bindings = append(bindings, newB...)
// Reset the collection of bindings now they're bound.
// Reset toBind now the ports are bound.
toBind = toBind[:0]
}
@@ -168,6 +186,53 @@ func (n *bridgeNetwork) addPortMappings(
}
}
// Now the iptables rules are set up, it's safe to start the userland proxy.
// (If it was started before the iptables rules were created, it may have
// accepted a connection, then become unreachable due to NAT rules sending
// packets directly to the container.)
// If not starting the proxy, nothing will ever accept a connection on the
// socket. But, listen anyway so that the binding shows up in "netstat -at".
somaxconn := 0
if proxyPath != "" {
somaxconn = -1 // silently capped to "/proc/sys/net/core/somaxconn"
}
for i := range bindings {
if bindings[i].boundSocket == nil {
continue
}
if bindings[i].Proto == types.TCP {
rc, err := bindings[i].boundSocket.SyscallConn()
if err != nil {
return nil, fmt.Errorf("raw conn not available on TCP socket: %w", err)
}
if errC := rc.Control(func(fd uintptr) {
err = syscall.Listen(int(fd), somaxconn)
}); errC != nil {
return nil, fmt.Errorf("failed to Control TCP socket: %w", err)
}
if err != nil {
return nil, fmt.Errorf("failed to listen on TCP socket: %w", err)
}
}
if proxyPath != "" {
var err error
bindings[i].stopProxy, err = startProxy(
bindings[i].childPortBinding(), proxyPath, bindings[i].boundSocket,
)
if err != nil {
return nil, fmt.Errorf("failed to start userland proxy for port mapping %s: %w",
bindings[i].PortBinding, err)
}
if err := bindings[i].boundSocket.Close(); err != nil {
log.G(context.TODO()).WithFields(log.Fields{
"error": err,
"mapping": bindings[i].PortBinding,
}).Warnf("failed to close proxy socket")
}
bindings[i].boundSocket = nil
}
}
return bindings, nil
}
@@ -366,7 +431,7 @@ func setChildHostIP(pdc portDriverClient, req portBindingReq) portBindingReq {
return req
}
// bindHostPorts allocates ports and starts docker-proxy for the given cfg. The
// bindHostPorts allocates and binds host ports for the given cfg. The
// caller is responsible for ensuring that all entries in cfg map the same proto,
// container port, and host port range (their host addresses must differ).
func bindHostPorts(cfg []portBindingReq, proxyPath string) ([]portBinding, error) {
@@ -401,17 +466,13 @@ func bindHostPorts(cfg []portBindingReq, proxyPath string) ([]portBinding, error
return nil, err
}
// Allow unit tests to supply a dummy StartProxy.
var startProxy = portmapper.StartProxy
// attemptBindHostPorts allocates host ports for each port mapping that requires
// one, and reserves those ports by starting docker-proxy.
// one, and reserves those ports by binding them.
//
// If the allocator doesn't have an available port in the required range, or the
// docker-proxy process doesn't start (perhaps because another process has
// already bound the port), all resources are released and an error is returned.
// When ports are successfully reserved, a portBinding is returned for each
// mapping.
// port can't be bound (perhaps because another process has already bound it),
// all resources are released and an error is returned. When ports are
// successfully reserved, a portBinding is returned for each mapping.
//
// If NAT is disabled for any of the bindings, no host port reservation is
// needed. These bindings are included in results, as the container port itself
@@ -428,7 +489,7 @@ func attemptBindHostPorts(
addrs := make([]net.IP, 0, len(cfg))
for _, c := range cfg {
if !c.disableNAT {
addrs = append(addrs, c.HostIP)
addrs = append(addrs, c.childHostIP)
}
}
@@ -448,31 +509,177 @@ func attemptBindHostPorts(
}
res := make([]portBinding, 0, len(cfg))
for _, c := range cfg {
pb := portBinding{PortBinding: c.GetCopy()}
if c.disableNAT {
pb.HostPort = 0
} else {
pb.stopProxy, err = startProxy(c.Proto.String(), c.childHostIP, port, c.IP, int(c.Port), proxyPath)
if err != nil {
return nil, fmt.Errorf("failed to bind port %s:%d/%s: %w", c.HostIP, port, c.Proto, err)
}
defer func() {
if retErr != nil {
if err := pb.stopProxy(); err != nil {
log.G(context.TODO()).Warnf("Failed to stop userland proxy for port mapping %s: %s", pb, err)
defer func() {
if retErr != nil {
for _, pb := range res {
if pb.boundSocket != nil {
if err := pb.boundSocket.Close(); err != nil {
log.G(context.TODO()).Warnf("Failed to close port binding for %s: %s", pb, err)
}
}
}()
pb.HostPort = uint16(port)
// TODO(robmry) - this is only needed because the userland proxy may have
// been started for SCTP. If a bound socket is passed to the proxy after
// iptables rules have been configured (as it is for TCP/UDP), remove this.
if pb.stopProxy != nil {
if err := pb.stopProxy(); err != nil {
log.G(context.TODO()).Warnf("Failed to stop proxy for %s: %s", pb, err)
}
}
}
}
}()
for _, c := range cfg {
var pb portBinding
if c.disableNAT {
pb = portBinding{PortBinding: c.GetCopy()}
pb.HostPort = 0
pb.HostPortEnd = 0
} else {
switch proto {
case "tcp":
pb, err = bindTCPOrUDP(c, port, syscall.SOCK_STREAM, syscall.IPPROTO_TCP)
case "udp":
pb, err = bindTCPOrUDP(c, port, syscall.SOCK_DGRAM, syscall.IPPROTO_UDP)
case "sctp":
if proxyPath == "" {
pb, err = bindSCTP(c, port)
} else {
// TODO(robmry) - it's not currently possible to pass a bound SCTP port
// to the userland proxy, because the proxy is not able to convert the
// file descriptor into an sctp.SCTPListener (fd is an unexported member
// of the struct, and ListenSCTP is the only constructor).
// So, it is possible for the proxy to start listening and accept
// connections before iptables rules are created that would bypass
// the proxy for external connections.
// Remove this and pb.stopProxy() from the cleanup function above if
// this is fixed.
pb, err = startSCTPProxy(c, port, proxyPath)
}
default:
return nil, fmt.Errorf("Unknown addr type: %s", proto)
}
if err != nil {
return nil, err
}
}
pb.HostPortEnd = pb.HostPort
pb.childHostIP = c.childHostIP
res = append(res, pb)
}
return res, nil
}
func bindTCPOrUDP(cfg portBindingReq, port, typ, proto int) (_ portBinding, retErr error) {
pb := portBinding{PortBinding: cfg.PortBinding.GetCopy()}
pb.HostPort = uint16(port)
pb.HostPortEnd = pb.HostPort
pb.childHostIP = cfg.childHostIP
var domain int
var sa syscall.Sockaddr
if hip := cfg.childHostIP.To4(); hip != nil {
domain = syscall.AF_INET
sa4 := syscall.SockaddrInet4{Port: port}
copy(sa4.Addr[:], hip)
sa = &sa4
} else {
domain = syscall.AF_INET6
sa6 := syscall.SockaddrInet6{Port: port}
copy(sa6.Addr[:], cfg.childHostIP)
sa = &sa6
}
sd, err := syscall.Socket(domain, typ|syscall.SOCK_CLOEXEC, proto)
if err != nil {
return portBinding{}, fmt.Errorf("failed to create socket for userland proxy for %s: %w", cfg, err)
}
defer func() {
if retErr != nil {
syscall.Close(sd)
}
}()
if domain == syscall.AF_INET6 {
syscall.SetsockoptInt(sd, syscall.IPPROTO_IPV6, syscall.IPV6_V6ONLY, 1)
}
if err := syscall.Bind(sd, sa); err != nil {
if cfg.HostPort == cfg.HostPortEnd {
return portBinding{}, fmt.Errorf("failed to bind host port for %s: %w", cfg, err)
}
return portBinding{}, fmt.Errorf("failed to bind host port %d for %s: %w", port, cfg, err)
}
pb.boundSocket = os.NewFile(uintptr(sd), "listener")
if pb.boundSocket == nil {
return portBinding{}, fmt.Errorf("failed to convert socket for userland proxy for %s", cfg)
}
return pb, nil
}
// bindSCTP is based on sctp.ListenSCTP. The socket is created and bound, but
// does not start listening.
func bindSCTP(cfg portBindingReq, port int) (_ portBinding, retErr error) {
pb := portBinding{PortBinding: cfg.GetCopy()}
pb.HostPort = uint16(port)
pb.HostPortEnd = pb.HostPort
pb.childHostIP = cfg.childHostIP
domain := syscall.AF_INET
if cfg.childHostIP.To4() == nil {
domain = syscall.AF_INET6
}
sd, err := syscall.Socket(domain, syscall.SOCK_STREAM|syscall.SOCK_CLOEXEC, syscall.IPPROTO_SCTP)
if err != nil {
return portBinding{}, fmt.Errorf("failed to create socket for userland proxy for %s: %w", cfg, err)
}
defer func() {
if retErr != nil {
syscall.Close(sd)
}
}()
if domain == syscall.AF_INET6 {
syscall.SetsockoptInt(sd, syscall.IPPROTO_IPV6, syscall.IPV6_V6ONLY, 1)
}
options := sctp.InitMsg{NumOstreams: sctp.SCTP_MAX_STREAM}
if _, _, errno := syscall.Syscall6(syscall.SYS_SETSOCKOPT,
uintptr(sd),
sctp.SOL_SCTP,
sctp.SCTP_INITMSG,
uintptr(unsafe.Pointer(&options)),
unsafe.Sizeof(options),
0); errno != 0 {
return portBinding{}, errno
}
if err := sctp.SCTPBind(sd,
&sctp.SCTPAddr{IPAddrs: []net.IPAddr{{IP: cfg.childHostIP}}, Port: int(cfg.HostPort)},
sctp.SCTP_BINDX_ADD_ADDR); err != nil {
return portBinding{}, fmt.Errorf("failed to bind socket for userland proxy for %s: %w", cfg, err)
}
pb.boundSocket = os.NewFile(uintptr(sd), "listener")
if pb.boundSocket == nil {
return portBinding{}, fmt.Errorf("failed to convert socket for userland proxy for %s", cfg)
}
return pb, nil
}
func startSCTPProxy(cfg portBindingReq, port int, proxyPath string) (_ portBinding, retErr error) {
pb := portBinding{PortBinding: cfg.GetCopy()}
pb.HostPort = uint16(port)
pb.HostPortEnd = pb.HostPort
pb.childHostIP = cfg.childHostIP
var err error
pb.stopProxy, err = startProxy(pb.childPortBinding(), proxyPath, nil)
if err != nil {
return portBinding{}, err
}
return pb, nil
}
// releasePorts attempts to release all port bindings, does not stop on failure
func (n *bridgeNetwork) releasePorts(ep *bridgeEndpoint) error {
n.Lock()
@@ -486,14 +693,20 @@ func (n *bridgeNetwork) releasePorts(ep *bridgeEndpoint) error {
func (n *bridgeNetwork) releasePortBindings(pbs []portBinding) error {
var errs []error
for _, pb := range pbs {
var errPD, errP error
var errS, errPD, errP error
if pb.boundSocket != nil {
errS = pb.boundSocket.Close()
if errS != nil {
errS = fmt.Errorf("failed to close socket for port mapping %s: %w", pb, errS)
}
}
if pb.portDriverRemove != nil {
errPD = pb.portDriverRemove()
}
if pb.stopProxy != nil {
errP = pb.stopProxy()
if errP != nil {
errP = fmt.Errorf("failed to stop docker-proxy for port mapping %s: %w", pb, errP)
errP = fmt.Errorf("failed to stop userland proxy for port mapping %s: %w", pb, errP)
}
}
errN := n.setPerPortIptables(pb, false)
@@ -501,9 +714,9 @@ func (n *bridgeNetwork) releasePortBindings(pbs []portBinding) error {
errN = fmt.Errorf("failed to remove iptables rules for port mapping %s: %w", pb, errN)
}
if pb.HostPort > 0 {
portallocator.Get().ReleasePort(pb.HostIP, pb.Proto.String(), int(pb.HostPort))
portallocator.Get().ReleasePort(pb.childHostIP, pb.Proto.String(), int(pb.HostPort))
}
errs = append(errs, errPD, errP, errN)
errs = append(errs, errS, errPD, errP, errN)
}
return errors.Join(errs...)
}
@@ -545,8 +758,8 @@ func setPerPortNAT(b portBinding, ipv iptables.IPVersion, proxyPath string, brid
// want "0.0.0.0/0". "0/0" is correctly interpreted as "any
// value" by both iptables and ip6tables.
hostIP := "0/0"
if !b.HostIP.IsUnspecified() {
hostIP = b.HostIP.String()
if !b.childHostIP.IsUnspecified() {
hostIP = b.childHostIP.String()
}
args := []string{
"-p", b.Proto.String(),

View File

@@ -6,8 +6,10 @@ import (
"fmt"
"net"
"net/netip"
"os"
"strconv"
"strings"
"syscall"
"testing"
"github.com/docker/docker/internal/testutils/netnsutils"
@@ -16,6 +18,7 @@ import (
"github.com/docker/docker/libnetwork/ns"
"github.com/docker/docker/libnetwork/portallocator"
"github.com/docker/docker/libnetwork/types"
"github.com/vishvananda/netlink"
"gotest.tools/v3/assert"
is "gotest.tools/v3/assert/cmp"
)
@@ -423,6 +426,7 @@ func TestAddPortMappings(t *testing.T) {
proxyPath string
busyPortIPv4 int
rootless bool
hostAddrs []string
expErr string
expPBs []types.PortBinding
@@ -441,6 +445,7 @@ func TestAddPortMappings(t *testing.T) {
{Proto: types.TCP, Port: 22},
{Proto: types.TCP, Port: 80},
},
proxyPath: "/dummy/path/to/proxy",
expPBs: []types.PortBinding{
{Proto: types.TCP, IP: ctrIP4.IP, Port: 22, HostIP: net.IPv4zero, HostPort: firstEphemPort},
{Proto: types.TCP, IP: ctrIP6.IP, Port: 22, HostIP: net.IPv6zero, HostPort: firstEphemPort},
@@ -449,22 +454,24 @@ func TestAddPortMappings(t *testing.T) {
},
},
{
name: "specific host port",
epAddrV4: ctrIP4,
epAddrV6: ctrIP6,
cfg: []types.PortBinding{{Proto: types.TCP, Port: 80, HostPort: 8080}},
name: "specific host port",
epAddrV4: ctrIP4,
epAddrV6: ctrIP6,
cfg: []types.PortBinding{{Proto: types.TCP, Port: 80, HostPort: 8080}},
proxyPath: "/dummy/path/to/proxy",
expPBs: []types.PortBinding{
{Proto: types.TCP, IP: ctrIP4.IP, Port: 80, HostIP: net.IPv4zero, HostPort: 8080, HostPortEnd: 8080},
{Proto: types.TCP, IP: ctrIP6.IP, Port: 80, HostIP: net.IPv6zero, HostPort: 8080, HostPortEnd: 8080},
},
},
{
name: "nat explicitly enabled",
epAddrV4: ctrIP4,
epAddrV6: ctrIP6,
cfg: []types.PortBinding{{Proto: types.TCP, Port: 80, HostPort: 8080}},
gwMode4: gwModeNAT,
gwMode6: gwModeNAT,
name: "nat explicitly enabled",
epAddrV4: ctrIP4,
epAddrV6: ctrIP6,
cfg: []types.PortBinding{{Proto: types.TCP, Port: 80, HostPort: 8080}},
gwMode4: gwModeNAT,
gwMode6: gwModeNAT,
proxyPath: "/dummy/path/to/proxy",
expPBs: []types.PortBinding{
{Proto: types.TCP, IP: ctrIP4.IP, Port: 80, HostIP: net.IPv4zero, HostPort: 8080, HostPortEnd: 8080},
{Proto: types.TCP, IP: ctrIP6.IP, Port: 80, HostIP: net.IPv6zero, HostPort: 8080, HostPortEnd: 8080},
@@ -475,24 +482,27 @@ func TestAddPortMappings(t *testing.T) {
epAddrV4: ctrIP4,
epAddrV6: ctrIP6,
cfg: []types.PortBinding{{Proto: types.TCP, Port: 80, HostPort: 8080}},
proxyPath: "/dummy/path/to/proxy",
busyPortIPv4: 8080,
expErr: "failed to bind port 0.0.0.0:8080/tcp: busy port",
expErr: "failed to bind host port for 0.0.0.0:8080:172.19.0.2:80/tcp: address already in use",
},
{
name: "ipv4 mapped container address with specific host port",
epAddrV4: ctrIP4Mapped,
epAddrV6: ctrIP6,
cfg: []types.PortBinding{{Proto: types.TCP, Port: 80, HostPort: 8080}},
name: "ipv4 mapped container address with specific host port",
epAddrV4: ctrIP4Mapped,
epAddrV6: ctrIP6,
cfg: []types.PortBinding{{Proto: types.TCP, Port: 80, HostPort: 8080}},
proxyPath: "/dummy/path/to/proxy",
expPBs: []types.PortBinding{
{Proto: types.TCP, IP: ctrIP4.IP, Port: 80, HostIP: net.IPv4zero, HostPort: 8080, HostPortEnd: 8080},
{Proto: types.TCP, IP: ctrIP6.IP, Port: 80, HostIP: net.IPv6zero, HostPort: 8080, HostPortEnd: 8080},
},
},
{
name: "ipv4 mapped host address with specific host port",
epAddrV4: ctrIP4,
epAddrV6: ctrIP6,
cfg: []types.PortBinding{{Proto: types.TCP, Port: 80, HostIP: newIPNet(t, "::ffff:127.0.0.1/128").IP, HostPort: 8080}},
name: "ipv4 mapped host address with specific host port",
epAddrV4: ctrIP4,
epAddrV6: ctrIP6,
cfg: []types.PortBinding{{Proto: types.TCP, Port: 80, HostIP: newIPNet(t, "::ffff:127.0.0.1/128").IP, HostPort: 8080}},
proxyPath: "/dummy/path/to/proxy",
expPBs: []types.PortBinding{
{Proto: types.TCP, IP: ctrIP4.IP, Port: 80, HostIP: newIPNet(t, "127.0.0.1/32").IP, HostPort: 8080, HostPortEnd: 8080},
},
@@ -502,6 +512,7 @@ func TestAddPortMappings(t *testing.T) {
epAddrV4: ctrIP4,
epAddrV6: ctrIP6,
cfg: []types.PortBinding{{Proto: types.TCP, Port: 80, HostPort: 8080, HostPortEnd: 8081}},
proxyPath: "/dummy/path/to/proxy",
busyPortIPv4: 8080,
expPBs: []types.PortBinding{
{Proto: types.TCP, IP: ctrIP4.IP, Port: 80, HostIP: net.IPv4zero, HostPort: 8081, HostPortEnd: 8081},
@@ -516,6 +527,7 @@ func TestAddPortMappings(t *testing.T) {
{Proto: types.TCP, Port: 80, HostIP: net.IPv4zero, HostPort: 8080, HostPortEnd: 8081},
{Proto: types.TCP, Port: 80, HostIP: net.IPv6zero, HostPort: 8080, HostPortEnd: 8081},
},
proxyPath: "/dummy/path/to/proxy",
busyPortIPv4: 8080,
expPBs: []types.PortBinding{
{Proto: types.TCP, IP: ctrIP4.IP, Port: 80, HostIP: net.IPv4zero, HostPort: 8081},
@@ -534,6 +546,7 @@ func TestAddPortMappings(t *testing.T) {
{Proto: types.UDP, Port: 81, HostPort: 8080, HostPortEnd: 8083},
{Proto: types.UDP, Port: 82, HostPort: 8080, HostPortEnd: 8083},
},
proxyPath: "/dummy/path/to/proxy",
busyPortIPv4: 8082,
expPBs: []types.PortBinding{
{Proto: types.TCP, IP: ctrIP4.IP, Port: 80, HostIP: net.IPv4zero, HostPort: 8080, HostPortEnd: 8080},
@@ -559,8 +572,9 @@ func TestAddPortMappings(t *testing.T) {
{Proto: types.TCP, Port: 81, HostPort: 8080, HostPortEnd: 8082},
{Proto: types.TCP, Port: 82, HostPort: 8080, HostPortEnd: 8082},
},
proxyPath: "/dummy/path/to/proxy",
busyPortIPv4: 8081,
expErr: "failed to bind port 0.0.0.0:8081/tcp: busy port",
expErr: "failed to bind host port 8081 for 0.0.0.0:8080-8082:172.19.0.2:82/tcp",
},
{
name: "map host ipv6 to ipv4 container with proxy",
@@ -591,9 +605,10 @@ func TestAddPortMappings(t *testing.T) {
epAddrV4: ctrIP4,
epAddrV6: ctrIP6,
cfg: []types.PortBinding{{Proto: types.TCP, Port: 80}},
defHostIP: newIPNet(t, "10.11.12.13/24").IP,
proxyPath: "/dummy/path/to/proxy",
defHostIP: newIPNet(t, "127.0.0.1/8").IP,
expPBs: []types.PortBinding{
{Proto: types.TCP, IP: ctrIP4.IP, Port: 80, HostIP: newIPNet(t, "10.11.12.13/24").IP, HostPort: firstEphemPort},
{Proto: types.TCP, IP: ctrIP4.IP, Port: 80, HostIP: newIPNet(t, "127.0.0.1/8").IP, HostPort: firstEphemPort},
},
},
{
@@ -601,9 +616,10 @@ func TestAddPortMappings(t *testing.T) {
epAddrV4: ctrIP4,
epAddrV6: ctrIP6,
cfg: []types.PortBinding{{Proto: types.TCP, Port: 80}},
defHostIP: newIPNet(t, "::ffff:10.11.12.13/120").IP,
proxyPath: "/dummy/path/to/proxy",
defHostIP: newIPNet(t, "::ffff:127.0.0.1/72").IP,
expPBs: []types.PortBinding{
{Proto: types.TCP, IP: ctrIP4.IP, Port: 80, HostIP: newIPNet(t, "10.11.12.13/24").IP, HostPort: firstEphemPort},
{Proto: types.TCP, IP: ctrIP4.IP, Port: 80, HostIP: newIPNet(t, "127.0.0.1/8").IP, HostPort: firstEphemPort},
},
},
{
@@ -611,6 +627,7 @@ func TestAddPortMappings(t *testing.T) {
epAddrV4: ctrIP4,
epAddrV6: ctrIP6,
cfg: []types.PortBinding{{Proto: types.TCP, Port: 80}},
proxyPath: "/dummy/path/to/proxy",
defHostIP: net.IPv6zero,
expPBs: []types.PortBinding{
{Proto: types.TCP, IP: ctrIP6.IP, Port: 80, HostIP: net.IPv6zero, HostPort: firstEphemPort},
@@ -621,6 +638,7 @@ func TestAddPortMappings(t *testing.T) {
epAddrV4: ctrIP4,
epAddrV6: ctrIP6,
cfg: []types.PortBinding{{Proto: types.TCP, Port: 80}},
proxyPath: "/dummy/path/to/proxy",
defHostIP: newIPNet(t, "::1/128").IP,
expPBs: []types.PortBinding{
{Proto: types.TCP, IP: ctrIP6.IP, Port: 80, HostIP: newIPNet(t, "::1/128").IP, HostPort: firstEphemPort},
@@ -634,16 +652,17 @@ func TestAddPortMappings(t *testing.T) {
{Proto: types.TCP, Port: 80, HostPort: 8080},
{Proto: types.TCP, Port: 22, HostPort: 2222},
},
proxyPath: "/dummy/path/to/proxy",
expPBs: []types.PortBinding{
{Proto: types.TCP, IP: ctrIP4.IP, Port: 22, HostIP: net.IPv4zero, HostPort: 2222},
{Proto: types.TCP, IP: ctrIP6.IP, Port: 22, HostIP: net.IPv6zero, HostPort: 2222},
{Proto: types.TCP, IP: ctrIP4.IP, Port: 80, HostIP: net.IPv4zero, HostPort: 8080},
{Proto: types.TCP, IP: ctrIP6.IP, Port: 80, HostIP: net.IPv6zero, HostPort: 8080},
},
expReleaseErr: "failed to stop docker-proxy for port mapping 0.0.0.0:2222:172.19.0.2:22/tcp: can't stop now\n" +
"failed to stop docker-proxy for port mapping [::]:2222:[fdf8:b88e:bb5c:3483::2]:22/tcp: can't stop now\n" +
"failed to stop docker-proxy for port mapping 0.0.0.0:8080:172.19.0.2:80/tcp: can't stop now\n" +
"failed to stop docker-proxy for port mapping [::]:8080:[fdf8:b88e:bb5c:3483::2]:80/tcp: can't stop now",
expReleaseErr: "failed to stop userland proxy for port mapping 0.0.0.0:2222:172.19.0.2:22/tcp: can't stop now\n" +
"failed to stop userland proxy for port mapping [::]:2222:[fdf8:b88e:bb5c:3483::2]:22/tcp: can't stop now\n" +
"failed to stop userland proxy for port mapping 0.0.0.0:8080:172.19.0.2:80/tcp: can't stop now\n" +
"failed to stop userland proxy for port mapping [::]:8080:[fdf8:b88e:bb5c:3483::2]:80/tcp: can't stop now",
},
{
name: "disable nat6",
@@ -653,7 +672,8 @@ func TestAddPortMappings(t *testing.T) {
{Proto: types.TCP, Port: 22},
{Proto: types.TCP, Port: 80},
},
gwMode6: gwModeRouted,
proxyPath: "/dummy/path/to/proxy",
gwMode6: gwModeRouted,
expPBs: []types.PortBinding{
{Proto: types.TCP, IP: ctrIP4.IP, Port: 22, HostIP: net.IPv4zero, HostPort: firstEphemPort},
{Proto: types.TCP, IP: ctrIP6.IP, Port: 22, HostIP: net.IPv6zero},
@@ -669,7 +689,8 @@ func TestAddPortMappings(t *testing.T) {
{Proto: types.TCP, Port: 22},
{Proto: types.TCP, Port: 80},
},
gwMode4: gwModeRouted,
proxyPath: "/dummy/path/to/proxy",
gwMode4: gwModeRouted,
expPBs: []types.PortBinding{
{Proto: types.TCP, IP: ctrIP4.IP, Port: 22, HostIP: net.IPv4zero},
{Proto: types.TCP, IP: ctrIP6.IP, Port: 22, HostIP: net.IPv6zero, HostPort: firstEphemPort},
@@ -685,8 +706,9 @@ func TestAddPortMappings(t *testing.T) {
{Proto: types.TCP, Port: 22},
{Proto: types.TCP, Port: 80},
},
gwMode4: gwModeRouted,
gwMode6: gwModeRouted,
proxyPath: "/dummy/path/to/proxy",
gwMode4: gwModeRouted,
gwMode6: gwModeRouted,
expPBs: []types.PortBinding{
{Proto: types.TCP, IP: ctrIP4.IP, Port: 22, HostIP: net.IPv4zero},
{Proto: types.TCP, IP: ctrIP6.IP, Port: 22, HostIP: net.IPv6zero},
@@ -695,9 +717,10 @@ func TestAddPortMappings(t *testing.T) {
},
},
{
name: "same ports for matching mappings with different host addresses",
epAddrV4: ctrIP4,
epAddrV6: ctrIP6,
name: "same ports for matching mappings with different host addresses",
epAddrV4: ctrIP4,
epAddrV6: ctrIP6,
hostAddrs: []string{"192.168.1.2/24", "fd0c:9167:5b11::2/64", "fd0c:9167:5b11::3/64"},
cfg: []types.PortBinding{
// These two should both get the same host port.
{Proto: types.TCP, Port: 80, HostIP: newIPNet(t, "fd0c:9167:5b11::2/64").IP},
@@ -711,6 +734,7 @@ func TestAddPortMappings(t *testing.T) {
{Proto: types.TCP, Port: 12345, HostPort: 12345, HostPortEnd: 12346},
{Proto: types.TCP, Port: 12345, HostPort: 12345},
},
proxyPath: "/dummy/path/to/proxy",
expPBs: []types.PortBinding{
{Proto: types.TCP, IP: ctrIP4.IP, Port: 12345, HostIP: net.IPv4zero, HostPort: 12345},
{Proto: types.TCP, IP: ctrIP6.IP, Port: 12345, HostIP: net.IPv6zero, HostPort: 12345},
@@ -751,15 +775,14 @@ func TestAddPortMappings(t *testing.T) {
origStartProxy := startProxy
defer func() { startProxy = origStartProxy }()
proxies := map[proxyCall]bool{} // proxy -> is not stopped
startProxy = func(proto string,
hostIP net.IP, hostPort int,
containerIP net.IP, containerPort int,
startProxy = func(pb types.PortBinding,
proxyPath string,
listenSock *os.File,
) (stop func() error, retErr error) {
if tc.busyPortIPv4 > 0 && tc.busyPortIPv4 == hostPort && hostIP.To4() != nil {
if tc.busyPortIPv4 > 0 && tc.busyPortIPv4 == int(pb.HostPort) && pb.HostIP.To4() != nil {
return nil, errors.New("busy port")
}
c := newProxyCall(proto, hostIP, hostPort, containerIP, containerPort, proxyPath)
c := newProxyCall(pb.Proto.String(), pb.HostIP, int(pb.HostPort), pb.IP, int(pb.Port), proxyPath)
if _, ok := proxies[c]; ok {
return nil, fmt.Errorf("duplicate proxy: %#v", c)
}
@@ -781,6 +804,28 @@ func TestAddPortMappings(t *testing.T) {
defer func() { newPortDriverClient = origNewPortDriverClient }()
newPortDriverClient = func() (portDriverClient, error) { return newMockPortDriverClient() }
if len(tc.hostAddrs) > 0 {
dummyLink := &netlink.Bridge{LinkAttrs: netlink.LinkAttrs{Name: "br-dummy"}}
err := netlink.LinkAdd(dummyLink)
assert.NilError(t, err)
for _, addr := range tc.hostAddrs {
// Add with NODAD so that the address is available immediately.
err := netlink.AddrAdd(dummyLink,
&netlink.Addr{IPNet: newIPNet(t, addr), Flags: syscall.IFA_F_NODAD})
assert.NilError(t, err)
}
err = netlink.LinkSetUp(dummyLink)
assert.NilError(t, err)
}
if tc.busyPortIPv4 != 0 {
tl, err := net.ListenTCP("tcp4", &net.TCPAddr{IP: net.IPv4zero, Port: tc.busyPortIPv4})
assert.NilError(t, err)
defer tl.Close()
ul, err := net.ListenUDP("udp4", &net.UDPAddr{IP: net.IPv4zero, Port: tc.busyPortIPv4})
assert.NilError(t, err)
defer ul.Close()
}
n := &bridgeNetwork{
config: &networkConfiguration{
BridgeName: "dummybridge",
@@ -829,20 +874,21 @@ func TestAddPortMappings(t *testing.T) {
var disableNAT bool
var addrM, addrD, addrH string
var ipv iptables.IPVersion
hip := expChildIP(expPB.HostIP)
if expPB.IP.To4() == nil {
disableNAT = tc.gwMode6.natDisabled()
ipv = iptables.IPv6
addrM = ctrIP6.IP.String() + "/128"
addrD = "[" + ctrIP6.IP.String() + "]"
addrH = expPB.HostIP.String() + "/128"
addrH = hip.String() + "/128"
} else {
disableNAT = tc.gwMode4.natDisabled()
ipv = iptables.IPv4
addrM = ctrIP4.IP.String() + "/32"
addrD = ctrIP4.IP.String()
addrH = expPB.HostIP.String() + "/32"
addrH = hip.String() + "/32"
}
if expPB.HostIP.IsUnspecified() {
if hip.IsUnspecified() {
addrH = "0/0"
}
@@ -887,19 +933,21 @@ func TestAddPortMappings(t *testing.T) {
}
// Check a docker-proxy was started and stopped for each expected port binding.
expProxies := map[proxyCall]bool{}
for _, expPB := range tc.expPBs {
hip := expChildIP(expPB.HostIP)
is4 := hip.To4() != nil
if (is4 && tc.gwMode4.natDisabled()) || (!is4 && tc.gwMode6.natDisabled()) {
continue
if tc.proxyPath != "" {
expProxies := map[proxyCall]bool{}
for _, expPB := range tc.expPBs {
hip := expChildIP(expPB.HostIP)
is4 := hip.To4() != nil
if (is4 && tc.gwMode4.natDisabled()) || (!is4 && tc.gwMode6.natDisabled()) {
continue
}
p := newProxyCall(expPB.Proto.String(),
hip, int(expPB.HostPort),
expPB.IP, int(expPB.Port), tc.proxyPath)
expProxies[p] = tc.expReleaseErr != ""
}
p := newProxyCall(expPB.Proto.String(),
hip, int(expPB.HostPort),
expPB.IP, int(expPB.Port), tc.proxyPath)
expProxies[p] = tc.expReleaseErr != ""
assert.Check(t, is.DeepEqual(expProxies, proxies))
}
assert.Check(t, is.DeepEqual(expProxies, proxies))
// Check the port driver has seen the expected port mappings and no others,
// and that they have all been closed.

View File

@@ -99,7 +99,8 @@ func (c *PortDriverClient) AddPort(
hostIP netip.Addr,
childIP netip.Addr,
hostPort int,
) (func() error, error) { // proto is like "tcp", but we need to convert it to "tcp4" or "tcp6" explicitly
) (func() error, error) {
// proto is like "tcp", but we need to convert it to "tcp4" or "tcp6" explicitly
// for libnetwork >= 20201216
//
// See https://github.com/moby/libnetwork/pull/2604/files#diff-8fa48beed55dd033bf8e4f8c40b31cf69d0b2cc5d4bb53cde8594670ea6c938aR20

View File

@@ -1,78 +1,61 @@
package portmapper
import (
"errors"
"fmt"
"io"
"net"
"os"
"os/exec"
"runtime"
"strconv"
"syscall"
"time"
"github.com/docker/docker/libnetwork/types"
)
// StartProxy starts the proxy process at proxyPath, or instantiates a dummy proxy
// to bind the host port if proxyPath is the empty string.
func StartProxy(
proto string,
hostIP net.IP, hostPort int,
containerIP net.IP, containerPort int,
proxyPath string,
) (stop func() error, retErr error) {
if proxyPath == "" {
return newDummyProxy(proto, hostIP, hostPort)
}
return newProxyCommand(proto, hostIP, hostPort, containerIP, containerPort, proxyPath)
}
func newProxyCommand(
proto string,
hostIP net.IP, hostPort int,
containerIP net.IP, containerPort int,
// StartProxy starts the proxy process at proxyPath.
// If listenSock is not nil, it must be a bound socket that can be passed to
// the proxy process for it to listen on.
func StartProxy(pb types.PortBinding,
proxyPath string,
listenSock *os.File,
) (stop func() error, retErr error) {
if proxyPath == "" {
return nil, fmt.Errorf("no path provided for userland-proxy binary")
}
p := &proxyCommand{
cmd: &exec.Cmd{
Path: proxyPath,
Args: []string{
proxyPath,
"-proto", proto,
"-host-ip", hostIP.String(),
"-host-port", strconv.Itoa(hostPort),
"-container-ip", containerIP.String(),
"-container-port", strconv.Itoa(containerPort),
},
SysProcAttr: &syscall.SysProcAttr{
Pdeathsig: syscall.SIGTERM, // send a sigterm to the proxy if the creating thread in the daemon process dies (https://go.dev/issue/27505)
},
},
wait: make(chan error, 1),
}
if err := p.start(); err != nil {
return nil, err
}
return p.stop, nil
}
// proxyCommand wraps an exec.Cmd to run the userland TCP and UDP
// proxies as separate processes.
type proxyCommand struct {
cmd *exec.Cmd
wait chan error
}
func (p *proxyCommand) start() error {
r, w, err := os.Pipe()
if err != nil {
return fmt.Errorf("proxy unable to open os.Pipe %s", err)
return nil, fmt.Errorf("proxy unable to open os.Pipe %s", err)
}
defer r.Close()
p.cmd.ExtraFiles = []*os.File{w}
defer func() {
if w != nil {
w.Close()
}
r.Close()
}()
cmd := &exec.Cmd{
Path: proxyPath,
Args: []string{
proxyPath,
"-proto", pb.Proto.String(),
"-host-ip", pb.HostIP.String(),
"-host-port", strconv.FormatUint(uint64(pb.HostPort), 10),
"-container-ip", pb.IP.String(),
"-container-port", strconv.FormatUint(uint64(pb.Port), 10),
},
ExtraFiles: []*os.File{w},
SysProcAttr: &syscall.SysProcAttr{
Pdeathsig: syscall.SIGTERM, // send a sigterm to the proxy if the creating thread in the daemon process dies (https://go.dev/issue/27505)
},
}
if listenSock != nil {
cmd.Args = append(cmd.Args, "-use-listen-fd")
cmd.ExtraFiles = append(cmd.ExtraFiles, listenSock)
}
wait := make(chan error, 1)
// As p.cmd.SysProcAttr.Pdeathsig is set, the signal will be sent to the
// process when the OS thread on which p.cmd.Start() was executed dies.
@@ -88,17 +71,18 @@ func (p *proxyCommand) start() error {
go func() {
runtime.LockOSThread()
defer runtime.UnlockOSThread()
err := p.cmd.Start()
err := cmd.Start()
started <- err
if err != nil {
return
}
p.wait <- p.cmd.Wait()
wait <- cmd.Wait()
}()
if err := <-started; err != nil {
return err
return nil, err
}
w.Close()
w = nil
errchan := make(chan error, 1)
go func() {
@@ -108,11 +92,16 @@ func (p *proxyCommand) start() error {
if string(buf) != "0\n" {
errStr, err := io.ReadAll(r)
if err != nil {
errchan <- fmt.Errorf("Error reading exit status from userland proxy: %v", err)
errchan <- fmt.Errorf("error reading exit status from userland proxy: %v", err)
return
}
errchan <- fmt.Errorf("Error starting userland proxy: %s", errStr)
// If the user has an old docker-proxy in their PATH, and we passed "-use-listen-fd"
// on the command line, it exits with no response on the pipe.
if listenSock != nil && buf[0] == 0 && len(errStr) == 0 {
errchan <- errors.New("failed to start docker-proxy, check that the current version is in your $PATH")
return
}
errchan <- fmt.Errorf("error starting userland proxy: %s", errStr)
return
}
errchan <- nil
@@ -120,18 +109,21 @@ func (p *proxyCommand) start() error {
select {
case err := <-errchan:
return err
if err != nil {
return nil, err
}
case <-time.After(16 * time.Second):
return fmt.Errorf("Timed out proxy starting the userland proxy")
return nil, fmt.Errorf("timed out starting the userland proxy")
}
}
func (p *proxyCommand) stop() error {
if p.cmd.Process != nil {
if err := p.cmd.Process.Signal(os.Interrupt); err != nil {
stopFn := func() error {
if cmd.Process == nil {
return nil
}
if err := cmd.Process.Signal(os.Interrupt); err != nil {
return err
}
return <-p.wait
return <-wait
}
return nil
return stopFn, nil
}