From e0c7a839a8e6ecf82074429fa7b1b67fc3aca121 Mon Sep 17 00:00:00 2001 From: Rob Murray Date: Mon, 5 Aug 2024 12:41:38 +0100 Subject: [PATCH 1/9] Only build docker-proxy for Linux Signed-off-by: Rob Murray --- Dockerfile | 2 +- cmd/docker-proxy/genwinres_windows.go | 5 ----- cmd/docker-proxy/{main.go => main_linux.go} | 0 .../{network_proxy_test.go => network_proxy_linux_test.go} | 0 cmd/docker-proxy/{proxy.go => proxy_linux.go} | 0 cmd/docker-proxy/{sctp_proxy.go => sctp_proxy_linux.go} | 0 cmd/docker-proxy/{tcp_proxy.go => tcp_proxy_linux.go} | 0 cmd/docker-proxy/{udp_proxy.go => udp_proxy_linux.go} | 0 hack/make/binary-proxy | 3 ++- hack/make/dynbinary-proxy | 3 ++- 10 files changed, 5 insertions(+), 8 deletions(-) delete mode 100644 cmd/docker-proxy/genwinres_windows.go rename cmd/docker-proxy/{main.go => main_linux.go} (100%) rename cmd/docker-proxy/{network_proxy_test.go => network_proxy_linux_test.go} (100%) rename cmd/docker-proxy/{proxy.go => proxy_linux.go} (100%) rename cmd/docker-proxy/{sctp_proxy.go => sctp_proxy_linux.go} (100%) rename cmd/docker-proxy/{tcp_proxy.go => tcp_proxy_linux.go} (100%) rename cmd/docker-proxy/{udp_proxy.go => udp_proxy_linux.go} (100%) diff --git a/Dockerfile b/Dockerfile index 52049c9dd5..ad277300a6 100644 --- a/Dockerfile +++ b/Dockerfile @@ -620,7 +620,7 @@ RUN --mount=type=bind,target=.,rw \ xx-go --wrap PKG_CONFIG=$(xx-go env PKG_CONFIG) ./hack/make.sh $target xx-verify $([ "$DOCKER_STATIC" = "1" ] && echo "--static") /tmp/bundles/${target}-daemon/dockerd$([ "$(xx-info os)" = "windows" ] && echo ".exe") - xx-verify $([ "$DOCKER_STATIC" = "1" ] && echo "--static") /tmp/bundles/${target}-daemon/docker-proxy$([ "$(xx-info os)" = "windows" ] && echo ".exe") + [ "$(xx-info os)" != "linux" ] || xx-verify $([ "$DOCKER_STATIC" = "1" ] && echo "--static") /tmp/bundles/${target}-daemon/docker-proxy mkdir /build mv /tmp/bundles/${target}-daemon/* /build/ EOT diff --git a/cmd/docker-proxy/genwinres_windows.go b/cmd/docker-proxy/genwinres_windows.go deleted file mode 100644 index 86697d8a57..0000000000 --- a/cmd/docker-proxy/genwinres_windows.go +++ /dev/null @@ -1,5 +0,0 @@ -//go:generate go-winres make --arch=386,amd64,arm,arm64 --in=../../cli/winresources/docker-proxy/winres.json --out=../../cli/winresources/docker-proxy/resource - -package main - -import _ "github.com/docker/docker/cli/winresources/docker-proxy" diff --git a/cmd/docker-proxy/main.go b/cmd/docker-proxy/main_linux.go similarity index 100% rename from cmd/docker-proxy/main.go rename to cmd/docker-proxy/main_linux.go diff --git a/cmd/docker-proxy/network_proxy_test.go b/cmd/docker-proxy/network_proxy_linux_test.go similarity index 100% rename from cmd/docker-proxy/network_proxy_test.go rename to cmd/docker-proxy/network_proxy_linux_test.go diff --git a/cmd/docker-proxy/proxy.go b/cmd/docker-proxy/proxy_linux.go similarity index 100% rename from cmd/docker-proxy/proxy.go rename to cmd/docker-proxy/proxy_linux.go diff --git a/cmd/docker-proxy/sctp_proxy.go b/cmd/docker-proxy/sctp_proxy_linux.go similarity index 100% rename from cmd/docker-proxy/sctp_proxy.go rename to cmd/docker-proxy/sctp_proxy_linux.go diff --git a/cmd/docker-proxy/tcp_proxy.go b/cmd/docker-proxy/tcp_proxy_linux.go similarity index 100% rename from cmd/docker-proxy/tcp_proxy.go rename to cmd/docker-proxy/tcp_proxy_linux.go diff --git a/cmd/docker-proxy/udp_proxy.go b/cmd/docker-proxy/udp_proxy_linux.go similarity index 100% rename from cmd/docker-proxy/udp_proxy.go rename to cmd/docker-proxy/udp_proxy_linux.go diff --git a/hack/make/binary-proxy b/hack/make/binary-proxy index 9fa51f76cd..7285d962af 100644 --- a/hack/make/binary-proxy +++ b/hack/make/binary-proxy @@ -2,7 +2,8 @@ set -e -( +# docker-proxy is Linux only +[ "$(go env GOOS)" != 'linux' ] || ( export CGO_ENABLED=0 DOCKER_STATIC=1 diff --git a/hack/make/dynbinary-proxy b/hack/make/dynbinary-proxy index d732df13a4..436db2e188 100644 --- a/hack/make/dynbinary-proxy +++ b/hack/make/dynbinary-proxy @@ -2,7 +2,8 @@ set -e -( +# docker-proxy is Linux only +[ "$(go env GOOS)" != 'linux' ] || ( export LDFLAGS_STATIC='' export BUILDFLAGS=("${BUILDFLAGS[@]/netgo /}") # disable netgo, since we don't need it for a dynamic binary export BUILDFLAGS=("${BUILDFLAGS[@]/osusergo /}") # ditto for osusergo From ba2f3c0a9883e4e20cd63665b3230d9f9d3fa2c0 Mon Sep 17 00:00:00 2001 From: Rob Murray Date: Mon, 8 Jul 2024 13:50:22 +0100 Subject: [PATCH 2/9] cmd/docker-proxy: pass open listener to proxy impl In preparation for the daemon passing a listen fd, add command line option -use-listen-fd to indicate that the fd is present (as fd 4). If the new flag isn't given, open the listener as normal. Refactor the TCP and UDP proxies to be constructed with an existing TCPListener or UDPConn, respectively. Lift the responsibilty of opening the listener to the entrypoint. Per the Single Responsibility Principle, this structure affords changing how the listener is created without having to touch the proxy implementations. Co-authored-by: Cory Snider Signed-off-by: Rob Murray --- cmd/docker-proxy/main_linux.go | 145 ++++++++++--- cmd/docker-proxy/network_proxy_linux_test.go | 202 ++++++++++++++----- cmd/docker-proxy/proxy_linux.go | 20 +- cmd/docker-proxy/sctp_proxy_linux.go | 13 +- cmd/docker-proxy/tcp_proxy_linux.go | 13 +- cmd/docker-proxy/udp_proxy_linux.go | 11 +- 6 files changed, 274 insertions(+), 130 deletions(-) diff --git a/cmd/docker-proxy/main_linux.go b/cmd/docker-proxy/main_linux.go index 555c27a084..2210d2f50d 100644 --- a/cmd/docker-proxy/main_linux.go +++ b/cmd/docker-proxy/main_linux.go @@ -1,9 +1,9 @@ package main import ( + "errors" "flag" "fmt" - "log" "net" "os" "os/signal" @@ -13,11 +13,27 @@ import ( "github.com/ishidawataru/sctp" ) -func main() { - f := os.NewFile(3, "signal-parent") - host, container := parseFlags() +// The caller is expected to pass-in open file descriptors ... +const ( + // Pipe for reporting status, as a string. "0\n" if the proxy + // started normally. "1\n" otherwise. + parentPipeFd uintptr = 3 + iota + // If -use-listen-fd=true, a listening socket ready to accept TCP + // connections or receive UDP. (Without that option on the command + // line, the listener needs to be opened by docker-proxy, for + // compatibility with older docker daemons. In this case fd 4 + // may belong to the Go runtime.) + listenSockFd +) - p, err := NewProxy(host, container) +func main() { + config := parseFlags() + p, err := newProxy(config) + if config.ListenSock != nil { + config.ListenSock.Close() + } + + f := os.NewFile(parentPipeFd, "signal-parent") if err != nil { fmt.Fprintf(f, "1\n%s", err) f.Close() @@ -31,41 +47,114 @@ func main() { p.Run() } +func newProxy(config ProxyConfig) (p Proxy, err error) { + ipv := ipv4 + if config.HostIP.To4() == nil { + ipv = ipv6 + } + + switch config.Proto { + case "tcp": + var listener *net.TCPListener + if config.ListenSock == nil { + // Fall back to HostIP:HostPort if no socket on fd 4, for compatibility with older daemons. + hostAddr := &net.TCPAddr{IP: config.HostIP, Port: config.HostPort} + listener, err = net.ListenTCP("tcp"+string(ipv), hostAddr) + if err != nil { + return nil, fmt.Errorf("failed to listen on %s: %w", hostAddr, err) + } + } else { + l, err := net.FileListener(config.ListenSock) + if err != nil { + return nil, err + } + var ok bool + listener, ok = l.(*net.TCPListener) + if !ok { + return nil, fmt.Errorf("unexpected socket type for listener fd: %s", l.Addr().Network()) + } + } + container := &net.TCPAddr{IP: config.ContainerIP, Port: config.ContainerPort} + p, err = NewTCPProxy(listener, container) + case "udp": + var listener *net.UDPConn + if config.ListenSock == nil { + // Fall back to HostIP:HostPort if no socket on fd 4, for compatibility with older daemons. + hostAddr := &net.UDPAddr{IP: config.HostIP, Port: config.HostPort} + listener, err = net.ListenUDP("udp"+string(ipv), hostAddr) + if err != nil { + return nil, fmt.Errorf("failed to listen on %s: %w", hostAddr, err) + } + } else { + l, err := net.FilePacketConn(config.ListenSock) + if err != nil { + return nil, err + } + var ok bool + listener, ok = l.(*net.UDPConn) + if !ok { + return nil, fmt.Errorf("unexpected socket type for listener fd: %s", l.LocalAddr().Network()) + } + } + container := &net.UDPAddr{IP: config.ContainerIP, Port: config.ContainerPort} + p, err = NewUDPProxy(listener, container) + case "sctp": + var listener *sctp.SCTPListener + if config.ListenSock != nil { + // There's no way to construct an SCTPListener from a file descriptor at the moment. + // If a socket has been passed in, it's probably from a newer daemon using a version + // of the sctp module that does allow it. + return nil, errors.New("cannot use supplied SCTP socket, check the latest docker-proxy is in your $PATH") + } + hostAddr := &sctp.SCTPAddr{IPAddrs: []net.IPAddr{{IP: config.HostIP}}, Port: config.HostPort} + container := &sctp.SCTPAddr{IPAddrs: []net.IPAddr{{IP: config.ContainerIP}}, Port: config.ContainerPort} + listener, err = sctp.ListenSCTP("sctp"+string(ipv), hostAddr) + if err != nil { + return nil, fmt.Errorf("failed to listen on %s: %w", hostAddr, err) + } + p, err = NewSCTPProxy(listener, container) + default: + return nil, fmt.Errorf("unsupported protocol %s", config.Proto) + } + + return p, err +} + +type ProxyConfig struct { + Proto string + HostIP, ContainerIP net.IP + HostPort, ContainerPort int + ListenSock *os.File +} + // parseFlags parses the flags passed on reexec to create the TCP/UDP/SCTP // net.Addrs to map the host and container ports. -func parseFlags() (host net.Addr, container net.Addr) { +func parseFlags() ProxyConfig { var ( - proto = flag.String("proto", "tcp", "proxy protocol") - hostIP = flag.String("host-ip", "", "host ip") - hostPort = flag.Int("host-port", -1, "host port") - containerIP = flag.String("container-ip", "", "container ip") - containerPort = flag.Int("container-port", -1, "container port") - printVer = flag.Bool("v", false, "print version information and quit") - printVersion = flag.Bool("version", false, "print version information and quit") + config ProxyConfig + useListenFd bool + printVer bool ) - + flag.StringVar(&config.Proto, "proto", "tcp", "proxy protocol") + flag.TextVar(&config.HostIP, "host-ip", net.IPv4zero, "host ip") + flag.IntVar(&config.HostPort, "host-port", -1, "host port") + flag.TextVar(&config.ContainerIP, "container-ip", net.IPv4zero, "container ip") + flag.IntVar(&config.ContainerPort, "container-port", -1, "container port") + flag.BoolVar(&useListenFd, "use-listen-fd", false, "use a supplied listen fd") + flag.BoolVar(&printVer, "v", false, "print version information and quit") + flag.BoolVar(&printVer, "version", false, "print version information and quit") flag.Parse() - if *printVer || *printVersion { + if printVer { fmt.Printf("docker-proxy (commit %s) version %s\n", dockerversion.GitCommit, dockerversion.Version) os.Exit(0) } - switch *proto { - case "tcp": - host = &net.TCPAddr{IP: net.ParseIP(*hostIP), Port: *hostPort} - container = &net.TCPAddr{IP: net.ParseIP(*containerIP), Port: *containerPort} - case "udp": - host = &net.UDPAddr{IP: net.ParseIP(*hostIP), Port: *hostPort} - container = &net.UDPAddr{IP: net.ParseIP(*containerIP), Port: *containerPort} - case "sctp": - host = &sctp.SCTPAddr{IPAddrs: []net.IPAddr{{IP: net.ParseIP(*hostIP)}}, Port: *hostPort} - container = &sctp.SCTPAddr{IPAddrs: []net.IPAddr{{IP: net.ParseIP(*containerIP)}}, Port: *containerPort} - default: - log.Fatalf("unsupported protocol %s", *proto) + if useListenFd { + config.ListenSock = os.NewFile(listenSockFd, "listen-sock") } - return host, container + return config } func handleStopSignals(p Proxy) { diff --git a/cmd/docker-proxy/network_proxy_linux_test.go b/cmd/docker-proxy/network_proxy_linux_test.go index 48c7a1abf7..2457e63278 100644 --- a/cmd/docker-proxy/network_proxy_linux_test.go +++ b/cmd/docker-proxy/network_proxy_linux_test.go @@ -1,3 +1,5 @@ +//go:build !windows + package main import ( @@ -5,13 +7,13 @@ import ( "fmt" "io" "net" - "runtime" + "os" "strings" "testing" "time" "github.com/ishidawataru/sctp" - "gotest.tools/v3/skip" + "gotest.tools/v3/assert" ) var ( @@ -40,6 +42,8 @@ type UDPEchoServer struct { testCtx *testing.T } +const hopefullyFreePort = 25587 + func NewEchoServer(t *testing.T, proto, address string, opts EchoServerOptions) EchoServer { var server EchoServer if !strings.HasPrefix(proto, "tcp") && opts.TCPHalfClose { @@ -128,7 +132,31 @@ func (server *UDPEchoServer) Run() { func (server *UDPEchoServer) LocalAddr() net.Addr { return server.conn.LocalAddr() } func (server *UDPEchoServer) Close() { server.conn.Close() } +func tcpListener(t *testing.T, nw string, addr *net.TCPAddr) (*os.File, *net.TCPAddr) { + t.Helper() + l, err := net.ListenTCP(nw, addr) + assert.NilError(t, err) + osFile, err := l.File() + assert.NilError(t, err) + tcpAddr := l.Addr().(*net.TCPAddr) + err = l.Close() + assert.NilError(t, err) + return osFile, tcpAddr +} + +func udpListener(t *testing.T, nw string, addr *net.UDPAddr) (*os.File, *net.UDPAddr) { + t.Helper() + l, err := net.ListenUDP(nw, addr) + assert.NilError(t, err) + osFile, err := l.File() + assert.NilError(t, err) + err = l.Close() + assert.NilError(t, err) + return osFile, l.LocalAddr().(*net.UDPAddr) +} + func testProxyAt(t *testing.T, proto string, proxy Proxy, addr string, halfClose bool) { + t.Helper() defer proxy.Close() go proxy.Run() var client net.Conn @@ -167,98 +195,169 @@ func testProxyAt(t *testing.T, proto string, proxy Proxy, addr string, halfClose } } -func testProxy(t *testing.T, proto string, proxy Proxy, halfClose bool) { - testProxyAt(t, proto, proxy, proxy.FrontendAddr().String(), halfClose) -} - -func testTCP4Proxy(t *testing.T, halfClose bool) { +func testTCP4Proxy(t *testing.T, halfClose bool, hostPort int) { + t.Helper() backend := NewEchoServer(t, "tcp", "127.0.0.1:0", EchoServerOptions{TCPHalfClose: halfClose}) defer backend.Close() backend.Run() + backendAddr := backend.LocalAddr().(*net.TCPAddr) + var listener *os.File frontendAddr := &net.TCPAddr{IP: net.IPv4(127, 0, 0, 1), Port: 0} - proxy, err := NewProxy(frontendAddr, backend.LocalAddr()) + if hostPort == 0 { + listener, frontendAddr = tcpListener(t, "tcp4", &net.TCPAddr{IP: net.IPv4(127, 0, 0, 1), Port: 0}) + } else { + frontendAddr.Port = hostPort + } + config := ProxyConfig{ + Proto: "tcp", + HostIP: frontendAddr.IP, + HostPort: frontendAddr.Port, + ContainerIP: backendAddr.IP, + ContainerPort: backendAddr.Port, + ListenSock: listener, + } + proxy, err := newProxy(config) if err != nil { t.Fatal(err) } - testProxy(t, "tcp", proxy, halfClose) + testProxyAt(t, "tcp", proxy, frontendAddr.String(), halfClose) } func TestTCP4Proxy(t *testing.T) { - testTCP4Proxy(t, false) + testTCP4Proxy(t, false, 0) +} + +func TestTCP4ProxyNoListener(t *testing.T) { + testTCP4Proxy(t, false, hopefullyFreePort) } func TestTCP4ProxyHalfClose(t *testing.T) { - testTCP4Proxy(t, true) + testTCP4Proxy(t, true, 0) } func TestTCP6Proxy(t *testing.T) { - t.Skip("Need to start CI docker with --ipv6") backend := NewEchoServer(t, "tcp", "[::1]:0", EchoServerOptions{}) defer backend.Close() backend.Run() - frontendAddr := &net.TCPAddr{IP: net.IPv6loopback, Port: 0} - proxy, err := NewProxy(frontendAddr, backend.LocalAddr()) + backendAddr := backend.LocalAddr().(*net.TCPAddr) + listener, frontendAddr := tcpListener(t, "tcp6", &net.TCPAddr{IP: net.IPv6loopback, Port: 0}) + config := ProxyConfig{ + Proto: "tcp", + HostIP: frontendAddr.IP, + HostPort: frontendAddr.Port, + ContainerIP: backendAddr.IP, + ContainerPort: backendAddr.Port, + ListenSock: listener, + } + proxy, err := newProxy(config) if err != nil { t.Fatal(err) } - testProxy(t, "tcp", proxy, false) + testProxyAt(t, "tcp", proxy, frontendAddr.String(), false) } func TestTCPDualStackProxy(t *testing.T) { - // If I understand `godoc -src net favoriteAddrFamily` (used by the - // net.Listen* functions) correctly this should work, but it doesn't. - t.Skip("No support for dual stack yet") backend := NewEchoServer(t, "tcp", "[::1]:0", EchoServerOptions{}) defer backend.Close() backend.Run() - frontendAddr := &net.TCPAddr{IP: net.IPv6loopback, Port: 0} - proxy, err := NewProxy(frontendAddr, backend.LocalAddr()) + backendAddr := backend.LocalAddr().(*net.TCPAddr) + listener, frontendAddr := tcpListener(t, "tcp", &net.TCPAddr{IP: net.IPv6zero, Port: 0}) + config := ProxyConfig{ + Proto: "tcp", + HostIP: frontendAddr.IP, + HostPort: frontendAddr.Port, + ContainerIP: backendAddr.IP, + ContainerPort: backendAddr.Port, + ListenSock: listener, + } + proxy, err := newProxy(config) if err != nil { t.Fatal(err) } ipv4ProxyAddr := &net.TCPAddr{ IP: net.IPv4(127, 0, 0, 1), - Port: proxy.FrontendAddr().(*net.TCPAddr).Port, + Port: frontendAddr.Port, } testProxyAt(t, "tcp", proxy, ipv4ProxyAddr.String(), false) } -func TestUDP4Proxy(t *testing.T) { +func testUDP4Proxy(t *testing.T, hostPort int) { + t.Helper() backend := NewEchoServer(t, "udp", "127.0.0.1:0", EchoServerOptions{}) defer backend.Close() backend.Run() + var listener *os.File frontendAddr := &net.UDPAddr{IP: net.IPv4(127, 0, 0, 1), Port: 0} - proxy, err := NewProxy(frontendAddr, backend.LocalAddr()) + if hostPort == 0 { + listener, frontendAddr = udpListener(t, "udp4", &net.UDPAddr{IP: net.IPv4(127, 0, 0, 1), Port: 0}) + } else { + frontendAddr.Port = hostPort + } + backendAddr := backend.LocalAddr().(*net.UDPAddr) + config := ProxyConfig{ + Proto: "udp", + HostIP: frontendAddr.IP, + HostPort: frontendAddr.Port, + ContainerIP: backendAddr.IP, + ContainerPort: backendAddr.Port, + ListenSock: listener, + } + proxy, err := newProxy(config) if err != nil { t.Fatal(err) } - testProxy(t, "udp", proxy, false) + testProxyAt(t, "udp", proxy, frontendAddr.String(), false) +} + +func TestUDP4Proxy(t *testing.T) { + testUDP4Proxy(t, 0) +} + +func TestUDP4ProxyNoListener(t *testing.T) { + testUDP4Proxy(t, hopefullyFreePort) } func TestUDP6Proxy(t *testing.T) { - t.Skip("Need to start CI docker with --ipv6") backend := NewEchoServer(t, "udp", "[::1]:0", EchoServerOptions{}) defer backend.Close() backend.Run() - frontendAddr := &net.UDPAddr{IP: net.IPv6loopback, Port: 0} - proxy, err := NewProxy(frontendAddr, backend.LocalAddr()) + listener, frontendAddr := udpListener(t, "udp6", &net.UDPAddr{IP: net.IPv6loopback, Port: 0}) + backendAddr := backend.LocalAddr().(*net.UDPAddr) + config := ProxyConfig{ + Proto: "udp", + HostIP: frontendAddr.IP, + HostPort: frontendAddr.Port, + ContainerIP: backendAddr.IP, + ContainerPort: backendAddr.Port, + ListenSock: listener, + } + proxy, err := newProxy(config) if err != nil { t.Fatal(err) } - testProxy(t, "udp", proxy, false) + testProxyAt(t, "udp", proxy, frontendAddr.String(), false) } func TestUDPWriteError(t *testing.T) { frontendAddr := &net.UDPAddr{IP: net.IPv4(127, 0, 0, 1), Port: 0} // Hopefully, this port will be free: */ - backendAddr := &net.UDPAddr{IP: net.IPv4(127, 0, 0, 1), Port: 25587} - proxy, err := NewProxy(frontendAddr, backendAddr) + backendAddr := &net.UDPAddr{IP: net.IPv4(127, 0, 0, 1), Port: hopefullyFreePort} + listener, frontendAddr := udpListener(t, "udp4", frontendAddr) + config := ProxyConfig{ + Proto: "udp", + HostIP: frontendAddr.IP, + HostPort: frontendAddr.Port, + ContainerIP: backendAddr.IP, + ContainerPort: backendAddr.Port, + ListenSock: listener, + } + proxy, err := newProxy(config) if err != nil { t.Fatal(err) } defer proxy.Close() go proxy.Run() - client, err := net.Dial("udp", "127.0.0.1:25587") + client, err := net.Dial("udp", frontendAddr.String()) if err != nil { t.Fatalf("Can't connect to the proxy: %v", err) } @@ -266,7 +365,7 @@ func TestUDPWriteError(t *testing.T) { // Make sure the proxy doesn't stop when there is no actual backend: client.Write(testBuf) client.Write(testBuf) - backend := NewEchoServer(t, "udp", "127.0.0.1:25587", EchoServerOptions{}) + backend := NewEchoServer(t, "udp", backendAddr.String(), EchoServerOptions{}) defer backend.Close() backend.Run() client.SetDeadline(time.Now().Add(10 * time.Second)) @@ -282,31 +381,36 @@ func TestUDPWriteError(t *testing.T) { } } -func TestSCTP4Proxy(t *testing.T) { - skip.If(t, runtime.GOOS == "windows", "sctp is not supported on windows") - +func TestSCTP4ProxyNoListener(t *testing.T) { backend := NewEchoServer(t, "sctp", "127.0.0.1:0", EchoServerOptions{}) defer backend.Close() backend.Run() - frontendAddr := &sctp.SCTPAddr{IPAddrs: []net.IPAddr{{IP: net.IPv4(127, 0, 0, 1)}}, Port: 0} - proxy, err := NewProxy(frontendAddr, backend.LocalAddr()) - if err != nil { - t.Fatal(err) + backendAddr := backend.LocalAddr().(*sctp.SCTPAddr) + config := ProxyConfig{ + Proto: "sctp", + HostIP: net.IPv4(127, 0, 0, 1), + HostPort: hopefullyFreePort, + ContainerIP: backendAddr.IPAddrs[0].IP, + ContainerPort: backendAddr.Port, } - testProxy(t, "sctp", proxy, false) + proxy, err := newProxy(config) + assert.NilError(t, err) + testProxyAt(t, "sctp", proxy, fmt.Sprintf("%s:%d", config.HostIP, config.HostPort), false) } -func TestSCTP6Proxy(t *testing.T) { - t.Skip("Need to start CI docker with --ipv6") - skip.If(t, runtime.GOOS == "windows", "sctp is not supported on windows") - +func TestSCTP6ProxyNoListener(t *testing.T) { backend := NewEchoServer(t, "sctp", "[::1]:0", EchoServerOptions{}) defer backend.Close() backend.Run() - frontendAddr := &sctp.SCTPAddr{IPAddrs: []net.IPAddr{{IP: net.IPv6loopback}}, Port: 0} - proxy, err := NewProxy(frontendAddr, backend.LocalAddr()) - if err != nil { - t.Fatal(err) + backendAddr := backend.LocalAddr().(*sctp.SCTPAddr) + config := ProxyConfig{ + Proto: "sctp", + HostIP: net.IPv6loopback, + HostPort: hopefullyFreePort, + ContainerIP: backendAddr.IPAddrs[0].IP, + ContainerPort: backendAddr.Port, } - testProxy(t, "sctp", proxy, false) + proxy, err := newProxy(config) + assert.NilError(t, err) + testProxyAt(t, "sctp", proxy, fmt.Sprintf("[%s]:%d", config.HostIP, config.HostPort), false) } diff --git a/cmd/docker-proxy/proxy_linux.go b/cmd/docker-proxy/proxy_linux.go index 1bd8f9d8e1..b8cabc7959 100644 --- a/cmd/docker-proxy/proxy_linux.go +++ b/cmd/docker-proxy/proxy_linux.go @@ -2,11 +2,7 @@ // and UDP. package main -import ( - "net" - - "github.com/ishidawataru/sctp" -) +import "net" // ipVersion refers to IP version - v4 or v6 type ipVersion string @@ -34,17 +30,3 @@ type Proxy interface { // BackendAddr returns the proxied address. BackendAddr() net.Addr } - -// NewProxy creates a Proxy according to the specified frontendAddr and backendAddr. -func NewProxy(frontendAddr, backendAddr net.Addr) (Proxy, error) { - switch frontendAddr.(type) { - case *net.UDPAddr: - return NewUDPProxy(frontendAddr.(*net.UDPAddr), backendAddr.(*net.UDPAddr)) - case *net.TCPAddr: - return NewTCPProxy(frontendAddr.(*net.TCPAddr), backendAddr.(*net.TCPAddr)) - case *sctp.SCTPAddr: - return NewSCTPProxy(frontendAddr.(*sctp.SCTPAddr), backendAddr.(*sctp.SCTPAddr)) - default: - panic("Unsupported protocol") - } -} diff --git a/cmd/docker-proxy/sctp_proxy_linux.go b/cmd/docker-proxy/sctp_proxy_linux.go index 29ee5a6562..793dfb84f2 100644 --- a/cmd/docker-proxy/sctp_proxy_linux.go +++ b/cmd/docker-proxy/sctp_proxy_linux.go @@ -18,18 +18,7 @@ type SCTPProxy struct { } // NewSCTPProxy creates a new SCTPProxy. -func NewSCTPProxy(frontendAddr, backendAddr *sctp.SCTPAddr) (*SCTPProxy, error) { - // detect version of hostIP to bind only to correct version - ipVersion := ipv4 - if frontendAddr.IPAddrs[0].IP.To4() == nil { - ipVersion = ipv6 - } - listener, err := sctp.ListenSCTP("sctp"+string(ipVersion), frontendAddr) - if err != nil { - return nil, err - } - // If the port in frontendAddr was 0 then ListenSCTP will have a picked - // a port to listen on, hence the call to Addr to get that actual port: +func NewSCTPProxy(listener *sctp.SCTPListener, backendAddr *sctp.SCTPAddr) (*SCTPProxy, error) { return &SCTPProxy{ listener: listener, frontendAddr: listener.Addr().(*sctp.SCTPAddr), diff --git a/cmd/docker-proxy/tcp_proxy_linux.go b/cmd/docker-proxy/tcp_proxy_linux.go index aa7711c1ea..0f3360a271 100644 --- a/cmd/docker-proxy/tcp_proxy_linux.go +++ b/cmd/docker-proxy/tcp_proxy_linux.go @@ -16,18 +16,7 @@ type TCPProxy struct { } // NewTCPProxy creates a new TCPProxy. -func NewTCPProxy(frontendAddr, backendAddr *net.TCPAddr) (*TCPProxy, error) { - // detect version of hostIP to bind only to correct version - ipVersion := ipv4 - if frontendAddr.IP.To4() == nil { - ipVersion = ipv6 - } - listener, err := net.ListenTCP("tcp"+string(ipVersion), frontendAddr) - if err != nil { - return nil, err - } - // If the port in frontendAddr was 0 then ListenTCP will have a picked - // a port to listen on, hence the call to Addr to get that actual port: +func NewTCPProxy(listener *net.TCPListener, backendAddr *net.TCPAddr) (*TCPProxy, error) { return &TCPProxy{ listener: listener, frontendAddr: listener.Addr().(*net.TCPAddr), diff --git a/cmd/docker-proxy/udp_proxy_linux.go b/cmd/docker-proxy/udp_proxy_linux.go index 66bacafa4f..51b9bae119 100644 --- a/cmd/docker-proxy/udp_proxy_linux.go +++ b/cmd/docker-proxy/udp_proxy_linux.go @@ -54,16 +54,7 @@ type UDPProxy struct { } // NewUDPProxy creates a new UDPProxy. -func NewUDPProxy(frontendAddr, backendAddr *net.UDPAddr) (*UDPProxy, error) { - // detect version of hostIP to bind only to correct version - ipVersion := ipv4 - if frontendAddr.IP.To4() == nil { - ipVersion = ipv6 - } - listener, err := net.ListenUDP("udp"+string(ipVersion), frontendAddr) - if err != nil { - return nil, err - } +func NewUDPProxy(listener *net.UDPConn, backendAddr *net.UDPAddr) (*UDPProxy, error) { return &UDPProxy{ listener: listener, frontendAddr: listener.LocalAddr().(*net.UDPAddr), From 88fb0c9fec6d4604c58870bad868f7e8ef07689e Mon Sep 17 00:00:00 2001 From: Cory Snider Date: Fri, 5 Jul 2024 19:47:41 -0400 Subject: [PATCH 3/9] cmd/docker-proxy: drop unused BackendAddr() method Signed-off-by: Cory Snider --- cmd/docker-proxy/proxy_linux.go | 2 -- cmd/docker-proxy/sctp_proxy_linux.go | 3 --- cmd/docker-proxy/tcp_proxy_linux.go | 3 --- cmd/docker-proxy/udp_proxy_linux.go | 3 --- 4 files changed, 11 deletions(-) diff --git a/cmd/docker-proxy/proxy_linux.go b/cmd/docker-proxy/proxy_linux.go index b8cabc7959..a704dec73a 100644 --- a/cmd/docker-proxy/proxy_linux.go +++ b/cmd/docker-proxy/proxy_linux.go @@ -27,6 +27,4 @@ type Proxy interface { Close() // FrontendAddr returns the address on which the proxy is listening. FrontendAddr() net.Addr - // BackendAddr returns the proxied address. - BackendAddr() net.Addr } diff --git a/cmd/docker-proxy/sctp_proxy_linux.go b/cmd/docker-proxy/sctp_proxy_linux.go index 793dfb84f2..5ed3f4d2ed 100644 --- a/cmd/docker-proxy/sctp_proxy_linux.go +++ b/cmd/docker-proxy/sctp_proxy_linux.go @@ -82,6 +82,3 @@ func (proxy *SCTPProxy) Close() { proxy.listener.Close() } // FrontendAddr returns the SCTP address on which the proxy is listening. func (proxy *SCTPProxy) FrontendAddr() net.Addr { return proxy.frontendAddr } - -// BackendAddr returns the SCTP proxied address. -func (proxy *SCTPProxy) BackendAddr() net.Addr { return proxy.backendAddr } diff --git a/cmd/docker-proxy/tcp_proxy_linux.go b/cmd/docker-proxy/tcp_proxy_linux.go index 0f3360a271..8d91866bcb 100644 --- a/cmd/docker-proxy/tcp_proxy_linux.go +++ b/cmd/docker-proxy/tcp_proxy_linux.go @@ -78,6 +78,3 @@ func (proxy *TCPProxy) Close() { proxy.listener.Close() } // FrontendAddr returns the TCP address on which the proxy is listening. func (proxy *TCPProxy) FrontendAddr() net.Addr { return proxy.frontendAddr } - -// BackendAddr returns the TCP proxied address. -func (proxy *TCPProxy) BackendAddr() net.Addr { return proxy.backendAddr } diff --git a/cmd/docker-proxy/udp_proxy_linux.go b/cmd/docker-proxy/udp_proxy_linux.go index 51b9bae119..015b345856 100644 --- a/cmd/docker-proxy/udp_proxy_linux.go +++ b/cmd/docker-proxy/udp_proxy_linux.go @@ -150,9 +150,6 @@ func (proxy *UDPProxy) Close() { // FrontendAddr returns the UDP address on which the proxy is listening. func (proxy *UDPProxy) FrontendAddr() net.Addr { return proxy.frontendAddr } -// BackendAddr returns the proxied UDP address. -func (proxy *UDPProxy) BackendAddr() net.Addr { return proxy.backendAddr } - func isClosedError(err error) bool { /* This comparison is ugly, but unfortunately, net.go doesn't export errClosing. * See: From 7a8663c9ea6fb9817d6b58c8b19fe8a84467ef4e Mon Sep 17 00:00:00 2001 From: Cory Snider Date: Fri, 5 Jul 2024 20:10:23 -0400 Subject: [PATCH 4/9] cmd/docker-proxy: drop FrontendAddr() method It was only used in tests. Signed-off-by: Cory Snider --- cmd/docker-proxy/proxy_linux.go | 4 ---- cmd/docker-proxy/sctp_proxy_linux.go | 3 --- cmd/docker-proxy/tcp_proxy_linux.go | 3 --- cmd/docker-proxy/udp_proxy_linux.go | 3 --- 4 files changed, 13 deletions(-) diff --git a/cmd/docker-proxy/proxy_linux.go b/cmd/docker-proxy/proxy_linux.go index a704dec73a..b3e8050c50 100644 --- a/cmd/docker-proxy/proxy_linux.go +++ b/cmd/docker-proxy/proxy_linux.go @@ -2,8 +2,6 @@ // and UDP. package main -import "net" - // ipVersion refers to IP version - v4 or v6 type ipVersion string @@ -25,6 +23,4 @@ type Proxy interface { Run() // Close stops forwarding traffic and close both ends of the Proxy. Close() - // FrontendAddr returns the address on which the proxy is listening. - FrontendAddr() net.Addr } diff --git a/cmd/docker-proxy/sctp_proxy_linux.go b/cmd/docker-proxy/sctp_proxy_linux.go index 5ed3f4d2ed..097e78e326 100644 --- a/cmd/docker-proxy/sctp_proxy_linux.go +++ b/cmd/docker-proxy/sctp_proxy_linux.go @@ -79,6 +79,3 @@ func (proxy *SCTPProxy) Run() { // Close stops forwarding the traffic. func (proxy *SCTPProxy) Close() { proxy.listener.Close() } - -// FrontendAddr returns the SCTP address on which the proxy is listening. -func (proxy *SCTPProxy) FrontendAddr() net.Addr { return proxy.frontendAddr } diff --git a/cmd/docker-proxy/tcp_proxy_linux.go b/cmd/docker-proxy/tcp_proxy_linux.go index 8d91866bcb..ac4fc99340 100644 --- a/cmd/docker-proxy/tcp_proxy_linux.go +++ b/cmd/docker-proxy/tcp_proxy_linux.go @@ -75,6 +75,3 @@ func (proxy *TCPProxy) Run() { // Close stops forwarding the traffic. func (proxy *TCPProxy) Close() { proxy.listener.Close() } - -// FrontendAddr returns the TCP address on which the proxy is listening. -func (proxy *TCPProxy) FrontendAddr() net.Addr { return proxy.frontendAddr } diff --git a/cmd/docker-proxy/udp_proxy_linux.go b/cmd/docker-proxy/udp_proxy_linux.go index 015b345856..585cc3616e 100644 --- a/cmd/docker-proxy/udp_proxy_linux.go +++ b/cmd/docker-proxy/udp_proxy_linux.go @@ -147,9 +147,6 @@ func (proxy *UDPProxy) Close() { } } -// FrontendAddr returns the UDP address on which the proxy is listening. -func (proxy *UDPProxy) FrontendAddr() net.Addr { return proxy.frontendAddr } - func isClosedError(err error) bool { /* This comparison is ugly, but unfortunately, net.go doesn't export errClosing. * See: From 384ca56d9056b5d226a7266bf0200d0a45062c20 Mon Sep 17 00:00:00 2001 From: Rob Murray Date: Thu, 4 Jul 2024 09:11:23 +0100 Subject: [PATCH 5/9] Fix error type used for ProgramExternalConnectivity Get rid of "FIXME: Got an API for which error does not match any expected type!!! error="driver failed programming external connectivity on endpoint..." from debug logs. Signed-off-by: Rob Murray --- libnetwork/endpoint.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/libnetwork/endpoint.go b/libnetwork/endpoint.go index bfba8948a1..60fa8aa767 100644 --- a/libnetwork/endpoint.go +++ b/libnetwork/endpoint.go @@ -624,10 +624,10 @@ func (ep *Endpoint) sbJoin(ctx context.Context, sb *Sandbox, options ...Endpoint } if !n.internal { log.G(ctx).Debugf("Programming external connectivity on endpoint %s (%s)", ep.Name(), ep.ID()) - if err := d.ProgramExternalConnectivity(ctx, n.ID(), ep.ID(), sb.Labels()); err != nil { - return types.InternalErrorf( + if err = d.ProgramExternalConnectivity(ctx, n.ID(), ep.ID(), sb.Labels()); err != nil { + return errdefs.System(fmt.Errorf( "driver failed programming external connectivity on endpoint %s (%s): %v", - ep.Name(), ep.ID(), err) + ep.Name(), ep.ID(), err)) } } } From f1e0746c08bf4daa3d9dba6dc6bf6ae928db7eaf Mon Sep 17 00:00:00 2001 From: Rob Murray Date: Wed, 10 Jul 2024 16:16:03 +0100 Subject: [PATCH 6/9] Tell RootlessKit about docker-proxy port mappings Before this change, when running rootless, instead of running docker-proxy the daemon would run rootlesskit-docker-proxy. The job of rootlesskit-docker-proxy was to tell RootlessKit about mapped host ports before starting docker-proxy, and then to remove the mapping when it was stopped. So, rootlesskit-docker-proxy would need to be kept in-step with changes to docker-proxy (particuarly the upcoming change to bind TCP/UDP ports in the daemon and pass them to the proxy, but also possible-future changes like running proxy per-container rather than per-port-mapping). This change runs the docker-proxy in rootless mode, instead of rootlesskit-docker-proxy, and the daemon itself tells RootlessKit about changes in host port mappings. Signed-off-by: Rob Murray --- daemon/config/config_linux.go | 39 ++--- daemon/daemon_unix.go | 1 + libnetwork/drivers/bridge/bridge_linux.go | 36 +++++ .../drivers/bridge/port_mapping_linux.go | 75 +++++++-- .../drivers/bridge/port_mapping_linux_test.go | 107 ++++++++++++- .../rlkclient/rootlesskit_client_linux.go | 149 ++++++++++++++++++ 6 files changed, 368 insertions(+), 39 deletions(-) create mode 100644 libnetwork/drivers/bridge/rlkclient/rootlesskit_client_linux.go diff --git a/daemon/config/config_linux.go b/daemon/config/config_linux.go index 07322aa09a..35c463794e 100644 --- a/daemon/config/config_linux.go +++ b/daemon/config/config_linux.go @@ -34,7 +34,6 @@ const ( StockRuntimeName = "runc" // userlandProxyBinary is the name of the userland-proxy binary. - // In rootless-mode, [rootless.RootlessKitDockerProxyBinary] is used instead. userlandProxyBinary = "docker-proxy" ) @@ -234,16 +233,25 @@ func setPlatformDefaults(cfg *Config) error { cfg.CgroupNamespaceMode = string(DefaultCgroupNamespaceMode) } + var err error + cfg.BridgeConfig.UserlandProxyPath, err = lookupBinPath(userlandProxyBinary) + if err != nil { + // Log, but don't error here. This allows running a daemon with + // userland-proxy disabled (which does not require the binary + // to be present). + // + // An error is still produced by [Config.ValidatePlatformConfig] if + // userland-proxy is enabled in the configuration. + // + // We log this at "debug" level, as this code is also executed + // when running "--version", and we don't want to print logs in + // that case.. + log.G(context.TODO()).WithError(err).Debug("failed to lookup default userland-proxy binary") + } + if rootless.RunningWithRootlessKit() { cfg.Rootless = true - var err error - // use rootlesskit-docker-proxy for exposing the ports in RootlessKit netns to the initial namespace. - cfg.BridgeConfig.UserlandProxyPath, err = lookupBinPath(rootless.RootlessKitDockerProxyBinary) - if err != nil { - return errors.Wrapf(err, "running with RootlessKit, but %s not installed", rootless.RootlessKitDockerProxyBinary) - } - dataHome, err := homedir.GetDataHome() if err != nil { return err @@ -257,21 +265,6 @@ func setPlatformDefaults(cfg *Config) error { cfg.ExecRoot = filepath.Join(runtimeDir, "docker") cfg.Pidfile = filepath.Join(runtimeDir, "docker.pid") } else { - var err error - cfg.BridgeConfig.UserlandProxyPath, err = lookupBinPath(userlandProxyBinary) - if err != nil { - // Log, but don't error here. This allows running a daemon with - // userland-proxy disabled (which does not require the binary - // to be present). - // - // An error is still produced by [Config.ValidatePlatformConfig] if - // userland-proxy is enabled in the configuration. - // - // We log this at "debug" level, as this code is also executed - // when running "--version", and we don't want to print logs in - // that case.. - log.G(context.TODO()).WithError(err).Debug("failed to lookup default userland-proxy binary") - } cfg.Root = "/var/lib/docker" cfg.ExecRoot = "/var/run/docker" cfg.Pidfile = "/var/run/docker.pid" diff --git a/daemon/daemon_unix.go b/daemon/daemon_unix.go index fc53019d86..10ab139245 100644 --- a/daemon/daemon_unix.go +++ b/daemon/daemon_unix.go @@ -915,6 +915,7 @@ func driverOptions(config *config.Config) nwconfig.Option { "EnableIP6Tables": config.BridgeConfig.EnableIP6Tables, "EnableUserlandProxy": config.BridgeConfig.EnableUserlandProxy, "UserlandProxyPath": config.BridgeConfig.UserlandProxyPath, + "Rootless": config.Rootless, }, }) } diff --git a/libnetwork/drivers/bridge/bridge_linux.go b/libnetwork/drivers/bridge/bridge_linux.go index d0ffa1a809..5c7ec6f2aa 100644 --- a/libnetwork/drivers/bridge/bridge_linux.go +++ b/libnetwork/drivers/bridge/bridge_linux.go @@ -14,6 +14,7 @@ import ( "github.com/docker/docker/errdefs" "github.com/docker/docker/libnetwork/datastore" "github.com/docker/docker/libnetwork/driverapi" + "github.com/docker/docker/libnetwork/drivers/bridge/rlkclient" "github.com/docker/docker/libnetwork/internal/netiputil" "github.com/docker/docker/libnetwork/iptables" "github.com/docker/docker/libnetwork/netlabel" @@ -56,6 +57,7 @@ type configuration struct { EnableIP6Tables bool EnableUserlandProxy bool UserlandProxyPath string + Rootless bool } // networkConfiguration for network specific configuration @@ -131,6 +133,14 @@ type bridgeNetwork struct { sync.Mutex } +type portDriverClient interface { + ChildHostIP(hostIP netip.Addr) netip.Addr + AddPort(ctx context.Context, proto string, hostIP, childIP netip.Addr, hostPort int) (func() error, error) +} + +// Allow unit tests to supply a dummy RootlessKit port driver client. +var newPortDriverClient = func() (portDriverClient, error) { return rlkclient.NewPortDriverClient() } + type driver struct { config configuration natChain *iptables.ChainInfo @@ -144,6 +154,7 @@ type driver struct { networks map[string]*bridgeNetwork store *datastore.Store nlh *netlink.Handle + portDriverClient portDriverClient configNetwork sync.Mutex sync.Mutex } @@ -414,6 +425,15 @@ func (n *bridgeNetwork) userlandProxyPath() string { return n.driver.userlandProxyPath() } +func (n *bridgeNetwork) getPortDriverClient() portDriverClient { + n.Lock() + defer n.Unlock() + if n.driver == nil { + return nil + } + return n.driver.getPortDriverClient() +} + func (n *bridgeNetwork) getEndpoint(eid string) (*bridgeEndpoint, error) { if eid == "" { return nil, InvalidEndpointIDError(eid) @@ -465,6 +485,7 @@ func (d *driver) configure(option map[string]interface{}) error { filterChainV6 *iptables.ChainInfo isolationChain1V6 *iptables.ChainInfo isolationChain2V6 *iptables.ChainInfo + pdc portDriverClient ) switch opt := option[netlabel.GenericData].(type) { @@ -537,6 +558,14 @@ func (d *driver) configure(option map[string]interface{}) error { } } + if config.EnableUserlandProxy && config.Rootless { + var err error + pdc, err = newPortDriverClient() + if err != nil { + return err + } + } + d.Lock() d.natChain = natChain d.filterChain = filterChain @@ -546,6 +575,7 @@ func (d *driver) configure(option map[string]interface{}) error { d.filterChainV6 = filterChainV6 d.isolationChain1V6 = isolationChain1V6 d.isolationChain2V6 = isolationChain2V6 + d.portDriverClient = pdc d.config = config d.Unlock() @@ -577,6 +607,12 @@ func (d *driver) userlandProxyPath() string { return "" } +func (d *driver) getPortDriverClient() portDriverClient { + d.Lock() + defer d.Unlock() + return d.portDriverClient +} + func parseNetworkGenericOptions(data interface{}) (*networkConfiguration, error) { var ( err error diff --git a/libnetwork/drivers/bridge/port_mapping_linux.go b/libnetwork/drivers/bridge/port_mapping_linux.go index e54ba233c8..d00ccd2343 100644 --- a/libnetwork/drivers/bridge/port_mapping_linux.go +++ b/libnetwork/drivers/bridge/port_mapping_linux.go @@ -23,12 +23,28 @@ import ( type portBinding struct { types.PortBinding + // childHostIP is the host IP address, as seen from the daemon. This + // is normally the same as PortBinding.HostIP but, in rootless mode, it + // will be an address in the rootless network namespace. RootlessKit + // binds the port on the real (parent) host address and maps it to the + // same port number on the address dockerd sees in the child namespace. + // So, for example, docker-proxy and DNAT rules need to use the child + // namespace's host address. (PortBinding.HostIP isn't replaced by the + // child address, because it's stored as user-config and the child + // address may change if RootlessKit is configured differently.) + childHostIP net.IP + // portDriverRemove is a function that will inform the RootlessKit + // port driver about removal of a port binding, or nil. + portDriverRemove func() error + // stopProxy is a function to stop the userland proxy for this binding, + // if a proxy has been started - else nil. stopProxy func() error } type portBindingReq struct { types.PortBinding - disableNAT bool + childHostIP net.IP + disableNAT bool } // addPortMappings takes cfg, the configuration for port mappings, selects host @@ -79,6 +95,7 @@ func (n *bridgeNetwork) addPortMappings( sortAndNormPBs(sortedCfg) proxyPath := n.userlandProxyPath() + pdc := n.getPortDriverClient() // toBind accumulates port bindings that should be allocated the same host port // (if required by NAT config). If the host address is unspecified, and defHostIP @@ -91,7 +108,7 @@ func (n *bridgeNetwork) addPortMappings( // bindings to collect, they're applied and toBind is reset. var toBind []portBindingReq for i, c := range sortedCfg { - if bindingIPv4, ok := configurePortBindingIPv4(disableNAT4, c, containerIPv4, defHostIP); ok { + if bindingIPv4, ok := configurePortBindingIPv4(pdc, disableNAT4, c, containerIPv4, defHostIP); ok { toBind = append(toBind, bindingIPv4) } @@ -107,7 +124,7 @@ func (n *bridgeNetwork) addPortMappings( if proxyPath != "" && (containerIPv6 == nil) { containerIP = containerIPv4 } - if bindingIPv6, ok := configurePortBindingIPv6(disableNAT6, c, containerIP, defHostIP); ok { + if bindingIPv6, ok := configurePortBindingIPv6(pdc, disableNAT6, c, containerIP, defHostIP); ok { toBind = append(toBind, bindingIPv6) } @@ -129,8 +146,24 @@ func (n *bridgeNetwork) addPortMappings( toBind = toBind[:0] } - for _, b := range bindings { - if err := n.setPerPortIptables(b, true); err != nil { + for i := range bindings { + if pdc != nil && bindings[i].HostPort != 0 { + var err error + b := &bindings[i] + hip, ok := netip.AddrFromSlice(b.HostIP) + if !ok { + return nil, fmt.Errorf("invalid host IP address in %s", b) + } + chip, ok := netip.AddrFromSlice(b.childHostIP) + if !ok { + return nil, fmt.Errorf("invalid child host IP address %s in %s", b.childHostIP, b) + } + b.portDriverRemove, err = pdc.AddPort(context.TODO(), b.Proto.String(), hip, chip, int(b.HostPort)) + if err != nil { + return nil, err + } + } + if err := n.setPerPortIptables(bindings[i], true); err != nil { return nil, err } } @@ -263,7 +296,7 @@ func needSamePort(a, b types.PortBinding) bool { // configurePortBindingIPv4 returns a new port binding with the HostIP field populated // if a binding is required, else nil. -func configurePortBindingIPv4(disableNAT bool, bnd types.PortBinding, containerIPv4, defHostIP net.IP) (portBindingReq, bool) { +func configurePortBindingIPv4(pdc portDriverClient, disableNAT bool, bnd types.PortBinding, containerIPv4, defHostIP net.IP) (portBindingReq, bool) { if len(containerIPv4) == 0 { return portBindingReq{}, false } @@ -282,15 +315,15 @@ func configurePortBindingIPv4(disableNAT bool, bnd types.PortBinding, containerI // Unmap the addresses if they're IPv4-mapped IPv6. bnd.HostIP = bnd.HostIP.To4() bnd.IP = containerIPv4.To4() - return portBindingReq{ + return setChildHostIP(pdc, portBindingReq{ PortBinding: bnd, disableNAT: disableNAT, - }, true + }), true } // configurePortBindingIPv6 returns a new port binding with the HostIP field populated // if a binding is required, else nil. -func configurePortBindingIPv6(disableNAT bool, bnd types.PortBinding, containerIP, defHostIP net.IP) (portBindingReq, bool) { +func configurePortBindingIPv6(pdc portDriverClient, disableNAT bool, bnd types.PortBinding, containerIP, defHostIP net.IP) (portBindingReq, bool) { if containerIP == nil { return portBindingReq{}, false } @@ -317,10 +350,20 @@ func configurePortBindingIPv6(disableNAT bool, bnd types.PortBinding, containerI } } bnd.IP = containerIP - return portBindingReq{ + return setChildHostIP(pdc, portBindingReq{ PortBinding: bnd, disableNAT: disableNAT, - }, true + }), true +} + +func setChildHostIP(pdc portDriverClient, req portBindingReq) portBindingReq { + if pdc == nil { + req.childHostIP = req.HostIP + return req + } + hip, _ := netip.AddrFromSlice(req.HostIP) + req.childHostIP = pdc.ChildHostIP(hip).AsSlice() + return req } // bindHostPorts allocates ports and starts docker-proxy for the given cfg. The @@ -410,7 +453,7 @@ func attemptBindHostPorts( if c.disableNAT { pb.HostPort = 0 } else { - pb.stopProxy, err = startProxy(c.Proto.String(), c.HostIP, port, c.IP, int(c.Port), proxyPath) + pb.stopProxy, err = startProxy(c.Proto.String(), c.childHostIP, port, c.IP, int(c.Port), proxyPath) if err != nil { return nil, fmt.Errorf("failed to bind port %s:%d/%s: %w", c.HostIP, port, c.Proto, err) } @@ -424,6 +467,7 @@ func attemptBindHostPorts( pb.HostPort = uint16(port) } pb.HostPortEnd = pb.HostPort + pb.childHostIP = c.childHostIP res = append(res, pb) } return res, nil @@ -442,7 +486,10 @@ func (n *bridgeNetwork) releasePorts(ep *bridgeEndpoint) error { func (n *bridgeNetwork) releasePortBindings(pbs []portBinding) error { var errs []error for _, pb := range pbs { - var errP error + var errPD, errP error + if pb.portDriverRemove != nil { + errPD = pb.portDriverRemove() + } if pb.stopProxy != nil { errP = pb.stopProxy() if errP != nil { @@ -456,7 +503,7 @@ func (n *bridgeNetwork) releasePortBindings(pbs []portBinding) error { if pb.HostPort > 0 { portallocator.Get().ReleasePort(pb.HostIP, pb.Proto.String(), int(pb.HostPort)) } - errs = append(errs, errP, errN) + errs = append(errs, errPD, errP, errN) } return errors.Join(errs...) } diff --git a/libnetwork/drivers/bridge/port_mapping_linux_test.go b/libnetwork/drivers/bridge/port_mapping_linux_test.go index e12e7d5c41..23972ebd61 100644 --- a/libnetwork/drivers/bridge/port_mapping_linux_test.go +++ b/libnetwork/drivers/bridge/port_mapping_linux_test.go @@ -5,6 +5,8 @@ import ( "errors" "fmt" "net" + "net/netip" + "strconv" "strings" "testing" @@ -420,6 +422,7 @@ func TestAddPortMappings(t *testing.T) { defHostIP net.IP proxyPath string busyPortIPv4 int + rootless bool expErr string expPBs []types.PortBinding @@ -720,6 +723,23 @@ func TestAddPortMappings(t *testing.T) { {Proto: types.TCP, IP: ctrIP6.IP, Port: 12345, HostIP: net.IPv6zero, HostPort: 12346}, }, }, + { + name: "rootless", + epAddrV4: ctrIP4, + epAddrV6: ctrIP6, + cfg: []types.PortBinding{ + {Proto: types.TCP, Port: 22}, + {Proto: types.TCP, Port: 80}, + }, + proxyPath: "/dummy/path/to/proxy", + rootless: true, + expPBs: []types.PortBinding{ + {Proto: types.TCP, IP: ctrIP4.IP, Port: 22, HostIP: net.IPv4zero, HostPort: firstEphemPort}, + {Proto: types.TCP, IP: ctrIP6.IP, Port: 22, HostIP: net.IPv6zero, HostPort: firstEphemPort}, + {Proto: types.TCP, IP: ctrIP4.IP, Port: 80, HostIP: net.IPv4zero, HostPort: firstEphemPort + 1}, + {Proto: types.TCP, IP: ctrIP6.IP, Port: 80, HostIP: net.IPv6zero, HostPort: firstEphemPort + 1}, + }, + }, } for _, tc := range testcases { @@ -756,6 +776,11 @@ func TestAddPortMappings(t *testing.T) { }, nil } + // Mock the RootlessKit port driver. + origNewPortDriverClient := newPortDriverClient + defer func() { newPortDriverClient = origNewPortDriverClient }() + newPortDriverClient = func() (portDriverClient, error) { return newMockPortDriverClient() } + n := &bridgeNetwork{ config: &networkConfiguration{ BridgeName: "dummybridge", @@ -771,11 +796,23 @@ func TestAddPortMappings(t *testing.T) { EnableIP6Tables: true, EnableUserlandProxy: tc.proxyPath != "", UserlandProxyPath: tc.proxyPath, + Rootless: tc.rootless, }, } err := n.driver.configure(genericOption) assert.NilError(t, err) + assert.Check(t, is.Equal(n.driver.portDriverClient == nil, !tc.rootless)) + expChildIP := func(hostIP net.IP) net.IP { + if !tc.rootless { + return hostIP + } + if hostIP.To4() == nil { + return net.ParseIP("::1") + } + return net.ParseIP("127.0.0.1") + } + err = portallocator.Get().ReleaseAll() assert.NilError(t, err) @@ -852,16 +889,37 @@ func TestAddPortMappings(t *testing.T) { // Check a docker-proxy was started and stopped for each expected port binding. expProxies := map[proxyCall]bool{} for _, expPB := range tc.expPBs { - is4 := expPB.HostIP.To4() != nil + hip := expChildIP(expPB.HostIP) + is4 := hip.To4() != nil if (is4 && tc.gwMode4.natDisabled()) || (!is4 && tc.gwMode6.natDisabled()) { continue } p := newProxyCall(expPB.Proto.String(), - expPB.HostIP, int(expPB.HostPort), + hip, int(expPB.HostPort), expPB.IP, int(expPB.Port), tc.proxyPath) expProxies[p] = tc.expReleaseErr != "" } assert.Check(t, is.DeepEqual(expProxies, proxies)) + + // Check the port driver has seen the expected port mappings and no others, + // and that they have all been closed. + if n.driver.portDriverClient != nil { + pdc := n.driver.portDriverClient.(*mockPortDriverClient) + expPorts := map[mockPortDriverPort]bool{} + for _, expPB := range tc.expPBs { + if expPB.HostPort == 0 { + continue + } + pdp := mockPortDriverPort{ + proto: expPB.Proto.String(), + hostIP: expPB.HostIP.String(), + childIP: expChildIP(expPB.HostIP).String(), + hostPort: int(expPB.HostPort), + } + expPorts[pdp] = false + } + assert.Check(t, is.DeepEqual(pdc.openPorts, expPorts)) + } }) } } @@ -881,3 +939,48 @@ func newProxyCall(proto string, proxyPath: proxyPath, } } + +// Types for tracking calls to the port driver client (mock for RootlessKit client). + +type mockPortDriverPort struct { + proto string + hostIP string + childIP string + hostPort int +} + +func (p mockPortDriverPort) String() string { + return p.hostIP + ":" + strconv.Itoa(p.hostPort) + "/" + p.proto +} + +type mockPortDriverClient struct { + openPorts map[mockPortDriverPort]bool +} + +func newMockPortDriverClient() (*mockPortDriverClient, error) { + return &mockPortDriverClient{ + openPorts: map[mockPortDriverPort]bool{}, + }, nil +} + +func (c *mockPortDriverClient) ChildHostIP(hostIP netip.Addr) netip.Addr { + if hostIP.Is6() { + return netip.IPv6Loopback() + } + return netip.MustParseAddr("127.0.0.1") +} + +func (c *mockPortDriverClient) AddPort(_ context.Context, proto string, hostIP, childIP netip.Addr, hostPort int) (func() error, error) { + key := mockPortDriverPort{proto: proto, hostIP: hostIP.String(), childIP: childIP.String(), hostPort: hostPort} + if _, exists := c.openPorts[key]; exists { + return nil, fmt.Errorf("mockPortDriverClient: port %s is already open", key) + } + c.openPorts[key] = true + return func() error { + if !c.openPorts[key] { + return fmt.Errorf("mockPortDriverClient: port %s is not open", key) + } + c.openPorts[key] = false + return nil + }, nil +} diff --git a/libnetwork/drivers/bridge/rlkclient/rootlesskit_client_linux.go b/libnetwork/drivers/bridge/rlkclient/rootlesskit_client_linux.go new file mode 100644 index 0000000000..4b92d17345 --- /dev/null +++ b/libnetwork/drivers/bridge/rlkclient/rootlesskit_client_linux.go @@ -0,0 +1,149 @@ +// RootlessKit integration - if required by RootlessKit's port driver, let it know +// about port mappings as they're added and removed. +// +// This is based on / copied from rootlesskit-docker-proxy, which was previously +// installed as a proxy for docker-proxy: +// https://github.com/rootless-containers/rootlesskit/blob/4fb2e2cb80bf13eb28b7f2a4317b63406b89ad32/cmd/rootlesskit-docker-proxy/main.go + +package rlkclient + +import ( + "context" + "fmt" + "net" + "net/netip" + "os" + "path/filepath" + "strconv" + "strings" + + "github.com/pkg/errors" + "github.com/rootless-containers/rootlesskit/v2/pkg/api/client" + "github.com/rootless-containers/rootlesskit/v2/pkg/port" +) + +type PortDriverClient struct { + client client.Client + portDriverName string + protos map[string]struct{} + childIP netip.Addr +} + +func NewPortDriverClient() (*PortDriverClient, error) { + stateDir := os.Getenv("ROOTLESSKIT_STATE_DIR") + if stateDir == "" { + return nil, errors.New("$ROOTLESSKIT_STATE_DIR needs to be set") + } + socketPath := filepath.Join(stateDir, "api.sock") + c, err := client.New(socketPath) + if err != nil { + return nil, fmt.Errorf("error while connecting to RootlessKit API socket: %w", err) + } + + info, err := c.Info(context.Background()) + if err != nil { + return nil, fmt.Errorf("failed to call info API, probably RootlessKit binary is too old (needs to be v0.14.0 or later): %w", err) + } + + // info.PortDriver is currently nil for "none" and "implicit", but this may change in future + if info.PortDriver == nil || info.PortDriver.Driver == "none" || info.PortDriver.Driver == "implicit" { + return nil, nil + } + + pdc := &PortDriverClient{ + client: c, + portDriverName: info.PortDriver.Driver, + } + + if info.PortDriver.DisallowLoopbackChildIP { + // i.e., port-driver="slirp4netns" + if info.NetworkDriver.ChildIP == nil { + return nil, fmt.Errorf("RootlessKit port driver (%q) does not allow loopback child IP, but network driver (%q) has no non-loopback IP", + info.PortDriver.Driver, info.NetworkDriver.Driver) + } + childIP, ok := netip.AddrFromSlice(info.NetworkDriver.ChildIP) + if !ok { + return nil, fmt.Errorf("unable to use child IP %s from network driver (%q)", + info.NetworkDriver.ChildIP, info.NetworkDriver.Driver) + } + pdc.childIP = childIP + } + + pdc.protos = make(map[string]struct{}, len(info.PortDriver.Protos)) + for _, p := range info.PortDriver.Protos { + pdc.protos[p] = struct{}{} + } + + return pdc, nil +} + +// ChildHostIP returns the address that must be used in the child network +// namespace in place of hostIP, a host IP address. In particular, port +// mappings from host IP addresses, and DNAT rules, must use this child +// address in place of the real host address. +func (c *PortDriverClient) ChildHostIP(hostIP netip.Addr) netip.Addr { + if c.childIP.IsValid() { + return c.childIP + } + if hostIP.Is6() { + return netip.IPv6Loopback() + } + return netip.MustParseAddr("127.0.0.1") +} + +// AddPort makes a request to RootlessKit asking it to set up a port +// mapping between a host IP address and a child host IP address. +func (c *PortDriverClient) AddPort( + ctx context.Context, + proto string, + hostIP netip.Addr, + childIP netip.Addr, + hostPort int, +) (func() error, error) { // proto is like "tcp", but we need to convert it to "tcp4" or "tcp6" explicitly + // for libnetwork >= 20201216 + // + // See https://github.com/moby/libnetwork/pull/2604/files#diff-8fa48beed55dd033bf8e4f8c40b31cf69d0b2cc5d4bb53cde8594670ea6c938aR20 + // See also https://github.com/rootless-containers/rootlesskit/issues/231 + apiProto := proto + if !strings.HasSuffix(apiProto, "4") && !strings.HasSuffix(apiProto, "6") { + if hostIP.Is6() { + apiProto += "6" + } else { + apiProto += "4" + } + } + + if _, ok := c.protos[apiProto]; !ok { + // This happens when apiProto="tcp6", portDriverName="slirp4netns", + // because "slirp4netns" port driver does not support listening on IPv6 yet. + // + // Note that "slirp4netns" port driver is not used by default, + // even when network driver is set to "slirp4netns". + // + // Most users are using "builtin" port driver and will not see this warning. + return nil, fmt.Errorf("protocol %q is not supported by the RootlessKit port driver %q, discarding request for %q", + proto, + c.portDriverName, + net.JoinHostPort(hostIP.String(), strconv.Itoa(hostPort))) + } + + pm := c.client.PortManager() + p := port.Spec{ + Proto: apiProto, + ParentIP: hostIP.String(), + ParentPort: hostPort, + ChildIP: childIP.String(), + ChildPort: hostPort, + } + st, err := pm.AddPort(ctx, p) + if err != nil { + return nil, fmt.Errorf("error while calling RootlessKit PortManager.AddPort(): %w", err) + } + deferFunc := func() error { + if dErr := pm.RemovePort(ctx, st.ID); dErr != nil { + return fmt.Errorf("error while calling RootlessKit PortManager.RemovePort(): %w", err) + } + return nil + } + return deferFunc, nil +} From dac7ffa3404138a4f291c16586e5a2c68dad4151 Mon Sep 17 00:00:00 2001 From: Rob Murray Date: Wed, 10 Jul 2024 17:42:38 +0100 Subject: [PATCH 7/9] Remove rootlesskit-docker-proxy It's not needed, now the daemon tells RootlessKit about port mappings directly. Signed-off-by: Rob Murray --- Dockerfile | 2 -- hack/dockerfile/install/rootlesskit.installer | 4 +--- hack/make/binary-daemon | 2 +- hack/make/install-binary | 1 - pkg/rootless/rootless.go | 3 --- 5 files changed, 2 insertions(+), 10 deletions(-) diff --git a/Dockerfile b/Dockerfile index ad277300a6..b9abb945ab 100644 --- a/Dockerfile +++ b/Dockerfile @@ -377,8 +377,6 @@ RUN --mount=from=rootlesskit-src,src=/usr/src/rootlesskit,rw \ export CGO_ENABLED=$([ "$DOCKER_STATIC" = "1" ] && echo "0" || echo "1") xx-go build -o /build/rootlesskit -ldflags="$([ "$DOCKER_STATIC" != "1" ] && echo "-linkmode=external")" ./cmd/rootlesskit xx-verify $([ "$DOCKER_STATIC" = "1" ] && echo "--static") /build/rootlesskit - xx-go build -o /build/rootlesskit-docker-proxy -ldflags="$([ "$DOCKER_STATIC" != "1" ] && echo "-linkmode=external")" ./cmd/rootlesskit-docker-proxy - xx-verify $([ "$DOCKER_STATIC" = "1" ] && echo "--static") /build/rootlesskit-docker-proxy EOT COPY --link ./contrib/dockerd-rootless.sh /build/ COPY --link ./contrib/dockerd-rootless-setuptool.sh /build/ diff --git a/hack/dockerfile/install/rootlesskit.installer b/hack/dockerfile/install/rootlesskit.installer index de6846ae17..27a0cf662f 100755 --- a/hack/dockerfile/install/rootlesskit.installer +++ b/hack/dockerfile/install/rootlesskit.installer @@ -27,7 +27,5 @@ install_rootlesskit_dynamic() { _install_rootlesskit() ( echo "Install rootlesskit version ${ROOTLESSKIT_VERSION}" - for f in rootlesskit rootlesskit-docker-proxy; do - GOBIN="${PREFIX}" GO111MODULE=on go install ${BUILD_MODE} -ldflags="$ROOTLESSKIT_LDFLAGS" "github.com/rootless-containers/rootlesskit/v2/cmd/${f}@${ROOTLESSKIT_VERSION}" - done + GOBIN="${PREFIX}" GO111MODULE=on go install ${BUILD_MODE} -ldflags="$ROOTLESSKIT_LDFLAGS" "github.com/rootless-containers/rootlesskit/v2/cmd/rootlesskit@${ROOTLESSKIT_VERSION}" ) diff --git a/hack/make/binary-daemon b/hack/make/binary-daemon index 3d775fee41..45f4a76544 100644 --- a/hack/make/binary-daemon +++ b/hack/make/binary-daemon @@ -14,7 +14,7 @@ copy_binaries() { return fi echo "Copying nested executables into $dir" - for file in containerd containerd-shim-runc-v2 ctr runc docker-init rootlesskit rootlesskit-docker-proxy dockerd-rootless.sh dockerd-rootless-setuptool.sh; do + for file in containerd containerd-shim-runc-v2 ctr runc docker-init rootlesskit dockerd-rootless.sh dockerd-rootless-setuptool.sh; do cp -f "$(command -v "$file")" "$dir/" done # vpnkit might not be available for the target platform, see vpnkit stage in diff --git a/hack/make/install-binary b/hack/make/install-binary index 96e7cc0a39..1b20ac46d6 100644 --- a/hack/make/install-binary +++ b/hack/make/install-binary @@ -15,7 +15,6 @@ source "${MAKEDIR}/.install" install_binary "${DEST}/docker-proxy" install_binary "${DEST}/docker-init" install_binary "${DEST}/rootlesskit" - install_binary "${DEST}/rootlesskit-docker-proxy" install_binary "${DEST}/dockerd-rootless.sh" install_binary "${DEST}/dockerd-rootless-setuptool.sh" if [ -f "${DEST}/vpnkit" ]; then diff --git a/pkg/rootless/rootless.go b/pkg/rootless/rootless.go index b52f8eee71..1251b48817 100644 --- a/pkg/rootless/rootless.go +++ b/pkg/rootless/rootless.go @@ -2,9 +2,6 @@ package rootless // import "github.com/docker/docker/pkg/rootless" import "os" -// RootlessKitDockerProxyBinary is the binary name of rootlesskit-docker-proxy -const RootlessKitDockerProxyBinary = "rootlesskit-docker-proxy" - // RunningWithRootlessKit returns true if running under RootlessKit namespaces. func RunningWithRootlessKit() bool { return os.Getenv("ROOTLESSKIT_STATE_DIR") != "" From b3fabedecc0f408f674972d87803f41faedf4de8 Mon Sep 17 00:00:00 2001 From: Rob Murray Date: Mon, 8 Jul 2024 13:50:40 +0100 Subject: [PATCH 8/9] Create docker-proxy TCP/UDP listener sockets in the daemon Before commit 4f09af6, when allocating host ports for a new port mapping, iptables rules were set up then docker-proxy was started. If the host port was already in-use, docker-proxy exited with an error, and the iptables rules were removed. That could potentially interfere with a non-docker service that was already using the host port for something unrelated. Commit 4f09af6 swapped that problem for a different one... in order to check that a port was available before creating iptables rules, it attempted to start docker-proxy first. If it failed, it could then try a different host port, without interfering with any other service. The problem with that is docker-proxy would start listening before the iptables rules were in place, so it could accept connections then become unusable because new NAT rules diverted packets directly to the container. This would leave the client with a broken connection, causing at-least a delay while it figured that out and reconnected. This change creates and binds the socket in the daemon, before creating iptables rules. If the bind fails, it may try a different port. When or if the bind succeeds, iptables rules are created, then the daemon calls listen on the socket. If docker-proxy is needed, the socket is handed over to it at that point. In rootless mode, the ports have to be bound to an address in the rootless network namespace (where dockerd is running). DNAT rules now use the same address. If docker-proxy is not needed ("--userland-proxy=false"), the daemon still listens on TCP sockets as the old dummyProxy would have done. This makes the socket show up in "netstat" output. The dummyProxy is no longer needed on Linux. Its job was to bind the host ports if docker-proxy was disabled, but that's now already handled by binding the sockets early. This change doesn't affect SCTP, because it's not currently possible for docker-proxy to convert the file descriptor into an SCTPListener. So, docker-proxy is still started early, and the window for lost connections remains. If the user has an old docker-proxy in their path and it's given a listener docker with '-use-listen-fd', it'll fail because of the unknown option. In this case, the daemon's error message suggests checking $PATH. Signed-off-by: Rob Murray --- integration-cli/docker_cli_daemon_test.go | 2 +- .../drivers/bridge/port_mapping_linux.go | 301 +++++++++++++++--- .../drivers/bridge/port_mapping_linux_test.go | 156 +++++---- .../rlkclient/rootlesskit_client_linux.go | 3 +- libnetwork/portmapper/proxy_linux.go | 128 ++++---- .../portmapper/{proxy.go => proxy_windows.go} | 0 6 files changed, 422 insertions(+), 168 deletions(-) rename libnetwork/portmapper/{proxy.go => proxy_windows.go} (100%) diff --git a/integration-cli/docker_cli_daemon_test.go b/integration-cli/docker_cli_daemon_test.go index 683dfcfbed..f7ddfe7d2e 100644 --- a/integration-cli/docker_cli_daemon_test.go +++ b/integration-cli/docker_cli_daemon_test.go @@ -704,7 +704,7 @@ func (s *DockerDaemonSuite) TestDaemonIP(c *testing.T) { out, err := d.Cmd("run", "-d", "-p", "8000:8000", "busybox", "top") assert.Assert(c, err != nil, "Running a container must fail with an invalid --ip option") - assert.Equal(c, strings.Contains(out, "Error starting userland proxy"), true) + assert.Check(c, is.Contains(out, "failed to bind host port for 192.170.1.1")) ifName := "dummy" createInterface(c, "dummy", ifName, ipStr) diff --git a/libnetwork/drivers/bridge/port_mapping_linux.go b/libnetwork/drivers/bridge/port_mapping_linux.go index d00ccd2343..95b3db45da 100644 --- a/libnetwork/drivers/bridge/port_mapping_linux.go +++ b/libnetwork/drivers/bridge/port_mapping_linux.go @@ -12,6 +12,8 @@ import ( "os" "slices" "strconv" + "syscall" + "unsafe" "github.com/containerd/log" "github.com/docker/docker/libnetwork/iptables" @@ -19,10 +21,15 @@ import ( "github.com/docker/docker/libnetwork/portallocator" "github.com/docker/docker/libnetwork/portmapper" "github.com/docker/docker/libnetwork/types" + "github.com/ishidawataru/sctp" ) type portBinding struct { types.PortBinding + // boundSocket is used to reserve a host port for the binding. If the + // userland proxy is in-use, it's passed to the proxy when the proxy is + // started, then it's closed and set to nil here. + boundSocket *os.File // childHostIP is the host IP address, as seen from the daemon. This // is normally the same as PortBinding.HostIP but, in rootless mode, it // will be an address in the rootless network namespace. RootlessKit @@ -41,21 +48,33 @@ type portBinding struct { stopProxy func() error } +// childPortBinding is pb.PortBinding, with the host address the daemon +// will see - which, in rootless mode, will be an address in the RootlessKit's +// child namespace (see portBinding.childHostIP). +func (pb portBinding) childPortBinding() types.PortBinding { + res := pb.PortBinding + res.HostIP = pb.childHostIP + return res +} + type portBindingReq struct { types.PortBinding childHostIP net.IP disableNAT bool } +// Allow unit tests to supply a dummy StartProxy. +var startProxy = portmapper.StartProxy + // addPortMappings takes cfg, the configuration for port mappings, selects host -// ports when ranges are given, starts docker-proxy or its dummy to reserve -// host ports, and sets up iptables NAT/forwarding rules as necessary. If -// anything goes wrong, it will undo any work it's done and return an error. -// Otherwise, the returned slice of portBinding has an entry per address -// family (if cfg describes a mapping for 'any' host address, it's expanded -// into mappings for IPv4 and IPv6, because that's how the mapping is presented -// in 'inspect'). HostPort and HostPortEnd in each returned portBinding are set -// to the selected and reserved port. +// ports when ranges are given, binds host ports to check they're available and +// reserve them, starts docker-proxy if required, and sets up iptables +// NAT/forwarding rules as necessary. If anything goes wrong, it will undo any +// work it's done and return an error. Otherwise, the returned slice of +// portBinding has an entry per address family (if cfg describes a mapping for +// 'any' host address, it's expanded into mappings for IPv4 and IPv6, because +// that's how the mapping is presented in 'inspect'). HostPort and HostPortEnd in +// each returned portBinding are set to the selected and reserved port. func (n *bridgeNetwork) addPortMappings( epAddrV4, epAddrV6 *net.IPNet, cfg []types.PortBinding, @@ -134,15 +153,14 @@ func (n *bridgeNetwork) addPortMappings( continue } - // Allocate a host port, and reserve it by starting docker-proxy for each host - // address in toBind. + // Allocate and bind a host port. newB, err := bindHostPorts(toBind, proxyPath) if err != nil { return nil, err } bindings = append(bindings, newB...) - // Reset the collection of bindings now they're bound. + // Reset toBind now the ports are bound. toBind = toBind[:0] } @@ -168,6 +186,53 @@ func (n *bridgeNetwork) addPortMappings( } } + // Now the iptables rules are set up, it's safe to start the userland proxy. + // (If it was started before the iptables rules were created, it may have + // accepted a connection, then become unreachable due to NAT rules sending + // packets directly to the container.) + // If not starting the proxy, nothing will ever accept a connection on the + // socket. But, listen anyway so that the binding shows up in "netstat -at". + somaxconn := 0 + if proxyPath != "" { + somaxconn = -1 // silently capped to "/proc/sys/net/core/somaxconn" + } + for i := range bindings { + if bindings[i].boundSocket == nil { + continue + } + if bindings[i].Proto == types.TCP { + rc, err := bindings[i].boundSocket.SyscallConn() + if err != nil { + return nil, fmt.Errorf("raw conn not available on TCP socket: %w", err) + } + if errC := rc.Control(func(fd uintptr) { + err = syscall.Listen(int(fd), somaxconn) + }); errC != nil { + return nil, fmt.Errorf("failed to Control TCP socket: %w", err) + } + if err != nil { + return nil, fmt.Errorf("failed to listen on TCP socket: %w", err) + } + } + if proxyPath != "" { + var err error + bindings[i].stopProxy, err = startProxy( + bindings[i].childPortBinding(), proxyPath, bindings[i].boundSocket, + ) + if err != nil { + return nil, fmt.Errorf("failed to start userland proxy for port mapping %s: %w", + bindings[i].PortBinding, err) + } + if err := bindings[i].boundSocket.Close(); err != nil { + log.G(context.TODO()).WithFields(log.Fields{ + "error": err, + "mapping": bindings[i].PortBinding, + }).Warnf("failed to close proxy socket") + } + bindings[i].boundSocket = nil + } + } + return bindings, nil } @@ -366,7 +431,7 @@ func setChildHostIP(pdc portDriverClient, req portBindingReq) portBindingReq { return req } -// bindHostPorts allocates ports and starts docker-proxy for the given cfg. The +// bindHostPorts allocates and binds host ports for the given cfg. The // caller is responsible for ensuring that all entries in cfg map the same proto, // container port, and host port range (their host addresses must differ). func bindHostPorts(cfg []portBindingReq, proxyPath string) ([]portBinding, error) { @@ -401,17 +466,13 @@ func bindHostPorts(cfg []portBindingReq, proxyPath string) ([]portBinding, error return nil, err } -// Allow unit tests to supply a dummy StartProxy. -var startProxy = portmapper.StartProxy - // attemptBindHostPorts allocates host ports for each port mapping that requires -// one, and reserves those ports by starting docker-proxy. +// one, and reserves those ports by binding them. // // If the allocator doesn't have an available port in the required range, or the -// docker-proxy process doesn't start (perhaps because another process has -// already bound the port), all resources are released and an error is returned. -// When ports are successfully reserved, a portBinding is returned for each -// mapping. +// port can't be bound (perhaps because another process has already bound it), +// all resources are released and an error is returned. When ports are +// successfully reserved, a portBinding is returned for each mapping. // // If NAT is disabled for any of the bindings, no host port reservation is // needed. These bindings are included in results, as the container port itself @@ -428,7 +489,7 @@ func attemptBindHostPorts( addrs := make([]net.IP, 0, len(cfg)) for _, c := range cfg { if !c.disableNAT { - addrs = append(addrs, c.HostIP) + addrs = append(addrs, c.childHostIP) } } @@ -448,31 +509,177 @@ func attemptBindHostPorts( } res := make([]portBinding, 0, len(cfg)) - for _, c := range cfg { - pb := portBinding{PortBinding: c.GetCopy()} - if c.disableNAT { - pb.HostPort = 0 - } else { - pb.stopProxy, err = startProxy(c.Proto.String(), c.childHostIP, port, c.IP, int(c.Port), proxyPath) - if err != nil { - return nil, fmt.Errorf("failed to bind port %s:%d/%s: %w", c.HostIP, port, c.Proto, err) - } - defer func() { - if retErr != nil { - if err := pb.stopProxy(); err != nil { - log.G(context.TODO()).Warnf("Failed to stop userland proxy for port mapping %s: %s", pb, err) + defer func() { + if retErr != nil { + for _, pb := range res { + if pb.boundSocket != nil { + if err := pb.boundSocket.Close(); err != nil { + log.G(context.TODO()).Warnf("Failed to close port binding for %s: %s", pb, err) } } - }() - pb.HostPort = uint16(port) + // TODO(robmry) - this is only needed because the userland proxy may have + // been started for SCTP. If a bound socket is passed to the proxy after + // iptables rules have been configured (as it is for TCP/UDP), remove this. + if pb.stopProxy != nil { + if err := pb.stopProxy(); err != nil { + log.G(context.TODO()).Warnf("Failed to stop proxy for %s: %s", pb, err) + } + } + } + } + }() + + for _, c := range cfg { + var pb portBinding + if c.disableNAT { + pb = portBinding{PortBinding: c.GetCopy()} + pb.HostPort = 0 + pb.HostPortEnd = 0 + } else { + switch proto { + case "tcp": + pb, err = bindTCPOrUDP(c, port, syscall.SOCK_STREAM, syscall.IPPROTO_TCP) + case "udp": + pb, err = bindTCPOrUDP(c, port, syscall.SOCK_DGRAM, syscall.IPPROTO_UDP) + case "sctp": + if proxyPath == "" { + pb, err = bindSCTP(c, port) + } else { + // TODO(robmry) - it's not currently possible to pass a bound SCTP port + // to the userland proxy, because the proxy is not able to convert the + // file descriptor into an sctp.SCTPListener (fd is an unexported member + // of the struct, and ListenSCTP is the only constructor). + // So, it is possible for the proxy to start listening and accept + // connections before iptables rules are created that would bypass + // the proxy for external connections. + // Remove this and pb.stopProxy() from the cleanup function above if + // this is fixed. + pb, err = startSCTPProxy(c, port, proxyPath) + } + default: + return nil, fmt.Errorf("Unknown addr type: %s", proto) + } + if err != nil { + return nil, err + } } - pb.HostPortEnd = pb.HostPort - pb.childHostIP = c.childHostIP res = append(res, pb) } return res, nil } +func bindTCPOrUDP(cfg portBindingReq, port, typ, proto int) (_ portBinding, retErr error) { + pb := portBinding{PortBinding: cfg.PortBinding.GetCopy()} + pb.HostPort = uint16(port) + pb.HostPortEnd = pb.HostPort + pb.childHostIP = cfg.childHostIP + + var domain int + var sa syscall.Sockaddr + if hip := cfg.childHostIP.To4(); hip != nil { + domain = syscall.AF_INET + sa4 := syscall.SockaddrInet4{Port: port} + copy(sa4.Addr[:], hip) + sa = &sa4 + } else { + domain = syscall.AF_INET6 + sa6 := syscall.SockaddrInet6{Port: port} + copy(sa6.Addr[:], cfg.childHostIP) + sa = &sa6 + } + + sd, err := syscall.Socket(domain, typ|syscall.SOCK_CLOEXEC, proto) + if err != nil { + return portBinding{}, fmt.Errorf("failed to create socket for userland proxy for %s: %w", cfg, err) + } + defer func() { + if retErr != nil { + syscall.Close(sd) + } + }() + + if domain == syscall.AF_INET6 { + syscall.SetsockoptInt(sd, syscall.IPPROTO_IPV6, syscall.IPV6_V6ONLY, 1) + } + if err := syscall.Bind(sd, sa); err != nil { + if cfg.HostPort == cfg.HostPortEnd { + return portBinding{}, fmt.Errorf("failed to bind host port for %s: %w", cfg, err) + } + return portBinding{}, fmt.Errorf("failed to bind host port %d for %s: %w", port, cfg, err) + } + + pb.boundSocket = os.NewFile(uintptr(sd), "listener") + if pb.boundSocket == nil { + return portBinding{}, fmt.Errorf("failed to convert socket for userland proxy for %s", cfg) + } + return pb, nil +} + +// bindSCTP is based on sctp.ListenSCTP. The socket is created and bound, but +// does not start listening. +func bindSCTP(cfg portBindingReq, port int) (_ portBinding, retErr error) { + pb := portBinding{PortBinding: cfg.GetCopy()} + pb.HostPort = uint16(port) + pb.HostPortEnd = pb.HostPort + pb.childHostIP = cfg.childHostIP + + domain := syscall.AF_INET + if cfg.childHostIP.To4() == nil { + domain = syscall.AF_INET6 + } + + sd, err := syscall.Socket(domain, syscall.SOCK_STREAM|syscall.SOCK_CLOEXEC, syscall.IPPROTO_SCTP) + if err != nil { + return portBinding{}, fmt.Errorf("failed to create socket for userland proxy for %s: %w", cfg, err) + } + defer func() { + if retErr != nil { + syscall.Close(sd) + } + }() + + if domain == syscall.AF_INET6 { + syscall.SetsockoptInt(sd, syscall.IPPROTO_IPV6, syscall.IPV6_V6ONLY, 1) + } + + options := sctp.InitMsg{NumOstreams: sctp.SCTP_MAX_STREAM} + if _, _, errno := syscall.Syscall6(syscall.SYS_SETSOCKOPT, + uintptr(sd), + sctp.SOL_SCTP, + sctp.SCTP_INITMSG, + uintptr(unsafe.Pointer(&options)), + unsafe.Sizeof(options), + 0); errno != 0 { + return portBinding{}, errno + } + + if err := sctp.SCTPBind(sd, + &sctp.SCTPAddr{IPAddrs: []net.IPAddr{{IP: cfg.childHostIP}}, Port: int(cfg.HostPort)}, + sctp.SCTP_BINDX_ADD_ADDR); err != nil { + return portBinding{}, fmt.Errorf("failed to bind socket for userland proxy for %s: %w", cfg, err) + } + + pb.boundSocket = os.NewFile(uintptr(sd), "listener") + if pb.boundSocket == nil { + return portBinding{}, fmt.Errorf("failed to convert socket for userland proxy for %s", cfg) + } + return pb, nil +} + +func startSCTPProxy(cfg portBindingReq, port int, proxyPath string) (_ portBinding, retErr error) { + pb := portBinding{PortBinding: cfg.GetCopy()} + pb.HostPort = uint16(port) + pb.HostPortEnd = pb.HostPort + pb.childHostIP = cfg.childHostIP + + var err error + pb.stopProxy, err = startProxy(pb.childPortBinding(), proxyPath, nil) + if err != nil { + return portBinding{}, err + } + return pb, nil +} + // releasePorts attempts to release all port bindings, does not stop on failure func (n *bridgeNetwork) releasePorts(ep *bridgeEndpoint) error { n.Lock() @@ -486,14 +693,20 @@ func (n *bridgeNetwork) releasePorts(ep *bridgeEndpoint) error { func (n *bridgeNetwork) releasePortBindings(pbs []portBinding) error { var errs []error for _, pb := range pbs { - var errPD, errP error + var errS, errPD, errP error + if pb.boundSocket != nil { + errS = pb.boundSocket.Close() + if errS != nil { + errS = fmt.Errorf("failed to close socket for port mapping %s: %w", pb, errS) + } + } if pb.portDriverRemove != nil { errPD = pb.portDriverRemove() } if pb.stopProxy != nil { errP = pb.stopProxy() if errP != nil { - errP = fmt.Errorf("failed to stop docker-proxy for port mapping %s: %w", pb, errP) + errP = fmt.Errorf("failed to stop userland proxy for port mapping %s: %w", pb, errP) } } errN := n.setPerPortIptables(pb, false) @@ -501,9 +714,9 @@ func (n *bridgeNetwork) releasePortBindings(pbs []portBinding) error { errN = fmt.Errorf("failed to remove iptables rules for port mapping %s: %w", pb, errN) } if pb.HostPort > 0 { - portallocator.Get().ReleasePort(pb.HostIP, pb.Proto.String(), int(pb.HostPort)) + portallocator.Get().ReleasePort(pb.childHostIP, pb.Proto.String(), int(pb.HostPort)) } - errs = append(errs, errPD, errP, errN) + errs = append(errs, errS, errPD, errP, errN) } return errors.Join(errs...) } @@ -545,8 +758,8 @@ func setPerPortNAT(b portBinding, ipv iptables.IPVersion, proxyPath string, brid // want "0.0.0.0/0". "0/0" is correctly interpreted as "any // value" by both iptables and ip6tables. hostIP := "0/0" - if !b.HostIP.IsUnspecified() { - hostIP = b.HostIP.String() + if !b.childHostIP.IsUnspecified() { + hostIP = b.childHostIP.String() } args := []string{ "-p", b.Proto.String(), diff --git a/libnetwork/drivers/bridge/port_mapping_linux_test.go b/libnetwork/drivers/bridge/port_mapping_linux_test.go index 23972ebd61..684bece84b 100644 --- a/libnetwork/drivers/bridge/port_mapping_linux_test.go +++ b/libnetwork/drivers/bridge/port_mapping_linux_test.go @@ -6,8 +6,10 @@ import ( "fmt" "net" "net/netip" + "os" "strconv" "strings" + "syscall" "testing" "github.com/docker/docker/internal/testutils/netnsutils" @@ -16,6 +18,7 @@ import ( "github.com/docker/docker/libnetwork/ns" "github.com/docker/docker/libnetwork/portallocator" "github.com/docker/docker/libnetwork/types" + "github.com/vishvananda/netlink" "gotest.tools/v3/assert" is "gotest.tools/v3/assert/cmp" ) @@ -423,6 +426,7 @@ func TestAddPortMappings(t *testing.T) { proxyPath string busyPortIPv4 int rootless bool + hostAddrs []string expErr string expPBs []types.PortBinding @@ -441,6 +445,7 @@ func TestAddPortMappings(t *testing.T) { {Proto: types.TCP, Port: 22}, {Proto: types.TCP, Port: 80}, }, + proxyPath: "/dummy/path/to/proxy", expPBs: []types.PortBinding{ {Proto: types.TCP, IP: ctrIP4.IP, Port: 22, HostIP: net.IPv4zero, HostPort: firstEphemPort}, {Proto: types.TCP, IP: ctrIP6.IP, Port: 22, HostIP: net.IPv6zero, HostPort: firstEphemPort}, @@ -449,22 +454,24 @@ func TestAddPortMappings(t *testing.T) { }, }, { - name: "specific host port", - epAddrV4: ctrIP4, - epAddrV6: ctrIP6, - cfg: []types.PortBinding{{Proto: types.TCP, Port: 80, HostPort: 8080}}, + name: "specific host port", + epAddrV4: ctrIP4, + epAddrV6: ctrIP6, + cfg: []types.PortBinding{{Proto: types.TCP, Port: 80, HostPort: 8080}}, + proxyPath: "/dummy/path/to/proxy", expPBs: []types.PortBinding{ {Proto: types.TCP, IP: ctrIP4.IP, Port: 80, HostIP: net.IPv4zero, HostPort: 8080, HostPortEnd: 8080}, {Proto: types.TCP, IP: ctrIP6.IP, Port: 80, HostIP: net.IPv6zero, HostPort: 8080, HostPortEnd: 8080}, }, }, { - name: "nat explicitly enabled", - epAddrV4: ctrIP4, - epAddrV6: ctrIP6, - cfg: []types.PortBinding{{Proto: types.TCP, Port: 80, HostPort: 8080}}, - gwMode4: gwModeNAT, - gwMode6: gwModeNAT, + name: "nat explicitly enabled", + epAddrV4: ctrIP4, + epAddrV6: ctrIP6, + cfg: []types.PortBinding{{Proto: types.TCP, Port: 80, HostPort: 8080}}, + gwMode4: gwModeNAT, + gwMode6: gwModeNAT, + proxyPath: "/dummy/path/to/proxy", expPBs: []types.PortBinding{ {Proto: types.TCP, IP: ctrIP4.IP, Port: 80, HostIP: net.IPv4zero, HostPort: 8080, HostPortEnd: 8080}, {Proto: types.TCP, IP: ctrIP6.IP, Port: 80, HostIP: net.IPv6zero, HostPort: 8080, HostPortEnd: 8080}, @@ -475,24 +482,27 @@ func TestAddPortMappings(t *testing.T) { epAddrV4: ctrIP4, epAddrV6: ctrIP6, cfg: []types.PortBinding{{Proto: types.TCP, Port: 80, HostPort: 8080}}, + proxyPath: "/dummy/path/to/proxy", busyPortIPv4: 8080, - expErr: "failed to bind port 0.0.0.0:8080/tcp: busy port", + expErr: "failed to bind host port for 0.0.0.0:8080:172.19.0.2:80/tcp: address already in use", }, { - name: "ipv4 mapped container address with specific host port", - epAddrV4: ctrIP4Mapped, - epAddrV6: ctrIP6, - cfg: []types.PortBinding{{Proto: types.TCP, Port: 80, HostPort: 8080}}, + name: "ipv4 mapped container address with specific host port", + epAddrV4: ctrIP4Mapped, + epAddrV6: ctrIP6, + cfg: []types.PortBinding{{Proto: types.TCP, Port: 80, HostPort: 8080}}, + proxyPath: "/dummy/path/to/proxy", expPBs: []types.PortBinding{ {Proto: types.TCP, IP: ctrIP4.IP, Port: 80, HostIP: net.IPv4zero, HostPort: 8080, HostPortEnd: 8080}, {Proto: types.TCP, IP: ctrIP6.IP, Port: 80, HostIP: net.IPv6zero, HostPort: 8080, HostPortEnd: 8080}, }, }, { - name: "ipv4 mapped host address with specific host port", - epAddrV4: ctrIP4, - epAddrV6: ctrIP6, - cfg: []types.PortBinding{{Proto: types.TCP, Port: 80, HostIP: newIPNet(t, "::ffff:127.0.0.1/128").IP, HostPort: 8080}}, + name: "ipv4 mapped host address with specific host port", + epAddrV4: ctrIP4, + epAddrV6: ctrIP6, + cfg: []types.PortBinding{{Proto: types.TCP, Port: 80, HostIP: newIPNet(t, "::ffff:127.0.0.1/128").IP, HostPort: 8080}}, + proxyPath: "/dummy/path/to/proxy", expPBs: []types.PortBinding{ {Proto: types.TCP, IP: ctrIP4.IP, Port: 80, HostIP: newIPNet(t, "127.0.0.1/32").IP, HostPort: 8080, HostPortEnd: 8080}, }, @@ -502,6 +512,7 @@ func TestAddPortMappings(t *testing.T) { epAddrV4: ctrIP4, epAddrV6: ctrIP6, cfg: []types.PortBinding{{Proto: types.TCP, Port: 80, HostPort: 8080, HostPortEnd: 8081}}, + proxyPath: "/dummy/path/to/proxy", busyPortIPv4: 8080, expPBs: []types.PortBinding{ {Proto: types.TCP, IP: ctrIP4.IP, Port: 80, HostIP: net.IPv4zero, HostPort: 8081, HostPortEnd: 8081}, @@ -516,6 +527,7 @@ func TestAddPortMappings(t *testing.T) { {Proto: types.TCP, Port: 80, HostIP: net.IPv4zero, HostPort: 8080, HostPortEnd: 8081}, {Proto: types.TCP, Port: 80, HostIP: net.IPv6zero, HostPort: 8080, HostPortEnd: 8081}, }, + proxyPath: "/dummy/path/to/proxy", busyPortIPv4: 8080, expPBs: []types.PortBinding{ {Proto: types.TCP, IP: ctrIP4.IP, Port: 80, HostIP: net.IPv4zero, HostPort: 8081}, @@ -534,6 +546,7 @@ func TestAddPortMappings(t *testing.T) { {Proto: types.UDP, Port: 81, HostPort: 8080, HostPortEnd: 8083}, {Proto: types.UDP, Port: 82, HostPort: 8080, HostPortEnd: 8083}, }, + proxyPath: "/dummy/path/to/proxy", busyPortIPv4: 8082, expPBs: []types.PortBinding{ {Proto: types.TCP, IP: ctrIP4.IP, Port: 80, HostIP: net.IPv4zero, HostPort: 8080, HostPortEnd: 8080}, @@ -559,8 +572,9 @@ func TestAddPortMappings(t *testing.T) { {Proto: types.TCP, Port: 81, HostPort: 8080, HostPortEnd: 8082}, {Proto: types.TCP, Port: 82, HostPort: 8080, HostPortEnd: 8082}, }, + proxyPath: "/dummy/path/to/proxy", busyPortIPv4: 8081, - expErr: "failed to bind port 0.0.0.0:8081/tcp: busy port", + expErr: "failed to bind host port 8081 for 0.0.0.0:8080-8082:172.19.0.2:82/tcp", }, { name: "map host ipv6 to ipv4 container with proxy", @@ -591,9 +605,10 @@ func TestAddPortMappings(t *testing.T) { epAddrV4: ctrIP4, epAddrV6: ctrIP6, cfg: []types.PortBinding{{Proto: types.TCP, Port: 80}}, - defHostIP: newIPNet(t, "10.11.12.13/24").IP, + proxyPath: "/dummy/path/to/proxy", + defHostIP: newIPNet(t, "127.0.0.1/8").IP, expPBs: []types.PortBinding{ - {Proto: types.TCP, IP: ctrIP4.IP, Port: 80, HostIP: newIPNet(t, "10.11.12.13/24").IP, HostPort: firstEphemPort}, + {Proto: types.TCP, IP: ctrIP4.IP, Port: 80, HostIP: newIPNet(t, "127.0.0.1/8").IP, HostPort: firstEphemPort}, }, }, { @@ -601,9 +616,10 @@ func TestAddPortMappings(t *testing.T) { epAddrV4: ctrIP4, epAddrV6: ctrIP6, cfg: []types.PortBinding{{Proto: types.TCP, Port: 80}}, - defHostIP: newIPNet(t, "::ffff:10.11.12.13/120").IP, + proxyPath: "/dummy/path/to/proxy", + defHostIP: newIPNet(t, "::ffff:127.0.0.1/72").IP, expPBs: []types.PortBinding{ - {Proto: types.TCP, IP: ctrIP4.IP, Port: 80, HostIP: newIPNet(t, "10.11.12.13/24").IP, HostPort: firstEphemPort}, + {Proto: types.TCP, IP: ctrIP4.IP, Port: 80, HostIP: newIPNet(t, "127.0.0.1/8").IP, HostPort: firstEphemPort}, }, }, { @@ -611,6 +627,7 @@ func TestAddPortMappings(t *testing.T) { epAddrV4: ctrIP4, epAddrV6: ctrIP6, cfg: []types.PortBinding{{Proto: types.TCP, Port: 80}}, + proxyPath: "/dummy/path/to/proxy", defHostIP: net.IPv6zero, expPBs: []types.PortBinding{ {Proto: types.TCP, IP: ctrIP6.IP, Port: 80, HostIP: net.IPv6zero, HostPort: firstEphemPort}, @@ -621,6 +638,7 @@ func TestAddPortMappings(t *testing.T) { epAddrV4: ctrIP4, epAddrV6: ctrIP6, cfg: []types.PortBinding{{Proto: types.TCP, Port: 80}}, + proxyPath: "/dummy/path/to/proxy", defHostIP: newIPNet(t, "::1/128").IP, expPBs: []types.PortBinding{ {Proto: types.TCP, IP: ctrIP6.IP, Port: 80, HostIP: newIPNet(t, "::1/128").IP, HostPort: firstEphemPort}, @@ -634,16 +652,17 @@ func TestAddPortMappings(t *testing.T) { {Proto: types.TCP, Port: 80, HostPort: 8080}, {Proto: types.TCP, Port: 22, HostPort: 2222}, }, + proxyPath: "/dummy/path/to/proxy", expPBs: []types.PortBinding{ {Proto: types.TCP, IP: ctrIP4.IP, Port: 22, HostIP: net.IPv4zero, HostPort: 2222}, {Proto: types.TCP, IP: ctrIP6.IP, Port: 22, HostIP: net.IPv6zero, HostPort: 2222}, {Proto: types.TCP, IP: ctrIP4.IP, Port: 80, HostIP: net.IPv4zero, HostPort: 8080}, {Proto: types.TCP, IP: ctrIP6.IP, Port: 80, HostIP: net.IPv6zero, HostPort: 8080}, }, - expReleaseErr: "failed to stop docker-proxy for port mapping 0.0.0.0:2222:172.19.0.2:22/tcp: can't stop now\n" + - "failed to stop docker-proxy for port mapping [::]:2222:[fdf8:b88e:bb5c:3483::2]:22/tcp: can't stop now\n" + - "failed to stop docker-proxy for port mapping 0.0.0.0:8080:172.19.0.2:80/tcp: can't stop now\n" + - "failed to stop docker-proxy for port mapping [::]:8080:[fdf8:b88e:bb5c:3483::2]:80/tcp: can't stop now", + expReleaseErr: "failed to stop userland proxy for port mapping 0.0.0.0:2222:172.19.0.2:22/tcp: can't stop now\n" + + "failed to stop userland proxy for port mapping [::]:2222:[fdf8:b88e:bb5c:3483::2]:22/tcp: can't stop now\n" + + "failed to stop userland proxy for port mapping 0.0.0.0:8080:172.19.0.2:80/tcp: can't stop now\n" + + "failed to stop userland proxy for port mapping [::]:8080:[fdf8:b88e:bb5c:3483::2]:80/tcp: can't stop now", }, { name: "disable nat6", @@ -653,7 +672,8 @@ func TestAddPortMappings(t *testing.T) { {Proto: types.TCP, Port: 22}, {Proto: types.TCP, Port: 80}, }, - gwMode6: gwModeRouted, + proxyPath: "/dummy/path/to/proxy", + gwMode6: gwModeRouted, expPBs: []types.PortBinding{ {Proto: types.TCP, IP: ctrIP4.IP, Port: 22, HostIP: net.IPv4zero, HostPort: firstEphemPort}, {Proto: types.TCP, IP: ctrIP6.IP, Port: 22, HostIP: net.IPv6zero}, @@ -669,7 +689,8 @@ func TestAddPortMappings(t *testing.T) { {Proto: types.TCP, Port: 22}, {Proto: types.TCP, Port: 80}, }, - gwMode4: gwModeRouted, + proxyPath: "/dummy/path/to/proxy", + gwMode4: gwModeRouted, expPBs: []types.PortBinding{ {Proto: types.TCP, IP: ctrIP4.IP, Port: 22, HostIP: net.IPv4zero}, {Proto: types.TCP, IP: ctrIP6.IP, Port: 22, HostIP: net.IPv6zero, HostPort: firstEphemPort}, @@ -685,8 +706,9 @@ func TestAddPortMappings(t *testing.T) { {Proto: types.TCP, Port: 22}, {Proto: types.TCP, Port: 80}, }, - gwMode4: gwModeRouted, - gwMode6: gwModeRouted, + proxyPath: "/dummy/path/to/proxy", + gwMode4: gwModeRouted, + gwMode6: gwModeRouted, expPBs: []types.PortBinding{ {Proto: types.TCP, IP: ctrIP4.IP, Port: 22, HostIP: net.IPv4zero}, {Proto: types.TCP, IP: ctrIP6.IP, Port: 22, HostIP: net.IPv6zero}, @@ -695,9 +717,10 @@ func TestAddPortMappings(t *testing.T) { }, }, { - name: "same ports for matching mappings with different host addresses", - epAddrV4: ctrIP4, - epAddrV6: ctrIP6, + name: "same ports for matching mappings with different host addresses", + epAddrV4: ctrIP4, + epAddrV6: ctrIP6, + hostAddrs: []string{"192.168.1.2/24", "fd0c:9167:5b11::2/64", "fd0c:9167:5b11::3/64"}, cfg: []types.PortBinding{ // These two should both get the same host port. {Proto: types.TCP, Port: 80, HostIP: newIPNet(t, "fd0c:9167:5b11::2/64").IP}, @@ -711,6 +734,7 @@ func TestAddPortMappings(t *testing.T) { {Proto: types.TCP, Port: 12345, HostPort: 12345, HostPortEnd: 12346}, {Proto: types.TCP, Port: 12345, HostPort: 12345}, }, + proxyPath: "/dummy/path/to/proxy", expPBs: []types.PortBinding{ {Proto: types.TCP, IP: ctrIP4.IP, Port: 12345, HostIP: net.IPv4zero, HostPort: 12345}, {Proto: types.TCP, IP: ctrIP6.IP, Port: 12345, HostIP: net.IPv6zero, HostPort: 12345}, @@ -751,15 +775,14 @@ func TestAddPortMappings(t *testing.T) { origStartProxy := startProxy defer func() { startProxy = origStartProxy }() proxies := map[proxyCall]bool{} // proxy -> is not stopped - startProxy = func(proto string, - hostIP net.IP, hostPort int, - containerIP net.IP, containerPort int, + startProxy = func(pb types.PortBinding, proxyPath string, + listenSock *os.File, ) (stop func() error, retErr error) { - if tc.busyPortIPv4 > 0 && tc.busyPortIPv4 == hostPort && hostIP.To4() != nil { + if tc.busyPortIPv4 > 0 && tc.busyPortIPv4 == int(pb.HostPort) && pb.HostIP.To4() != nil { return nil, errors.New("busy port") } - c := newProxyCall(proto, hostIP, hostPort, containerIP, containerPort, proxyPath) + c := newProxyCall(pb.Proto.String(), pb.HostIP, int(pb.HostPort), pb.IP, int(pb.Port), proxyPath) if _, ok := proxies[c]; ok { return nil, fmt.Errorf("duplicate proxy: %#v", c) } @@ -781,6 +804,28 @@ func TestAddPortMappings(t *testing.T) { defer func() { newPortDriverClient = origNewPortDriverClient }() newPortDriverClient = func() (portDriverClient, error) { return newMockPortDriverClient() } + if len(tc.hostAddrs) > 0 { + dummyLink := &netlink.Bridge{LinkAttrs: netlink.LinkAttrs{Name: "br-dummy"}} + err := netlink.LinkAdd(dummyLink) + assert.NilError(t, err) + for _, addr := range tc.hostAddrs { + // Add with NODAD so that the address is available immediately. + err := netlink.AddrAdd(dummyLink, + &netlink.Addr{IPNet: newIPNet(t, addr), Flags: syscall.IFA_F_NODAD}) + assert.NilError(t, err) + } + err = netlink.LinkSetUp(dummyLink) + assert.NilError(t, err) + } + if tc.busyPortIPv4 != 0 { + tl, err := net.ListenTCP("tcp4", &net.TCPAddr{IP: net.IPv4zero, Port: tc.busyPortIPv4}) + assert.NilError(t, err) + defer tl.Close() + ul, err := net.ListenUDP("udp4", &net.UDPAddr{IP: net.IPv4zero, Port: tc.busyPortIPv4}) + assert.NilError(t, err) + defer ul.Close() + } + n := &bridgeNetwork{ config: &networkConfiguration{ BridgeName: "dummybridge", @@ -829,20 +874,21 @@ func TestAddPortMappings(t *testing.T) { var disableNAT bool var addrM, addrD, addrH string var ipv iptables.IPVersion + hip := expChildIP(expPB.HostIP) if expPB.IP.To4() == nil { disableNAT = tc.gwMode6.natDisabled() ipv = iptables.IPv6 addrM = ctrIP6.IP.String() + "/128" addrD = "[" + ctrIP6.IP.String() + "]" - addrH = expPB.HostIP.String() + "/128" + addrH = hip.String() + "/128" } else { disableNAT = tc.gwMode4.natDisabled() ipv = iptables.IPv4 addrM = ctrIP4.IP.String() + "/32" addrD = ctrIP4.IP.String() - addrH = expPB.HostIP.String() + "/32" + addrH = hip.String() + "/32" } - if expPB.HostIP.IsUnspecified() { + if hip.IsUnspecified() { addrH = "0/0" } @@ -887,19 +933,21 @@ func TestAddPortMappings(t *testing.T) { } // Check a docker-proxy was started and stopped for each expected port binding. - expProxies := map[proxyCall]bool{} - for _, expPB := range tc.expPBs { - hip := expChildIP(expPB.HostIP) - is4 := hip.To4() != nil - if (is4 && tc.gwMode4.natDisabled()) || (!is4 && tc.gwMode6.natDisabled()) { - continue + if tc.proxyPath != "" { + expProxies := map[proxyCall]bool{} + for _, expPB := range tc.expPBs { + hip := expChildIP(expPB.HostIP) + is4 := hip.To4() != nil + if (is4 && tc.gwMode4.natDisabled()) || (!is4 && tc.gwMode6.natDisabled()) { + continue + } + p := newProxyCall(expPB.Proto.String(), + hip, int(expPB.HostPort), + expPB.IP, int(expPB.Port), tc.proxyPath) + expProxies[p] = tc.expReleaseErr != "" } - p := newProxyCall(expPB.Proto.String(), - hip, int(expPB.HostPort), - expPB.IP, int(expPB.Port), tc.proxyPath) - expProxies[p] = tc.expReleaseErr != "" + assert.Check(t, is.DeepEqual(expProxies, proxies)) } - assert.Check(t, is.DeepEqual(expProxies, proxies)) // Check the port driver has seen the expected port mappings and no others, // and that they have all been closed. diff --git a/libnetwork/drivers/bridge/rlkclient/rootlesskit_client_linux.go b/libnetwork/drivers/bridge/rlkclient/rootlesskit_client_linux.go index 4b92d17345..f81380bf0c 100644 --- a/libnetwork/drivers/bridge/rlkclient/rootlesskit_client_linux.go +++ b/libnetwork/drivers/bridge/rlkclient/rootlesskit_client_linux.go @@ -99,7 +99,8 @@ func (c *PortDriverClient) AddPort( hostIP netip.Addr, childIP netip.Addr, hostPort int, -) (func() error, error) { // proto is like "tcp", but we need to convert it to "tcp4" or "tcp6" explicitly +) (func() error, error) { + // proto is like "tcp", but we need to convert it to "tcp4" or "tcp6" explicitly // for libnetwork >= 20201216 // // See https://github.com/moby/libnetwork/pull/2604/files#diff-8fa48beed55dd033bf8e4f8c40b31cf69d0b2cc5d4bb53cde8594670ea6c938aR20 diff --git a/libnetwork/portmapper/proxy_linux.go b/libnetwork/portmapper/proxy_linux.go index 9bdb0f5567..d7256f76f0 100644 --- a/libnetwork/portmapper/proxy_linux.go +++ b/libnetwork/portmapper/proxy_linux.go @@ -1,78 +1,61 @@ package portmapper import ( + "errors" "fmt" "io" - "net" "os" "os/exec" "runtime" "strconv" "syscall" "time" + + "github.com/docker/docker/libnetwork/types" ) -// StartProxy starts the proxy process at proxyPath, or instantiates a dummy proxy -// to bind the host port if proxyPath is the empty string. -func StartProxy( - proto string, - hostIP net.IP, hostPort int, - containerIP net.IP, containerPort int, - proxyPath string, -) (stop func() error, retErr error) { - if proxyPath == "" { - return newDummyProxy(proto, hostIP, hostPort) - } - return newProxyCommand(proto, hostIP, hostPort, containerIP, containerPort, proxyPath) -} - -func newProxyCommand( - proto string, - hostIP net.IP, hostPort int, - containerIP net.IP, containerPort int, +// StartProxy starts the proxy process at proxyPath. +// If listenSock is not nil, it must be a bound socket that can be passed to +// the proxy process for it to listen on. +func StartProxy(pb types.PortBinding, proxyPath string, + listenSock *os.File, ) (stop func() error, retErr error) { if proxyPath == "" { return nil, fmt.Errorf("no path provided for userland-proxy binary") } - - p := &proxyCommand{ - cmd: &exec.Cmd{ - Path: proxyPath, - Args: []string{ - proxyPath, - "-proto", proto, - "-host-ip", hostIP.String(), - "-host-port", strconv.Itoa(hostPort), - "-container-ip", containerIP.String(), - "-container-port", strconv.Itoa(containerPort), - }, - SysProcAttr: &syscall.SysProcAttr{ - Pdeathsig: syscall.SIGTERM, // send a sigterm to the proxy if the creating thread in the daemon process dies (https://go.dev/issue/27505) - }, - }, - wait: make(chan error, 1), - } - if err := p.start(); err != nil { - return nil, err - } - return p.stop, nil -} - -// proxyCommand wraps an exec.Cmd to run the userland TCP and UDP -// proxies as separate processes. -type proxyCommand struct { - cmd *exec.Cmd - wait chan error -} - -func (p *proxyCommand) start() error { r, w, err := os.Pipe() if err != nil { - return fmt.Errorf("proxy unable to open os.Pipe %s", err) + return nil, fmt.Errorf("proxy unable to open os.Pipe %s", err) } - defer r.Close() - p.cmd.ExtraFiles = []*os.File{w} + defer func() { + if w != nil { + w.Close() + } + r.Close() + }() + + cmd := &exec.Cmd{ + Path: proxyPath, + Args: []string{ + proxyPath, + "-proto", pb.Proto.String(), + "-host-ip", pb.HostIP.String(), + "-host-port", strconv.FormatUint(uint64(pb.HostPort), 10), + "-container-ip", pb.IP.String(), + "-container-port", strconv.FormatUint(uint64(pb.Port), 10), + }, + ExtraFiles: []*os.File{w}, + SysProcAttr: &syscall.SysProcAttr{ + Pdeathsig: syscall.SIGTERM, // send a sigterm to the proxy if the creating thread in the daemon process dies (https://go.dev/issue/27505) + }, + } + if listenSock != nil { + cmd.Args = append(cmd.Args, "-use-listen-fd") + cmd.ExtraFiles = append(cmd.ExtraFiles, listenSock) + } + + wait := make(chan error, 1) // As p.cmd.SysProcAttr.Pdeathsig is set, the signal will be sent to the // process when the OS thread on which p.cmd.Start() was executed dies. @@ -88,17 +71,18 @@ func (p *proxyCommand) start() error { go func() { runtime.LockOSThread() defer runtime.UnlockOSThread() - err := p.cmd.Start() + err := cmd.Start() started <- err if err != nil { return } - p.wait <- p.cmd.Wait() + wait <- cmd.Wait() }() if err := <-started; err != nil { - return err + return nil, err } w.Close() + w = nil errchan := make(chan error, 1) go func() { @@ -108,11 +92,16 @@ func (p *proxyCommand) start() error { if string(buf) != "0\n" { errStr, err := io.ReadAll(r) if err != nil { - errchan <- fmt.Errorf("Error reading exit status from userland proxy: %v", err) + errchan <- fmt.Errorf("error reading exit status from userland proxy: %v", err) return } - - errchan <- fmt.Errorf("Error starting userland proxy: %s", errStr) + // If the user has an old docker-proxy in their PATH, and we passed "-use-listen-fd" + // on the command line, it exits with no response on the pipe. + if listenSock != nil && buf[0] == 0 && len(errStr) == 0 { + errchan <- errors.New("failed to start docker-proxy, check that the current version is in your $PATH") + return + } + errchan <- fmt.Errorf("error starting userland proxy: %s", errStr) return } errchan <- nil @@ -120,18 +109,21 @@ func (p *proxyCommand) start() error { select { case err := <-errchan: - return err + if err != nil { + return nil, err + } case <-time.After(16 * time.Second): - return fmt.Errorf("Timed out proxy starting the userland proxy") + return nil, fmt.Errorf("timed out starting the userland proxy") } -} -func (p *proxyCommand) stop() error { - if p.cmd.Process != nil { - if err := p.cmd.Process.Signal(os.Interrupt); err != nil { + stopFn := func() error { + if cmd.Process == nil { + return nil + } + if err := cmd.Process.Signal(os.Interrupt); err != nil { return err } - return <-p.wait + return <-wait } - return nil + return stopFn, nil } diff --git a/libnetwork/portmapper/proxy.go b/libnetwork/portmapper/proxy_windows.go similarity index 100% rename from libnetwork/portmapper/proxy.go rename to libnetwork/portmapper/proxy_windows.go From cdea7509661d6e0737c437c4ea6888acd770278c Mon Sep 17 00:00:00 2001 From: Rob Murray Date: Wed, 10 Jul 2024 16:52:34 +0100 Subject: [PATCH 9/9] Tell RootlessKit about port mappings when --userland-proxy=false The daemon was modified to tell RootlessKit about host port mappings directly, rather than by running rootlesskit-docker-proxy to make those updates. DNAT rules created in rootless mode referred to the host IP address, rather than the address seen as host address in the rootless network namespace. With these changes, port mappings work in rootless mode when --userland-proxy=false - so, don't gate the RootlessKit API calls on starting docker-proxy. Signed-off-by: Rob Murray --- cmd/docker-proxy/main_linux.go | 12 ++++++++++ libnetwork/drivers/bridge/bridge_linux.go | 10 +++++---- .../rlkclient/rootlesskit_client_linux.go | 12 +++++++--- .../drivers/bridge/port_mapping_linux_test.go | 22 +++++++++++++++++-- 4 files changed, 47 insertions(+), 9 deletions(-) rename libnetwork/drivers/bridge/{ => internal}/rlkclient/rootlesskit_client_linux.go (95%) diff --git a/cmd/docker-proxy/main_linux.go b/cmd/docker-proxy/main_linux.go index 2210d2f50d..2c1cd4e1a5 100644 --- a/cmd/docker-proxy/main_linux.go +++ b/cmd/docker-proxy/main_linux.go @@ -27,12 +27,23 @@ const ( ) func main() { + // Mark any files we expect to inherit as close-on-exec + // so that they are not unexpectedly inherited by any child processes + // if we ever need docker-proxy to exec something. + // This is safe to do even if the fd belongs to the Go runtime + // as it would be a no-op: + // the Go runtime marks all file descriptors it opens as close-on-exec. + // See the godoc for syscall.ForkLock for more information. + syscall.CloseOnExec(int(parentPipeFd)) + syscall.CloseOnExec(int(listenSockFd)) + config := parseFlags() p, err := newProxy(config) if config.ListenSock != nil { config.ListenSock.Close() } + _ = syscall.SetNonblock(int(parentPipeFd), true) f := os.NewFile(parentPipeFd, "signal-parent") if err != nil { fmt.Fprintf(f, "1\n%s", err) @@ -151,6 +162,7 @@ func parseFlags() ProxyConfig { } if useListenFd { + _ = syscall.SetNonblock(int(listenSockFd), true) config.ListenSock = os.NewFile(listenSockFd, "listen-sock") } diff --git a/libnetwork/drivers/bridge/bridge_linux.go b/libnetwork/drivers/bridge/bridge_linux.go index 5c7ec6f2aa..2e76b4d9e9 100644 --- a/libnetwork/drivers/bridge/bridge_linux.go +++ b/libnetwork/drivers/bridge/bridge_linux.go @@ -14,7 +14,7 @@ import ( "github.com/docker/docker/errdefs" "github.com/docker/docker/libnetwork/datastore" "github.com/docker/docker/libnetwork/driverapi" - "github.com/docker/docker/libnetwork/drivers/bridge/rlkclient" + "github.com/docker/docker/libnetwork/drivers/bridge/internal/rlkclient" "github.com/docker/docker/libnetwork/internal/netiputil" "github.com/docker/docker/libnetwork/iptables" "github.com/docker/docker/libnetwork/netlabel" @@ -139,7 +139,9 @@ type portDriverClient interface { } // Allow unit tests to supply a dummy RootlessKit port driver client. -var newPortDriverClient = func() (portDriverClient, error) { return rlkclient.NewPortDriverClient() } +var newPortDriverClient = func(ctx context.Context) (portDriverClient, error) { + return rlkclient.NewPortDriverClient(ctx) +} type driver struct { config configuration @@ -558,9 +560,9 @@ func (d *driver) configure(option map[string]interface{}) error { } } - if config.EnableUserlandProxy && config.Rootless { + if config.Rootless { var err error - pdc, err = newPortDriverClient() + pdc, err = newPortDriverClient(context.TODO()) if err != nil { return err } diff --git a/libnetwork/drivers/bridge/rlkclient/rootlesskit_client_linux.go b/libnetwork/drivers/bridge/internal/rlkclient/rootlesskit_client_linux.go similarity index 95% rename from libnetwork/drivers/bridge/rlkclient/rootlesskit_client_linux.go rename to libnetwork/drivers/bridge/internal/rlkclient/rootlesskit_client_linux.go index f81380bf0c..0c8e4b8268 100644 --- a/libnetwork/drivers/bridge/rlkclient/rootlesskit_client_linux.go +++ b/libnetwork/drivers/bridge/internal/rlkclient/rootlesskit_client_linux.go @@ -9,6 +9,7 @@ package rlkclient import ( "context" + "errors" "fmt" "net" "net/netip" @@ -17,7 +18,6 @@ import ( "strconv" "strings" - "github.com/pkg/errors" "github.com/rootless-containers/rootlesskit/v2/pkg/api/client" "github.com/rootless-containers/rootlesskit/v2/pkg/port" ) @@ -29,7 +29,7 @@ type PortDriverClient struct { childIP netip.Addr } -func NewPortDriverClient() (*PortDriverClient, error) { +func NewPortDriverClient(ctx context.Context) (*PortDriverClient, error) { stateDir := os.Getenv("ROOTLESSKIT_STATE_DIR") if stateDir == "" { return nil, errors.New("$ROOTLESSKIT_STATE_DIR needs to be set") @@ -40,7 +40,7 @@ func NewPortDriverClient() (*PortDriverClient, error) { return nil, fmt.Errorf("error while connecting to RootlessKit API socket: %w", err) } - info, err := c.Info(context.Background()) + info, err := c.Info(ctx) if err != nil { return nil, fmt.Errorf("failed to call info API, probably RootlessKit binary is too old (needs to be v0.14.0 or later): %w", err) } @@ -82,6 +82,9 @@ func NewPortDriverClient() (*PortDriverClient, error) { // mappings from host IP addresses, and DNAT rules, must use this child // address in place of the real host address. func (c *PortDriverClient) ChildHostIP(hostIP netip.Addr) netip.Addr { + if c == nil { + return hostIP + } if c.childIP.IsValid() { return c.childIP } @@ -100,6 +103,9 @@ func (c *PortDriverClient) AddPort( childIP netip.Addr, hostPort int, ) (func() error, error) { + if c == nil { + return func() error { return nil }, nil + } // proto is like "tcp", but we need to convert it to "tcp4" or "tcp6" explicitly // for libnetwork >= 20201216 // diff --git a/libnetwork/drivers/bridge/port_mapping_linux_test.go b/libnetwork/drivers/bridge/port_mapping_linux_test.go index 684bece84b..2bc21e7eb0 100644 --- a/libnetwork/drivers/bridge/port_mapping_linux_test.go +++ b/libnetwork/drivers/bridge/port_mapping_linux_test.go @@ -764,6 +764,22 @@ func TestAddPortMappings(t *testing.T) { {Proto: types.TCP, IP: ctrIP6.IP, Port: 80, HostIP: net.IPv6zero, HostPort: firstEphemPort + 1}, }, }, + { + name: "rootless without proxy", + epAddrV4: ctrIP4, + epAddrV6: ctrIP6, + cfg: []types.PortBinding{ + {Proto: types.TCP, Port: 22}, + {Proto: types.TCP, Port: 80}, + }, + rootless: true, + expPBs: []types.PortBinding{ + {Proto: types.TCP, IP: ctrIP4.IP, Port: 22, HostIP: net.IPv4zero, HostPort: firstEphemPort}, + {Proto: types.TCP, IP: ctrIP6.IP, Port: 22, HostIP: net.IPv6zero, HostPort: firstEphemPort}, + {Proto: types.TCP, IP: ctrIP4.IP, Port: 80, HostIP: net.IPv4zero, HostPort: firstEphemPort + 1}, + {Proto: types.TCP, IP: ctrIP6.IP, Port: 80, HostIP: net.IPv6zero, HostPort: firstEphemPort + 1}, + }, + }, } for _, tc := range testcases { @@ -802,7 +818,9 @@ func TestAddPortMappings(t *testing.T) { // Mock the RootlessKit port driver. origNewPortDriverClient := newPortDriverClient defer func() { newPortDriverClient = origNewPortDriverClient }() - newPortDriverClient = func() (portDriverClient, error) { return newMockPortDriverClient() } + newPortDriverClient = func(ctx context.Context) (portDriverClient, error) { + return newMockPortDriverClient(ctx) + } if len(tc.hostAddrs) > 0 { dummyLink := &netlink.Bridge{LinkAttrs: netlink.LinkAttrs{Name: "br-dummy"}} @@ -1005,7 +1023,7 @@ type mockPortDriverClient struct { openPorts map[mockPortDriverPort]bool } -func newMockPortDriverClient() (*mockPortDriverClient, error) { +func newMockPortDriverClient(_ context.Context) (*mockPortDriverClient, error) { return &mockPortDriverClient{ openPorts: map[mockPortDriverPort]bool{}, }, nil