diff --git a/Dockerfile b/Dockerfile index da129fabda..a2e004597b 100644 --- a/Dockerfile +++ b/Dockerfile @@ -377,8 +377,6 @@ RUN --mount=from=rootlesskit-src,src=/usr/src/rootlesskit,rw \ export CGO_ENABLED=$([ "$DOCKER_STATIC" = "1" ] && echo "0" || echo "1") xx-go build -o /build/rootlesskit -ldflags="$([ "$DOCKER_STATIC" != "1" ] && echo "-linkmode=external")" ./cmd/rootlesskit xx-verify $([ "$DOCKER_STATIC" = "1" ] && echo "--static") /build/rootlesskit - xx-go build -o /build/rootlesskit-docker-proxy -ldflags="$([ "$DOCKER_STATIC" != "1" ] && echo "-linkmode=external")" ./cmd/rootlesskit-docker-proxy - xx-verify $([ "$DOCKER_STATIC" = "1" ] && echo "--static") /build/rootlesskit-docker-proxy EOT COPY --link ./contrib/dockerd-rootless.sh /build/ COPY --link ./contrib/dockerd-rootless-setuptool.sh /build/ @@ -620,7 +618,7 @@ RUN --mount=type=bind,target=.,rw \ xx-go --wrap PKG_CONFIG=$(xx-go env PKG_CONFIG) ./hack/make.sh $target xx-verify $([ "$DOCKER_STATIC" = "1" ] && echo "--static") /tmp/bundles/${target}-daemon/dockerd$([ "$(xx-info os)" = "windows" ] && echo ".exe") - xx-verify $([ "$DOCKER_STATIC" = "1" ] && echo "--static") /tmp/bundles/${target}-daemon/docker-proxy$([ "$(xx-info os)" = "windows" ] && echo ".exe") + [ "$(xx-info os)" != "linux" ] || xx-verify $([ "$DOCKER_STATIC" = "1" ] && echo "--static") /tmp/bundles/${target}-daemon/docker-proxy mkdir /build mv /tmp/bundles/${target}-daemon/* /build/ EOT diff --git a/cmd/docker-proxy/genwinres_windows.go b/cmd/docker-proxy/genwinres_windows.go deleted file mode 100644 index 86697d8a57..0000000000 --- a/cmd/docker-proxy/genwinres_windows.go +++ /dev/null @@ -1,5 +0,0 @@ -//go:generate go-winres make --arch=386,amd64,arm,arm64 --in=../../cli/winresources/docker-proxy/winres.json --out=../../cli/winresources/docker-proxy/resource - -package main - -import _ "github.com/docker/docker/cli/winresources/docker-proxy" diff --git a/cmd/docker-proxy/main.go b/cmd/docker-proxy/main.go deleted file mode 100644 index 555c27a084..0000000000 --- a/cmd/docker-proxy/main.go +++ /dev/null @@ -1,80 +0,0 @@ -package main - -import ( - "flag" - "fmt" - "log" - "net" - "os" - "os/signal" - "syscall" - - "github.com/docker/docker/dockerversion" - "github.com/ishidawataru/sctp" -) - -func main() { - f := os.NewFile(3, "signal-parent") - host, container := parseFlags() - - p, err := NewProxy(host, container) - if err != nil { - fmt.Fprintf(f, "1\n%s", err) - f.Close() - os.Exit(1) - } - go handleStopSignals(p) - fmt.Fprint(f, "0\n") - f.Close() - - // Run will block until the proxy stops - p.Run() -} - -// parseFlags parses the flags passed on reexec to create the TCP/UDP/SCTP -// net.Addrs to map the host and container ports. -func parseFlags() (host net.Addr, container net.Addr) { - var ( - proto = flag.String("proto", "tcp", "proxy protocol") - hostIP = flag.String("host-ip", "", "host ip") - hostPort = flag.Int("host-port", -1, "host port") - containerIP = flag.String("container-ip", "", "container ip") - containerPort = flag.Int("container-port", -1, "container port") - printVer = flag.Bool("v", false, "print version information and quit") - printVersion = flag.Bool("version", false, "print version information and quit") - ) - - flag.Parse() - - if *printVer || *printVersion { - fmt.Printf("docker-proxy (commit %s) version %s\n", dockerversion.GitCommit, dockerversion.Version) - os.Exit(0) - } - - switch *proto { - case "tcp": - host = &net.TCPAddr{IP: net.ParseIP(*hostIP), Port: *hostPort} - container = &net.TCPAddr{IP: net.ParseIP(*containerIP), Port: *containerPort} - case "udp": - host = &net.UDPAddr{IP: net.ParseIP(*hostIP), Port: *hostPort} - container = &net.UDPAddr{IP: net.ParseIP(*containerIP), Port: *containerPort} - case "sctp": - host = &sctp.SCTPAddr{IPAddrs: []net.IPAddr{{IP: net.ParseIP(*hostIP)}}, Port: *hostPort} - container = &sctp.SCTPAddr{IPAddrs: []net.IPAddr{{IP: net.ParseIP(*containerIP)}}, Port: *containerPort} - default: - log.Fatalf("unsupported protocol %s", *proto) - } - - return host, container -} - -func handleStopSignals(p Proxy) { - s := make(chan os.Signal, 10) - signal.Notify(s, os.Interrupt, syscall.SIGTERM) - - for range s { - p.Close() - - os.Exit(0) - } -} diff --git a/cmd/docker-proxy/main_linux.go b/cmd/docker-proxy/main_linux.go new file mode 100644 index 0000000000..2c1cd4e1a5 --- /dev/null +++ b/cmd/docker-proxy/main_linux.go @@ -0,0 +1,181 @@ +package main + +import ( + "errors" + "flag" + "fmt" + "net" + "os" + "os/signal" + "syscall" + + "github.com/docker/docker/dockerversion" + "github.com/ishidawataru/sctp" +) + +// The caller is expected to pass-in open file descriptors ... +const ( + // Pipe for reporting status, as a string. "0\n" if the proxy + // started normally. "1\n" otherwise. + parentPipeFd uintptr = 3 + iota + // If -use-listen-fd=true, a listening socket ready to accept TCP + // connections or receive UDP. (Without that option on the command + // line, the listener needs to be opened by docker-proxy, for + // compatibility with older docker daemons. In this case fd 4 + // may belong to the Go runtime.) + listenSockFd +) + +func main() { + // Mark any files we expect to inherit as close-on-exec + // so that they are not unexpectedly inherited by any child processes + // if we ever need docker-proxy to exec something. + // This is safe to do even if the fd belongs to the Go runtime + // as it would be a no-op: + // the Go runtime marks all file descriptors it opens as close-on-exec. + // See the godoc for syscall.ForkLock for more information. + syscall.CloseOnExec(int(parentPipeFd)) + syscall.CloseOnExec(int(listenSockFd)) + + config := parseFlags() + p, err := newProxy(config) + if config.ListenSock != nil { + config.ListenSock.Close() + } + + _ = syscall.SetNonblock(int(parentPipeFd), true) + f := os.NewFile(parentPipeFd, "signal-parent") + if err != nil { + fmt.Fprintf(f, "1\n%s", err) + f.Close() + os.Exit(1) + } + go handleStopSignals(p) + fmt.Fprint(f, "0\n") + f.Close() + + // Run will block until the proxy stops + p.Run() +} + +func newProxy(config ProxyConfig) (p Proxy, err error) { + ipv := ipv4 + if config.HostIP.To4() == nil { + ipv = ipv6 + } + + switch config.Proto { + case "tcp": + var listener *net.TCPListener + if config.ListenSock == nil { + // Fall back to HostIP:HostPort if no socket on fd 4, for compatibility with older daemons. + hostAddr := &net.TCPAddr{IP: config.HostIP, Port: config.HostPort} + listener, err = net.ListenTCP("tcp"+string(ipv), hostAddr) + if err != nil { + return nil, fmt.Errorf("failed to listen on %s: %w", hostAddr, err) + } + } else { + l, err := net.FileListener(config.ListenSock) + if err != nil { + return nil, err + } + var ok bool + listener, ok = l.(*net.TCPListener) + if !ok { + return nil, fmt.Errorf("unexpected socket type for listener fd: %s", l.Addr().Network()) + } + } + container := &net.TCPAddr{IP: config.ContainerIP, Port: config.ContainerPort} + p, err = NewTCPProxy(listener, container) + case "udp": + var listener *net.UDPConn + if config.ListenSock == nil { + // Fall back to HostIP:HostPort if no socket on fd 4, for compatibility with older daemons. + hostAddr := &net.UDPAddr{IP: config.HostIP, Port: config.HostPort} + listener, err = net.ListenUDP("udp"+string(ipv), hostAddr) + if err != nil { + return nil, fmt.Errorf("failed to listen on %s: %w", hostAddr, err) + } + } else { + l, err := net.FilePacketConn(config.ListenSock) + if err != nil { + return nil, err + } + var ok bool + listener, ok = l.(*net.UDPConn) + if !ok { + return nil, fmt.Errorf("unexpected socket type for listener fd: %s", l.LocalAddr().Network()) + } + } + container := &net.UDPAddr{IP: config.ContainerIP, Port: config.ContainerPort} + p, err = NewUDPProxy(listener, container) + case "sctp": + var listener *sctp.SCTPListener + if config.ListenSock != nil { + // There's no way to construct an SCTPListener from a file descriptor at the moment. + // If a socket has been passed in, it's probably from a newer daemon using a version + // of the sctp module that does allow it. + return nil, errors.New("cannot use supplied SCTP socket, check the latest docker-proxy is in your $PATH") + } + hostAddr := &sctp.SCTPAddr{IPAddrs: []net.IPAddr{{IP: config.HostIP}}, Port: config.HostPort} + container := &sctp.SCTPAddr{IPAddrs: []net.IPAddr{{IP: config.ContainerIP}}, Port: config.ContainerPort} + listener, err = sctp.ListenSCTP("sctp"+string(ipv), hostAddr) + if err != nil { + return nil, fmt.Errorf("failed to listen on %s: %w", hostAddr, err) + } + p, err = NewSCTPProxy(listener, container) + default: + return nil, fmt.Errorf("unsupported protocol %s", config.Proto) + } + + return p, err +} + +type ProxyConfig struct { + Proto string + HostIP, ContainerIP net.IP + HostPort, ContainerPort int + ListenSock *os.File +} + +// parseFlags parses the flags passed on reexec to create the TCP/UDP/SCTP +// net.Addrs to map the host and container ports. +func parseFlags() ProxyConfig { + var ( + config ProxyConfig + useListenFd bool + printVer bool + ) + flag.StringVar(&config.Proto, "proto", "tcp", "proxy protocol") + flag.TextVar(&config.HostIP, "host-ip", net.IPv4zero, "host ip") + flag.IntVar(&config.HostPort, "host-port", -1, "host port") + flag.TextVar(&config.ContainerIP, "container-ip", net.IPv4zero, "container ip") + flag.IntVar(&config.ContainerPort, "container-port", -1, "container port") + flag.BoolVar(&useListenFd, "use-listen-fd", false, "use a supplied listen fd") + flag.BoolVar(&printVer, "v", false, "print version information and quit") + flag.BoolVar(&printVer, "version", false, "print version information and quit") + flag.Parse() + + if printVer { + fmt.Printf("docker-proxy (commit %s) version %s\n", dockerversion.GitCommit, dockerversion.Version) + os.Exit(0) + } + + if useListenFd { + _ = syscall.SetNonblock(int(listenSockFd), true) + config.ListenSock = os.NewFile(listenSockFd, "listen-sock") + } + + return config +} + +func handleStopSignals(p Proxy) { + s := make(chan os.Signal, 10) + signal.Notify(s, os.Interrupt, syscall.SIGTERM) + + for range s { + p.Close() + + os.Exit(0) + } +} diff --git a/cmd/docker-proxy/network_proxy_test.go b/cmd/docker-proxy/network_proxy_linux_test.go similarity index 55% rename from cmd/docker-proxy/network_proxy_test.go rename to cmd/docker-proxy/network_proxy_linux_test.go index 48c7a1abf7..2457e63278 100644 --- a/cmd/docker-proxy/network_proxy_test.go +++ b/cmd/docker-proxy/network_proxy_linux_test.go @@ -1,3 +1,5 @@ +//go:build !windows + package main import ( @@ -5,13 +7,13 @@ import ( "fmt" "io" "net" - "runtime" + "os" "strings" "testing" "time" "github.com/ishidawataru/sctp" - "gotest.tools/v3/skip" + "gotest.tools/v3/assert" ) var ( @@ -40,6 +42,8 @@ type UDPEchoServer struct { testCtx *testing.T } +const hopefullyFreePort = 25587 + func NewEchoServer(t *testing.T, proto, address string, opts EchoServerOptions) EchoServer { var server EchoServer if !strings.HasPrefix(proto, "tcp") && opts.TCPHalfClose { @@ -128,7 +132,31 @@ func (server *UDPEchoServer) Run() { func (server *UDPEchoServer) LocalAddr() net.Addr { return server.conn.LocalAddr() } func (server *UDPEchoServer) Close() { server.conn.Close() } +func tcpListener(t *testing.T, nw string, addr *net.TCPAddr) (*os.File, *net.TCPAddr) { + t.Helper() + l, err := net.ListenTCP(nw, addr) + assert.NilError(t, err) + osFile, err := l.File() + assert.NilError(t, err) + tcpAddr := l.Addr().(*net.TCPAddr) + err = l.Close() + assert.NilError(t, err) + return osFile, tcpAddr +} + +func udpListener(t *testing.T, nw string, addr *net.UDPAddr) (*os.File, *net.UDPAddr) { + t.Helper() + l, err := net.ListenUDP(nw, addr) + assert.NilError(t, err) + osFile, err := l.File() + assert.NilError(t, err) + err = l.Close() + assert.NilError(t, err) + return osFile, l.LocalAddr().(*net.UDPAddr) +} + func testProxyAt(t *testing.T, proto string, proxy Proxy, addr string, halfClose bool) { + t.Helper() defer proxy.Close() go proxy.Run() var client net.Conn @@ -167,98 +195,169 @@ func testProxyAt(t *testing.T, proto string, proxy Proxy, addr string, halfClose } } -func testProxy(t *testing.T, proto string, proxy Proxy, halfClose bool) { - testProxyAt(t, proto, proxy, proxy.FrontendAddr().String(), halfClose) -} - -func testTCP4Proxy(t *testing.T, halfClose bool) { +func testTCP4Proxy(t *testing.T, halfClose bool, hostPort int) { + t.Helper() backend := NewEchoServer(t, "tcp", "127.0.0.1:0", EchoServerOptions{TCPHalfClose: halfClose}) defer backend.Close() backend.Run() + backendAddr := backend.LocalAddr().(*net.TCPAddr) + var listener *os.File frontendAddr := &net.TCPAddr{IP: net.IPv4(127, 0, 0, 1), Port: 0} - proxy, err := NewProxy(frontendAddr, backend.LocalAddr()) + if hostPort == 0 { + listener, frontendAddr = tcpListener(t, "tcp4", &net.TCPAddr{IP: net.IPv4(127, 0, 0, 1), Port: 0}) + } else { + frontendAddr.Port = hostPort + } + config := ProxyConfig{ + Proto: "tcp", + HostIP: frontendAddr.IP, + HostPort: frontendAddr.Port, + ContainerIP: backendAddr.IP, + ContainerPort: backendAddr.Port, + ListenSock: listener, + } + proxy, err := newProxy(config) if err != nil { t.Fatal(err) } - testProxy(t, "tcp", proxy, halfClose) + testProxyAt(t, "tcp", proxy, frontendAddr.String(), halfClose) } func TestTCP4Proxy(t *testing.T) { - testTCP4Proxy(t, false) + testTCP4Proxy(t, false, 0) +} + +func TestTCP4ProxyNoListener(t *testing.T) { + testTCP4Proxy(t, false, hopefullyFreePort) } func TestTCP4ProxyHalfClose(t *testing.T) { - testTCP4Proxy(t, true) + testTCP4Proxy(t, true, 0) } func TestTCP6Proxy(t *testing.T) { - t.Skip("Need to start CI docker with --ipv6") backend := NewEchoServer(t, "tcp", "[::1]:0", EchoServerOptions{}) defer backend.Close() backend.Run() - frontendAddr := &net.TCPAddr{IP: net.IPv6loopback, Port: 0} - proxy, err := NewProxy(frontendAddr, backend.LocalAddr()) + backendAddr := backend.LocalAddr().(*net.TCPAddr) + listener, frontendAddr := tcpListener(t, "tcp6", &net.TCPAddr{IP: net.IPv6loopback, Port: 0}) + config := ProxyConfig{ + Proto: "tcp", + HostIP: frontendAddr.IP, + HostPort: frontendAddr.Port, + ContainerIP: backendAddr.IP, + ContainerPort: backendAddr.Port, + ListenSock: listener, + } + proxy, err := newProxy(config) if err != nil { t.Fatal(err) } - testProxy(t, "tcp", proxy, false) + testProxyAt(t, "tcp", proxy, frontendAddr.String(), false) } func TestTCPDualStackProxy(t *testing.T) { - // If I understand `godoc -src net favoriteAddrFamily` (used by the - // net.Listen* functions) correctly this should work, but it doesn't. - t.Skip("No support for dual stack yet") backend := NewEchoServer(t, "tcp", "[::1]:0", EchoServerOptions{}) defer backend.Close() backend.Run() - frontendAddr := &net.TCPAddr{IP: net.IPv6loopback, Port: 0} - proxy, err := NewProxy(frontendAddr, backend.LocalAddr()) + backendAddr := backend.LocalAddr().(*net.TCPAddr) + listener, frontendAddr := tcpListener(t, "tcp", &net.TCPAddr{IP: net.IPv6zero, Port: 0}) + config := ProxyConfig{ + Proto: "tcp", + HostIP: frontendAddr.IP, + HostPort: frontendAddr.Port, + ContainerIP: backendAddr.IP, + ContainerPort: backendAddr.Port, + ListenSock: listener, + } + proxy, err := newProxy(config) if err != nil { t.Fatal(err) } ipv4ProxyAddr := &net.TCPAddr{ IP: net.IPv4(127, 0, 0, 1), - Port: proxy.FrontendAddr().(*net.TCPAddr).Port, + Port: frontendAddr.Port, } testProxyAt(t, "tcp", proxy, ipv4ProxyAddr.String(), false) } -func TestUDP4Proxy(t *testing.T) { +func testUDP4Proxy(t *testing.T, hostPort int) { + t.Helper() backend := NewEchoServer(t, "udp", "127.0.0.1:0", EchoServerOptions{}) defer backend.Close() backend.Run() + var listener *os.File frontendAddr := &net.UDPAddr{IP: net.IPv4(127, 0, 0, 1), Port: 0} - proxy, err := NewProxy(frontendAddr, backend.LocalAddr()) + if hostPort == 0 { + listener, frontendAddr = udpListener(t, "udp4", &net.UDPAddr{IP: net.IPv4(127, 0, 0, 1), Port: 0}) + } else { + frontendAddr.Port = hostPort + } + backendAddr := backend.LocalAddr().(*net.UDPAddr) + config := ProxyConfig{ + Proto: "udp", + HostIP: frontendAddr.IP, + HostPort: frontendAddr.Port, + ContainerIP: backendAddr.IP, + ContainerPort: backendAddr.Port, + ListenSock: listener, + } + proxy, err := newProxy(config) if err != nil { t.Fatal(err) } - testProxy(t, "udp", proxy, false) + testProxyAt(t, "udp", proxy, frontendAddr.String(), false) +} + +func TestUDP4Proxy(t *testing.T) { + testUDP4Proxy(t, 0) +} + +func TestUDP4ProxyNoListener(t *testing.T) { + testUDP4Proxy(t, hopefullyFreePort) } func TestUDP6Proxy(t *testing.T) { - t.Skip("Need to start CI docker with --ipv6") backend := NewEchoServer(t, "udp", "[::1]:0", EchoServerOptions{}) defer backend.Close() backend.Run() - frontendAddr := &net.UDPAddr{IP: net.IPv6loopback, Port: 0} - proxy, err := NewProxy(frontendAddr, backend.LocalAddr()) + listener, frontendAddr := udpListener(t, "udp6", &net.UDPAddr{IP: net.IPv6loopback, Port: 0}) + backendAddr := backend.LocalAddr().(*net.UDPAddr) + config := ProxyConfig{ + Proto: "udp", + HostIP: frontendAddr.IP, + HostPort: frontendAddr.Port, + ContainerIP: backendAddr.IP, + ContainerPort: backendAddr.Port, + ListenSock: listener, + } + proxy, err := newProxy(config) if err != nil { t.Fatal(err) } - testProxy(t, "udp", proxy, false) + testProxyAt(t, "udp", proxy, frontendAddr.String(), false) } func TestUDPWriteError(t *testing.T) { frontendAddr := &net.UDPAddr{IP: net.IPv4(127, 0, 0, 1), Port: 0} // Hopefully, this port will be free: */ - backendAddr := &net.UDPAddr{IP: net.IPv4(127, 0, 0, 1), Port: 25587} - proxy, err := NewProxy(frontendAddr, backendAddr) + backendAddr := &net.UDPAddr{IP: net.IPv4(127, 0, 0, 1), Port: hopefullyFreePort} + listener, frontendAddr := udpListener(t, "udp4", frontendAddr) + config := ProxyConfig{ + Proto: "udp", + HostIP: frontendAddr.IP, + HostPort: frontendAddr.Port, + ContainerIP: backendAddr.IP, + ContainerPort: backendAddr.Port, + ListenSock: listener, + } + proxy, err := newProxy(config) if err != nil { t.Fatal(err) } defer proxy.Close() go proxy.Run() - client, err := net.Dial("udp", "127.0.0.1:25587") + client, err := net.Dial("udp", frontendAddr.String()) if err != nil { t.Fatalf("Can't connect to the proxy: %v", err) } @@ -266,7 +365,7 @@ func TestUDPWriteError(t *testing.T) { // Make sure the proxy doesn't stop when there is no actual backend: client.Write(testBuf) client.Write(testBuf) - backend := NewEchoServer(t, "udp", "127.0.0.1:25587", EchoServerOptions{}) + backend := NewEchoServer(t, "udp", backendAddr.String(), EchoServerOptions{}) defer backend.Close() backend.Run() client.SetDeadline(time.Now().Add(10 * time.Second)) @@ -282,31 +381,36 @@ func TestUDPWriteError(t *testing.T) { } } -func TestSCTP4Proxy(t *testing.T) { - skip.If(t, runtime.GOOS == "windows", "sctp is not supported on windows") - +func TestSCTP4ProxyNoListener(t *testing.T) { backend := NewEchoServer(t, "sctp", "127.0.0.1:0", EchoServerOptions{}) defer backend.Close() backend.Run() - frontendAddr := &sctp.SCTPAddr{IPAddrs: []net.IPAddr{{IP: net.IPv4(127, 0, 0, 1)}}, Port: 0} - proxy, err := NewProxy(frontendAddr, backend.LocalAddr()) - if err != nil { - t.Fatal(err) + backendAddr := backend.LocalAddr().(*sctp.SCTPAddr) + config := ProxyConfig{ + Proto: "sctp", + HostIP: net.IPv4(127, 0, 0, 1), + HostPort: hopefullyFreePort, + ContainerIP: backendAddr.IPAddrs[0].IP, + ContainerPort: backendAddr.Port, } - testProxy(t, "sctp", proxy, false) + proxy, err := newProxy(config) + assert.NilError(t, err) + testProxyAt(t, "sctp", proxy, fmt.Sprintf("%s:%d", config.HostIP, config.HostPort), false) } -func TestSCTP6Proxy(t *testing.T) { - t.Skip("Need to start CI docker with --ipv6") - skip.If(t, runtime.GOOS == "windows", "sctp is not supported on windows") - +func TestSCTP6ProxyNoListener(t *testing.T) { backend := NewEchoServer(t, "sctp", "[::1]:0", EchoServerOptions{}) defer backend.Close() backend.Run() - frontendAddr := &sctp.SCTPAddr{IPAddrs: []net.IPAddr{{IP: net.IPv6loopback}}, Port: 0} - proxy, err := NewProxy(frontendAddr, backend.LocalAddr()) - if err != nil { - t.Fatal(err) + backendAddr := backend.LocalAddr().(*sctp.SCTPAddr) + config := ProxyConfig{ + Proto: "sctp", + HostIP: net.IPv6loopback, + HostPort: hopefullyFreePort, + ContainerIP: backendAddr.IPAddrs[0].IP, + ContainerPort: backendAddr.Port, } - testProxy(t, "sctp", proxy, false) + proxy, err := newProxy(config) + assert.NilError(t, err) + testProxyAt(t, "sctp", proxy, fmt.Sprintf("[%s]:%d", config.HostIP, config.HostPort), false) } diff --git a/cmd/docker-proxy/proxy.go b/cmd/docker-proxy/proxy_linux.go similarity index 51% rename from cmd/docker-proxy/proxy.go rename to cmd/docker-proxy/proxy_linux.go index 1bd8f9d8e1..b3e8050c50 100644 --- a/cmd/docker-proxy/proxy.go +++ b/cmd/docker-proxy/proxy_linux.go @@ -2,12 +2,6 @@ // and UDP. package main -import ( - "net" - - "github.com/ishidawataru/sctp" -) - // ipVersion refers to IP version - v4 or v6 type ipVersion string @@ -29,22 +23,4 @@ type Proxy interface { Run() // Close stops forwarding traffic and close both ends of the Proxy. Close() - // FrontendAddr returns the address on which the proxy is listening. - FrontendAddr() net.Addr - // BackendAddr returns the proxied address. - BackendAddr() net.Addr -} - -// NewProxy creates a Proxy according to the specified frontendAddr and backendAddr. -func NewProxy(frontendAddr, backendAddr net.Addr) (Proxy, error) { - switch frontendAddr.(type) { - case *net.UDPAddr: - return NewUDPProxy(frontendAddr.(*net.UDPAddr), backendAddr.(*net.UDPAddr)) - case *net.TCPAddr: - return NewTCPProxy(frontendAddr.(*net.TCPAddr), backendAddr.(*net.TCPAddr)) - case *sctp.SCTPAddr: - return NewSCTPProxy(frontendAddr.(*sctp.SCTPAddr), backendAddr.(*sctp.SCTPAddr)) - default: - panic("Unsupported protocol") - } } diff --git a/cmd/docker-proxy/sctp_proxy.go b/cmd/docker-proxy/sctp_proxy_linux.go similarity index 69% rename from cmd/docker-proxy/sctp_proxy.go rename to cmd/docker-proxy/sctp_proxy_linux.go index 29ee5a6562..097e78e326 100644 --- a/cmd/docker-proxy/sctp_proxy.go +++ b/cmd/docker-proxy/sctp_proxy_linux.go @@ -18,18 +18,7 @@ type SCTPProxy struct { } // NewSCTPProxy creates a new SCTPProxy. -func NewSCTPProxy(frontendAddr, backendAddr *sctp.SCTPAddr) (*SCTPProxy, error) { - // detect version of hostIP to bind only to correct version - ipVersion := ipv4 - if frontendAddr.IPAddrs[0].IP.To4() == nil { - ipVersion = ipv6 - } - listener, err := sctp.ListenSCTP("sctp"+string(ipVersion), frontendAddr) - if err != nil { - return nil, err - } - // If the port in frontendAddr was 0 then ListenSCTP will have a picked - // a port to listen on, hence the call to Addr to get that actual port: +func NewSCTPProxy(listener *sctp.SCTPListener, backendAddr *sctp.SCTPAddr) (*SCTPProxy, error) { return &SCTPProxy{ listener: listener, frontendAddr: listener.Addr().(*sctp.SCTPAddr), @@ -90,9 +79,3 @@ func (proxy *SCTPProxy) Run() { // Close stops forwarding the traffic. func (proxy *SCTPProxy) Close() { proxy.listener.Close() } - -// FrontendAddr returns the SCTP address on which the proxy is listening. -func (proxy *SCTPProxy) FrontendAddr() net.Addr { return proxy.frontendAddr } - -// BackendAddr returns the SCTP proxied address. -func (proxy *SCTPProxy) BackendAddr() net.Addr { return proxy.backendAddr } diff --git a/cmd/docker-proxy/tcp_proxy.go b/cmd/docker-proxy/tcp_proxy_linux.go similarity index 68% rename from cmd/docker-proxy/tcp_proxy.go rename to cmd/docker-proxy/tcp_proxy_linux.go index aa7711c1ea..ac4fc99340 100644 --- a/cmd/docker-proxy/tcp_proxy.go +++ b/cmd/docker-proxy/tcp_proxy_linux.go @@ -16,18 +16,7 @@ type TCPProxy struct { } // NewTCPProxy creates a new TCPProxy. -func NewTCPProxy(frontendAddr, backendAddr *net.TCPAddr) (*TCPProxy, error) { - // detect version of hostIP to bind only to correct version - ipVersion := ipv4 - if frontendAddr.IP.To4() == nil { - ipVersion = ipv6 - } - listener, err := net.ListenTCP("tcp"+string(ipVersion), frontendAddr) - if err != nil { - return nil, err - } - // If the port in frontendAddr was 0 then ListenTCP will have a picked - // a port to listen on, hence the call to Addr to get that actual port: +func NewTCPProxy(listener *net.TCPListener, backendAddr *net.TCPAddr) (*TCPProxy, error) { return &TCPProxy{ listener: listener, frontendAddr: listener.Addr().(*net.TCPAddr), @@ -86,9 +75,3 @@ func (proxy *TCPProxy) Run() { // Close stops forwarding the traffic. func (proxy *TCPProxy) Close() { proxy.listener.Close() } - -// FrontendAddr returns the TCP address on which the proxy is listening. -func (proxy *TCPProxy) FrontendAddr() net.Addr { return proxy.frontendAddr } - -// BackendAddr returns the TCP proxied address. -func (proxy *TCPProxy) BackendAddr() net.Addr { return proxy.backendAddr } diff --git a/cmd/docker-proxy/udp_proxy.go b/cmd/docker-proxy/udp_proxy_linux.go similarity index 87% rename from cmd/docker-proxy/udp_proxy.go rename to cmd/docker-proxy/udp_proxy_linux.go index 66bacafa4f..585cc3616e 100644 --- a/cmd/docker-proxy/udp_proxy.go +++ b/cmd/docker-proxy/udp_proxy_linux.go @@ -54,16 +54,7 @@ type UDPProxy struct { } // NewUDPProxy creates a new UDPProxy. -func NewUDPProxy(frontendAddr, backendAddr *net.UDPAddr) (*UDPProxy, error) { - // detect version of hostIP to bind only to correct version - ipVersion := ipv4 - if frontendAddr.IP.To4() == nil { - ipVersion = ipv6 - } - listener, err := net.ListenUDP("udp"+string(ipVersion), frontendAddr) - if err != nil { - return nil, err - } +func NewUDPProxy(listener *net.UDPConn, backendAddr *net.UDPAddr) (*UDPProxy, error) { return &UDPProxy{ listener: listener, frontendAddr: listener.LocalAddr().(*net.UDPAddr), @@ -156,12 +147,6 @@ func (proxy *UDPProxy) Close() { } } -// FrontendAddr returns the UDP address on which the proxy is listening. -func (proxy *UDPProxy) FrontendAddr() net.Addr { return proxy.frontendAddr } - -// BackendAddr returns the proxied UDP address. -func (proxy *UDPProxy) BackendAddr() net.Addr { return proxy.backendAddr } - func isClosedError(err error) bool { /* This comparison is ugly, but unfortunately, net.go doesn't export errClosing. * See: diff --git a/daemon/config/config_linux.go b/daemon/config/config_linux.go index 07322aa09a..35c463794e 100644 --- a/daemon/config/config_linux.go +++ b/daemon/config/config_linux.go @@ -34,7 +34,6 @@ const ( StockRuntimeName = "runc" // userlandProxyBinary is the name of the userland-proxy binary. - // In rootless-mode, [rootless.RootlessKitDockerProxyBinary] is used instead. userlandProxyBinary = "docker-proxy" ) @@ -234,16 +233,25 @@ func setPlatformDefaults(cfg *Config) error { cfg.CgroupNamespaceMode = string(DefaultCgroupNamespaceMode) } + var err error + cfg.BridgeConfig.UserlandProxyPath, err = lookupBinPath(userlandProxyBinary) + if err != nil { + // Log, but don't error here. This allows running a daemon with + // userland-proxy disabled (which does not require the binary + // to be present). + // + // An error is still produced by [Config.ValidatePlatformConfig] if + // userland-proxy is enabled in the configuration. + // + // We log this at "debug" level, as this code is also executed + // when running "--version", and we don't want to print logs in + // that case.. + log.G(context.TODO()).WithError(err).Debug("failed to lookup default userland-proxy binary") + } + if rootless.RunningWithRootlessKit() { cfg.Rootless = true - var err error - // use rootlesskit-docker-proxy for exposing the ports in RootlessKit netns to the initial namespace. - cfg.BridgeConfig.UserlandProxyPath, err = lookupBinPath(rootless.RootlessKitDockerProxyBinary) - if err != nil { - return errors.Wrapf(err, "running with RootlessKit, but %s not installed", rootless.RootlessKitDockerProxyBinary) - } - dataHome, err := homedir.GetDataHome() if err != nil { return err @@ -257,21 +265,6 @@ func setPlatformDefaults(cfg *Config) error { cfg.ExecRoot = filepath.Join(runtimeDir, "docker") cfg.Pidfile = filepath.Join(runtimeDir, "docker.pid") } else { - var err error - cfg.BridgeConfig.UserlandProxyPath, err = lookupBinPath(userlandProxyBinary) - if err != nil { - // Log, but don't error here. This allows running a daemon with - // userland-proxy disabled (which does not require the binary - // to be present). - // - // An error is still produced by [Config.ValidatePlatformConfig] if - // userland-proxy is enabled in the configuration. - // - // We log this at "debug" level, as this code is also executed - // when running "--version", and we don't want to print logs in - // that case.. - log.G(context.TODO()).WithError(err).Debug("failed to lookup default userland-proxy binary") - } cfg.Root = "/var/lib/docker" cfg.ExecRoot = "/var/run/docker" cfg.Pidfile = "/var/run/docker.pid" diff --git a/daemon/daemon_unix.go b/daemon/daemon_unix.go index fc53019d86..10ab139245 100644 --- a/daemon/daemon_unix.go +++ b/daemon/daemon_unix.go @@ -915,6 +915,7 @@ func driverOptions(config *config.Config) nwconfig.Option { "EnableIP6Tables": config.BridgeConfig.EnableIP6Tables, "EnableUserlandProxy": config.BridgeConfig.EnableUserlandProxy, "UserlandProxyPath": config.BridgeConfig.UserlandProxyPath, + "Rootless": config.Rootless, }, }) } diff --git a/hack/dockerfile/install/rootlesskit.installer b/hack/dockerfile/install/rootlesskit.installer index de6846ae17..27a0cf662f 100755 --- a/hack/dockerfile/install/rootlesskit.installer +++ b/hack/dockerfile/install/rootlesskit.installer @@ -27,7 +27,5 @@ install_rootlesskit_dynamic() { _install_rootlesskit() ( echo "Install rootlesskit version ${ROOTLESSKIT_VERSION}" - for f in rootlesskit rootlesskit-docker-proxy; do - GOBIN="${PREFIX}" GO111MODULE=on go install ${BUILD_MODE} -ldflags="$ROOTLESSKIT_LDFLAGS" "github.com/rootless-containers/rootlesskit/v2/cmd/${f}@${ROOTLESSKIT_VERSION}" - done + GOBIN="${PREFIX}" GO111MODULE=on go install ${BUILD_MODE} -ldflags="$ROOTLESSKIT_LDFLAGS" "github.com/rootless-containers/rootlesskit/v2/cmd/rootlesskit@${ROOTLESSKIT_VERSION}" ) diff --git a/hack/make/binary-daemon b/hack/make/binary-daemon index 3d775fee41..45f4a76544 100644 --- a/hack/make/binary-daemon +++ b/hack/make/binary-daemon @@ -14,7 +14,7 @@ copy_binaries() { return fi echo "Copying nested executables into $dir" - for file in containerd containerd-shim-runc-v2 ctr runc docker-init rootlesskit rootlesskit-docker-proxy dockerd-rootless.sh dockerd-rootless-setuptool.sh; do + for file in containerd containerd-shim-runc-v2 ctr runc docker-init rootlesskit dockerd-rootless.sh dockerd-rootless-setuptool.sh; do cp -f "$(command -v "$file")" "$dir/" done # vpnkit might not be available for the target platform, see vpnkit stage in diff --git a/hack/make/binary-proxy b/hack/make/binary-proxy index 9fa51f76cd..7285d962af 100644 --- a/hack/make/binary-proxy +++ b/hack/make/binary-proxy @@ -2,7 +2,8 @@ set -e -( +# docker-proxy is Linux only +[ "$(go env GOOS)" != 'linux' ] || ( export CGO_ENABLED=0 DOCKER_STATIC=1 diff --git a/hack/make/dynbinary-proxy b/hack/make/dynbinary-proxy index d732df13a4..436db2e188 100644 --- a/hack/make/dynbinary-proxy +++ b/hack/make/dynbinary-proxy @@ -2,7 +2,8 @@ set -e -( +# docker-proxy is Linux only +[ "$(go env GOOS)" != 'linux' ] || ( export LDFLAGS_STATIC='' export BUILDFLAGS=("${BUILDFLAGS[@]/netgo /}") # disable netgo, since we don't need it for a dynamic binary export BUILDFLAGS=("${BUILDFLAGS[@]/osusergo /}") # ditto for osusergo diff --git a/hack/make/install-binary b/hack/make/install-binary index 96e7cc0a39..1b20ac46d6 100644 --- a/hack/make/install-binary +++ b/hack/make/install-binary @@ -15,7 +15,6 @@ source "${MAKEDIR}/.install" install_binary "${DEST}/docker-proxy" install_binary "${DEST}/docker-init" install_binary "${DEST}/rootlesskit" - install_binary "${DEST}/rootlesskit-docker-proxy" install_binary "${DEST}/dockerd-rootless.sh" install_binary "${DEST}/dockerd-rootless-setuptool.sh" if [ -f "${DEST}/vpnkit" ]; then diff --git a/integration-cli/docker_cli_daemon_test.go b/integration-cli/docker_cli_daemon_test.go index 683dfcfbed..f7ddfe7d2e 100644 --- a/integration-cli/docker_cli_daemon_test.go +++ b/integration-cli/docker_cli_daemon_test.go @@ -704,7 +704,7 @@ func (s *DockerDaemonSuite) TestDaemonIP(c *testing.T) { out, err := d.Cmd("run", "-d", "-p", "8000:8000", "busybox", "top") assert.Assert(c, err != nil, "Running a container must fail with an invalid --ip option") - assert.Equal(c, strings.Contains(out, "Error starting userland proxy"), true) + assert.Check(c, is.Contains(out, "failed to bind host port for 192.170.1.1")) ifName := "dummy" createInterface(c, "dummy", ifName, ipStr) diff --git a/libnetwork/drivers/bridge/bridge_linux.go b/libnetwork/drivers/bridge/bridge_linux.go index d0ffa1a809..2e76b4d9e9 100644 --- a/libnetwork/drivers/bridge/bridge_linux.go +++ b/libnetwork/drivers/bridge/bridge_linux.go @@ -14,6 +14,7 @@ import ( "github.com/docker/docker/errdefs" "github.com/docker/docker/libnetwork/datastore" "github.com/docker/docker/libnetwork/driverapi" + "github.com/docker/docker/libnetwork/drivers/bridge/internal/rlkclient" "github.com/docker/docker/libnetwork/internal/netiputil" "github.com/docker/docker/libnetwork/iptables" "github.com/docker/docker/libnetwork/netlabel" @@ -56,6 +57,7 @@ type configuration struct { EnableIP6Tables bool EnableUserlandProxy bool UserlandProxyPath string + Rootless bool } // networkConfiguration for network specific configuration @@ -131,6 +133,16 @@ type bridgeNetwork struct { sync.Mutex } +type portDriverClient interface { + ChildHostIP(hostIP netip.Addr) netip.Addr + AddPort(ctx context.Context, proto string, hostIP, childIP netip.Addr, hostPort int) (func() error, error) +} + +// Allow unit tests to supply a dummy RootlessKit port driver client. +var newPortDriverClient = func(ctx context.Context) (portDriverClient, error) { + return rlkclient.NewPortDriverClient(ctx) +} + type driver struct { config configuration natChain *iptables.ChainInfo @@ -144,6 +156,7 @@ type driver struct { networks map[string]*bridgeNetwork store *datastore.Store nlh *netlink.Handle + portDriverClient portDriverClient configNetwork sync.Mutex sync.Mutex } @@ -414,6 +427,15 @@ func (n *bridgeNetwork) userlandProxyPath() string { return n.driver.userlandProxyPath() } +func (n *bridgeNetwork) getPortDriverClient() portDriverClient { + n.Lock() + defer n.Unlock() + if n.driver == nil { + return nil + } + return n.driver.getPortDriverClient() +} + func (n *bridgeNetwork) getEndpoint(eid string) (*bridgeEndpoint, error) { if eid == "" { return nil, InvalidEndpointIDError(eid) @@ -465,6 +487,7 @@ func (d *driver) configure(option map[string]interface{}) error { filterChainV6 *iptables.ChainInfo isolationChain1V6 *iptables.ChainInfo isolationChain2V6 *iptables.ChainInfo + pdc portDriverClient ) switch opt := option[netlabel.GenericData].(type) { @@ -537,6 +560,14 @@ func (d *driver) configure(option map[string]interface{}) error { } } + if config.Rootless { + var err error + pdc, err = newPortDriverClient(context.TODO()) + if err != nil { + return err + } + } + d.Lock() d.natChain = natChain d.filterChain = filterChain @@ -546,6 +577,7 @@ func (d *driver) configure(option map[string]interface{}) error { d.filterChainV6 = filterChainV6 d.isolationChain1V6 = isolationChain1V6 d.isolationChain2V6 = isolationChain2V6 + d.portDriverClient = pdc d.config = config d.Unlock() @@ -577,6 +609,12 @@ func (d *driver) userlandProxyPath() string { return "" } +func (d *driver) getPortDriverClient() portDriverClient { + d.Lock() + defer d.Unlock() + return d.portDriverClient +} + func parseNetworkGenericOptions(data interface{}) (*networkConfiguration, error) { var ( err error diff --git a/libnetwork/drivers/bridge/internal/rlkclient/rootlesskit_client_linux.go b/libnetwork/drivers/bridge/internal/rlkclient/rootlesskit_client_linux.go new file mode 100644 index 0000000000..0c8e4b8268 --- /dev/null +++ b/libnetwork/drivers/bridge/internal/rlkclient/rootlesskit_client_linux.go @@ -0,0 +1,156 @@ +// RootlessKit integration - if required by RootlessKit's port driver, let it know +// about port mappings as they're added and removed. +// +// This is based on / copied from rootlesskit-docker-proxy, which was previously +// installed as a proxy for docker-proxy: +// https://github.com/rootless-containers/rootlesskit/blob/4fb2e2cb80bf13eb28b7f2a4317b63406b89ad32/cmd/rootlesskit-docker-proxy/main.go + +package rlkclient + +import ( + "context" + "errors" + "fmt" + "net" + "net/netip" + "os" + "path/filepath" + "strconv" + "strings" + + "github.com/rootless-containers/rootlesskit/v2/pkg/api/client" + "github.com/rootless-containers/rootlesskit/v2/pkg/port" +) + +type PortDriverClient struct { + client client.Client + portDriverName string + protos map[string]struct{} + childIP netip.Addr +} + +func NewPortDriverClient(ctx context.Context) (*PortDriverClient, error) { + stateDir := os.Getenv("ROOTLESSKIT_STATE_DIR") + if stateDir == "" { + return nil, errors.New("$ROOTLESSKIT_STATE_DIR needs to be set") + } + socketPath := filepath.Join(stateDir, "api.sock") + c, err := client.New(socketPath) + if err != nil { + return nil, fmt.Errorf("error while connecting to RootlessKit API socket: %w", err) + } + + info, err := c.Info(ctx) + if err != nil { + return nil, fmt.Errorf("failed to call info API, probably RootlessKit binary is too old (needs to be v0.14.0 or later): %w", err) + } + + // info.PortDriver is currently nil for "none" and "implicit", but this may change in future + if info.PortDriver == nil || info.PortDriver.Driver == "none" || info.PortDriver.Driver == "implicit" { + return nil, nil + } + + pdc := &PortDriverClient{ + client: c, + portDriverName: info.PortDriver.Driver, + } + + if info.PortDriver.DisallowLoopbackChildIP { + // i.e., port-driver="slirp4netns" + if info.NetworkDriver.ChildIP == nil { + return nil, fmt.Errorf("RootlessKit port driver (%q) does not allow loopback child IP, but network driver (%q) has no non-loopback IP", + info.PortDriver.Driver, info.NetworkDriver.Driver) + } + childIP, ok := netip.AddrFromSlice(info.NetworkDriver.ChildIP) + if !ok { + return nil, fmt.Errorf("unable to use child IP %s from network driver (%q)", + info.NetworkDriver.ChildIP, info.NetworkDriver.Driver) + } + pdc.childIP = childIP + } + + pdc.protos = make(map[string]struct{}, len(info.PortDriver.Protos)) + for _, p := range info.PortDriver.Protos { + pdc.protos[p] = struct{}{} + } + + return pdc, nil +} + +// ChildHostIP returns the address that must be used in the child network +// namespace in place of hostIP, a host IP address. In particular, port +// mappings from host IP addresses, and DNAT rules, must use this child +// address in place of the real host address. +func (c *PortDriverClient) ChildHostIP(hostIP netip.Addr) netip.Addr { + if c == nil { + return hostIP + } + if c.childIP.IsValid() { + return c.childIP + } + if hostIP.Is6() { + return netip.IPv6Loopback() + } + return netip.MustParseAddr("127.0.0.1") +} + +// AddPort makes a request to RootlessKit asking it to set up a port +// mapping between a host IP address and a child host IP address. +func (c *PortDriverClient) AddPort( + ctx context.Context, + proto string, + hostIP netip.Addr, + childIP netip.Addr, + hostPort int, +) (func() error, error) { + if c == nil { + return func() error { return nil }, nil + } + // proto is like "tcp", but we need to convert it to "tcp4" or "tcp6" explicitly + // for libnetwork >= 20201216 + // + // See https://github.com/moby/libnetwork/pull/2604/files#diff-8fa48beed55dd033bf8e4f8c40b31cf69d0b2cc5d4bb53cde8594670ea6c938aR20 + // See also https://github.com/rootless-containers/rootlesskit/issues/231 + apiProto := proto + if !strings.HasSuffix(apiProto, "4") && !strings.HasSuffix(apiProto, "6") { + if hostIP.Is6() { + apiProto += "6" + } else { + apiProto += "4" + } + } + + if _, ok := c.protos[apiProto]; !ok { + // This happens when apiProto="tcp6", portDriverName="slirp4netns", + // because "slirp4netns" port driver does not support listening on IPv6 yet. + // + // Note that "slirp4netns" port driver is not used by default, + // even when network driver is set to "slirp4netns". + // + // Most users are using "builtin" port driver and will not see this warning. + return nil, fmt.Errorf("protocol %q is not supported by the RootlessKit port driver %q, discarding request for %q", + proto, + c.portDriverName, + net.JoinHostPort(hostIP.String(), strconv.Itoa(hostPort))) + } + + pm := c.client.PortManager() + p := port.Spec{ + Proto: apiProto, + ParentIP: hostIP.String(), + ParentPort: hostPort, + ChildIP: childIP.String(), + ChildPort: hostPort, + } + st, err := pm.AddPort(ctx, p) + if err != nil { + return nil, fmt.Errorf("error while calling RootlessKit PortManager.AddPort(): %w", err) + } + deferFunc := func() error { + if dErr := pm.RemovePort(ctx, st.ID); dErr != nil { + return fmt.Errorf("error while calling RootlessKit PortManager.RemovePort(): %w", err) + } + return nil + } + return deferFunc, nil +} diff --git a/libnetwork/drivers/bridge/port_mapping_linux.go b/libnetwork/drivers/bridge/port_mapping_linux.go index e54ba233c8..95b3db45da 100644 --- a/libnetwork/drivers/bridge/port_mapping_linux.go +++ b/libnetwork/drivers/bridge/port_mapping_linux.go @@ -12,6 +12,8 @@ import ( "os" "slices" "strconv" + "syscall" + "unsafe" "github.com/containerd/log" "github.com/docker/docker/libnetwork/iptables" @@ -19,27 +21,60 @@ import ( "github.com/docker/docker/libnetwork/portallocator" "github.com/docker/docker/libnetwork/portmapper" "github.com/docker/docker/libnetwork/types" + "github.com/ishidawataru/sctp" ) type portBinding struct { types.PortBinding + // boundSocket is used to reserve a host port for the binding. If the + // userland proxy is in-use, it's passed to the proxy when the proxy is + // started, then it's closed and set to nil here. + boundSocket *os.File + // childHostIP is the host IP address, as seen from the daemon. This + // is normally the same as PortBinding.HostIP but, in rootless mode, it + // will be an address in the rootless network namespace. RootlessKit + // binds the port on the real (parent) host address and maps it to the + // same port number on the address dockerd sees in the child namespace. + // So, for example, docker-proxy and DNAT rules need to use the child + // namespace's host address. (PortBinding.HostIP isn't replaced by the + // child address, because it's stored as user-config and the child + // address may change if RootlessKit is configured differently.) + childHostIP net.IP + // portDriverRemove is a function that will inform the RootlessKit + // port driver about removal of a port binding, or nil. + portDriverRemove func() error + // stopProxy is a function to stop the userland proxy for this binding, + // if a proxy has been started - else nil. stopProxy func() error } +// childPortBinding is pb.PortBinding, with the host address the daemon +// will see - which, in rootless mode, will be an address in the RootlessKit's +// child namespace (see portBinding.childHostIP). +func (pb portBinding) childPortBinding() types.PortBinding { + res := pb.PortBinding + res.HostIP = pb.childHostIP + return res +} + type portBindingReq struct { types.PortBinding - disableNAT bool + childHostIP net.IP + disableNAT bool } +// Allow unit tests to supply a dummy StartProxy. +var startProxy = portmapper.StartProxy + // addPortMappings takes cfg, the configuration for port mappings, selects host -// ports when ranges are given, starts docker-proxy or its dummy to reserve -// host ports, and sets up iptables NAT/forwarding rules as necessary. If -// anything goes wrong, it will undo any work it's done and return an error. -// Otherwise, the returned slice of portBinding has an entry per address -// family (if cfg describes a mapping for 'any' host address, it's expanded -// into mappings for IPv4 and IPv6, because that's how the mapping is presented -// in 'inspect'). HostPort and HostPortEnd in each returned portBinding are set -// to the selected and reserved port. +// ports when ranges are given, binds host ports to check they're available and +// reserve them, starts docker-proxy if required, and sets up iptables +// NAT/forwarding rules as necessary. If anything goes wrong, it will undo any +// work it's done and return an error. Otherwise, the returned slice of +// portBinding has an entry per address family (if cfg describes a mapping for +// 'any' host address, it's expanded into mappings for IPv4 and IPv6, because +// that's how the mapping is presented in 'inspect'). HostPort and HostPortEnd in +// each returned portBinding are set to the selected and reserved port. func (n *bridgeNetwork) addPortMappings( epAddrV4, epAddrV6 *net.IPNet, cfg []types.PortBinding, @@ -79,6 +114,7 @@ func (n *bridgeNetwork) addPortMappings( sortAndNormPBs(sortedCfg) proxyPath := n.userlandProxyPath() + pdc := n.getPortDriverClient() // toBind accumulates port bindings that should be allocated the same host port // (if required by NAT config). If the host address is unspecified, and defHostIP @@ -91,7 +127,7 @@ func (n *bridgeNetwork) addPortMappings( // bindings to collect, they're applied and toBind is reset. var toBind []portBindingReq for i, c := range sortedCfg { - if bindingIPv4, ok := configurePortBindingIPv4(disableNAT4, c, containerIPv4, defHostIP); ok { + if bindingIPv4, ok := configurePortBindingIPv4(pdc, disableNAT4, c, containerIPv4, defHostIP); ok { toBind = append(toBind, bindingIPv4) } @@ -107,7 +143,7 @@ func (n *bridgeNetwork) addPortMappings( if proxyPath != "" && (containerIPv6 == nil) { containerIP = containerIPv4 } - if bindingIPv6, ok := configurePortBindingIPv6(disableNAT6, c, containerIP, defHostIP); ok { + if bindingIPv6, ok := configurePortBindingIPv6(pdc, disableNAT6, c, containerIP, defHostIP); ok { toBind = append(toBind, bindingIPv6) } @@ -117,24 +153,86 @@ func (n *bridgeNetwork) addPortMappings( continue } - // Allocate a host port, and reserve it by starting docker-proxy for each host - // address in toBind. + // Allocate and bind a host port. newB, err := bindHostPorts(toBind, proxyPath) if err != nil { return nil, err } bindings = append(bindings, newB...) - // Reset the collection of bindings now they're bound. + // Reset toBind now the ports are bound. toBind = toBind[:0] } - for _, b := range bindings { - if err := n.setPerPortIptables(b, true); err != nil { + for i := range bindings { + if pdc != nil && bindings[i].HostPort != 0 { + var err error + b := &bindings[i] + hip, ok := netip.AddrFromSlice(b.HostIP) + if !ok { + return nil, fmt.Errorf("invalid host IP address in %s", b) + } + chip, ok := netip.AddrFromSlice(b.childHostIP) + if !ok { + return nil, fmt.Errorf("invalid child host IP address %s in %s", b.childHostIP, b) + } + b.portDriverRemove, err = pdc.AddPort(context.TODO(), b.Proto.String(), hip, chip, int(b.HostPort)) + if err != nil { + return nil, err + } + } + if err := n.setPerPortIptables(bindings[i], true); err != nil { return nil, err } } + // Now the iptables rules are set up, it's safe to start the userland proxy. + // (If it was started before the iptables rules were created, it may have + // accepted a connection, then become unreachable due to NAT rules sending + // packets directly to the container.) + // If not starting the proxy, nothing will ever accept a connection on the + // socket. But, listen anyway so that the binding shows up in "netstat -at". + somaxconn := 0 + if proxyPath != "" { + somaxconn = -1 // silently capped to "/proc/sys/net/core/somaxconn" + } + for i := range bindings { + if bindings[i].boundSocket == nil { + continue + } + if bindings[i].Proto == types.TCP { + rc, err := bindings[i].boundSocket.SyscallConn() + if err != nil { + return nil, fmt.Errorf("raw conn not available on TCP socket: %w", err) + } + if errC := rc.Control(func(fd uintptr) { + err = syscall.Listen(int(fd), somaxconn) + }); errC != nil { + return nil, fmt.Errorf("failed to Control TCP socket: %w", err) + } + if err != nil { + return nil, fmt.Errorf("failed to listen on TCP socket: %w", err) + } + } + if proxyPath != "" { + var err error + bindings[i].stopProxy, err = startProxy( + bindings[i].childPortBinding(), proxyPath, bindings[i].boundSocket, + ) + if err != nil { + return nil, fmt.Errorf("failed to start userland proxy for port mapping %s: %w", + bindings[i].PortBinding, err) + } + if err := bindings[i].boundSocket.Close(); err != nil { + log.G(context.TODO()).WithFields(log.Fields{ + "error": err, + "mapping": bindings[i].PortBinding, + }).Warnf("failed to close proxy socket") + } + bindings[i].boundSocket = nil + } + } + return bindings, nil } @@ -263,7 +361,7 @@ func needSamePort(a, b types.PortBinding) bool { // configurePortBindingIPv4 returns a new port binding with the HostIP field populated // if a binding is required, else nil. -func configurePortBindingIPv4(disableNAT bool, bnd types.PortBinding, containerIPv4, defHostIP net.IP) (portBindingReq, bool) { +func configurePortBindingIPv4(pdc portDriverClient, disableNAT bool, bnd types.PortBinding, containerIPv4, defHostIP net.IP) (portBindingReq, bool) { if len(containerIPv4) == 0 { return portBindingReq{}, false } @@ -282,15 +380,15 @@ func configurePortBindingIPv4(disableNAT bool, bnd types.PortBinding, containerI // Unmap the addresses if they're IPv4-mapped IPv6. bnd.HostIP = bnd.HostIP.To4() bnd.IP = containerIPv4.To4() - return portBindingReq{ + return setChildHostIP(pdc, portBindingReq{ PortBinding: bnd, disableNAT: disableNAT, - }, true + }), true } // configurePortBindingIPv6 returns a new port binding with the HostIP field populated // if a binding is required, else nil. -func configurePortBindingIPv6(disableNAT bool, bnd types.PortBinding, containerIP, defHostIP net.IP) (portBindingReq, bool) { +func configurePortBindingIPv6(pdc portDriverClient, disableNAT bool, bnd types.PortBinding, containerIP, defHostIP net.IP) (portBindingReq, bool) { if containerIP == nil { return portBindingReq{}, false } @@ -317,13 +415,23 @@ func configurePortBindingIPv6(disableNAT bool, bnd types.PortBinding, containerI } } bnd.IP = containerIP - return portBindingReq{ + return setChildHostIP(pdc, portBindingReq{ PortBinding: bnd, disableNAT: disableNAT, - }, true + }), true } -// bindHostPorts allocates ports and starts docker-proxy for the given cfg. The +func setChildHostIP(pdc portDriverClient, req portBindingReq) portBindingReq { + if pdc == nil { + req.childHostIP = req.HostIP + return req + } + hip, _ := netip.AddrFromSlice(req.HostIP) + req.childHostIP = pdc.ChildHostIP(hip).AsSlice() + return req +} + +// bindHostPorts allocates and binds host ports for the given cfg. The // caller is responsible for ensuring that all entries in cfg map the same proto, // container port, and host port range (their host addresses must differ). func bindHostPorts(cfg []portBindingReq, proxyPath string) ([]portBinding, error) { @@ -358,17 +466,13 @@ func bindHostPorts(cfg []portBindingReq, proxyPath string) ([]portBinding, error return nil, err } -// Allow unit tests to supply a dummy StartProxy. -var startProxy = portmapper.StartProxy - // attemptBindHostPorts allocates host ports for each port mapping that requires -// one, and reserves those ports by starting docker-proxy. +// one, and reserves those ports by binding them. // // If the allocator doesn't have an available port in the required range, or the -// docker-proxy process doesn't start (perhaps because another process has -// already bound the port), all resources are released and an error is returned. -// When ports are successfully reserved, a portBinding is returned for each -// mapping. +// port can't be bound (perhaps because another process has already bound it), +// all resources are released and an error is returned. When ports are +// successfully reserved, a portBinding is returned for each mapping. // // If NAT is disabled for any of the bindings, no host port reservation is // needed. These bindings are included in results, as the container port itself @@ -385,7 +489,7 @@ func attemptBindHostPorts( addrs := make([]net.IP, 0, len(cfg)) for _, c := range cfg { if !c.disableNAT { - addrs = append(addrs, c.HostIP) + addrs = append(addrs, c.childHostIP) } } @@ -405,30 +509,177 @@ func attemptBindHostPorts( } res := make([]portBinding, 0, len(cfg)) - for _, c := range cfg { - pb := portBinding{PortBinding: c.GetCopy()} - if c.disableNAT { - pb.HostPort = 0 - } else { - pb.stopProxy, err = startProxy(c.Proto.String(), c.HostIP, port, c.IP, int(c.Port), proxyPath) - if err != nil { - return nil, fmt.Errorf("failed to bind port %s:%d/%s: %w", c.HostIP, port, c.Proto, err) - } - defer func() { - if retErr != nil { - if err := pb.stopProxy(); err != nil { - log.G(context.TODO()).Warnf("Failed to stop userland proxy for port mapping %s: %s", pb, err) + defer func() { + if retErr != nil { + for _, pb := range res { + if pb.boundSocket != nil { + if err := pb.boundSocket.Close(); err != nil { + log.G(context.TODO()).Warnf("Failed to close port binding for %s: %s", pb, err) } } - }() - pb.HostPort = uint16(port) + // TODO(robmry) - this is only needed because the userland proxy may have + // been started for SCTP. If a bound socket is passed to the proxy after + // iptables rules have been configured (as it is for TCP/UDP), remove this. + if pb.stopProxy != nil { + if err := pb.stopProxy(); err != nil { + log.G(context.TODO()).Warnf("Failed to stop proxy for %s: %s", pb, err) + } + } + } + } + }() + + for _, c := range cfg { + var pb portBinding + if c.disableNAT { + pb = portBinding{PortBinding: c.GetCopy()} + pb.HostPort = 0 + pb.HostPortEnd = 0 + } else { + switch proto { + case "tcp": + pb, err = bindTCPOrUDP(c, port, syscall.SOCK_STREAM, syscall.IPPROTO_TCP) + case "udp": + pb, err = bindTCPOrUDP(c, port, syscall.SOCK_DGRAM, syscall.IPPROTO_UDP) + case "sctp": + if proxyPath == "" { + pb, err = bindSCTP(c, port) + } else { + // TODO(robmry) - it's not currently possible to pass a bound SCTP port + // to the userland proxy, because the proxy is not able to convert the + // file descriptor into an sctp.SCTPListener (fd is an unexported member + // of the struct, and ListenSCTP is the only constructor). + // So, it is possible for the proxy to start listening and accept + // connections before iptables rules are created that would bypass + // the proxy for external connections. + // Remove this and pb.stopProxy() from the cleanup function above if + // this is fixed. + pb, err = startSCTPProxy(c, port, proxyPath) + } + default: + return nil, fmt.Errorf("Unknown addr type: %s", proto) + } + if err != nil { + return nil, err + } } - pb.HostPortEnd = pb.HostPort res = append(res, pb) } return res, nil } +func bindTCPOrUDP(cfg portBindingReq, port, typ, proto int) (_ portBinding, retErr error) { + pb := portBinding{PortBinding: cfg.PortBinding.GetCopy()} + pb.HostPort = uint16(port) + pb.HostPortEnd = pb.HostPort + pb.childHostIP = cfg.childHostIP + + var domain int + var sa syscall.Sockaddr + if hip := cfg.childHostIP.To4(); hip != nil { + domain = syscall.AF_INET + sa4 := syscall.SockaddrInet4{Port: port} + copy(sa4.Addr[:], hip) + sa = &sa4 + } else { + domain = syscall.AF_INET6 + sa6 := syscall.SockaddrInet6{Port: port} + copy(sa6.Addr[:], cfg.childHostIP) + sa = &sa6 + } + + sd, err := syscall.Socket(domain, typ|syscall.SOCK_CLOEXEC, proto) + if err != nil { + return portBinding{}, fmt.Errorf("failed to create socket for userland proxy for %s: %w", cfg, err) + } + defer func() { + if retErr != nil { + syscall.Close(sd) + } + }() + + if domain == syscall.AF_INET6 { + syscall.SetsockoptInt(sd, syscall.IPPROTO_IPV6, syscall.IPV6_V6ONLY, 1) + } + if err := syscall.Bind(sd, sa); err != nil { + if cfg.HostPort == cfg.HostPortEnd { + return portBinding{}, fmt.Errorf("failed to bind host port for %s: %w", cfg, err) + } + return portBinding{}, fmt.Errorf("failed to bind host port %d for %s: %w", port, cfg, err) + } + + pb.boundSocket = os.NewFile(uintptr(sd), "listener") + if pb.boundSocket == nil { + return portBinding{}, fmt.Errorf("failed to convert socket for userland proxy for %s", cfg) + } + return pb, nil +} + +// bindSCTP is based on sctp.ListenSCTP. The socket is created and bound, but +// does not start listening. +func bindSCTP(cfg portBindingReq, port int) (_ portBinding, retErr error) { + pb := portBinding{PortBinding: cfg.GetCopy()} + pb.HostPort = uint16(port) + pb.HostPortEnd = pb.HostPort + pb.childHostIP = cfg.childHostIP + + domain := syscall.AF_INET + if cfg.childHostIP.To4() == nil { + domain = syscall.AF_INET6 + } + + sd, err := syscall.Socket(domain, syscall.SOCK_STREAM|syscall.SOCK_CLOEXEC, syscall.IPPROTO_SCTP) + if err != nil { + return portBinding{}, fmt.Errorf("failed to create socket for userland proxy for %s: %w", cfg, err) + } + defer func() { + if retErr != nil { + syscall.Close(sd) + } + }() + + if domain == syscall.AF_INET6 { + syscall.SetsockoptInt(sd, syscall.IPPROTO_IPV6, syscall.IPV6_V6ONLY, 1) + } + + options := sctp.InitMsg{NumOstreams: sctp.SCTP_MAX_STREAM} + if _, _, errno := syscall.Syscall6(syscall.SYS_SETSOCKOPT, + uintptr(sd), + sctp.SOL_SCTP, + sctp.SCTP_INITMSG, + uintptr(unsafe.Pointer(&options)), + unsafe.Sizeof(options), + 0); errno != 0 { + return portBinding{}, errno + } + + if err := sctp.SCTPBind(sd, + &sctp.SCTPAddr{IPAddrs: []net.IPAddr{{IP: cfg.childHostIP}}, Port: int(cfg.HostPort)}, + sctp.SCTP_BINDX_ADD_ADDR); err != nil { + return portBinding{}, fmt.Errorf("failed to bind socket for userland proxy for %s: %w", cfg, err) + } + + pb.boundSocket = os.NewFile(uintptr(sd), "listener") + if pb.boundSocket == nil { + return portBinding{}, fmt.Errorf("failed to convert socket for userland proxy for %s", cfg) + } + return pb, nil +} + +func startSCTPProxy(cfg portBindingReq, port int, proxyPath string) (_ portBinding, retErr error) { + pb := portBinding{PortBinding: cfg.GetCopy()} + pb.HostPort = uint16(port) + pb.HostPortEnd = pb.HostPort + pb.childHostIP = cfg.childHostIP + + var err error + pb.stopProxy, err = startProxy(pb.childPortBinding(), proxyPath, nil) + if err != nil { + return portBinding{}, err + } + return pb, nil +} + // releasePorts attempts to release all port bindings, does not stop on failure func (n *bridgeNetwork) releasePorts(ep *bridgeEndpoint) error { n.Lock() @@ -442,11 +693,20 @@ func (n *bridgeNetwork) releasePorts(ep *bridgeEndpoint) error { func (n *bridgeNetwork) releasePortBindings(pbs []portBinding) error { var errs []error for _, pb := range pbs { - var errP error + var errS, errPD, errP error + if pb.boundSocket != nil { + errS = pb.boundSocket.Close() + if errS != nil { + errS = fmt.Errorf("failed to close socket for port mapping %s: %w", pb, errS) + } + } + if pb.portDriverRemove != nil { + errPD = pb.portDriverRemove() + } if pb.stopProxy != nil { errP = pb.stopProxy() if errP != nil { - errP = fmt.Errorf("failed to stop docker-proxy for port mapping %s: %w", pb, errP) + errP = fmt.Errorf("failed to stop userland proxy for port mapping %s: %w", pb, errP) } } errN := n.setPerPortIptables(pb, false) @@ -454,9 +714,9 @@ func (n *bridgeNetwork) releasePortBindings(pbs []portBinding) error { errN = fmt.Errorf("failed to remove iptables rules for port mapping %s: %w", pb, errN) } if pb.HostPort > 0 { - portallocator.Get().ReleasePort(pb.HostIP, pb.Proto.String(), int(pb.HostPort)) + portallocator.Get().ReleasePort(pb.childHostIP, pb.Proto.String(), int(pb.HostPort)) } - errs = append(errs, errP, errN) + errs = append(errs, errS, errPD, errP, errN) } return errors.Join(errs...) } @@ -498,8 +758,8 @@ func setPerPortNAT(b portBinding, ipv iptables.IPVersion, proxyPath string, brid // want "0.0.0.0/0". "0/0" is correctly interpreted as "any // value" by both iptables and ip6tables. hostIP := "0/0" - if !b.HostIP.IsUnspecified() { - hostIP = b.HostIP.String() + if !b.childHostIP.IsUnspecified() { + hostIP = b.childHostIP.String() } args := []string{ "-p", b.Proto.String(), diff --git a/libnetwork/drivers/bridge/port_mapping_linux_test.go b/libnetwork/drivers/bridge/port_mapping_linux_test.go index e12e7d5c41..2bc21e7eb0 100644 --- a/libnetwork/drivers/bridge/port_mapping_linux_test.go +++ b/libnetwork/drivers/bridge/port_mapping_linux_test.go @@ -5,7 +5,11 @@ import ( "errors" "fmt" "net" + "net/netip" + "os" + "strconv" "strings" + "syscall" "testing" "github.com/docker/docker/internal/testutils/netnsutils" @@ -14,6 +18,7 @@ import ( "github.com/docker/docker/libnetwork/ns" "github.com/docker/docker/libnetwork/portallocator" "github.com/docker/docker/libnetwork/types" + "github.com/vishvananda/netlink" "gotest.tools/v3/assert" is "gotest.tools/v3/assert/cmp" ) @@ -420,6 +425,8 @@ func TestAddPortMappings(t *testing.T) { defHostIP net.IP proxyPath string busyPortIPv4 int + rootless bool + hostAddrs []string expErr string expPBs []types.PortBinding @@ -438,6 +445,7 @@ func TestAddPortMappings(t *testing.T) { {Proto: types.TCP, Port: 22}, {Proto: types.TCP, Port: 80}, }, + proxyPath: "/dummy/path/to/proxy", expPBs: []types.PortBinding{ {Proto: types.TCP, IP: ctrIP4.IP, Port: 22, HostIP: net.IPv4zero, HostPort: firstEphemPort}, {Proto: types.TCP, IP: ctrIP6.IP, Port: 22, HostIP: net.IPv6zero, HostPort: firstEphemPort}, @@ -446,22 +454,24 @@ func TestAddPortMappings(t *testing.T) { }, }, { - name: "specific host port", - epAddrV4: ctrIP4, - epAddrV6: ctrIP6, - cfg: []types.PortBinding{{Proto: types.TCP, Port: 80, HostPort: 8080}}, + name: "specific host port", + epAddrV4: ctrIP4, + epAddrV6: ctrIP6, + cfg: []types.PortBinding{{Proto: types.TCP, Port: 80, HostPort: 8080}}, + proxyPath: "/dummy/path/to/proxy", expPBs: []types.PortBinding{ {Proto: types.TCP, IP: ctrIP4.IP, Port: 80, HostIP: net.IPv4zero, HostPort: 8080, HostPortEnd: 8080}, {Proto: types.TCP, IP: ctrIP6.IP, Port: 80, HostIP: net.IPv6zero, HostPort: 8080, HostPortEnd: 8080}, }, }, { - name: "nat explicitly enabled", - epAddrV4: ctrIP4, - epAddrV6: ctrIP6, - cfg: []types.PortBinding{{Proto: types.TCP, Port: 80, HostPort: 8080}}, - gwMode4: gwModeNAT, - gwMode6: gwModeNAT, + name: "nat explicitly enabled", + epAddrV4: ctrIP4, + epAddrV6: ctrIP6, + cfg: []types.PortBinding{{Proto: types.TCP, Port: 80, HostPort: 8080}}, + gwMode4: gwModeNAT, + gwMode6: gwModeNAT, + proxyPath: "/dummy/path/to/proxy", expPBs: []types.PortBinding{ {Proto: types.TCP, IP: ctrIP4.IP, Port: 80, HostIP: net.IPv4zero, HostPort: 8080, HostPortEnd: 8080}, {Proto: types.TCP, IP: ctrIP6.IP, Port: 80, HostIP: net.IPv6zero, HostPort: 8080, HostPortEnd: 8080}, @@ -472,24 +482,27 @@ func TestAddPortMappings(t *testing.T) { epAddrV4: ctrIP4, epAddrV6: ctrIP6, cfg: []types.PortBinding{{Proto: types.TCP, Port: 80, HostPort: 8080}}, + proxyPath: "/dummy/path/to/proxy", busyPortIPv4: 8080, - expErr: "failed to bind port 0.0.0.0:8080/tcp: busy port", + expErr: "failed to bind host port for 0.0.0.0:8080:172.19.0.2:80/tcp: address already in use", }, { - name: "ipv4 mapped container address with specific host port", - epAddrV4: ctrIP4Mapped, - epAddrV6: ctrIP6, - cfg: []types.PortBinding{{Proto: types.TCP, Port: 80, HostPort: 8080}}, + name: "ipv4 mapped container address with specific host port", + epAddrV4: ctrIP4Mapped, + epAddrV6: ctrIP6, + cfg: []types.PortBinding{{Proto: types.TCP, Port: 80, HostPort: 8080}}, + proxyPath: "/dummy/path/to/proxy", expPBs: []types.PortBinding{ {Proto: types.TCP, IP: ctrIP4.IP, Port: 80, HostIP: net.IPv4zero, HostPort: 8080, HostPortEnd: 8080}, {Proto: types.TCP, IP: ctrIP6.IP, Port: 80, HostIP: net.IPv6zero, HostPort: 8080, HostPortEnd: 8080}, }, }, { - name: "ipv4 mapped host address with specific host port", - epAddrV4: ctrIP4, - epAddrV6: ctrIP6, - cfg: []types.PortBinding{{Proto: types.TCP, Port: 80, HostIP: newIPNet(t, "::ffff:127.0.0.1/128").IP, HostPort: 8080}}, + name: "ipv4 mapped host address with specific host port", + epAddrV4: ctrIP4, + epAddrV6: ctrIP6, + cfg: []types.PortBinding{{Proto: types.TCP, Port: 80, HostIP: newIPNet(t, "::ffff:127.0.0.1/128").IP, HostPort: 8080}}, + proxyPath: "/dummy/path/to/proxy", expPBs: []types.PortBinding{ {Proto: types.TCP, IP: ctrIP4.IP, Port: 80, HostIP: newIPNet(t, "127.0.0.1/32").IP, HostPort: 8080, HostPortEnd: 8080}, }, @@ -499,6 +512,7 @@ func TestAddPortMappings(t *testing.T) { epAddrV4: ctrIP4, epAddrV6: ctrIP6, cfg: []types.PortBinding{{Proto: types.TCP, Port: 80, HostPort: 8080, HostPortEnd: 8081}}, + proxyPath: "/dummy/path/to/proxy", busyPortIPv4: 8080, expPBs: []types.PortBinding{ {Proto: types.TCP, IP: ctrIP4.IP, Port: 80, HostIP: net.IPv4zero, HostPort: 8081, HostPortEnd: 8081}, @@ -513,6 +527,7 @@ func TestAddPortMappings(t *testing.T) { {Proto: types.TCP, Port: 80, HostIP: net.IPv4zero, HostPort: 8080, HostPortEnd: 8081}, {Proto: types.TCP, Port: 80, HostIP: net.IPv6zero, HostPort: 8080, HostPortEnd: 8081}, }, + proxyPath: "/dummy/path/to/proxy", busyPortIPv4: 8080, expPBs: []types.PortBinding{ {Proto: types.TCP, IP: ctrIP4.IP, Port: 80, HostIP: net.IPv4zero, HostPort: 8081}, @@ -531,6 +546,7 @@ func TestAddPortMappings(t *testing.T) { {Proto: types.UDP, Port: 81, HostPort: 8080, HostPortEnd: 8083}, {Proto: types.UDP, Port: 82, HostPort: 8080, HostPortEnd: 8083}, }, + proxyPath: "/dummy/path/to/proxy", busyPortIPv4: 8082, expPBs: []types.PortBinding{ {Proto: types.TCP, IP: ctrIP4.IP, Port: 80, HostIP: net.IPv4zero, HostPort: 8080, HostPortEnd: 8080}, @@ -556,8 +572,9 @@ func TestAddPortMappings(t *testing.T) { {Proto: types.TCP, Port: 81, HostPort: 8080, HostPortEnd: 8082}, {Proto: types.TCP, Port: 82, HostPort: 8080, HostPortEnd: 8082}, }, + proxyPath: "/dummy/path/to/proxy", busyPortIPv4: 8081, - expErr: "failed to bind port 0.0.0.0:8081/tcp: busy port", + expErr: "failed to bind host port 8081 for 0.0.0.0:8080-8082:172.19.0.2:82/tcp", }, { name: "map host ipv6 to ipv4 container with proxy", @@ -588,9 +605,10 @@ func TestAddPortMappings(t *testing.T) { epAddrV4: ctrIP4, epAddrV6: ctrIP6, cfg: []types.PortBinding{{Proto: types.TCP, Port: 80}}, - defHostIP: newIPNet(t, "10.11.12.13/24").IP, + proxyPath: "/dummy/path/to/proxy", + defHostIP: newIPNet(t, "127.0.0.1/8").IP, expPBs: []types.PortBinding{ - {Proto: types.TCP, IP: ctrIP4.IP, Port: 80, HostIP: newIPNet(t, "10.11.12.13/24").IP, HostPort: firstEphemPort}, + {Proto: types.TCP, IP: ctrIP4.IP, Port: 80, HostIP: newIPNet(t, "127.0.0.1/8").IP, HostPort: firstEphemPort}, }, }, { @@ -598,9 +616,10 @@ func TestAddPortMappings(t *testing.T) { epAddrV4: ctrIP4, epAddrV6: ctrIP6, cfg: []types.PortBinding{{Proto: types.TCP, Port: 80}}, - defHostIP: newIPNet(t, "::ffff:10.11.12.13/120").IP, + proxyPath: "/dummy/path/to/proxy", + defHostIP: newIPNet(t, "::ffff:127.0.0.1/72").IP, expPBs: []types.PortBinding{ - {Proto: types.TCP, IP: ctrIP4.IP, Port: 80, HostIP: newIPNet(t, "10.11.12.13/24").IP, HostPort: firstEphemPort}, + {Proto: types.TCP, IP: ctrIP4.IP, Port: 80, HostIP: newIPNet(t, "127.0.0.1/8").IP, HostPort: firstEphemPort}, }, }, { @@ -608,6 +627,7 @@ func TestAddPortMappings(t *testing.T) { epAddrV4: ctrIP4, epAddrV6: ctrIP6, cfg: []types.PortBinding{{Proto: types.TCP, Port: 80}}, + proxyPath: "/dummy/path/to/proxy", defHostIP: net.IPv6zero, expPBs: []types.PortBinding{ {Proto: types.TCP, IP: ctrIP6.IP, Port: 80, HostIP: net.IPv6zero, HostPort: firstEphemPort}, @@ -618,6 +638,7 @@ func TestAddPortMappings(t *testing.T) { epAddrV4: ctrIP4, epAddrV6: ctrIP6, cfg: []types.PortBinding{{Proto: types.TCP, Port: 80}}, + proxyPath: "/dummy/path/to/proxy", defHostIP: newIPNet(t, "::1/128").IP, expPBs: []types.PortBinding{ {Proto: types.TCP, IP: ctrIP6.IP, Port: 80, HostIP: newIPNet(t, "::1/128").IP, HostPort: firstEphemPort}, @@ -631,16 +652,17 @@ func TestAddPortMappings(t *testing.T) { {Proto: types.TCP, Port: 80, HostPort: 8080}, {Proto: types.TCP, Port: 22, HostPort: 2222}, }, + proxyPath: "/dummy/path/to/proxy", expPBs: []types.PortBinding{ {Proto: types.TCP, IP: ctrIP4.IP, Port: 22, HostIP: net.IPv4zero, HostPort: 2222}, {Proto: types.TCP, IP: ctrIP6.IP, Port: 22, HostIP: net.IPv6zero, HostPort: 2222}, {Proto: types.TCP, IP: ctrIP4.IP, Port: 80, HostIP: net.IPv4zero, HostPort: 8080}, {Proto: types.TCP, IP: ctrIP6.IP, Port: 80, HostIP: net.IPv6zero, HostPort: 8080}, }, - expReleaseErr: "failed to stop docker-proxy for port mapping 0.0.0.0:2222:172.19.0.2:22/tcp: can't stop now\n" + - "failed to stop docker-proxy for port mapping [::]:2222:[fdf8:b88e:bb5c:3483::2]:22/tcp: can't stop now\n" + - "failed to stop docker-proxy for port mapping 0.0.0.0:8080:172.19.0.2:80/tcp: can't stop now\n" + - "failed to stop docker-proxy for port mapping [::]:8080:[fdf8:b88e:bb5c:3483::2]:80/tcp: can't stop now", + expReleaseErr: "failed to stop userland proxy for port mapping 0.0.0.0:2222:172.19.0.2:22/tcp: can't stop now\n" + + "failed to stop userland proxy for port mapping [::]:2222:[fdf8:b88e:bb5c:3483::2]:22/tcp: can't stop now\n" + + "failed to stop userland proxy for port mapping 0.0.0.0:8080:172.19.0.2:80/tcp: can't stop now\n" + + "failed to stop userland proxy for port mapping [::]:8080:[fdf8:b88e:bb5c:3483::2]:80/tcp: can't stop now", }, { name: "disable nat6", @@ -650,7 +672,8 @@ func TestAddPortMappings(t *testing.T) { {Proto: types.TCP, Port: 22}, {Proto: types.TCP, Port: 80}, }, - gwMode6: gwModeRouted, + proxyPath: "/dummy/path/to/proxy", + gwMode6: gwModeRouted, expPBs: []types.PortBinding{ {Proto: types.TCP, IP: ctrIP4.IP, Port: 22, HostIP: net.IPv4zero, HostPort: firstEphemPort}, {Proto: types.TCP, IP: ctrIP6.IP, Port: 22, HostIP: net.IPv6zero}, @@ -666,7 +689,8 @@ func TestAddPortMappings(t *testing.T) { {Proto: types.TCP, Port: 22}, {Proto: types.TCP, Port: 80}, }, - gwMode4: gwModeRouted, + proxyPath: "/dummy/path/to/proxy", + gwMode4: gwModeRouted, expPBs: []types.PortBinding{ {Proto: types.TCP, IP: ctrIP4.IP, Port: 22, HostIP: net.IPv4zero}, {Proto: types.TCP, IP: ctrIP6.IP, Port: 22, HostIP: net.IPv6zero, HostPort: firstEphemPort}, @@ -682,8 +706,9 @@ func TestAddPortMappings(t *testing.T) { {Proto: types.TCP, Port: 22}, {Proto: types.TCP, Port: 80}, }, - gwMode4: gwModeRouted, - gwMode6: gwModeRouted, + proxyPath: "/dummy/path/to/proxy", + gwMode4: gwModeRouted, + gwMode6: gwModeRouted, expPBs: []types.PortBinding{ {Proto: types.TCP, IP: ctrIP4.IP, Port: 22, HostIP: net.IPv4zero}, {Proto: types.TCP, IP: ctrIP6.IP, Port: 22, HostIP: net.IPv6zero}, @@ -692,9 +717,10 @@ func TestAddPortMappings(t *testing.T) { }, }, { - name: "same ports for matching mappings with different host addresses", - epAddrV4: ctrIP4, - epAddrV6: ctrIP6, + name: "same ports for matching mappings with different host addresses", + epAddrV4: ctrIP4, + epAddrV6: ctrIP6, + hostAddrs: []string{"192.168.1.2/24", "fd0c:9167:5b11::2/64", "fd0c:9167:5b11::3/64"}, cfg: []types.PortBinding{ // These two should both get the same host port. {Proto: types.TCP, Port: 80, HostIP: newIPNet(t, "fd0c:9167:5b11::2/64").IP}, @@ -708,6 +734,7 @@ func TestAddPortMappings(t *testing.T) { {Proto: types.TCP, Port: 12345, HostPort: 12345, HostPortEnd: 12346}, {Proto: types.TCP, Port: 12345, HostPort: 12345}, }, + proxyPath: "/dummy/path/to/proxy", expPBs: []types.PortBinding{ {Proto: types.TCP, IP: ctrIP4.IP, Port: 12345, HostIP: net.IPv4zero, HostPort: 12345}, {Proto: types.TCP, IP: ctrIP6.IP, Port: 12345, HostIP: net.IPv6zero, HostPort: 12345}, @@ -720,6 +747,39 @@ func TestAddPortMappings(t *testing.T) { {Proto: types.TCP, IP: ctrIP6.IP, Port: 12345, HostIP: net.IPv6zero, HostPort: 12346}, }, }, + { + name: "rootless", + epAddrV4: ctrIP4, + epAddrV6: ctrIP6, + cfg: []types.PortBinding{ + {Proto: types.TCP, Port: 22}, + {Proto: types.TCP, Port: 80}, + }, + proxyPath: "/dummy/path/to/proxy", + rootless: true, + expPBs: []types.PortBinding{ + {Proto: types.TCP, IP: ctrIP4.IP, Port: 22, HostIP: net.IPv4zero, HostPort: firstEphemPort}, + {Proto: types.TCP, IP: ctrIP6.IP, Port: 22, HostIP: net.IPv6zero, HostPort: firstEphemPort}, + {Proto: types.TCP, IP: ctrIP4.IP, Port: 80, HostIP: net.IPv4zero, HostPort: firstEphemPort + 1}, + {Proto: types.TCP, IP: ctrIP6.IP, Port: 80, HostIP: net.IPv6zero, HostPort: firstEphemPort + 1}, + }, + }, + { + name: "rootless without proxy", + epAddrV4: ctrIP4, + epAddrV6: ctrIP6, + cfg: []types.PortBinding{ + {Proto: types.TCP, Port: 22}, + {Proto: types.TCP, Port: 80}, + }, + rootless: true, + expPBs: []types.PortBinding{ + {Proto: types.TCP, IP: ctrIP4.IP, Port: 22, HostIP: net.IPv4zero, HostPort: firstEphemPort}, + {Proto: types.TCP, IP: ctrIP6.IP, Port: 22, HostIP: net.IPv6zero, HostPort: firstEphemPort}, + {Proto: types.TCP, IP: ctrIP4.IP, Port: 80, HostIP: net.IPv4zero, HostPort: firstEphemPort + 1}, + {Proto: types.TCP, IP: ctrIP6.IP, Port: 80, HostIP: net.IPv6zero, HostPort: firstEphemPort + 1}, + }, + }, } for _, tc := range testcases { @@ -731,15 +791,14 @@ func TestAddPortMappings(t *testing.T) { origStartProxy := startProxy defer func() { startProxy = origStartProxy }() proxies := map[proxyCall]bool{} // proxy -> is not stopped - startProxy = func(proto string, - hostIP net.IP, hostPort int, - containerIP net.IP, containerPort int, + startProxy = func(pb types.PortBinding, proxyPath string, + listenSock *os.File, ) (stop func() error, retErr error) { - if tc.busyPortIPv4 > 0 && tc.busyPortIPv4 == hostPort && hostIP.To4() != nil { + if tc.busyPortIPv4 > 0 && tc.busyPortIPv4 == int(pb.HostPort) && pb.HostIP.To4() != nil { return nil, errors.New("busy port") } - c := newProxyCall(proto, hostIP, hostPort, containerIP, containerPort, proxyPath) + c := newProxyCall(pb.Proto.String(), pb.HostIP, int(pb.HostPort), pb.IP, int(pb.Port), proxyPath) if _, ok := proxies[c]; ok { return nil, fmt.Errorf("duplicate proxy: %#v", c) } @@ -756,6 +815,35 @@ func TestAddPortMappings(t *testing.T) { }, nil } + // Mock the RootlessKit port driver. + origNewPortDriverClient := newPortDriverClient + defer func() { newPortDriverClient = origNewPortDriverClient }() + newPortDriverClient = func(ctx context.Context) (portDriverClient, error) { + return newMockPortDriverClient(ctx) + } + + if len(tc.hostAddrs) > 0 { + dummyLink := &netlink.Bridge{LinkAttrs: netlink.LinkAttrs{Name: "br-dummy"}} + err := netlink.LinkAdd(dummyLink) + assert.NilError(t, err) + for _, addr := range tc.hostAddrs { + // Add with NODAD so that the address is available immediately. + err := netlink.AddrAdd(dummyLink, + &netlink.Addr{IPNet: newIPNet(t, addr), Flags: syscall.IFA_F_NODAD}) + assert.NilError(t, err) + } + err = netlink.LinkSetUp(dummyLink) + assert.NilError(t, err) + } + if tc.busyPortIPv4 != 0 { + tl, err := net.ListenTCP("tcp4", &net.TCPAddr{IP: net.IPv4zero, Port: tc.busyPortIPv4}) + assert.NilError(t, err) + defer tl.Close() + ul, err := net.ListenUDP("udp4", &net.UDPAddr{IP: net.IPv4zero, Port: tc.busyPortIPv4}) + assert.NilError(t, err) + defer ul.Close() + } + n := &bridgeNetwork{ config: &networkConfiguration{ BridgeName: "dummybridge", @@ -771,11 +859,23 @@ func TestAddPortMappings(t *testing.T) { EnableIP6Tables: true, EnableUserlandProxy: tc.proxyPath != "", UserlandProxyPath: tc.proxyPath, + Rootless: tc.rootless, }, } err := n.driver.configure(genericOption) assert.NilError(t, err) + assert.Check(t, is.Equal(n.driver.portDriverClient == nil, !tc.rootless)) + expChildIP := func(hostIP net.IP) net.IP { + if !tc.rootless { + return hostIP + } + if hostIP.To4() == nil { + return net.ParseIP("::1") + } + return net.ParseIP("127.0.0.1") + } + err = portallocator.Get().ReleaseAll() assert.NilError(t, err) @@ -792,20 +892,21 @@ func TestAddPortMappings(t *testing.T) { var disableNAT bool var addrM, addrD, addrH string var ipv iptables.IPVersion + hip := expChildIP(expPB.HostIP) if expPB.IP.To4() == nil { disableNAT = tc.gwMode6.natDisabled() ipv = iptables.IPv6 addrM = ctrIP6.IP.String() + "/128" addrD = "[" + ctrIP6.IP.String() + "]" - addrH = expPB.HostIP.String() + "/128" + addrH = hip.String() + "/128" } else { disableNAT = tc.gwMode4.natDisabled() ipv = iptables.IPv4 addrM = ctrIP4.IP.String() + "/32" addrD = ctrIP4.IP.String() - addrH = expPB.HostIP.String() + "/32" + addrH = hip.String() + "/32" } - if expPB.HostIP.IsUnspecified() { + if hip.IsUnspecified() { addrH = "0/0" } @@ -850,18 +951,41 @@ func TestAddPortMappings(t *testing.T) { } // Check a docker-proxy was started and stopped for each expected port binding. - expProxies := map[proxyCall]bool{} - for _, expPB := range tc.expPBs { - is4 := expPB.HostIP.To4() != nil - if (is4 && tc.gwMode4.natDisabled()) || (!is4 && tc.gwMode6.natDisabled()) { - continue + if tc.proxyPath != "" { + expProxies := map[proxyCall]bool{} + for _, expPB := range tc.expPBs { + hip := expChildIP(expPB.HostIP) + is4 := hip.To4() != nil + if (is4 && tc.gwMode4.natDisabled()) || (!is4 && tc.gwMode6.natDisabled()) { + continue + } + p := newProxyCall(expPB.Proto.String(), + hip, int(expPB.HostPort), + expPB.IP, int(expPB.Port), tc.proxyPath) + expProxies[p] = tc.expReleaseErr != "" } - p := newProxyCall(expPB.Proto.String(), - expPB.HostIP, int(expPB.HostPort), - expPB.IP, int(expPB.Port), tc.proxyPath) - expProxies[p] = tc.expReleaseErr != "" + assert.Check(t, is.DeepEqual(expProxies, proxies)) + } + + // Check the port driver has seen the expected port mappings and no others, + // and that they have all been closed. + if n.driver.portDriverClient != nil { + pdc := n.driver.portDriverClient.(*mockPortDriverClient) + expPorts := map[mockPortDriverPort]bool{} + for _, expPB := range tc.expPBs { + if expPB.HostPort == 0 { + continue + } + pdp := mockPortDriverPort{ + proto: expPB.Proto.String(), + hostIP: expPB.HostIP.String(), + childIP: expChildIP(expPB.HostIP).String(), + hostPort: int(expPB.HostPort), + } + expPorts[pdp] = false + } + assert.Check(t, is.DeepEqual(pdc.openPorts, expPorts)) } - assert.Check(t, is.DeepEqual(expProxies, proxies)) }) } } @@ -881,3 +1005,48 @@ func newProxyCall(proto string, proxyPath: proxyPath, } } + +// Types for tracking calls to the port driver client (mock for RootlessKit client). + +type mockPortDriverPort struct { + proto string + hostIP string + childIP string + hostPort int +} + +func (p mockPortDriverPort) String() string { + return p.hostIP + ":" + strconv.Itoa(p.hostPort) + "/" + p.proto +} + +type mockPortDriverClient struct { + openPorts map[mockPortDriverPort]bool +} + +func newMockPortDriverClient(_ context.Context) (*mockPortDriverClient, error) { + return &mockPortDriverClient{ + openPorts: map[mockPortDriverPort]bool{}, + }, nil +} + +func (c *mockPortDriverClient) ChildHostIP(hostIP netip.Addr) netip.Addr { + if hostIP.Is6() { + return netip.IPv6Loopback() + } + return netip.MustParseAddr("127.0.0.1") +} + +func (c *mockPortDriverClient) AddPort(_ context.Context, proto string, hostIP, childIP netip.Addr, hostPort int) (func() error, error) { + key := mockPortDriverPort{proto: proto, hostIP: hostIP.String(), childIP: childIP.String(), hostPort: hostPort} + if _, exists := c.openPorts[key]; exists { + return nil, fmt.Errorf("mockPortDriverClient: port %s is already open", key) + } + c.openPorts[key] = true + return func() error { + if !c.openPorts[key] { + return fmt.Errorf("mockPortDriverClient: port %s is not open", key) + } + c.openPorts[key] = false + return nil + }, nil +} diff --git a/libnetwork/endpoint.go b/libnetwork/endpoint.go index 57524007dc..12b6b64b1c 100644 --- a/libnetwork/endpoint.go +++ b/libnetwork/endpoint.go @@ -624,10 +624,10 @@ func (ep *Endpoint) sbJoin(ctx context.Context, sb *Sandbox, options ...Endpoint } if !n.internal { log.G(ctx).Debugf("Programming external connectivity on endpoint %s (%s)", ep.Name(), ep.ID()) - if err := d.ProgramExternalConnectivity(ctx, n.ID(), ep.ID(), sb.Labels()); err != nil { - return types.InternalErrorf( + if err = d.ProgramExternalConnectivity(ctx, n.ID(), ep.ID(), sb.Labels()); err != nil { + return errdefs.System(fmt.Errorf( "driver failed programming external connectivity on endpoint %s (%s): %v", - ep.Name(), ep.ID(), err) + ep.Name(), ep.ID(), err)) } } } diff --git a/libnetwork/portmapper/proxy_linux.go b/libnetwork/portmapper/proxy_linux.go index 9bdb0f5567..d7256f76f0 100644 --- a/libnetwork/portmapper/proxy_linux.go +++ b/libnetwork/portmapper/proxy_linux.go @@ -1,78 +1,61 @@ package portmapper import ( + "errors" "fmt" "io" - "net" "os" "os/exec" "runtime" "strconv" "syscall" "time" + + "github.com/docker/docker/libnetwork/types" ) -// StartProxy starts the proxy process at proxyPath, or instantiates a dummy proxy -// to bind the host port if proxyPath is the empty string. -func StartProxy( - proto string, - hostIP net.IP, hostPort int, - containerIP net.IP, containerPort int, - proxyPath string, -) (stop func() error, retErr error) { - if proxyPath == "" { - return newDummyProxy(proto, hostIP, hostPort) - } - return newProxyCommand(proto, hostIP, hostPort, containerIP, containerPort, proxyPath) -} - -func newProxyCommand( - proto string, - hostIP net.IP, hostPort int, - containerIP net.IP, containerPort int, +// StartProxy starts the proxy process at proxyPath. +// If listenSock is not nil, it must be a bound socket that can be passed to +// the proxy process for it to listen on. +func StartProxy(pb types.PortBinding, proxyPath string, + listenSock *os.File, ) (stop func() error, retErr error) { if proxyPath == "" { return nil, fmt.Errorf("no path provided for userland-proxy binary") } - - p := &proxyCommand{ - cmd: &exec.Cmd{ - Path: proxyPath, - Args: []string{ - proxyPath, - "-proto", proto, - "-host-ip", hostIP.String(), - "-host-port", strconv.Itoa(hostPort), - "-container-ip", containerIP.String(), - "-container-port", strconv.Itoa(containerPort), - }, - SysProcAttr: &syscall.SysProcAttr{ - Pdeathsig: syscall.SIGTERM, // send a sigterm to the proxy if the creating thread in the daemon process dies (https://go.dev/issue/27505) - }, - }, - wait: make(chan error, 1), - } - if err := p.start(); err != nil { - return nil, err - } - return p.stop, nil -} - -// proxyCommand wraps an exec.Cmd to run the userland TCP and UDP -// proxies as separate processes. -type proxyCommand struct { - cmd *exec.Cmd - wait chan error -} - -func (p *proxyCommand) start() error { r, w, err := os.Pipe() if err != nil { - return fmt.Errorf("proxy unable to open os.Pipe %s", err) + return nil, fmt.Errorf("proxy unable to open os.Pipe %s", err) } - defer r.Close() - p.cmd.ExtraFiles = []*os.File{w} + defer func() { + if w != nil { + w.Close() + } + r.Close() + }() + + cmd := &exec.Cmd{ + Path: proxyPath, + Args: []string{ + proxyPath, + "-proto", pb.Proto.String(), + "-host-ip", pb.HostIP.String(), + "-host-port", strconv.FormatUint(uint64(pb.HostPort), 10), + "-container-ip", pb.IP.String(), + "-container-port", strconv.FormatUint(uint64(pb.Port), 10), + }, + ExtraFiles: []*os.File{w}, + SysProcAttr: &syscall.SysProcAttr{ + Pdeathsig: syscall.SIGTERM, // send a sigterm to the proxy if the creating thread in the daemon process dies (https://go.dev/issue/27505) + }, + } + if listenSock != nil { + cmd.Args = append(cmd.Args, "-use-listen-fd") + cmd.ExtraFiles = append(cmd.ExtraFiles, listenSock) + } + + wait := make(chan error, 1) // As p.cmd.SysProcAttr.Pdeathsig is set, the signal will be sent to the // process when the OS thread on which p.cmd.Start() was executed dies. @@ -88,17 +71,18 @@ func (p *proxyCommand) start() error { go func() { runtime.LockOSThread() defer runtime.UnlockOSThread() - err := p.cmd.Start() + err := cmd.Start() started <- err if err != nil { return } - p.wait <- p.cmd.Wait() + wait <- cmd.Wait() }() if err := <-started; err != nil { - return err + return nil, err } w.Close() + w = nil errchan := make(chan error, 1) go func() { @@ -108,11 +92,16 @@ func (p *proxyCommand) start() error { if string(buf) != "0\n" { errStr, err := io.ReadAll(r) if err != nil { - errchan <- fmt.Errorf("Error reading exit status from userland proxy: %v", err) + errchan <- fmt.Errorf("error reading exit status from userland proxy: %v", err) return } - - errchan <- fmt.Errorf("Error starting userland proxy: %s", errStr) + // If the user has an old docker-proxy in their PATH, and we passed "-use-listen-fd" + // on the command line, it exits with no response on the pipe. + if listenSock != nil && buf[0] == 0 && len(errStr) == 0 { + errchan <- errors.New("failed to start docker-proxy, check that the current version is in your $PATH") + return + } + errchan <- fmt.Errorf("error starting userland proxy: %s", errStr) return } errchan <- nil @@ -120,18 +109,21 @@ func (p *proxyCommand) start() error { select { case err := <-errchan: - return err + if err != nil { + return nil, err + } case <-time.After(16 * time.Second): - return fmt.Errorf("Timed out proxy starting the userland proxy") + return nil, fmt.Errorf("timed out starting the userland proxy") } -} -func (p *proxyCommand) stop() error { - if p.cmd.Process != nil { - if err := p.cmd.Process.Signal(os.Interrupt); err != nil { + stopFn := func() error { + if cmd.Process == nil { + return nil + } + if err := cmd.Process.Signal(os.Interrupt); err != nil { return err } - return <-p.wait + return <-wait } - return nil + return stopFn, nil } diff --git a/libnetwork/portmapper/proxy.go b/libnetwork/portmapper/proxy_windows.go similarity index 100% rename from libnetwork/portmapper/proxy.go rename to libnetwork/portmapper/proxy_windows.go diff --git a/pkg/rootless/rootless.go b/pkg/rootless/rootless.go index b52f8eee71..1251b48817 100644 --- a/pkg/rootless/rootless.go +++ b/pkg/rootless/rootless.go @@ -2,9 +2,6 @@ package rootless // import "github.com/docker/docker/pkg/rootless" import "os" -// RootlessKitDockerProxyBinary is the binary name of rootlesskit-docker-proxy -const RootlessKitDockerProxyBinary = "rootlesskit-docker-proxy" - // RunningWithRootlessKit returns true if running under RootlessKit namespaces. func RunningWithRootlessKit() bool { return os.Getenv("ROOTLESSKIT_STATE_DIR") != ""