Merge pull request #48132 from robmry/bind_socket_for_docker_proxy

Create docker-proxy TCP/UDP listener sockets in the daemon
This commit is contained in:
Sebastiaan van Stijn
2024-08-08 20:30:58 +02:00
committed by GitHub
25 changed files with 1155 additions and 425 deletions

View File

@@ -377,8 +377,6 @@ RUN --mount=from=rootlesskit-src,src=/usr/src/rootlesskit,rw \
export CGO_ENABLED=$([ "$DOCKER_STATIC" = "1" ] && echo "0" || echo "1")
xx-go build -o /build/rootlesskit -ldflags="$([ "$DOCKER_STATIC" != "1" ] && echo "-linkmode=external")" ./cmd/rootlesskit
xx-verify $([ "$DOCKER_STATIC" = "1" ] && echo "--static") /build/rootlesskit
xx-go build -o /build/rootlesskit-docker-proxy -ldflags="$([ "$DOCKER_STATIC" != "1" ] && echo "-linkmode=external")" ./cmd/rootlesskit-docker-proxy
xx-verify $([ "$DOCKER_STATIC" = "1" ] && echo "--static") /build/rootlesskit-docker-proxy
EOT
COPY --link ./contrib/dockerd-rootless.sh /build/
COPY --link ./contrib/dockerd-rootless-setuptool.sh /build/
@@ -620,7 +618,7 @@ RUN --mount=type=bind,target=.,rw \
xx-go --wrap
PKG_CONFIG=$(xx-go env PKG_CONFIG) ./hack/make.sh $target
xx-verify $([ "$DOCKER_STATIC" = "1" ] && echo "--static") /tmp/bundles/${target}-daemon/dockerd$([ "$(xx-info os)" = "windows" ] && echo ".exe")
xx-verify $([ "$DOCKER_STATIC" = "1" ] && echo "--static") /tmp/bundles/${target}-daemon/docker-proxy$([ "$(xx-info os)" = "windows" ] && echo ".exe")
[ "$(xx-info os)" != "linux" ] || xx-verify $([ "$DOCKER_STATIC" = "1" ] && echo "--static") /tmp/bundles/${target}-daemon/docker-proxy
mkdir /build
mv /tmp/bundles/${target}-daemon/* /build/
EOT

View File

@@ -1,5 +0,0 @@
//go:generate go-winres make --arch=386,amd64,arm,arm64 --in=../../cli/winresources/docker-proxy/winres.json --out=../../cli/winresources/docker-proxy/resource
package main
import _ "github.com/docker/docker/cli/winresources/docker-proxy"

View File

@@ -1,80 +0,0 @@
package main
import (
"flag"
"fmt"
"log"
"net"
"os"
"os/signal"
"syscall"
"github.com/docker/docker/dockerversion"
"github.com/ishidawataru/sctp"
)
func main() {
f := os.NewFile(3, "signal-parent")
host, container := parseFlags()
p, err := NewProxy(host, container)
if err != nil {
fmt.Fprintf(f, "1\n%s", err)
f.Close()
os.Exit(1)
}
go handleStopSignals(p)
fmt.Fprint(f, "0\n")
f.Close()
// Run will block until the proxy stops
p.Run()
}
// parseFlags parses the flags passed on reexec to create the TCP/UDP/SCTP
// net.Addrs to map the host and container ports.
func parseFlags() (host net.Addr, container net.Addr) {
var (
proto = flag.String("proto", "tcp", "proxy protocol")
hostIP = flag.String("host-ip", "", "host ip")
hostPort = flag.Int("host-port", -1, "host port")
containerIP = flag.String("container-ip", "", "container ip")
containerPort = flag.Int("container-port", -1, "container port")
printVer = flag.Bool("v", false, "print version information and quit")
printVersion = flag.Bool("version", false, "print version information and quit")
)
flag.Parse()
if *printVer || *printVersion {
fmt.Printf("docker-proxy (commit %s) version %s\n", dockerversion.GitCommit, dockerversion.Version)
os.Exit(0)
}
switch *proto {
case "tcp":
host = &net.TCPAddr{IP: net.ParseIP(*hostIP), Port: *hostPort}
container = &net.TCPAddr{IP: net.ParseIP(*containerIP), Port: *containerPort}
case "udp":
host = &net.UDPAddr{IP: net.ParseIP(*hostIP), Port: *hostPort}
container = &net.UDPAddr{IP: net.ParseIP(*containerIP), Port: *containerPort}
case "sctp":
host = &sctp.SCTPAddr{IPAddrs: []net.IPAddr{{IP: net.ParseIP(*hostIP)}}, Port: *hostPort}
container = &sctp.SCTPAddr{IPAddrs: []net.IPAddr{{IP: net.ParseIP(*containerIP)}}, Port: *containerPort}
default:
log.Fatalf("unsupported protocol %s", *proto)
}
return host, container
}
func handleStopSignals(p Proxy) {
s := make(chan os.Signal, 10)
signal.Notify(s, os.Interrupt, syscall.SIGTERM)
for range s {
p.Close()
os.Exit(0)
}
}

View File

@@ -0,0 +1,181 @@
package main
import (
"errors"
"flag"
"fmt"
"net"
"os"
"os/signal"
"syscall"
"github.com/docker/docker/dockerversion"
"github.com/ishidawataru/sctp"
)
// The caller is expected to pass-in open file descriptors ...
const (
// Pipe for reporting status, as a string. "0\n" if the proxy
// started normally. "1\n<error message>" otherwise.
parentPipeFd uintptr = 3 + iota
// If -use-listen-fd=true, a listening socket ready to accept TCP
// connections or receive UDP. (Without that option on the command
// line, the listener needs to be opened by docker-proxy, for
// compatibility with older docker daemons. In this case fd 4
// may belong to the Go runtime.)
listenSockFd
)
func main() {
// Mark any files we expect to inherit as close-on-exec
// so that they are not unexpectedly inherited by any child processes
// if we ever need docker-proxy to exec something.
// This is safe to do even if the fd belongs to the Go runtime
// as it would be a no-op:
// the Go runtime marks all file descriptors it opens as close-on-exec.
// See the godoc for syscall.ForkLock for more information.
syscall.CloseOnExec(int(parentPipeFd))
syscall.CloseOnExec(int(listenSockFd))
config := parseFlags()
p, err := newProxy(config)
if config.ListenSock != nil {
config.ListenSock.Close()
}
_ = syscall.SetNonblock(int(parentPipeFd), true)
f := os.NewFile(parentPipeFd, "signal-parent")
if err != nil {
fmt.Fprintf(f, "1\n%s", err)
f.Close()
os.Exit(1)
}
go handleStopSignals(p)
fmt.Fprint(f, "0\n")
f.Close()
// Run will block until the proxy stops
p.Run()
}
func newProxy(config ProxyConfig) (p Proxy, err error) {
ipv := ipv4
if config.HostIP.To4() == nil {
ipv = ipv6
}
switch config.Proto {
case "tcp":
var listener *net.TCPListener
if config.ListenSock == nil {
// Fall back to HostIP:HostPort if no socket on fd 4, for compatibility with older daemons.
hostAddr := &net.TCPAddr{IP: config.HostIP, Port: config.HostPort}
listener, err = net.ListenTCP("tcp"+string(ipv), hostAddr)
if err != nil {
return nil, fmt.Errorf("failed to listen on %s: %w", hostAddr, err)
}
} else {
l, err := net.FileListener(config.ListenSock)
if err != nil {
return nil, err
}
var ok bool
listener, ok = l.(*net.TCPListener)
if !ok {
return nil, fmt.Errorf("unexpected socket type for listener fd: %s", l.Addr().Network())
}
}
container := &net.TCPAddr{IP: config.ContainerIP, Port: config.ContainerPort}
p, err = NewTCPProxy(listener, container)
case "udp":
var listener *net.UDPConn
if config.ListenSock == nil {
// Fall back to HostIP:HostPort if no socket on fd 4, for compatibility with older daemons.
hostAddr := &net.UDPAddr{IP: config.HostIP, Port: config.HostPort}
listener, err = net.ListenUDP("udp"+string(ipv), hostAddr)
if err != nil {
return nil, fmt.Errorf("failed to listen on %s: %w", hostAddr, err)
}
} else {
l, err := net.FilePacketConn(config.ListenSock)
if err != nil {
return nil, err
}
var ok bool
listener, ok = l.(*net.UDPConn)
if !ok {
return nil, fmt.Errorf("unexpected socket type for listener fd: %s", l.LocalAddr().Network())
}
}
container := &net.UDPAddr{IP: config.ContainerIP, Port: config.ContainerPort}
p, err = NewUDPProxy(listener, container)
case "sctp":
var listener *sctp.SCTPListener
if config.ListenSock != nil {
// There's no way to construct an SCTPListener from a file descriptor at the moment.
// If a socket has been passed in, it's probably from a newer daemon using a version
// of the sctp module that does allow it.
return nil, errors.New("cannot use supplied SCTP socket, check the latest docker-proxy is in your $PATH")
}
hostAddr := &sctp.SCTPAddr{IPAddrs: []net.IPAddr{{IP: config.HostIP}}, Port: config.HostPort}
container := &sctp.SCTPAddr{IPAddrs: []net.IPAddr{{IP: config.ContainerIP}}, Port: config.ContainerPort}
listener, err = sctp.ListenSCTP("sctp"+string(ipv), hostAddr)
if err != nil {
return nil, fmt.Errorf("failed to listen on %s: %w", hostAddr, err)
}
p, err = NewSCTPProxy(listener, container)
default:
return nil, fmt.Errorf("unsupported protocol %s", config.Proto)
}
return p, err
}
type ProxyConfig struct {
Proto string
HostIP, ContainerIP net.IP
HostPort, ContainerPort int
ListenSock *os.File
}
// parseFlags parses the flags passed on reexec to create the TCP/UDP/SCTP
// net.Addrs to map the host and container ports.
func parseFlags() ProxyConfig {
var (
config ProxyConfig
useListenFd bool
printVer bool
)
flag.StringVar(&config.Proto, "proto", "tcp", "proxy protocol")
flag.TextVar(&config.HostIP, "host-ip", net.IPv4zero, "host ip")
flag.IntVar(&config.HostPort, "host-port", -1, "host port")
flag.TextVar(&config.ContainerIP, "container-ip", net.IPv4zero, "container ip")
flag.IntVar(&config.ContainerPort, "container-port", -1, "container port")
flag.BoolVar(&useListenFd, "use-listen-fd", false, "use a supplied listen fd")
flag.BoolVar(&printVer, "v", false, "print version information and quit")
flag.BoolVar(&printVer, "version", false, "print version information and quit")
flag.Parse()
if printVer {
fmt.Printf("docker-proxy (commit %s) version %s\n", dockerversion.GitCommit, dockerversion.Version)
os.Exit(0)
}
if useListenFd {
_ = syscall.SetNonblock(int(listenSockFd), true)
config.ListenSock = os.NewFile(listenSockFd, "listen-sock")
}
return config
}
func handleStopSignals(p Proxy) {
s := make(chan os.Signal, 10)
signal.Notify(s, os.Interrupt, syscall.SIGTERM)
for range s {
p.Close()
os.Exit(0)
}
}

View File

@@ -1,3 +1,5 @@
//go:build !windows
package main
import (
@@ -5,13 +7,13 @@ import (
"fmt"
"io"
"net"
"runtime"
"os"
"strings"
"testing"
"time"
"github.com/ishidawataru/sctp"
"gotest.tools/v3/skip"
"gotest.tools/v3/assert"
)
var (
@@ -40,6 +42,8 @@ type UDPEchoServer struct {
testCtx *testing.T
}
const hopefullyFreePort = 25587
func NewEchoServer(t *testing.T, proto, address string, opts EchoServerOptions) EchoServer {
var server EchoServer
if !strings.HasPrefix(proto, "tcp") && opts.TCPHalfClose {
@@ -128,7 +132,31 @@ func (server *UDPEchoServer) Run() {
func (server *UDPEchoServer) LocalAddr() net.Addr { return server.conn.LocalAddr() }
func (server *UDPEchoServer) Close() { server.conn.Close() }
func tcpListener(t *testing.T, nw string, addr *net.TCPAddr) (*os.File, *net.TCPAddr) {
t.Helper()
l, err := net.ListenTCP(nw, addr)
assert.NilError(t, err)
osFile, err := l.File()
assert.NilError(t, err)
tcpAddr := l.Addr().(*net.TCPAddr)
err = l.Close()
assert.NilError(t, err)
return osFile, tcpAddr
}
func udpListener(t *testing.T, nw string, addr *net.UDPAddr) (*os.File, *net.UDPAddr) {
t.Helper()
l, err := net.ListenUDP(nw, addr)
assert.NilError(t, err)
osFile, err := l.File()
assert.NilError(t, err)
err = l.Close()
assert.NilError(t, err)
return osFile, l.LocalAddr().(*net.UDPAddr)
}
func testProxyAt(t *testing.T, proto string, proxy Proxy, addr string, halfClose bool) {
t.Helper()
defer proxy.Close()
go proxy.Run()
var client net.Conn
@@ -167,98 +195,169 @@ func testProxyAt(t *testing.T, proto string, proxy Proxy, addr string, halfClose
}
}
func testProxy(t *testing.T, proto string, proxy Proxy, halfClose bool) {
testProxyAt(t, proto, proxy, proxy.FrontendAddr().String(), halfClose)
}
func testTCP4Proxy(t *testing.T, halfClose bool) {
func testTCP4Proxy(t *testing.T, halfClose bool, hostPort int) {
t.Helper()
backend := NewEchoServer(t, "tcp", "127.0.0.1:0", EchoServerOptions{TCPHalfClose: halfClose})
defer backend.Close()
backend.Run()
backendAddr := backend.LocalAddr().(*net.TCPAddr)
var listener *os.File
frontendAddr := &net.TCPAddr{IP: net.IPv4(127, 0, 0, 1), Port: 0}
proxy, err := NewProxy(frontendAddr, backend.LocalAddr())
if hostPort == 0 {
listener, frontendAddr = tcpListener(t, "tcp4", &net.TCPAddr{IP: net.IPv4(127, 0, 0, 1), Port: 0})
} else {
frontendAddr.Port = hostPort
}
config := ProxyConfig{
Proto: "tcp",
HostIP: frontendAddr.IP,
HostPort: frontendAddr.Port,
ContainerIP: backendAddr.IP,
ContainerPort: backendAddr.Port,
ListenSock: listener,
}
proxy, err := newProxy(config)
if err != nil {
t.Fatal(err)
}
testProxy(t, "tcp", proxy, halfClose)
testProxyAt(t, "tcp", proxy, frontendAddr.String(), halfClose)
}
func TestTCP4Proxy(t *testing.T) {
testTCP4Proxy(t, false)
testTCP4Proxy(t, false, 0)
}
func TestTCP4ProxyNoListener(t *testing.T) {
testTCP4Proxy(t, false, hopefullyFreePort)
}
func TestTCP4ProxyHalfClose(t *testing.T) {
testTCP4Proxy(t, true)
testTCP4Proxy(t, true, 0)
}
func TestTCP6Proxy(t *testing.T) {
t.Skip("Need to start CI docker with --ipv6")
backend := NewEchoServer(t, "tcp", "[::1]:0", EchoServerOptions{})
defer backend.Close()
backend.Run()
frontendAddr := &net.TCPAddr{IP: net.IPv6loopback, Port: 0}
proxy, err := NewProxy(frontendAddr, backend.LocalAddr())
backendAddr := backend.LocalAddr().(*net.TCPAddr)
listener, frontendAddr := tcpListener(t, "tcp6", &net.TCPAddr{IP: net.IPv6loopback, Port: 0})
config := ProxyConfig{
Proto: "tcp",
HostIP: frontendAddr.IP,
HostPort: frontendAddr.Port,
ContainerIP: backendAddr.IP,
ContainerPort: backendAddr.Port,
ListenSock: listener,
}
proxy, err := newProxy(config)
if err != nil {
t.Fatal(err)
}
testProxy(t, "tcp", proxy, false)
testProxyAt(t, "tcp", proxy, frontendAddr.String(), false)
}
func TestTCPDualStackProxy(t *testing.T) {
// If I understand `godoc -src net favoriteAddrFamily` (used by the
// net.Listen* functions) correctly this should work, but it doesn't.
t.Skip("No support for dual stack yet")
backend := NewEchoServer(t, "tcp", "[::1]:0", EchoServerOptions{})
defer backend.Close()
backend.Run()
frontendAddr := &net.TCPAddr{IP: net.IPv6loopback, Port: 0}
proxy, err := NewProxy(frontendAddr, backend.LocalAddr())
backendAddr := backend.LocalAddr().(*net.TCPAddr)
listener, frontendAddr := tcpListener(t, "tcp", &net.TCPAddr{IP: net.IPv6zero, Port: 0})
config := ProxyConfig{
Proto: "tcp",
HostIP: frontendAddr.IP,
HostPort: frontendAddr.Port,
ContainerIP: backendAddr.IP,
ContainerPort: backendAddr.Port,
ListenSock: listener,
}
proxy, err := newProxy(config)
if err != nil {
t.Fatal(err)
}
ipv4ProxyAddr := &net.TCPAddr{
IP: net.IPv4(127, 0, 0, 1),
Port: proxy.FrontendAddr().(*net.TCPAddr).Port,
Port: frontendAddr.Port,
}
testProxyAt(t, "tcp", proxy, ipv4ProxyAddr.String(), false)
}
func TestUDP4Proxy(t *testing.T) {
func testUDP4Proxy(t *testing.T, hostPort int) {
t.Helper()
backend := NewEchoServer(t, "udp", "127.0.0.1:0", EchoServerOptions{})
defer backend.Close()
backend.Run()
var listener *os.File
frontendAddr := &net.UDPAddr{IP: net.IPv4(127, 0, 0, 1), Port: 0}
proxy, err := NewProxy(frontendAddr, backend.LocalAddr())
if hostPort == 0 {
listener, frontendAddr = udpListener(t, "udp4", &net.UDPAddr{IP: net.IPv4(127, 0, 0, 1), Port: 0})
} else {
frontendAddr.Port = hostPort
}
backendAddr := backend.LocalAddr().(*net.UDPAddr)
config := ProxyConfig{
Proto: "udp",
HostIP: frontendAddr.IP,
HostPort: frontendAddr.Port,
ContainerIP: backendAddr.IP,
ContainerPort: backendAddr.Port,
ListenSock: listener,
}
proxy, err := newProxy(config)
if err != nil {
t.Fatal(err)
}
testProxy(t, "udp", proxy, false)
testProxyAt(t, "udp", proxy, frontendAddr.String(), false)
}
func TestUDP4Proxy(t *testing.T) {
testUDP4Proxy(t, 0)
}
func TestUDP4ProxyNoListener(t *testing.T) {
testUDP4Proxy(t, hopefullyFreePort)
}
func TestUDP6Proxy(t *testing.T) {
t.Skip("Need to start CI docker with --ipv6")
backend := NewEchoServer(t, "udp", "[::1]:0", EchoServerOptions{})
defer backend.Close()
backend.Run()
frontendAddr := &net.UDPAddr{IP: net.IPv6loopback, Port: 0}
proxy, err := NewProxy(frontendAddr, backend.LocalAddr())
listener, frontendAddr := udpListener(t, "udp6", &net.UDPAddr{IP: net.IPv6loopback, Port: 0})
backendAddr := backend.LocalAddr().(*net.UDPAddr)
config := ProxyConfig{
Proto: "udp",
HostIP: frontendAddr.IP,
HostPort: frontendAddr.Port,
ContainerIP: backendAddr.IP,
ContainerPort: backendAddr.Port,
ListenSock: listener,
}
proxy, err := newProxy(config)
if err != nil {
t.Fatal(err)
}
testProxy(t, "udp", proxy, false)
testProxyAt(t, "udp", proxy, frontendAddr.String(), false)
}
func TestUDPWriteError(t *testing.T) {
frontendAddr := &net.UDPAddr{IP: net.IPv4(127, 0, 0, 1), Port: 0}
// Hopefully, this port will be free: */
backendAddr := &net.UDPAddr{IP: net.IPv4(127, 0, 0, 1), Port: 25587}
proxy, err := NewProxy(frontendAddr, backendAddr)
backendAddr := &net.UDPAddr{IP: net.IPv4(127, 0, 0, 1), Port: hopefullyFreePort}
listener, frontendAddr := udpListener(t, "udp4", frontendAddr)
config := ProxyConfig{
Proto: "udp",
HostIP: frontendAddr.IP,
HostPort: frontendAddr.Port,
ContainerIP: backendAddr.IP,
ContainerPort: backendAddr.Port,
ListenSock: listener,
}
proxy, err := newProxy(config)
if err != nil {
t.Fatal(err)
}
defer proxy.Close()
go proxy.Run()
client, err := net.Dial("udp", "127.0.0.1:25587")
client, err := net.Dial("udp", frontendAddr.String())
if err != nil {
t.Fatalf("Can't connect to the proxy: %v", err)
}
@@ -266,7 +365,7 @@ func TestUDPWriteError(t *testing.T) {
// Make sure the proxy doesn't stop when there is no actual backend:
client.Write(testBuf)
client.Write(testBuf)
backend := NewEchoServer(t, "udp", "127.0.0.1:25587", EchoServerOptions{})
backend := NewEchoServer(t, "udp", backendAddr.String(), EchoServerOptions{})
defer backend.Close()
backend.Run()
client.SetDeadline(time.Now().Add(10 * time.Second))
@@ -282,31 +381,36 @@ func TestUDPWriteError(t *testing.T) {
}
}
func TestSCTP4Proxy(t *testing.T) {
skip.If(t, runtime.GOOS == "windows", "sctp is not supported on windows")
func TestSCTP4ProxyNoListener(t *testing.T) {
backend := NewEchoServer(t, "sctp", "127.0.0.1:0", EchoServerOptions{})
defer backend.Close()
backend.Run()
frontendAddr := &sctp.SCTPAddr{IPAddrs: []net.IPAddr{{IP: net.IPv4(127, 0, 0, 1)}}, Port: 0}
proxy, err := NewProxy(frontendAddr, backend.LocalAddr())
if err != nil {
t.Fatal(err)
backendAddr := backend.LocalAddr().(*sctp.SCTPAddr)
config := ProxyConfig{
Proto: "sctp",
HostIP: net.IPv4(127, 0, 0, 1),
HostPort: hopefullyFreePort,
ContainerIP: backendAddr.IPAddrs[0].IP,
ContainerPort: backendAddr.Port,
}
testProxy(t, "sctp", proxy, false)
proxy, err := newProxy(config)
assert.NilError(t, err)
testProxyAt(t, "sctp", proxy, fmt.Sprintf("%s:%d", config.HostIP, config.HostPort), false)
}
func TestSCTP6Proxy(t *testing.T) {
t.Skip("Need to start CI docker with --ipv6")
skip.If(t, runtime.GOOS == "windows", "sctp is not supported on windows")
func TestSCTP6ProxyNoListener(t *testing.T) {
backend := NewEchoServer(t, "sctp", "[::1]:0", EchoServerOptions{})
defer backend.Close()
backend.Run()
frontendAddr := &sctp.SCTPAddr{IPAddrs: []net.IPAddr{{IP: net.IPv6loopback}}, Port: 0}
proxy, err := NewProxy(frontendAddr, backend.LocalAddr())
if err != nil {
t.Fatal(err)
backendAddr := backend.LocalAddr().(*sctp.SCTPAddr)
config := ProxyConfig{
Proto: "sctp",
HostIP: net.IPv6loopback,
HostPort: hopefullyFreePort,
ContainerIP: backendAddr.IPAddrs[0].IP,
ContainerPort: backendAddr.Port,
}
testProxy(t, "sctp", proxy, false)
proxy, err := newProxy(config)
assert.NilError(t, err)
testProxyAt(t, "sctp", proxy, fmt.Sprintf("[%s]:%d", config.HostIP, config.HostPort), false)
}

View File

@@ -2,12 +2,6 @@
// and UDP.
package main
import (
"net"
"github.com/ishidawataru/sctp"
)
// ipVersion refers to IP version - v4 or v6
type ipVersion string
@@ -29,22 +23,4 @@ type Proxy interface {
Run()
// Close stops forwarding traffic and close both ends of the Proxy.
Close()
// FrontendAddr returns the address on which the proxy is listening.
FrontendAddr() net.Addr
// BackendAddr returns the proxied address.
BackendAddr() net.Addr
}
// NewProxy creates a Proxy according to the specified frontendAddr and backendAddr.
func NewProxy(frontendAddr, backendAddr net.Addr) (Proxy, error) {
switch frontendAddr.(type) {
case *net.UDPAddr:
return NewUDPProxy(frontendAddr.(*net.UDPAddr), backendAddr.(*net.UDPAddr))
case *net.TCPAddr:
return NewTCPProxy(frontendAddr.(*net.TCPAddr), backendAddr.(*net.TCPAddr))
case *sctp.SCTPAddr:
return NewSCTPProxy(frontendAddr.(*sctp.SCTPAddr), backendAddr.(*sctp.SCTPAddr))
default:
panic("Unsupported protocol")
}
}

View File

@@ -18,18 +18,7 @@ type SCTPProxy struct {
}
// NewSCTPProxy creates a new SCTPProxy.
func NewSCTPProxy(frontendAddr, backendAddr *sctp.SCTPAddr) (*SCTPProxy, error) {
// detect version of hostIP to bind only to correct version
ipVersion := ipv4
if frontendAddr.IPAddrs[0].IP.To4() == nil {
ipVersion = ipv6
}
listener, err := sctp.ListenSCTP("sctp"+string(ipVersion), frontendAddr)
if err != nil {
return nil, err
}
// If the port in frontendAddr was 0 then ListenSCTP will have a picked
// a port to listen on, hence the call to Addr to get that actual port:
func NewSCTPProxy(listener *sctp.SCTPListener, backendAddr *sctp.SCTPAddr) (*SCTPProxy, error) {
return &SCTPProxy{
listener: listener,
frontendAddr: listener.Addr().(*sctp.SCTPAddr),
@@ -90,9 +79,3 @@ func (proxy *SCTPProxy) Run() {
// Close stops forwarding the traffic.
func (proxy *SCTPProxy) Close() { proxy.listener.Close() }
// FrontendAddr returns the SCTP address on which the proxy is listening.
func (proxy *SCTPProxy) FrontendAddr() net.Addr { return proxy.frontendAddr }
// BackendAddr returns the SCTP proxied address.
func (proxy *SCTPProxy) BackendAddr() net.Addr { return proxy.backendAddr }

View File

@@ -16,18 +16,7 @@ type TCPProxy struct {
}
// NewTCPProxy creates a new TCPProxy.
func NewTCPProxy(frontendAddr, backendAddr *net.TCPAddr) (*TCPProxy, error) {
// detect version of hostIP to bind only to correct version
ipVersion := ipv4
if frontendAddr.IP.To4() == nil {
ipVersion = ipv6
}
listener, err := net.ListenTCP("tcp"+string(ipVersion), frontendAddr)
if err != nil {
return nil, err
}
// If the port in frontendAddr was 0 then ListenTCP will have a picked
// a port to listen on, hence the call to Addr to get that actual port:
func NewTCPProxy(listener *net.TCPListener, backendAddr *net.TCPAddr) (*TCPProxy, error) {
return &TCPProxy{
listener: listener,
frontendAddr: listener.Addr().(*net.TCPAddr),
@@ -86,9 +75,3 @@ func (proxy *TCPProxy) Run() {
// Close stops forwarding the traffic.
func (proxy *TCPProxy) Close() { proxy.listener.Close() }
// FrontendAddr returns the TCP address on which the proxy is listening.
func (proxy *TCPProxy) FrontendAddr() net.Addr { return proxy.frontendAddr }
// BackendAddr returns the TCP proxied address.
func (proxy *TCPProxy) BackendAddr() net.Addr { return proxy.backendAddr }

View File

@@ -54,16 +54,7 @@ type UDPProxy struct {
}
// NewUDPProxy creates a new UDPProxy.
func NewUDPProxy(frontendAddr, backendAddr *net.UDPAddr) (*UDPProxy, error) {
// detect version of hostIP to bind only to correct version
ipVersion := ipv4
if frontendAddr.IP.To4() == nil {
ipVersion = ipv6
}
listener, err := net.ListenUDP("udp"+string(ipVersion), frontendAddr)
if err != nil {
return nil, err
}
func NewUDPProxy(listener *net.UDPConn, backendAddr *net.UDPAddr) (*UDPProxy, error) {
return &UDPProxy{
listener: listener,
frontendAddr: listener.LocalAddr().(*net.UDPAddr),
@@ -156,12 +147,6 @@ func (proxy *UDPProxy) Close() {
}
}
// FrontendAddr returns the UDP address on which the proxy is listening.
func (proxy *UDPProxy) FrontendAddr() net.Addr { return proxy.frontendAddr }
// BackendAddr returns the proxied UDP address.
func (proxy *UDPProxy) BackendAddr() net.Addr { return proxy.backendAddr }
func isClosedError(err error) bool {
/* This comparison is ugly, but unfortunately, net.go doesn't export errClosing.
* See:

View File

@@ -34,7 +34,6 @@ const (
StockRuntimeName = "runc"
// userlandProxyBinary is the name of the userland-proxy binary.
// In rootless-mode, [rootless.RootlessKitDockerProxyBinary] is used instead.
userlandProxyBinary = "docker-proxy"
)
@@ -234,16 +233,25 @@ func setPlatformDefaults(cfg *Config) error {
cfg.CgroupNamespaceMode = string(DefaultCgroupNamespaceMode)
}
var err error
cfg.BridgeConfig.UserlandProxyPath, err = lookupBinPath(userlandProxyBinary)
if err != nil {
// Log, but don't error here. This allows running a daemon with
// userland-proxy disabled (which does not require the binary
// to be present).
//
// An error is still produced by [Config.ValidatePlatformConfig] if
// userland-proxy is enabled in the configuration.
//
// We log this at "debug" level, as this code is also executed
// when running "--version", and we don't want to print logs in
// that case..
log.G(context.TODO()).WithError(err).Debug("failed to lookup default userland-proxy binary")
}
if rootless.RunningWithRootlessKit() {
cfg.Rootless = true
var err error
// use rootlesskit-docker-proxy for exposing the ports in RootlessKit netns to the initial namespace.
cfg.BridgeConfig.UserlandProxyPath, err = lookupBinPath(rootless.RootlessKitDockerProxyBinary)
if err != nil {
return errors.Wrapf(err, "running with RootlessKit, but %s not installed", rootless.RootlessKitDockerProxyBinary)
}
dataHome, err := homedir.GetDataHome()
if err != nil {
return err
@@ -257,21 +265,6 @@ func setPlatformDefaults(cfg *Config) error {
cfg.ExecRoot = filepath.Join(runtimeDir, "docker")
cfg.Pidfile = filepath.Join(runtimeDir, "docker.pid")
} else {
var err error
cfg.BridgeConfig.UserlandProxyPath, err = lookupBinPath(userlandProxyBinary)
if err != nil {
// Log, but don't error here. This allows running a daemon with
// userland-proxy disabled (which does not require the binary
// to be present).
//
// An error is still produced by [Config.ValidatePlatformConfig] if
// userland-proxy is enabled in the configuration.
//
// We log this at "debug" level, as this code is also executed
// when running "--version", and we don't want to print logs in
// that case..
log.G(context.TODO()).WithError(err).Debug("failed to lookup default userland-proxy binary")
}
cfg.Root = "/var/lib/docker"
cfg.ExecRoot = "/var/run/docker"
cfg.Pidfile = "/var/run/docker.pid"

View File

@@ -915,6 +915,7 @@ func driverOptions(config *config.Config) nwconfig.Option {
"EnableIP6Tables": config.BridgeConfig.EnableIP6Tables,
"EnableUserlandProxy": config.BridgeConfig.EnableUserlandProxy,
"UserlandProxyPath": config.BridgeConfig.UserlandProxyPath,
"Rootless": config.Rootless,
},
})
}

View File

@@ -27,7 +27,5 @@ install_rootlesskit_dynamic() {
_install_rootlesskit() (
echo "Install rootlesskit version ${ROOTLESSKIT_VERSION}"
for f in rootlesskit rootlesskit-docker-proxy; do
GOBIN="${PREFIX}" GO111MODULE=on go install ${BUILD_MODE} -ldflags="$ROOTLESSKIT_LDFLAGS" "github.com/rootless-containers/rootlesskit/v2/cmd/${f}@${ROOTLESSKIT_VERSION}"
done
GOBIN="${PREFIX}" GO111MODULE=on go install ${BUILD_MODE} -ldflags="$ROOTLESSKIT_LDFLAGS" "github.com/rootless-containers/rootlesskit/v2/cmd/rootlesskit@${ROOTLESSKIT_VERSION}"
)

View File

@@ -14,7 +14,7 @@ copy_binaries() {
return
fi
echo "Copying nested executables into $dir"
for file in containerd containerd-shim-runc-v2 ctr runc docker-init rootlesskit rootlesskit-docker-proxy dockerd-rootless.sh dockerd-rootless-setuptool.sh; do
for file in containerd containerd-shim-runc-v2 ctr runc docker-init rootlesskit dockerd-rootless.sh dockerd-rootless-setuptool.sh; do
cp -f "$(command -v "$file")" "$dir/"
done
# vpnkit might not be available for the target platform, see vpnkit stage in

View File

@@ -2,7 +2,8 @@
set -e
(
# docker-proxy is Linux only
[ "$(go env GOOS)" != 'linux' ] || (
export CGO_ENABLED=0
DOCKER_STATIC=1

View File

@@ -2,7 +2,8 @@
set -e
(
# docker-proxy is Linux only
[ "$(go env GOOS)" != 'linux' ] || (
export LDFLAGS_STATIC=''
export BUILDFLAGS=("${BUILDFLAGS[@]/netgo /}") # disable netgo, since we don't need it for a dynamic binary
export BUILDFLAGS=("${BUILDFLAGS[@]/osusergo /}") # ditto for osusergo

View File

@@ -15,7 +15,6 @@ source "${MAKEDIR}/.install"
install_binary "${DEST}/docker-proxy"
install_binary "${DEST}/docker-init"
install_binary "${DEST}/rootlesskit"
install_binary "${DEST}/rootlesskit-docker-proxy"
install_binary "${DEST}/dockerd-rootless.sh"
install_binary "${DEST}/dockerd-rootless-setuptool.sh"
if [ -f "${DEST}/vpnkit" ]; then

View File

@@ -704,7 +704,7 @@ func (s *DockerDaemonSuite) TestDaemonIP(c *testing.T) {
out, err := d.Cmd("run", "-d", "-p", "8000:8000", "busybox", "top")
assert.Assert(c, err != nil, "Running a container must fail with an invalid --ip option")
assert.Equal(c, strings.Contains(out, "Error starting userland proxy"), true)
assert.Check(c, is.Contains(out, "failed to bind host port for 192.170.1.1"))
ifName := "dummy"
createInterface(c, "dummy", ifName, ipStr)

View File

@@ -14,6 +14,7 @@ import (
"github.com/docker/docker/errdefs"
"github.com/docker/docker/libnetwork/datastore"
"github.com/docker/docker/libnetwork/driverapi"
"github.com/docker/docker/libnetwork/drivers/bridge/internal/rlkclient"
"github.com/docker/docker/libnetwork/internal/netiputil"
"github.com/docker/docker/libnetwork/iptables"
"github.com/docker/docker/libnetwork/netlabel"
@@ -56,6 +57,7 @@ type configuration struct {
EnableIP6Tables bool
EnableUserlandProxy bool
UserlandProxyPath string
Rootless bool
}
// networkConfiguration for network specific configuration
@@ -131,6 +133,16 @@ type bridgeNetwork struct {
sync.Mutex
}
type portDriverClient interface {
ChildHostIP(hostIP netip.Addr) netip.Addr
AddPort(ctx context.Context, proto string, hostIP, childIP netip.Addr, hostPort int) (func() error, error)
}
// Allow unit tests to supply a dummy RootlessKit port driver client.
var newPortDriverClient = func(ctx context.Context) (portDriverClient, error) {
return rlkclient.NewPortDriverClient(ctx)
}
type driver struct {
config configuration
natChain *iptables.ChainInfo
@@ -144,6 +156,7 @@ type driver struct {
networks map[string]*bridgeNetwork
store *datastore.Store
nlh *netlink.Handle
portDriverClient portDriverClient
configNetwork sync.Mutex
sync.Mutex
}
@@ -414,6 +427,15 @@ func (n *bridgeNetwork) userlandProxyPath() string {
return n.driver.userlandProxyPath()
}
func (n *bridgeNetwork) getPortDriverClient() portDriverClient {
n.Lock()
defer n.Unlock()
if n.driver == nil {
return nil
}
return n.driver.getPortDriverClient()
}
func (n *bridgeNetwork) getEndpoint(eid string) (*bridgeEndpoint, error) {
if eid == "" {
return nil, InvalidEndpointIDError(eid)
@@ -465,6 +487,7 @@ func (d *driver) configure(option map[string]interface{}) error {
filterChainV6 *iptables.ChainInfo
isolationChain1V6 *iptables.ChainInfo
isolationChain2V6 *iptables.ChainInfo
pdc portDriverClient
)
switch opt := option[netlabel.GenericData].(type) {
@@ -537,6 +560,14 @@ func (d *driver) configure(option map[string]interface{}) error {
}
}
if config.Rootless {
var err error
pdc, err = newPortDriverClient(context.TODO())
if err != nil {
return err
}
}
d.Lock()
d.natChain = natChain
d.filterChain = filterChain
@@ -546,6 +577,7 @@ func (d *driver) configure(option map[string]interface{}) error {
d.filterChainV6 = filterChainV6
d.isolationChain1V6 = isolationChain1V6
d.isolationChain2V6 = isolationChain2V6
d.portDriverClient = pdc
d.config = config
d.Unlock()
@@ -577,6 +609,12 @@ func (d *driver) userlandProxyPath() string {
return ""
}
func (d *driver) getPortDriverClient() portDriverClient {
d.Lock()
defer d.Unlock()
return d.portDriverClient
}
func parseNetworkGenericOptions(data interface{}) (*networkConfiguration, error) {
var (
err error

View File

@@ -0,0 +1,156 @@
// RootlessKit integration - if required by RootlessKit's port driver, let it know
// about port mappings as they're added and removed.
//
// This is based on / copied from rootlesskit-docker-proxy, which was previously
// installed as a proxy for docker-proxy:
// https://github.com/rootless-containers/rootlesskit/blob/4fb2e2cb80bf13eb28b7f2a4317b63406b89ad32/cmd/rootlesskit-docker-proxy/main.go
package rlkclient
import (
"context"
"errors"
"fmt"
"net"
"net/netip"
"os"
"path/filepath"
"strconv"
"strings"
"github.com/rootless-containers/rootlesskit/v2/pkg/api/client"
"github.com/rootless-containers/rootlesskit/v2/pkg/port"
)
type PortDriverClient struct {
client client.Client
portDriverName string
protos map[string]struct{}
childIP netip.Addr
}
func NewPortDriverClient(ctx context.Context) (*PortDriverClient, error) {
stateDir := os.Getenv("ROOTLESSKIT_STATE_DIR")
if stateDir == "" {
return nil, errors.New("$ROOTLESSKIT_STATE_DIR needs to be set")
}
socketPath := filepath.Join(stateDir, "api.sock")
c, err := client.New(socketPath)
if err != nil {
return nil, fmt.Errorf("error while connecting to RootlessKit API socket: %w", err)
}
info, err := c.Info(ctx)
if err != nil {
return nil, fmt.Errorf("failed to call info API, probably RootlessKit binary is too old (needs to be v0.14.0 or later): %w", err)
}
// info.PortDriver is currently nil for "none" and "implicit", but this may change in future
if info.PortDriver == nil || info.PortDriver.Driver == "none" || info.PortDriver.Driver == "implicit" {
return nil, nil
}
pdc := &PortDriverClient{
client: c,
portDriverName: info.PortDriver.Driver,
}
if info.PortDriver.DisallowLoopbackChildIP {
// i.e., port-driver="slirp4netns"
if info.NetworkDriver.ChildIP == nil {
return nil, fmt.Errorf("RootlessKit port driver (%q) does not allow loopback child IP, but network driver (%q) has no non-loopback IP",
info.PortDriver.Driver, info.NetworkDriver.Driver)
}
childIP, ok := netip.AddrFromSlice(info.NetworkDriver.ChildIP)
if !ok {
return nil, fmt.Errorf("unable to use child IP %s from network driver (%q)",
info.NetworkDriver.ChildIP, info.NetworkDriver.Driver)
}
pdc.childIP = childIP
}
pdc.protos = make(map[string]struct{}, len(info.PortDriver.Protos))
for _, p := range info.PortDriver.Protos {
pdc.protos[p] = struct{}{}
}
return pdc, nil
}
// ChildHostIP returns the address that must be used in the child network
// namespace in place of hostIP, a host IP address. In particular, port
// mappings from host IP addresses, and DNAT rules, must use this child
// address in place of the real host address.
func (c *PortDriverClient) ChildHostIP(hostIP netip.Addr) netip.Addr {
if c == nil {
return hostIP
}
if c.childIP.IsValid() {
return c.childIP
}
if hostIP.Is6() {
return netip.IPv6Loopback()
}
return netip.MustParseAddr("127.0.0.1")
}
// AddPort makes a request to RootlessKit asking it to set up a port
// mapping between a host IP address and a child host IP address.
func (c *PortDriverClient) AddPort(
ctx context.Context,
proto string,
hostIP netip.Addr,
childIP netip.Addr,
hostPort int,
) (func() error, error) {
if c == nil {
return func() error { return nil }, nil
}
// proto is like "tcp", but we need to convert it to "tcp4" or "tcp6" explicitly
// for libnetwork >= 20201216
//
// See https://github.com/moby/libnetwork/pull/2604/files#diff-8fa48beed55dd033bf8e4f8c40b31cf69d0b2cc5d4bb53cde8594670ea6c938aR20
// See also https://github.com/rootless-containers/rootlesskit/issues/231
apiProto := proto
if !strings.HasSuffix(apiProto, "4") && !strings.HasSuffix(apiProto, "6") {
if hostIP.Is6() {
apiProto += "6"
} else {
apiProto += "4"
}
}
if _, ok := c.protos[apiProto]; !ok {
// This happens when apiProto="tcp6", portDriverName="slirp4netns",
// because "slirp4netns" port driver does not support listening on IPv6 yet.
//
// Note that "slirp4netns" port driver is not used by default,
// even when network driver is set to "slirp4netns".
//
// Most users are using "builtin" port driver and will not see this warning.
return nil, fmt.Errorf("protocol %q is not supported by the RootlessKit port driver %q, discarding request for %q",
proto,
c.portDriverName,
net.JoinHostPort(hostIP.String(), strconv.Itoa(hostPort)))
}
pm := c.client.PortManager()
p := port.Spec{
Proto: apiProto,
ParentIP: hostIP.String(),
ParentPort: hostPort,
ChildIP: childIP.String(),
ChildPort: hostPort,
}
st, err := pm.AddPort(ctx, p)
if err != nil {
return nil, fmt.Errorf("error while calling RootlessKit PortManager.AddPort(): %w", err)
}
deferFunc := func() error {
if dErr := pm.RemovePort(ctx, st.ID); dErr != nil {
return fmt.Errorf("error while calling RootlessKit PortManager.RemovePort(): %w", err)
}
return nil
}
return deferFunc, nil
}

View File

@@ -12,6 +12,8 @@ import (
"os"
"slices"
"strconv"
"syscall"
"unsafe"
"github.com/containerd/log"
"github.com/docker/docker/libnetwork/iptables"
@@ -19,27 +21,60 @@ import (
"github.com/docker/docker/libnetwork/portallocator"
"github.com/docker/docker/libnetwork/portmapper"
"github.com/docker/docker/libnetwork/types"
"github.com/ishidawataru/sctp"
)
type portBinding struct {
types.PortBinding
// boundSocket is used to reserve a host port for the binding. If the
// userland proxy is in-use, it's passed to the proxy when the proxy is
// started, then it's closed and set to nil here.
boundSocket *os.File
// childHostIP is the host IP address, as seen from the daemon. This
// is normally the same as PortBinding.HostIP but, in rootless mode, it
// will be an address in the rootless network namespace. RootlessKit
// binds the port on the real (parent) host address and maps it to the
// same port number on the address dockerd sees in the child namespace.
// So, for example, docker-proxy and DNAT rules need to use the child
// namespace's host address. (PortBinding.HostIP isn't replaced by the
// child address, because it's stored as user-config and the child
// address may change if RootlessKit is configured differently.)
childHostIP net.IP
// portDriverRemove is a function that will inform the RootlessKit
// port driver about removal of a port binding, or nil.
portDriverRemove func() error
// stopProxy is a function to stop the userland proxy for this binding,
// if a proxy has been started - else nil.
stopProxy func() error
}
// childPortBinding is pb.PortBinding, with the host address the daemon
// will see - which, in rootless mode, will be an address in the RootlessKit's
// child namespace (see portBinding.childHostIP).
func (pb portBinding) childPortBinding() types.PortBinding {
res := pb.PortBinding
res.HostIP = pb.childHostIP
return res
}
type portBindingReq struct {
types.PortBinding
disableNAT bool
childHostIP net.IP
disableNAT bool
}
// Allow unit tests to supply a dummy StartProxy.
var startProxy = portmapper.StartProxy
// addPortMappings takes cfg, the configuration for port mappings, selects host
// ports when ranges are given, starts docker-proxy or its dummy to reserve
// host ports, and sets up iptables NAT/forwarding rules as necessary. If
// anything goes wrong, it will undo any work it's done and return an error.
// Otherwise, the returned slice of portBinding has an entry per address
// family (if cfg describes a mapping for 'any' host address, it's expanded
// into mappings for IPv4 and IPv6, because that's how the mapping is presented
// in 'inspect'). HostPort and HostPortEnd in each returned portBinding are set
// to the selected and reserved port.
// ports when ranges are given, binds host ports to check they're available and
// reserve them, starts docker-proxy if required, and sets up iptables
// NAT/forwarding rules as necessary. If anything goes wrong, it will undo any
// work it's done and return an error. Otherwise, the returned slice of
// portBinding has an entry per address family (if cfg describes a mapping for
// 'any' host address, it's expanded into mappings for IPv4 and IPv6, because
// that's how the mapping is presented in 'inspect'). HostPort and HostPortEnd in
// each returned portBinding are set to the selected and reserved port.
func (n *bridgeNetwork) addPortMappings(
epAddrV4, epAddrV6 *net.IPNet,
cfg []types.PortBinding,
@@ -79,6 +114,7 @@ func (n *bridgeNetwork) addPortMappings(
sortAndNormPBs(sortedCfg)
proxyPath := n.userlandProxyPath()
pdc := n.getPortDriverClient()
// toBind accumulates port bindings that should be allocated the same host port
// (if required by NAT config). If the host address is unspecified, and defHostIP
@@ -91,7 +127,7 @@ func (n *bridgeNetwork) addPortMappings(
// bindings to collect, they're applied and toBind is reset.
var toBind []portBindingReq
for i, c := range sortedCfg {
if bindingIPv4, ok := configurePortBindingIPv4(disableNAT4, c, containerIPv4, defHostIP); ok {
if bindingIPv4, ok := configurePortBindingIPv4(pdc, disableNAT4, c, containerIPv4, defHostIP); ok {
toBind = append(toBind, bindingIPv4)
}
@@ -107,7 +143,7 @@ func (n *bridgeNetwork) addPortMappings(
if proxyPath != "" && (containerIPv6 == nil) {
containerIP = containerIPv4
}
if bindingIPv6, ok := configurePortBindingIPv6(disableNAT6, c, containerIP, defHostIP); ok {
if bindingIPv6, ok := configurePortBindingIPv6(pdc, disableNAT6, c, containerIP, defHostIP); ok {
toBind = append(toBind, bindingIPv6)
}
@@ -117,24 +153,86 @@ func (n *bridgeNetwork) addPortMappings(
continue
}
// Allocate a host port, and reserve it by starting docker-proxy for each host
// address in toBind.
// Allocate and bind a host port.
newB, err := bindHostPorts(toBind, proxyPath)
if err != nil {
return nil, err
}
bindings = append(bindings, newB...)
// Reset the collection of bindings now they're bound.
// Reset toBind now the ports are bound.
toBind = toBind[:0]
}
for _, b := range bindings {
if err := n.setPerPortIptables(b, true); err != nil {
for i := range bindings {
if pdc != nil && bindings[i].HostPort != 0 {
var err error
b := &bindings[i]
hip, ok := netip.AddrFromSlice(b.HostIP)
if !ok {
return nil, fmt.Errorf("invalid host IP address in %s", b)
}
chip, ok := netip.AddrFromSlice(b.childHostIP)
if !ok {
return nil, fmt.Errorf("invalid child host IP address %s in %s", b.childHostIP, b)
}
b.portDriverRemove, err = pdc.AddPort(context.TODO(), b.Proto.String(), hip, chip, int(b.HostPort))
if err != nil {
return nil, err
}
}
if err := n.setPerPortIptables(bindings[i], true); err != nil {
return nil, err
}
}
// Now the iptables rules are set up, it's safe to start the userland proxy.
// (If it was started before the iptables rules were created, it may have
// accepted a connection, then become unreachable due to NAT rules sending
// packets directly to the container.)
// If not starting the proxy, nothing will ever accept a connection on the
// socket. But, listen anyway so that the binding shows up in "netstat -at".
somaxconn := 0
if proxyPath != "" {
somaxconn = -1 // silently capped to "/proc/sys/net/core/somaxconn"
}
for i := range bindings {
if bindings[i].boundSocket == nil {
continue
}
if bindings[i].Proto == types.TCP {
rc, err := bindings[i].boundSocket.SyscallConn()
if err != nil {
return nil, fmt.Errorf("raw conn not available on TCP socket: %w", err)
}
if errC := rc.Control(func(fd uintptr) {
err = syscall.Listen(int(fd), somaxconn)
}); errC != nil {
return nil, fmt.Errorf("failed to Control TCP socket: %w", err)
}
if err != nil {
return nil, fmt.Errorf("failed to listen on TCP socket: %w", err)
}
}
if proxyPath != "" {
var err error
bindings[i].stopProxy, err = startProxy(
bindings[i].childPortBinding(), proxyPath, bindings[i].boundSocket,
)
if err != nil {
return nil, fmt.Errorf("failed to start userland proxy for port mapping %s: %w",
bindings[i].PortBinding, err)
}
if err := bindings[i].boundSocket.Close(); err != nil {
log.G(context.TODO()).WithFields(log.Fields{
"error": err,
"mapping": bindings[i].PortBinding,
}).Warnf("failed to close proxy socket")
}
bindings[i].boundSocket = nil
}
}
return bindings, nil
}
@@ -263,7 +361,7 @@ func needSamePort(a, b types.PortBinding) bool {
// configurePortBindingIPv4 returns a new port binding with the HostIP field populated
// if a binding is required, else nil.
func configurePortBindingIPv4(disableNAT bool, bnd types.PortBinding, containerIPv4, defHostIP net.IP) (portBindingReq, bool) {
func configurePortBindingIPv4(pdc portDriverClient, disableNAT bool, bnd types.PortBinding, containerIPv4, defHostIP net.IP) (portBindingReq, bool) {
if len(containerIPv4) == 0 {
return portBindingReq{}, false
}
@@ -282,15 +380,15 @@ func configurePortBindingIPv4(disableNAT bool, bnd types.PortBinding, containerI
// Unmap the addresses if they're IPv4-mapped IPv6.
bnd.HostIP = bnd.HostIP.To4()
bnd.IP = containerIPv4.To4()
return portBindingReq{
return setChildHostIP(pdc, portBindingReq{
PortBinding: bnd,
disableNAT: disableNAT,
}, true
}), true
}
// configurePortBindingIPv6 returns a new port binding with the HostIP field populated
// if a binding is required, else nil.
func configurePortBindingIPv6(disableNAT bool, bnd types.PortBinding, containerIP, defHostIP net.IP) (portBindingReq, bool) {
func configurePortBindingIPv6(pdc portDriverClient, disableNAT bool, bnd types.PortBinding, containerIP, defHostIP net.IP) (portBindingReq, bool) {
if containerIP == nil {
return portBindingReq{}, false
}
@@ -317,13 +415,23 @@ func configurePortBindingIPv6(disableNAT bool, bnd types.PortBinding, containerI
}
}
bnd.IP = containerIP
return portBindingReq{
return setChildHostIP(pdc, portBindingReq{
PortBinding: bnd,
disableNAT: disableNAT,
}, true
}), true
}
// bindHostPorts allocates ports and starts docker-proxy for the given cfg. The
func setChildHostIP(pdc portDriverClient, req portBindingReq) portBindingReq {
if pdc == nil {
req.childHostIP = req.HostIP
return req
}
hip, _ := netip.AddrFromSlice(req.HostIP)
req.childHostIP = pdc.ChildHostIP(hip).AsSlice()
return req
}
// bindHostPorts allocates and binds host ports for the given cfg. The
// caller is responsible for ensuring that all entries in cfg map the same proto,
// container port, and host port range (their host addresses must differ).
func bindHostPorts(cfg []portBindingReq, proxyPath string) ([]portBinding, error) {
@@ -358,17 +466,13 @@ func bindHostPorts(cfg []portBindingReq, proxyPath string) ([]portBinding, error
return nil, err
}
// Allow unit tests to supply a dummy StartProxy.
var startProxy = portmapper.StartProxy
// attemptBindHostPorts allocates host ports for each port mapping that requires
// one, and reserves those ports by starting docker-proxy.
// one, and reserves those ports by binding them.
//
// If the allocator doesn't have an available port in the required range, or the
// docker-proxy process doesn't start (perhaps because another process has
// already bound the port), all resources are released and an error is returned.
// When ports are successfully reserved, a portBinding is returned for each
// mapping.
// port can't be bound (perhaps because another process has already bound it),
// all resources are released and an error is returned. When ports are
// successfully reserved, a portBinding is returned for each mapping.
//
// If NAT is disabled for any of the bindings, no host port reservation is
// needed. These bindings are included in results, as the container port itself
@@ -385,7 +489,7 @@ func attemptBindHostPorts(
addrs := make([]net.IP, 0, len(cfg))
for _, c := range cfg {
if !c.disableNAT {
addrs = append(addrs, c.HostIP)
addrs = append(addrs, c.childHostIP)
}
}
@@ -405,30 +509,177 @@ func attemptBindHostPorts(
}
res := make([]portBinding, 0, len(cfg))
for _, c := range cfg {
pb := portBinding{PortBinding: c.GetCopy()}
if c.disableNAT {
pb.HostPort = 0
} else {
pb.stopProxy, err = startProxy(c.Proto.String(), c.HostIP, port, c.IP, int(c.Port), proxyPath)
if err != nil {
return nil, fmt.Errorf("failed to bind port %s:%d/%s: %w", c.HostIP, port, c.Proto, err)
}
defer func() {
if retErr != nil {
if err := pb.stopProxy(); err != nil {
log.G(context.TODO()).Warnf("Failed to stop userland proxy for port mapping %s: %s", pb, err)
defer func() {
if retErr != nil {
for _, pb := range res {
if pb.boundSocket != nil {
if err := pb.boundSocket.Close(); err != nil {
log.G(context.TODO()).Warnf("Failed to close port binding for %s: %s", pb, err)
}
}
}()
pb.HostPort = uint16(port)
// TODO(robmry) - this is only needed because the userland proxy may have
// been started for SCTP. If a bound socket is passed to the proxy after
// iptables rules have been configured (as it is for TCP/UDP), remove this.
if pb.stopProxy != nil {
if err := pb.stopProxy(); err != nil {
log.G(context.TODO()).Warnf("Failed to stop proxy for %s: %s", pb, err)
}
}
}
}
}()
for _, c := range cfg {
var pb portBinding
if c.disableNAT {
pb = portBinding{PortBinding: c.GetCopy()}
pb.HostPort = 0
pb.HostPortEnd = 0
} else {
switch proto {
case "tcp":
pb, err = bindTCPOrUDP(c, port, syscall.SOCK_STREAM, syscall.IPPROTO_TCP)
case "udp":
pb, err = bindTCPOrUDP(c, port, syscall.SOCK_DGRAM, syscall.IPPROTO_UDP)
case "sctp":
if proxyPath == "" {
pb, err = bindSCTP(c, port)
} else {
// TODO(robmry) - it's not currently possible to pass a bound SCTP port
// to the userland proxy, because the proxy is not able to convert the
// file descriptor into an sctp.SCTPListener (fd is an unexported member
// of the struct, and ListenSCTP is the only constructor).
// So, it is possible for the proxy to start listening and accept
// connections before iptables rules are created that would bypass
// the proxy for external connections.
// Remove this and pb.stopProxy() from the cleanup function above if
// this is fixed.
pb, err = startSCTPProxy(c, port, proxyPath)
}
default:
return nil, fmt.Errorf("Unknown addr type: %s", proto)
}
if err != nil {
return nil, err
}
}
pb.HostPortEnd = pb.HostPort
res = append(res, pb)
}
return res, nil
}
func bindTCPOrUDP(cfg portBindingReq, port, typ, proto int) (_ portBinding, retErr error) {
pb := portBinding{PortBinding: cfg.PortBinding.GetCopy()}
pb.HostPort = uint16(port)
pb.HostPortEnd = pb.HostPort
pb.childHostIP = cfg.childHostIP
var domain int
var sa syscall.Sockaddr
if hip := cfg.childHostIP.To4(); hip != nil {
domain = syscall.AF_INET
sa4 := syscall.SockaddrInet4{Port: port}
copy(sa4.Addr[:], hip)
sa = &sa4
} else {
domain = syscall.AF_INET6
sa6 := syscall.SockaddrInet6{Port: port}
copy(sa6.Addr[:], cfg.childHostIP)
sa = &sa6
}
sd, err := syscall.Socket(domain, typ|syscall.SOCK_CLOEXEC, proto)
if err != nil {
return portBinding{}, fmt.Errorf("failed to create socket for userland proxy for %s: %w", cfg, err)
}
defer func() {
if retErr != nil {
syscall.Close(sd)
}
}()
if domain == syscall.AF_INET6 {
syscall.SetsockoptInt(sd, syscall.IPPROTO_IPV6, syscall.IPV6_V6ONLY, 1)
}
if err := syscall.Bind(sd, sa); err != nil {
if cfg.HostPort == cfg.HostPortEnd {
return portBinding{}, fmt.Errorf("failed to bind host port for %s: %w", cfg, err)
}
return portBinding{}, fmt.Errorf("failed to bind host port %d for %s: %w", port, cfg, err)
}
pb.boundSocket = os.NewFile(uintptr(sd), "listener")
if pb.boundSocket == nil {
return portBinding{}, fmt.Errorf("failed to convert socket for userland proxy for %s", cfg)
}
return pb, nil
}
// bindSCTP is based on sctp.ListenSCTP. The socket is created and bound, but
// does not start listening.
func bindSCTP(cfg portBindingReq, port int) (_ portBinding, retErr error) {
pb := portBinding{PortBinding: cfg.GetCopy()}
pb.HostPort = uint16(port)
pb.HostPortEnd = pb.HostPort
pb.childHostIP = cfg.childHostIP
domain := syscall.AF_INET
if cfg.childHostIP.To4() == nil {
domain = syscall.AF_INET6
}
sd, err := syscall.Socket(domain, syscall.SOCK_STREAM|syscall.SOCK_CLOEXEC, syscall.IPPROTO_SCTP)
if err != nil {
return portBinding{}, fmt.Errorf("failed to create socket for userland proxy for %s: %w", cfg, err)
}
defer func() {
if retErr != nil {
syscall.Close(sd)
}
}()
if domain == syscall.AF_INET6 {
syscall.SetsockoptInt(sd, syscall.IPPROTO_IPV6, syscall.IPV6_V6ONLY, 1)
}
options := sctp.InitMsg{NumOstreams: sctp.SCTP_MAX_STREAM}
if _, _, errno := syscall.Syscall6(syscall.SYS_SETSOCKOPT,
uintptr(sd),
sctp.SOL_SCTP,
sctp.SCTP_INITMSG,
uintptr(unsafe.Pointer(&options)),
unsafe.Sizeof(options),
0); errno != 0 {
return portBinding{}, errno
}
if err := sctp.SCTPBind(sd,
&sctp.SCTPAddr{IPAddrs: []net.IPAddr{{IP: cfg.childHostIP}}, Port: int(cfg.HostPort)},
sctp.SCTP_BINDX_ADD_ADDR); err != nil {
return portBinding{}, fmt.Errorf("failed to bind socket for userland proxy for %s: %w", cfg, err)
}
pb.boundSocket = os.NewFile(uintptr(sd), "listener")
if pb.boundSocket == nil {
return portBinding{}, fmt.Errorf("failed to convert socket for userland proxy for %s", cfg)
}
return pb, nil
}
func startSCTPProxy(cfg portBindingReq, port int, proxyPath string) (_ portBinding, retErr error) {
pb := portBinding{PortBinding: cfg.GetCopy()}
pb.HostPort = uint16(port)
pb.HostPortEnd = pb.HostPort
pb.childHostIP = cfg.childHostIP
var err error
pb.stopProxy, err = startProxy(pb.childPortBinding(), proxyPath, nil)
if err != nil {
return portBinding{}, err
}
return pb, nil
}
// releasePorts attempts to release all port bindings, does not stop on failure
func (n *bridgeNetwork) releasePorts(ep *bridgeEndpoint) error {
n.Lock()
@@ -442,11 +693,20 @@ func (n *bridgeNetwork) releasePorts(ep *bridgeEndpoint) error {
func (n *bridgeNetwork) releasePortBindings(pbs []portBinding) error {
var errs []error
for _, pb := range pbs {
var errP error
var errS, errPD, errP error
if pb.boundSocket != nil {
errS = pb.boundSocket.Close()
if errS != nil {
errS = fmt.Errorf("failed to close socket for port mapping %s: %w", pb, errS)
}
}
if pb.portDriverRemove != nil {
errPD = pb.portDriverRemove()
}
if pb.stopProxy != nil {
errP = pb.stopProxy()
if errP != nil {
errP = fmt.Errorf("failed to stop docker-proxy for port mapping %s: %w", pb, errP)
errP = fmt.Errorf("failed to stop userland proxy for port mapping %s: %w", pb, errP)
}
}
errN := n.setPerPortIptables(pb, false)
@@ -454,9 +714,9 @@ func (n *bridgeNetwork) releasePortBindings(pbs []portBinding) error {
errN = fmt.Errorf("failed to remove iptables rules for port mapping %s: %w", pb, errN)
}
if pb.HostPort > 0 {
portallocator.Get().ReleasePort(pb.HostIP, pb.Proto.String(), int(pb.HostPort))
portallocator.Get().ReleasePort(pb.childHostIP, pb.Proto.String(), int(pb.HostPort))
}
errs = append(errs, errP, errN)
errs = append(errs, errS, errPD, errP, errN)
}
return errors.Join(errs...)
}
@@ -498,8 +758,8 @@ func setPerPortNAT(b portBinding, ipv iptables.IPVersion, proxyPath string, brid
// want "0.0.0.0/0". "0/0" is correctly interpreted as "any
// value" by both iptables and ip6tables.
hostIP := "0/0"
if !b.HostIP.IsUnspecified() {
hostIP = b.HostIP.String()
if !b.childHostIP.IsUnspecified() {
hostIP = b.childHostIP.String()
}
args := []string{
"-p", b.Proto.String(),

View File

@@ -5,7 +5,11 @@ import (
"errors"
"fmt"
"net"
"net/netip"
"os"
"strconv"
"strings"
"syscall"
"testing"
"github.com/docker/docker/internal/testutils/netnsutils"
@@ -14,6 +18,7 @@ import (
"github.com/docker/docker/libnetwork/ns"
"github.com/docker/docker/libnetwork/portallocator"
"github.com/docker/docker/libnetwork/types"
"github.com/vishvananda/netlink"
"gotest.tools/v3/assert"
is "gotest.tools/v3/assert/cmp"
)
@@ -420,6 +425,8 @@ func TestAddPortMappings(t *testing.T) {
defHostIP net.IP
proxyPath string
busyPortIPv4 int
rootless bool
hostAddrs []string
expErr string
expPBs []types.PortBinding
@@ -438,6 +445,7 @@ func TestAddPortMappings(t *testing.T) {
{Proto: types.TCP, Port: 22},
{Proto: types.TCP, Port: 80},
},
proxyPath: "/dummy/path/to/proxy",
expPBs: []types.PortBinding{
{Proto: types.TCP, IP: ctrIP4.IP, Port: 22, HostIP: net.IPv4zero, HostPort: firstEphemPort},
{Proto: types.TCP, IP: ctrIP6.IP, Port: 22, HostIP: net.IPv6zero, HostPort: firstEphemPort},
@@ -446,22 +454,24 @@ func TestAddPortMappings(t *testing.T) {
},
},
{
name: "specific host port",
epAddrV4: ctrIP4,
epAddrV6: ctrIP6,
cfg: []types.PortBinding{{Proto: types.TCP, Port: 80, HostPort: 8080}},
name: "specific host port",
epAddrV4: ctrIP4,
epAddrV6: ctrIP6,
cfg: []types.PortBinding{{Proto: types.TCP, Port: 80, HostPort: 8080}},
proxyPath: "/dummy/path/to/proxy",
expPBs: []types.PortBinding{
{Proto: types.TCP, IP: ctrIP4.IP, Port: 80, HostIP: net.IPv4zero, HostPort: 8080, HostPortEnd: 8080},
{Proto: types.TCP, IP: ctrIP6.IP, Port: 80, HostIP: net.IPv6zero, HostPort: 8080, HostPortEnd: 8080},
},
},
{
name: "nat explicitly enabled",
epAddrV4: ctrIP4,
epAddrV6: ctrIP6,
cfg: []types.PortBinding{{Proto: types.TCP, Port: 80, HostPort: 8080}},
gwMode4: gwModeNAT,
gwMode6: gwModeNAT,
name: "nat explicitly enabled",
epAddrV4: ctrIP4,
epAddrV6: ctrIP6,
cfg: []types.PortBinding{{Proto: types.TCP, Port: 80, HostPort: 8080}},
gwMode4: gwModeNAT,
gwMode6: gwModeNAT,
proxyPath: "/dummy/path/to/proxy",
expPBs: []types.PortBinding{
{Proto: types.TCP, IP: ctrIP4.IP, Port: 80, HostIP: net.IPv4zero, HostPort: 8080, HostPortEnd: 8080},
{Proto: types.TCP, IP: ctrIP6.IP, Port: 80, HostIP: net.IPv6zero, HostPort: 8080, HostPortEnd: 8080},
@@ -472,24 +482,27 @@ func TestAddPortMappings(t *testing.T) {
epAddrV4: ctrIP4,
epAddrV6: ctrIP6,
cfg: []types.PortBinding{{Proto: types.TCP, Port: 80, HostPort: 8080}},
proxyPath: "/dummy/path/to/proxy",
busyPortIPv4: 8080,
expErr: "failed to bind port 0.0.0.0:8080/tcp: busy port",
expErr: "failed to bind host port for 0.0.0.0:8080:172.19.0.2:80/tcp: address already in use",
},
{
name: "ipv4 mapped container address with specific host port",
epAddrV4: ctrIP4Mapped,
epAddrV6: ctrIP6,
cfg: []types.PortBinding{{Proto: types.TCP, Port: 80, HostPort: 8080}},
name: "ipv4 mapped container address with specific host port",
epAddrV4: ctrIP4Mapped,
epAddrV6: ctrIP6,
cfg: []types.PortBinding{{Proto: types.TCP, Port: 80, HostPort: 8080}},
proxyPath: "/dummy/path/to/proxy",
expPBs: []types.PortBinding{
{Proto: types.TCP, IP: ctrIP4.IP, Port: 80, HostIP: net.IPv4zero, HostPort: 8080, HostPortEnd: 8080},
{Proto: types.TCP, IP: ctrIP6.IP, Port: 80, HostIP: net.IPv6zero, HostPort: 8080, HostPortEnd: 8080},
},
},
{
name: "ipv4 mapped host address with specific host port",
epAddrV4: ctrIP4,
epAddrV6: ctrIP6,
cfg: []types.PortBinding{{Proto: types.TCP, Port: 80, HostIP: newIPNet(t, "::ffff:127.0.0.1/128").IP, HostPort: 8080}},
name: "ipv4 mapped host address with specific host port",
epAddrV4: ctrIP4,
epAddrV6: ctrIP6,
cfg: []types.PortBinding{{Proto: types.TCP, Port: 80, HostIP: newIPNet(t, "::ffff:127.0.0.1/128").IP, HostPort: 8080}},
proxyPath: "/dummy/path/to/proxy",
expPBs: []types.PortBinding{
{Proto: types.TCP, IP: ctrIP4.IP, Port: 80, HostIP: newIPNet(t, "127.0.0.1/32").IP, HostPort: 8080, HostPortEnd: 8080},
},
@@ -499,6 +512,7 @@ func TestAddPortMappings(t *testing.T) {
epAddrV4: ctrIP4,
epAddrV6: ctrIP6,
cfg: []types.PortBinding{{Proto: types.TCP, Port: 80, HostPort: 8080, HostPortEnd: 8081}},
proxyPath: "/dummy/path/to/proxy",
busyPortIPv4: 8080,
expPBs: []types.PortBinding{
{Proto: types.TCP, IP: ctrIP4.IP, Port: 80, HostIP: net.IPv4zero, HostPort: 8081, HostPortEnd: 8081},
@@ -513,6 +527,7 @@ func TestAddPortMappings(t *testing.T) {
{Proto: types.TCP, Port: 80, HostIP: net.IPv4zero, HostPort: 8080, HostPortEnd: 8081},
{Proto: types.TCP, Port: 80, HostIP: net.IPv6zero, HostPort: 8080, HostPortEnd: 8081},
},
proxyPath: "/dummy/path/to/proxy",
busyPortIPv4: 8080,
expPBs: []types.PortBinding{
{Proto: types.TCP, IP: ctrIP4.IP, Port: 80, HostIP: net.IPv4zero, HostPort: 8081},
@@ -531,6 +546,7 @@ func TestAddPortMappings(t *testing.T) {
{Proto: types.UDP, Port: 81, HostPort: 8080, HostPortEnd: 8083},
{Proto: types.UDP, Port: 82, HostPort: 8080, HostPortEnd: 8083},
},
proxyPath: "/dummy/path/to/proxy",
busyPortIPv4: 8082,
expPBs: []types.PortBinding{
{Proto: types.TCP, IP: ctrIP4.IP, Port: 80, HostIP: net.IPv4zero, HostPort: 8080, HostPortEnd: 8080},
@@ -556,8 +572,9 @@ func TestAddPortMappings(t *testing.T) {
{Proto: types.TCP, Port: 81, HostPort: 8080, HostPortEnd: 8082},
{Proto: types.TCP, Port: 82, HostPort: 8080, HostPortEnd: 8082},
},
proxyPath: "/dummy/path/to/proxy",
busyPortIPv4: 8081,
expErr: "failed to bind port 0.0.0.0:8081/tcp: busy port",
expErr: "failed to bind host port 8081 for 0.0.0.0:8080-8082:172.19.0.2:82/tcp",
},
{
name: "map host ipv6 to ipv4 container with proxy",
@@ -588,9 +605,10 @@ func TestAddPortMappings(t *testing.T) {
epAddrV4: ctrIP4,
epAddrV6: ctrIP6,
cfg: []types.PortBinding{{Proto: types.TCP, Port: 80}},
defHostIP: newIPNet(t, "10.11.12.13/24").IP,
proxyPath: "/dummy/path/to/proxy",
defHostIP: newIPNet(t, "127.0.0.1/8").IP,
expPBs: []types.PortBinding{
{Proto: types.TCP, IP: ctrIP4.IP, Port: 80, HostIP: newIPNet(t, "10.11.12.13/24").IP, HostPort: firstEphemPort},
{Proto: types.TCP, IP: ctrIP4.IP, Port: 80, HostIP: newIPNet(t, "127.0.0.1/8").IP, HostPort: firstEphemPort},
},
},
{
@@ -598,9 +616,10 @@ func TestAddPortMappings(t *testing.T) {
epAddrV4: ctrIP4,
epAddrV6: ctrIP6,
cfg: []types.PortBinding{{Proto: types.TCP, Port: 80}},
defHostIP: newIPNet(t, "::ffff:10.11.12.13/120").IP,
proxyPath: "/dummy/path/to/proxy",
defHostIP: newIPNet(t, "::ffff:127.0.0.1/72").IP,
expPBs: []types.PortBinding{
{Proto: types.TCP, IP: ctrIP4.IP, Port: 80, HostIP: newIPNet(t, "10.11.12.13/24").IP, HostPort: firstEphemPort},
{Proto: types.TCP, IP: ctrIP4.IP, Port: 80, HostIP: newIPNet(t, "127.0.0.1/8").IP, HostPort: firstEphemPort},
},
},
{
@@ -608,6 +627,7 @@ func TestAddPortMappings(t *testing.T) {
epAddrV4: ctrIP4,
epAddrV6: ctrIP6,
cfg: []types.PortBinding{{Proto: types.TCP, Port: 80}},
proxyPath: "/dummy/path/to/proxy",
defHostIP: net.IPv6zero,
expPBs: []types.PortBinding{
{Proto: types.TCP, IP: ctrIP6.IP, Port: 80, HostIP: net.IPv6zero, HostPort: firstEphemPort},
@@ -618,6 +638,7 @@ func TestAddPortMappings(t *testing.T) {
epAddrV4: ctrIP4,
epAddrV6: ctrIP6,
cfg: []types.PortBinding{{Proto: types.TCP, Port: 80}},
proxyPath: "/dummy/path/to/proxy",
defHostIP: newIPNet(t, "::1/128").IP,
expPBs: []types.PortBinding{
{Proto: types.TCP, IP: ctrIP6.IP, Port: 80, HostIP: newIPNet(t, "::1/128").IP, HostPort: firstEphemPort},
@@ -631,16 +652,17 @@ func TestAddPortMappings(t *testing.T) {
{Proto: types.TCP, Port: 80, HostPort: 8080},
{Proto: types.TCP, Port: 22, HostPort: 2222},
},
proxyPath: "/dummy/path/to/proxy",
expPBs: []types.PortBinding{
{Proto: types.TCP, IP: ctrIP4.IP, Port: 22, HostIP: net.IPv4zero, HostPort: 2222},
{Proto: types.TCP, IP: ctrIP6.IP, Port: 22, HostIP: net.IPv6zero, HostPort: 2222},
{Proto: types.TCP, IP: ctrIP4.IP, Port: 80, HostIP: net.IPv4zero, HostPort: 8080},
{Proto: types.TCP, IP: ctrIP6.IP, Port: 80, HostIP: net.IPv6zero, HostPort: 8080},
},
expReleaseErr: "failed to stop docker-proxy for port mapping 0.0.0.0:2222:172.19.0.2:22/tcp: can't stop now\n" +
"failed to stop docker-proxy for port mapping [::]:2222:[fdf8:b88e:bb5c:3483::2]:22/tcp: can't stop now\n" +
"failed to stop docker-proxy for port mapping 0.0.0.0:8080:172.19.0.2:80/tcp: can't stop now\n" +
"failed to stop docker-proxy for port mapping [::]:8080:[fdf8:b88e:bb5c:3483::2]:80/tcp: can't stop now",
expReleaseErr: "failed to stop userland proxy for port mapping 0.0.0.0:2222:172.19.0.2:22/tcp: can't stop now\n" +
"failed to stop userland proxy for port mapping [::]:2222:[fdf8:b88e:bb5c:3483::2]:22/tcp: can't stop now\n" +
"failed to stop userland proxy for port mapping 0.0.0.0:8080:172.19.0.2:80/tcp: can't stop now\n" +
"failed to stop userland proxy for port mapping [::]:8080:[fdf8:b88e:bb5c:3483::2]:80/tcp: can't stop now",
},
{
name: "disable nat6",
@@ -650,7 +672,8 @@ func TestAddPortMappings(t *testing.T) {
{Proto: types.TCP, Port: 22},
{Proto: types.TCP, Port: 80},
},
gwMode6: gwModeRouted,
proxyPath: "/dummy/path/to/proxy",
gwMode6: gwModeRouted,
expPBs: []types.PortBinding{
{Proto: types.TCP, IP: ctrIP4.IP, Port: 22, HostIP: net.IPv4zero, HostPort: firstEphemPort},
{Proto: types.TCP, IP: ctrIP6.IP, Port: 22, HostIP: net.IPv6zero},
@@ -666,7 +689,8 @@ func TestAddPortMappings(t *testing.T) {
{Proto: types.TCP, Port: 22},
{Proto: types.TCP, Port: 80},
},
gwMode4: gwModeRouted,
proxyPath: "/dummy/path/to/proxy",
gwMode4: gwModeRouted,
expPBs: []types.PortBinding{
{Proto: types.TCP, IP: ctrIP4.IP, Port: 22, HostIP: net.IPv4zero},
{Proto: types.TCP, IP: ctrIP6.IP, Port: 22, HostIP: net.IPv6zero, HostPort: firstEphemPort},
@@ -682,8 +706,9 @@ func TestAddPortMappings(t *testing.T) {
{Proto: types.TCP, Port: 22},
{Proto: types.TCP, Port: 80},
},
gwMode4: gwModeRouted,
gwMode6: gwModeRouted,
proxyPath: "/dummy/path/to/proxy",
gwMode4: gwModeRouted,
gwMode6: gwModeRouted,
expPBs: []types.PortBinding{
{Proto: types.TCP, IP: ctrIP4.IP, Port: 22, HostIP: net.IPv4zero},
{Proto: types.TCP, IP: ctrIP6.IP, Port: 22, HostIP: net.IPv6zero},
@@ -692,9 +717,10 @@ func TestAddPortMappings(t *testing.T) {
},
},
{
name: "same ports for matching mappings with different host addresses",
epAddrV4: ctrIP4,
epAddrV6: ctrIP6,
name: "same ports for matching mappings with different host addresses",
epAddrV4: ctrIP4,
epAddrV6: ctrIP6,
hostAddrs: []string{"192.168.1.2/24", "fd0c:9167:5b11::2/64", "fd0c:9167:5b11::3/64"},
cfg: []types.PortBinding{
// These two should both get the same host port.
{Proto: types.TCP, Port: 80, HostIP: newIPNet(t, "fd0c:9167:5b11::2/64").IP},
@@ -708,6 +734,7 @@ func TestAddPortMappings(t *testing.T) {
{Proto: types.TCP, Port: 12345, HostPort: 12345, HostPortEnd: 12346},
{Proto: types.TCP, Port: 12345, HostPort: 12345},
},
proxyPath: "/dummy/path/to/proxy",
expPBs: []types.PortBinding{
{Proto: types.TCP, IP: ctrIP4.IP, Port: 12345, HostIP: net.IPv4zero, HostPort: 12345},
{Proto: types.TCP, IP: ctrIP6.IP, Port: 12345, HostIP: net.IPv6zero, HostPort: 12345},
@@ -720,6 +747,39 @@ func TestAddPortMappings(t *testing.T) {
{Proto: types.TCP, IP: ctrIP6.IP, Port: 12345, HostIP: net.IPv6zero, HostPort: 12346},
},
},
{
name: "rootless",
epAddrV4: ctrIP4,
epAddrV6: ctrIP6,
cfg: []types.PortBinding{
{Proto: types.TCP, Port: 22},
{Proto: types.TCP, Port: 80},
},
proxyPath: "/dummy/path/to/proxy",
rootless: true,
expPBs: []types.PortBinding{
{Proto: types.TCP, IP: ctrIP4.IP, Port: 22, HostIP: net.IPv4zero, HostPort: firstEphemPort},
{Proto: types.TCP, IP: ctrIP6.IP, Port: 22, HostIP: net.IPv6zero, HostPort: firstEphemPort},
{Proto: types.TCP, IP: ctrIP4.IP, Port: 80, HostIP: net.IPv4zero, HostPort: firstEphemPort + 1},
{Proto: types.TCP, IP: ctrIP6.IP, Port: 80, HostIP: net.IPv6zero, HostPort: firstEphemPort + 1},
},
},
{
name: "rootless without proxy",
epAddrV4: ctrIP4,
epAddrV6: ctrIP6,
cfg: []types.PortBinding{
{Proto: types.TCP, Port: 22},
{Proto: types.TCP, Port: 80},
},
rootless: true,
expPBs: []types.PortBinding{
{Proto: types.TCP, IP: ctrIP4.IP, Port: 22, HostIP: net.IPv4zero, HostPort: firstEphemPort},
{Proto: types.TCP, IP: ctrIP6.IP, Port: 22, HostIP: net.IPv6zero, HostPort: firstEphemPort},
{Proto: types.TCP, IP: ctrIP4.IP, Port: 80, HostIP: net.IPv4zero, HostPort: firstEphemPort + 1},
{Proto: types.TCP, IP: ctrIP6.IP, Port: 80, HostIP: net.IPv6zero, HostPort: firstEphemPort + 1},
},
},
}
for _, tc := range testcases {
@@ -731,15 +791,14 @@ func TestAddPortMappings(t *testing.T) {
origStartProxy := startProxy
defer func() { startProxy = origStartProxy }()
proxies := map[proxyCall]bool{} // proxy -> is not stopped
startProxy = func(proto string,
hostIP net.IP, hostPort int,
containerIP net.IP, containerPort int,
startProxy = func(pb types.PortBinding,
proxyPath string,
listenSock *os.File,
) (stop func() error, retErr error) {
if tc.busyPortIPv4 > 0 && tc.busyPortIPv4 == hostPort && hostIP.To4() != nil {
if tc.busyPortIPv4 > 0 && tc.busyPortIPv4 == int(pb.HostPort) && pb.HostIP.To4() != nil {
return nil, errors.New("busy port")
}
c := newProxyCall(proto, hostIP, hostPort, containerIP, containerPort, proxyPath)
c := newProxyCall(pb.Proto.String(), pb.HostIP, int(pb.HostPort), pb.IP, int(pb.Port), proxyPath)
if _, ok := proxies[c]; ok {
return nil, fmt.Errorf("duplicate proxy: %#v", c)
}
@@ -756,6 +815,35 @@ func TestAddPortMappings(t *testing.T) {
}, nil
}
// Mock the RootlessKit port driver.
origNewPortDriverClient := newPortDriverClient
defer func() { newPortDriverClient = origNewPortDriverClient }()
newPortDriverClient = func(ctx context.Context) (portDriverClient, error) {
return newMockPortDriverClient(ctx)
}
if len(tc.hostAddrs) > 0 {
dummyLink := &netlink.Bridge{LinkAttrs: netlink.LinkAttrs{Name: "br-dummy"}}
err := netlink.LinkAdd(dummyLink)
assert.NilError(t, err)
for _, addr := range tc.hostAddrs {
// Add with NODAD so that the address is available immediately.
err := netlink.AddrAdd(dummyLink,
&netlink.Addr{IPNet: newIPNet(t, addr), Flags: syscall.IFA_F_NODAD})
assert.NilError(t, err)
}
err = netlink.LinkSetUp(dummyLink)
assert.NilError(t, err)
}
if tc.busyPortIPv4 != 0 {
tl, err := net.ListenTCP("tcp4", &net.TCPAddr{IP: net.IPv4zero, Port: tc.busyPortIPv4})
assert.NilError(t, err)
defer tl.Close()
ul, err := net.ListenUDP("udp4", &net.UDPAddr{IP: net.IPv4zero, Port: tc.busyPortIPv4})
assert.NilError(t, err)
defer ul.Close()
}
n := &bridgeNetwork{
config: &networkConfiguration{
BridgeName: "dummybridge",
@@ -771,11 +859,23 @@ func TestAddPortMappings(t *testing.T) {
EnableIP6Tables: true,
EnableUserlandProxy: tc.proxyPath != "",
UserlandProxyPath: tc.proxyPath,
Rootless: tc.rootless,
},
}
err := n.driver.configure(genericOption)
assert.NilError(t, err)
assert.Check(t, is.Equal(n.driver.portDriverClient == nil, !tc.rootless))
expChildIP := func(hostIP net.IP) net.IP {
if !tc.rootless {
return hostIP
}
if hostIP.To4() == nil {
return net.ParseIP("::1")
}
return net.ParseIP("127.0.0.1")
}
err = portallocator.Get().ReleaseAll()
assert.NilError(t, err)
@@ -792,20 +892,21 @@ func TestAddPortMappings(t *testing.T) {
var disableNAT bool
var addrM, addrD, addrH string
var ipv iptables.IPVersion
hip := expChildIP(expPB.HostIP)
if expPB.IP.To4() == nil {
disableNAT = tc.gwMode6.natDisabled()
ipv = iptables.IPv6
addrM = ctrIP6.IP.String() + "/128"
addrD = "[" + ctrIP6.IP.String() + "]"
addrH = expPB.HostIP.String() + "/128"
addrH = hip.String() + "/128"
} else {
disableNAT = tc.gwMode4.natDisabled()
ipv = iptables.IPv4
addrM = ctrIP4.IP.String() + "/32"
addrD = ctrIP4.IP.String()
addrH = expPB.HostIP.String() + "/32"
addrH = hip.String() + "/32"
}
if expPB.HostIP.IsUnspecified() {
if hip.IsUnspecified() {
addrH = "0/0"
}
@@ -850,18 +951,41 @@ func TestAddPortMappings(t *testing.T) {
}
// Check a docker-proxy was started and stopped for each expected port binding.
expProxies := map[proxyCall]bool{}
for _, expPB := range tc.expPBs {
is4 := expPB.HostIP.To4() != nil
if (is4 && tc.gwMode4.natDisabled()) || (!is4 && tc.gwMode6.natDisabled()) {
continue
if tc.proxyPath != "" {
expProxies := map[proxyCall]bool{}
for _, expPB := range tc.expPBs {
hip := expChildIP(expPB.HostIP)
is4 := hip.To4() != nil
if (is4 && tc.gwMode4.natDisabled()) || (!is4 && tc.gwMode6.natDisabled()) {
continue
}
p := newProxyCall(expPB.Proto.String(),
hip, int(expPB.HostPort),
expPB.IP, int(expPB.Port), tc.proxyPath)
expProxies[p] = tc.expReleaseErr != ""
}
p := newProxyCall(expPB.Proto.String(),
expPB.HostIP, int(expPB.HostPort),
expPB.IP, int(expPB.Port), tc.proxyPath)
expProxies[p] = tc.expReleaseErr != ""
assert.Check(t, is.DeepEqual(expProxies, proxies))
}
// Check the port driver has seen the expected port mappings and no others,
// and that they have all been closed.
if n.driver.portDriverClient != nil {
pdc := n.driver.portDriverClient.(*mockPortDriverClient)
expPorts := map[mockPortDriverPort]bool{}
for _, expPB := range tc.expPBs {
if expPB.HostPort == 0 {
continue
}
pdp := mockPortDriverPort{
proto: expPB.Proto.String(),
hostIP: expPB.HostIP.String(),
childIP: expChildIP(expPB.HostIP).String(),
hostPort: int(expPB.HostPort),
}
expPorts[pdp] = false
}
assert.Check(t, is.DeepEqual(pdc.openPorts, expPorts))
}
assert.Check(t, is.DeepEqual(expProxies, proxies))
})
}
}
@@ -881,3 +1005,48 @@ func newProxyCall(proto string,
proxyPath: proxyPath,
}
}
// Types for tracking calls to the port driver client (mock for RootlessKit client).
type mockPortDriverPort struct {
proto string
hostIP string
childIP string
hostPort int
}
func (p mockPortDriverPort) String() string {
return p.hostIP + ":" + strconv.Itoa(p.hostPort) + "/" + p.proto
}
type mockPortDriverClient struct {
openPorts map[mockPortDriverPort]bool
}
func newMockPortDriverClient(_ context.Context) (*mockPortDriverClient, error) {
return &mockPortDriverClient{
openPorts: map[mockPortDriverPort]bool{},
}, nil
}
func (c *mockPortDriverClient) ChildHostIP(hostIP netip.Addr) netip.Addr {
if hostIP.Is6() {
return netip.IPv6Loopback()
}
return netip.MustParseAddr("127.0.0.1")
}
func (c *mockPortDriverClient) AddPort(_ context.Context, proto string, hostIP, childIP netip.Addr, hostPort int) (func() error, error) {
key := mockPortDriverPort{proto: proto, hostIP: hostIP.String(), childIP: childIP.String(), hostPort: hostPort}
if _, exists := c.openPorts[key]; exists {
return nil, fmt.Errorf("mockPortDriverClient: port %s is already open", key)
}
c.openPorts[key] = true
return func() error {
if !c.openPorts[key] {
return fmt.Errorf("mockPortDriverClient: port %s is not open", key)
}
c.openPorts[key] = false
return nil
}, nil
}

View File

@@ -624,10 +624,10 @@ func (ep *Endpoint) sbJoin(ctx context.Context, sb *Sandbox, options ...Endpoint
}
if !n.internal {
log.G(ctx).Debugf("Programming external connectivity on endpoint %s (%s)", ep.Name(), ep.ID())
if err := d.ProgramExternalConnectivity(ctx, n.ID(), ep.ID(), sb.Labels()); err != nil {
return types.InternalErrorf(
if err = d.ProgramExternalConnectivity(ctx, n.ID(), ep.ID(), sb.Labels()); err != nil {
return errdefs.System(fmt.Errorf(
"driver failed programming external connectivity on endpoint %s (%s): %v",
ep.Name(), ep.ID(), err)
ep.Name(), ep.ID(), err))
}
}
}

View File

@@ -1,78 +1,61 @@
package portmapper
import (
"errors"
"fmt"
"io"
"net"
"os"
"os/exec"
"runtime"
"strconv"
"syscall"
"time"
"github.com/docker/docker/libnetwork/types"
)
// StartProxy starts the proxy process at proxyPath, or instantiates a dummy proxy
// to bind the host port if proxyPath is the empty string.
func StartProxy(
proto string,
hostIP net.IP, hostPort int,
containerIP net.IP, containerPort int,
proxyPath string,
) (stop func() error, retErr error) {
if proxyPath == "" {
return newDummyProxy(proto, hostIP, hostPort)
}
return newProxyCommand(proto, hostIP, hostPort, containerIP, containerPort, proxyPath)
}
func newProxyCommand(
proto string,
hostIP net.IP, hostPort int,
containerIP net.IP, containerPort int,
// StartProxy starts the proxy process at proxyPath.
// If listenSock is not nil, it must be a bound socket that can be passed to
// the proxy process for it to listen on.
func StartProxy(pb types.PortBinding,
proxyPath string,
listenSock *os.File,
) (stop func() error, retErr error) {
if proxyPath == "" {
return nil, fmt.Errorf("no path provided for userland-proxy binary")
}
p := &proxyCommand{
cmd: &exec.Cmd{
Path: proxyPath,
Args: []string{
proxyPath,
"-proto", proto,
"-host-ip", hostIP.String(),
"-host-port", strconv.Itoa(hostPort),
"-container-ip", containerIP.String(),
"-container-port", strconv.Itoa(containerPort),
},
SysProcAttr: &syscall.SysProcAttr{
Pdeathsig: syscall.SIGTERM, // send a sigterm to the proxy if the creating thread in the daemon process dies (https://go.dev/issue/27505)
},
},
wait: make(chan error, 1),
}
if err := p.start(); err != nil {
return nil, err
}
return p.stop, nil
}
// proxyCommand wraps an exec.Cmd to run the userland TCP and UDP
// proxies as separate processes.
type proxyCommand struct {
cmd *exec.Cmd
wait chan error
}
func (p *proxyCommand) start() error {
r, w, err := os.Pipe()
if err != nil {
return fmt.Errorf("proxy unable to open os.Pipe %s", err)
return nil, fmt.Errorf("proxy unable to open os.Pipe %s", err)
}
defer r.Close()
p.cmd.ExtraFiles = []*os.File{w}
defer func() {
if w != nil {
w.Close()
}
r.Close()
}()
cmd := &exec.Cmd{
Path: proxyPath,
Args: []string{
proxyPath,
"-proto", pb.Proto.String(),
"-host-ip", pb.HostIP.String(),
"-host-port", strconv.FormatUint(uint64(pb.HostPort), 10),
"-container-ip", pb.IP.String(),
"-container-port", strconv.FormatUint(uint64(pb.Port), 10),
},
ExtraFiles: []*os.File{w},
SysProcAttr: &syscall.SysProcAttr{
Pdeathsig: syscall.SIGTERM, // send a sigterm to the proxy if the creating thread in the daemon process dies (https://go.dev/issue/27505)
},
}
if listenSock != nil {
cmd.Args = append(cmd.Args, "-use-listen-fd")
cmd.ExtraFiles = append(cmd.ExtraFiles, listenSock)
}
wait := make(chan error, 1)
// As p.cmd.SysProcAttr.Pdeathsig is set, the signal will be sent to the
// process when the OS thread on which p.cmd.Start() was executed dies.
@@ -88,17 +71,18 @@ func (p *proxyCommand) start() error {
go func() {
runtime.LockOSThread()
defer runtime.UnlockOSThread()
err := p.cmd.Start()
err := cmd.Start()
started <- err
if err != nil {
return
}
p.wait <- p.cmd.Wait()
wait <- cmd.Wait()
}()
if err := <-started; err != nil {
return err
return nil, err
}
w.Close()
w = nil
errchan := make(chan error, 1)
go func() {
@@ -108,11 +92,16 @@ func (p *proxyCommand) start() error {
if string(buf) != "0\n" {
errStr, err := io.ReadAll(r)
if err != nil {
errchan <- fmt.Errorf("Error reading exit status from userland proxy: %v", err)
errchan <- fmt.Errorf("error reading exit status from userland proxy: %v", err)
return
}
errchan <- fmt.Errorf("Error starting userland proxy: %s", errStr)
// If the user has an old docker-proxy in their PATH, and we passed "-use-listen-fd"
// on the command line, it exits with no response on the pipe.
if listenSock != nil && buf[0] == 0 && len(errStr) == 0 {
errchan <- errors.New("failed to start docker-proxy, check that the current version is in your $PATH")
return
}
errchan <- fmt.Errorf("error starting userland proxy: %s", errStr)
return
}
errchan <- nil
@@ -120,18 +109,21 @@ func (p *proxyCommand) start() error {
select {
case err := <-errchan:
return err
if err != nil {
return nil, err
}
case <-time.After(16 * time.Second):
return fmt.Errorf("Timed out proxy starting the userland proxy")
return nil, fmt.Errorf("timed out starting the userland proxy")
}
}
func (p *proxyCommand) stop() error {
if p.cmd.Process != nil {
if err := p.cmd.Process.Signal(os.Interrupt); err != nil {
stopFn := func() error {
if cmd.Process == nil {
return nil
}
if err := cmd.Process.Signal(os.Interrupt); err != nil {
return err
}
return <-p.wait
return <-wait
}
return nil
return stopFn, nil
}

View File

@@ -2,9 +2,6 @@ package rootless // import "github.com/docker/docker/pkg/rootless"
import "os"
// RootlessKitDockerProxyBinary is the binary name of rootlesskit-docker-proxy
const RootlessKitDockerProxyBinary = "rootlesskit-docker-proxy"
// RunningWithRootlessKit returns true if running under RootlessKit namespaces.
func RunningWithRootlessKit() bool {
return os.Getenv("ROOTLESSKIT_STATE_DIR") != ""