mirror of
https://github.com/moby/moby.git
synced 2026-01-11 18:51:37 +00:00
libnet/d/bridge: port mappings: filter by input iface
When a NAT-based port mapping is created with a HostIP specified, we insert a DNAT rule in nat-DOCKER to replace the dest addr with the container IP. Then, in filter chains, we allow access to the container port for any packet not coming from the container's network itself (if hairpinning is disabled), nor from another host bridge. However we don't set any rule that prevents a rogue neighbor that shares a L2 segment with the host, but not the one where the port binding is expected to be published, from sending packets destined to that HostIP. For instance, if a port binding is created with HostIP == '127.0.0.1', this port should not be accessible from anything but the lo interface. That's currently not the case and this provides a false sense of security. Since nat-DOCKER mangles the dest addr, and the nat table rejects DROP rules, this change adds rules into raw-PREROUTING to filter ingress packets destined to mapped ports based on the input interface, the dest addr and the dest port. Interfaces are dynamically resolved when packets hit the host, thanks to iptables' addrtype extension. This extension does a fib lookup of the dest addr and checks that it's associated with the interface reached. Also, when a proxy-based port mapping is created, as is the case when an IPv6 HostIP is specified but the container is only IPv4-capable, we don't set any sort of filtering. So the same issue might happen. The reason is a bit different - in that case, that's just how the kernel works. But, in order to stay consistent with NAT-based mappings, these rules are also applied. The env var `DOCKER_DISABLE_INPUT_IFACE_FILTERING` can be set to any true-ish value to globally disable this behavior. Signed-off-by: Albin Kerouanton <albinker@gmail.com>
This commit is contained in:
@@ -542,6 +542,7 @@ RUN --mount=type=cache,sharing=locked,id=moby-dev-aptlib,target=/var/lib/apt \
|
||||
libprotobuf-c1 \
|
||||
libyajl2 \
|
||||
net-tools \
|
||||
netcat-openbsd \
|
||||
patch \
|
||||
pigz \
|
||||
sudo \
|
||||
|
||||
@@ -9,6 +9,7 @@ import (
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
runcoptions "github.com/containerd/containerd/api/types/runc/options"
|
||||
@@ -159,6 +160,12 @@ func (daemon *Daemon) fillPlatformInfo(ctx context.Context, v *system.Info, sysI
|
||||
if !v.IPv4Forwarding {
|
||||
v.Warnings = append(v.Warnings, "WARNING: IPv4 forwarding is disabled")
|
||||
}
|
||||
if filtering, _ := strconv.ParseBool(os.Getenv("DOCKER_DISABLE_INPUT_IFACE_FILTERING")); filtering {
|
||||
v.Warnings = append(v.Warnings,
|
||||
"WARNING: input interface filtering is disabled on port mappings, this might be insecure",
|
||||
"DEPRECATED: DOCKER_DISABLE_INPUT_IFACE_FILTERING is deprecated and will be removed in a future release",
|
||||
)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
|
||||
@@ -27,6 +27,14 @@ func WithIPv6() func(*network.CreateOptions) {
|
||||
}
|
||||
}
|
||||
|
||||
// WithIPv6Disabled makes sure IPv6 is disabled on the network.
|
||||
func WithIPv6Disabled() func(*network.CreateOptions) {
|
||||
return func(n *network.CreateOptions) {
|
||||
enable := false
|
||||
n.EnableIPv6 = &enable
|
||||
}
|
||||
}
|
||||
|
||||
// WithInternal enables Internal flag on the create network request
|
||||
func WithInternal() func(*network.CreateOptions) {
|
||||
return func(n *network.CreateOptions) {
|
||||
|
||||
@@ -0,0 +1,147 @@
|
||||
## Container on a user-defined network, with a port published on a specific HostIP
|
||||
|
||||
Adding a network running a container with a mapped port, equivalent to:
|
||||
|
||||
docker network create \
|
||||
-o com.docker.network.bridge.name=bridge1 \
|
||||
--subnet 192.0.2.0/24 --gateway 192.0.2.1 bridge1
|
||||
docker run --network bridge1 -p 127.0.0.1:8080:80 --name c1 busybox
|
||||
|
||||
The filter and nat tables are the same as with no HostIP specified.
|
||||
|
||||
<details>
|
||||
<summary>Filter table</summary>
|
||||
|
||||
Chain INPUT (policy ACCEPT 0 packets, 0 bytes)
|
||||
num pkts bytes target prot opt in out source destination
|
||||
|
||||
Chain FORWARD (policy ACCEPT 0 packets, 0 bytes)
|
||||
num pkts bytes target prot opt in out source destination
|
||||
1 0 0 DOCKER-USER 0 -- * * 0.0.0.0/0 0.0.0.0/0
|
||||
2 0 0 ACCEPT 0 -- * * 0.0.0.0/0 0.0.0.0/0 match-set docker-ext-bridges-v4 dst ctstate RELATED,ESTABLISHED
|
||||
3 0 0 DOCKER-ISOLATION-STAGE-1 0 -- * * 0.0.0.0/0 0.0.0.0/0
|
||||
4 0 0 DOCKER 0 -- * * 0.0.0.0/0 0.0.0.0/0 match-set docker-ext-bridges-v4 dst
|
||||
5 0 0 ACCEPT 0 -- docker0 * 0.0.0.0/0 0.0.0.0/0
|
||||
6 0 0 ACCEPT 0 -- bridge1 * 0.0.0.0/0 0.0.0.0/0
|
||||
|
||||
Chain OUTPUT (policy ACCEPT 0 packets, 0 bytes)
|
||||
num pkts bytes target prot opt in out source destination
|
||||
|
||||
Chain DOCKER (1 references)
|
||||
num pkts bytes target prot opt in out source destination
|
||||
1 0 0 ACCEPT 6 -- !bridge1 bridge1 0.0.0.0/0 192.0.2.2 tcp dpt:80
|
||||
2 0 0 DROP 0 -- !docker0 docker0 0.0.0.0/0 0.0.0.0/0
|
||||
3 0 0 DROP 0 -- !bridge1 bridge1 0.0.0.0/0 0.0.0.0/0
|
||||
|
||||
Chain DOCKER-ISOLATION-STAGE-1 (1 references)
|
||||
num pkts bytes target prot opt in out source destination
|
||||
1 0 0 DOCKER-ISOLATION-STAGE-2 0 -- docker0 !docker0 0.0.0.0/0 0.0.0.0/0
|
||||
2 0 0 DOCKER-ISOLATION-STAGE-2 0 -- bridge1 !bridge1 0.0.0.0/0 0.0.0.0/0
|
||||
|
||||
Chain DOCKER-ISOLATION-STAGE-2 (2 references)
|
||||
num pkts bytes target prot opt in out source destination
|
||||
1 0 0 DROP 0 -- * bridge1 0.0.0.0/0 0.0.0.0/0
|
||||
2 0 0 DROP 0 -- * docker0 0.0.0.0/0 0.0.0.0/0
|
||||
|
||||
Chain DOCKER-USER (1 references)
|
||||
num pkts bytes target prot opt in out source destination
|
||||
1 0 0 RETURN 0 -- * * 0.0.0.0/0 0.0.0.0/0
|
||||
|
||||
|
||||
-P INPUT ACCEPT
|
||||
-P FORWARD ACCEPT
|
||||
-P OUTPUT ACCEPT
|
||||
-N DOCKER
|
||||
-N DOCKER-ISOLATION-STAGE-1
|
||||
-N DOCKER-ISOLATION-STAGE-2
|
||||
-N DOCKER-USER
|
||||
-A FORWARD -j DOCKER-USER
|
||||
-A FORWARD -m set --match-set docker-ext-bridges-v4 dst -m conntrack --ctstate RELATED,ESTABLISHED -j ACCEPT
|
||||
-A FORWARD -j DOCKER-ISOLATION-STAGE-1
|
||||
-A FORWARD -m set --match-set docker-ext-bridges-v4 dst -j DOCKER
|
||||
-A FORWARD -i docker0 -j ACCEPT
|
||||
-A FORWARD -i bridge1 -j ACCEPT
|
||||
-A DOCKER -d 192.0.2.2/32 ! -i bridge1 -o bridge1 -p tcp -m tcp --dport 80 -j ACCEPT
|
||||
-A DOCKER ! -i docker0 -o docker0 -j DROP
|
||||
-A DOCKER ! -i bridge1 -o bridge1 -j DROP
|
||||
-A DOCKER-ISOLATION-STAGE-1 -i docker0 ! -o docker0 -j DOCKER-ISOLATION-STAGE-2
|
||||
-A DOCKER-ISOLATION-STAGE-1 -i bridge1 ! -o bridge1 -j DOCKER-ISOLATION-STAGE-2
|
||||
-A DOCKER-ISOLATION-STAGE-2 -o bridge1 -j DROP
|
||||
-A DOCKER-ISOLATION-STAGE-2 -o docker0 -j DROP
|
||||
-A DOCKER-USER -j RETURN
|
||||
|
||||
|
||||
</details>
|
||||
|
||||
<details>
|
||||
<summary>NAT table</summary>
|
||||
|
||||
Chain PREROUTING (policy ACCEPT 0 packets, 0 bytes)
|
||||
num pkts bytes target prot opt in out source destination
|
||||
1 0 0 DOCKER 0 -- * * 0.0.0.0/0 0.0.0.0/0 ADDRTYPE match dst-type LOCAL
|
||||
|
||||
Chain INPUT (policy ACCEPT 0 packets, 0 bytes)
|
||||
num pkts bytes target prot opt in out source destination
|
||||
|
||||
Chain OUTPUT (policy ACCEPT 0 packets, 0 bytes)
|
||||
num pkts bytes target prot opt in out source destination
|
||||
1 0 0 DOCKER 0 -- * * 0.0.0.0/0 !127.0.0.0/8 ADDRTYPE match dst-type LOCAL
|
||||
|
||||
Chain POSTROUTING (policy ACCEPT 0 packets, 0 bytes)
|
||||
num pkts bytes target prot opt in out source destination
|
||||
1 0 0 MASQUERADE 0 -- * !bridge1 192.0.2.0/24 0.0.0.0/0
|
||||
2 0 0 MASQUERADE 0 -- * !docker0 172.17.0.0/16 0.0.0.0/0
|
||||
|
||||
Chain DOCKER (2 references)
|
||||
num pkts bytes target prot opt in out source destination
|
||||
1 0 0 RETURN 0 -- bridge1 * 0.0.0.0/0 0.0.0.0/0
|
||||
2 0 0 RETURN 0 -- docker0 * 0.0.0.0/0 0.0.0.0/0
|
||||
3 0 0 DNAT 6 -- !bridge1 * 0.0.0.0/0 127.0.0.1 tcp dpt:8080 to:192.0.2.2:80
|
||||
|
||||
|
||||
-P PREROUTING ACCEPT
|
||||
-P INPUT ACCEPT
|
||||
-P OUTPUT ACCEPT
|
||||
-P POSTROUTING ACCEPT
|
||||
-N DOCKER
|
||||
-A PREROUTING -m addrtype --dst-type LOCAL -j DOCKER
|
||||
-A OUTPUT ! -d 127.0.0.0/8 -m addrtype --dst-type LOCAL -j DOCKER
|
||||
-A POSTROUTING -s 192.0.2.0/24 ! -o bridge1 -j MASQUERADE
|
||||
-A POSTROUTING -s 172.17.0.0/16 ! -o docker0 -j MASQUERADE
|
||||
-A DOCKER -i bridge1 -j RETURN
|
||||
-A DOCKER -i docker0 -j RETURN
|
||||
-A DOCKER -d 127.0.0.1/32 ! -i bridge1 -p tcp -m tcp --dport 8080 -j DNAT --to-destination 192.0.2.2:80
|
||||
|
||||
|
||||
</details>
|
||||
|
||||
The raw table is:
|
||||
|
||||
Chain PREROUTING (policy ACCEPT 0 packets, 0 bytes)
|
||||
num pkts bytes target prot opt in out source destination
|
||||
1 0 0 ACCEPT 6 -- * * 0.0.0.0/0 127.0.0.1 tcp dpt:8080 ADDRTYPE match dst-type LOCAL limit-in
|
||||
2 0 0 DROP 6 -- * * 0.0.0.0/0 127.0.0.1 tcp dpt:8080
|
||||
|
||||
Chain OUTPUT (policy ACCEPT 0 packets, 0 bytes)
|
||||
num pkts bytes target prot opt in out source destination
|
||||
|
||||
|
||||
<details>
|
||||
<summary>iptables commands</summary>
|
||||
|
||||
-P PREROUTING ACCEPT
|
||||
-P OUTPUT ACCEPT
|
||||
-A PREROUTING -d 127.0.0.1/32 -p tcp -m tcp --dport 8080 -m addrtype --dst-type LOCAL --limit-iface-in -j ACCEPT
|
||||
-A PREROUTING -d 127.0.0.1/32 -p tcp -m tcp --dport 8080 -j DROP
|
||||
|
||||
|
||||
</details>
|
||||
|
||||
The difference from [port mapping with no HostIP][0] is:
|
||||
|
||||
- An ACCEPT rule is added to the PREROUTING chain to drop packets targeting the
|
||||
mapped port and coming from the interface that has the HostIP assigned.
|
||||
- And a DROP rule is added too, to drop packets targeting the mapped port but
|
||||
didn't pass the previous check.
|
||||
|
||||
[0]: usernet-portmap.md
|
||||
@@ -46,3 +46,4 @@ Scenarios:
|
||||
- [Container on a routed-mode network, with a published port](generated/usernet-portmap-routed.md)
|
||||
- [Container on a nat-unprotected network, with a published port](generated/usernet-portmap-natunprot.md)
|
||||
- [Swarm service, with a published port](generated/swarm-portmap.md)
|
||||
- [Container on a user-defined network, with a port published on a specific HostIP](generated/usernet-portmap-hostip.md)
|
||||
|
||||
@@ -174,6 +174,18 @@ var index = []section{
|
||||
},
|
||||
}},
|
||||
},
|
||||
{
|
||||
name: "usernet-portmap-hostip.md",
|
||||
networks: []networkDesc{{
|
||||
name: "bridge1",
|
||||
containers: []ctrDesc{
|
||||
{
|
||||
name: "c1",
|
||||
portMappings: nat.PortMap{"80/tcp": {{HostIP: "127.0.0.1", HostPort: "8080"}}},
|
||||
},
|
||||
},
|
||||
}},
|
||||
},
|
||||
}
|
||||
|
||||
// iptCmdType is used to look up iptCmds in the markdown (can't use an int
|
||||
@@ -188,6 +200,8 @@ const (
|
||||
iptCmdSFilterDocker4 iptCmdType = "SFilterDocker4"
|
||||
iptCmdLNat4 iptCmdType = "LNat4"
|
||||
iptCmdSNat4 iptCmdType = "SNat4"
|
||||
iptCmdLRaw4 iptCmdType = "LRaw4"
|
||||
iptCmdSRaw4 iptCmdType = "SRaw4"
|
||||
)
|
||||
|
||||
var iptCmds = map[iptCmdType][]string{
|
||||
@@ -198,6 +212,8 @@ var iptCmds = map[iptCmdType][]string{
|
||||
iptCmdSFilterDocker4: {"iptables", "-S", "DOCKER"},
|
||||
iptCmdLNat4: {"iptables", "-nvL", "--line-numbers", "-t", "nat"},
|
||||
iptCmdSNat4: {"iptables", "-S", "-t", "nat"},
|
||||
iptCmdLRaw4: {"iptables", "-nvL", "--line-numbers", "-t", "raw"},
|
||||
iptCmdSRaw4: {"iptables", "-S", "-t", "raw"},
|
||||
}
|
||||
|
||||
func TestBridgeIptablesDoc(t *testing.T) {
|
||||
|
||||
@@ -0,0 +1,48 @@
|
||||
## Container on a user-defined network, with a port published on a specific HostIP
|
||||
|
||||
Adding a network running a container with a mapped port, equivalent to:
|
||||
|
||||
docker network create \
|
||||
-o com.docker.network.bridge.name=bridge1 \
|
||||
--subnet 192.0.2.0/24 --gateway 192.0.2.1 bridge1
|
||||
docker run --network bridge1 -p 127.0.0.1:8080:80 --name c1 busybox
|
||||
|
||||
The filter and nat tables are the same as with no HostIP specified.
|
||||
|
||||
<details>
|
||||
<summary>Filter table</summary>
|
||||
|
||||
{{index . "LFilter4"}}
|
||||
|
||||
{{index . "SFilter4"}}
|
||||
|
||||
</details>
|
||||
|
||||
<details>
|
||||
<summary>NAT table</summary>
|
||||
|
||||
{{index . "LNat4"}}
|
||||
|
||||
{{index . "SNat4"}}
|
||||
|
||||
</details>
|
||||
|
||||
The raw table is:
|
||||
|
||||
{{index . "LRaw4"}}
|
||||
|
||||
<details>
|
||||
<summary>iptables commands</summary>
|
||||
|
||||
{{index . "SRaw4"}}
|
||||
|
||||
</details>
|
||||
|
||||
The difference from [port mapping with no HostIP][0] is:
|
||||
|
||||
- An ACCEPT rule is added to the PREROUTING chain to drop packets targeting the
|
||||
mapped port and coming from the interface that has the HostIP assigned.
|
||||
- And a DROP rule is added too, to drop packets targeting the mapped port but
|
||||
didn't pass the previous check.
|
||||
|
||||
[0]: usernet-portmap.md
|
||||
@@ -1,6 +1,7 @@
|
||||
package networking
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"fmt"
|
||||
"net"
|
||||
@@ -16,10 +17,12 @@ import (
|
||||
|
||||
containertypes "github.com/docker/docker/api/types/container"
|
||||
networktypes "github.com/docker/docker/api/types/network"
|
||||
"github.com/docker/docker/client"
|
||||
"github.com/docker/docker/integration/internal/container"
|
||||
"github.com/docker/docker/integration/internal/network"
|
||||
"github.com/docker/docker/internal/testutils/networking"
|
||||
"github.com/docker/docker/libnetwork/drivers/bridge"
|
||||
"github.com/docker/docker/pkg/stdcopy"
|
||||
"github.com/docker/docker/testutil"
|
||||
"github.com/docker/docker/testutil/daemon"
|
||||
"github.com/docker/go-connections/nat"
|
||||
@@ -781,3 +784,185 @@ func TestDirectRoutingOpenPorts(t *testing.T) {
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TestAccessPublishedPortFromNonMatchingIface checks that, on multi-homed
|
||||
// network hosts, PBs created with a specific HostIP aren't accessible from
|
||||
// interfaces that don't match the HostIP.
|
||||
//
|
||||
// Regression test for https://github.com/moby/moby/issues/45610.
|
||||
func TestAccessPublishedPortFromNonMatchingIface(t *testing.T) {
|
||||
// This test checks iptables rules that live in dockerd's netns. In the case
|
||||
// of rootlesskit, this is not the same netns as the host, so they don't
|
||||
// have any effect.
|
||||
// TODO(aker): we need to figure out what we want to do for rootlesskit.
|
||||
skip.If(t, testEnv.IsRootless, "rootlesskit has its own netns")
|
||||
|
||||
ctx := setupTest(t)
|
||||
|
||||
const (
|
||||
hostIPv4 = "192.168.120.2"
|
||||
hostIPv6 = "fdbc:277b:d40b::2"
|
||||
)
|
||||
|
||||
// l3Good is where the port will be published.
|
||||
l3Good := networking.NewL3Segment(t, "test-matching-iface-br",
|
||||
netip.MustParsePrefix("192.168.120.1/24"),
|
||||
netip.MustParsePrefix("fdbc:277b:d40b::1/64"))
|
||||
defer l3Good.Destroy(t)
|
||||
// "docker" is the host where dockerd is running. Suffix the iface name to
|
||||
// not collide with the L3 segment below.
|
||||
l3Good.AddHost(t, "docker", networking.CurrentNetns, "eth-test1",
|
||||
netip.MustParsePrefix(hostIPv4+"/24"),
|
||||
netip.MustParsePrefix(hostIPv6+"/64"))
|
||||
l3Good.AddHost(t, "neigh", "test-matching-iface-neighbor", "eth0",
|
||||
netip.MustParsePrefix("192.168.120.3/24"),
|
||||
netip.MustParsePrefix("fdbc:277b:d40b::3/64"))
|
||||
|
||||
// l3Bad is another L3Segment, from which the published port should be
|
||||
// inaccessible.
|
||||
l3Bad := networking.NewL3Segment(t, "test-non-matching-iface-br",
|
||||
netip.MustParsePrefix("192.168.123.1/24"),
|
||||
netip.MustParsePrefix("fde8:19ff:6e09::1/64"))
|
||||
defer l3Bad.Destroy(t)
|
||||
// "docker" is the host where dockerd is running. Suffix the iface name to
|
||||
// not collide with the L3 segment above.
|
||||
l3Bad.AddHost(t, "docker", networking.CurrentNetns, "eth-test2",
|
||||
netip.MustParsePrefix("192.168.123.2/24"),
|
||||
netip.MustParsePrefix("fde8:19ff:6e09::2/64"))
|
||||
l3Bad.AddHost(t, "attacker", "test-non-matching-iface-attacker", "eth0",
|
||||
netip.MustParsePrefix("192.168.123.3/24"),
|
||||
netip.MustParsePrefix("fde8:19ff:6e09::3/64"))
|
||||
|
||||
testAccess := func(t *testing.T, c *client.Client, host networking.Host, hostAddr string, escapeHatch, expAccess bool, nwOpts ...func(*networktypes.CreateOptions)) {
|
||||
testutil.StartSpan(ctx, t)
|
||||
|
||||
const bridgeName = "brattacked"
|
||||
network.CreateNoError(ctx, t, c, bridgeName, append(nwOpts,
|
||||
network.WithDriver("bridge"),
|
||||
network.WithOption(bridge.BridgeName, bridgeName),
|
||||
)...)
|
||||
defer network.RemoveNoError(ctx, t, c, bridgeName)
|
||||
|
||||
const hostPort = "5000"
|
||||
// Create the victim container, with a non-empty / non-unspecified
|
||||
// HostIP in its port binding.
|
||||
serverID := container.Run(ctx, t, c,
|
||||
container.WithName(sanitizeCtrName(t.Name()+"-server")),
|
||||
container.WithCmd("nc", "-lup", "5000"),
|
||||
container.WithExposedPorts("5000/udp"),
|
||||
container.WithPortMap(nat.PortMap{"5000/udp": {{HostIP: hostAddr, HostPort: hostPort}}}),
|
||||
container.WithNetworkMode(bridgeName))
|
||||
defer c.ContainerRemove(ctx, serverID, containertypes.RemoveOptions{Force: true})
|
||||
|
||||
// Send a UDP datagram to the published port, from the 'host' passed
|
||||
// as argument.
|
||||
//
|
||||
// Here UDP is preferred, because it's a one-way, connectionless
|
||||
// protocol. With TCP the three-way handshake has to be completed
|
||||
// before sending a payload. But since some of the test cases try to
|
||||
// spoof the loopback address, the 'attacker host' will drop the
|
||||
// SYN-ACK by default (because the source addr will be considered
|
||||
// invalid / non-routable). This would require further tuning to make
|
||||
// it work. But with UDP, this problem doesn't exist - the payload can
|
||||
// be sent straight away.
|
||||
host.Do(t, func() {
|
||||
// Send a payload to the victim container from the attacker host.
|
||||
for i := 0; i < 10; i++ {
|
||||
t.Logf("Sending probe #%d to %s:%s from host %s", i, hostAddr, hostPort, host.Name)
|
||||
|
||||
// For some unexplainable reason, the first few packets might
|
||||
// not reach the container (ie. the container returns an ICMP
|
||||
// 'Port Unreachable' message).
|
||||
time.Sleep(50 * time.Millisecond)
|
||||
icmd.RunCommand("/bin/sh", "-c", fmt.Sprintf("echo foobar | nc -w1 -u %s %s", hostAddr, hostPort)).Assert(t, icmd.Success)
|
||||
}
|
||||
})
|
||||
|
||||
// Check whether the payload was received by the victim container.
|
||||
logReader, err := c.ContainerLogs(ctx, serverID, containertypes.LogsOptions{ShowStdout: true})
|
||||
assert.NilError(t, err)
|
||||
defer logReader.Close()
|
||||
|
||||
var actualStdout bytes.Buffer
|
||||
_, err = stdcopy.StdCopy(&actualStdout, nil, logReader)
|
||||
assert.NilError(t, err)
|
||||
|
||||
stdOut := strings.TrimSpace(actualStdout.String())
|
||||
if expAccess {
|
||||
assert.Assert(t, strings.Contains(stdOut, "foobar"), "Host %s should have access to the container, but the payload wasn't received by the docker host", host.Name)
|
||||
} else {
|
||||
assert.Assert(t, !strings.Contains(stdOut, "foobar"), "Host %s should not have access to the container, but the payload was received by the docker host", host.Name)
|
||||
}
|
||||
}
|
||||
|
||||
for _, escapeHatch := range []bool{false, true} {
|
||||
var dopts []daemon.Option
|
||||
if escapeHatch {
|
||||
dopts = []daemon.Option{daemon.WithEnvVars("DOCKER_DISABLE_INPUT_IFACE_FILTERING=1")}
|
||||
}
|
||||
|
||||
d := daemon.New(t, dopts...)
|
||||
d.StartWithBusybox(ctx, t)
|
||||
defer d.Stop(t)
|
||||
|
||||
c := d.NewClientT(t)
|
||||
defer c.Close()
|
||||
|
||||
t.Run(fmt.Sprintf("NAT/IPv4/lo/EscapeHatch=%t", escapeHatch), func(t *testing.T) {
|
||||
const hostAddr = "127.0.10.1"
|
||||
|
||||
l3Bad.Hosts["attacker"].Run(t, "ip", "route", "add", hostAddr+"/32", "via", "192.168.123.2", "dev", "eth0")
|
||||
defer l3Bad.Hosts["attacker"].Run(t, "ip", "route", "delete", hostAddr+"/32", "via", "192.168.123.2", "dev", "eth0")
|
||||
|
||||
testAccess(t, c, l3Bad.Hosts["attacker"], hostAddr, escapeHatch, escapeHatch)
|
||||
// Test access from the L3 segment where the port is published to
|
||||
// make sure that the test works properly (otherwise we might
|
||||
// reintroduce the security issue without realizing).
|
||||
testAccess(t, c, l3Good.Hosts["docker"], hostAddr, escapeHatch, true)
|
||||
})
|
||||
|
||||
t.Run(fmt.Sprintf("NAT/IPv4/HostAddr/EscapeHatch=%t", escapeHatch), func(t *testing.T) {
|
||||
l3Bad.Hosts["attacker"].Run(t, "ip", "route", "add", hostIPv4+"/32", "via", "192.168.123.2", "dev", "eth0")
|
||||
defer l3Bad.Hosts["attacker"].Run(t, "ip", "route", "delete", hostIPv4+"/32", "via", "192.168.123.2", "dev", "eth0")
|
||||
|
||||
testAccess(t, c, l3Bad.Hosts["attacker"], hostIPv4, escapeHatch, escapeHatch)
|
||||
// Test access from the L3 segment where the port is published to
|
||||
// make sure that the test works properly (otherwise we might
|
||||
// reintroduce the security issue without realizing).
|
||||
testAccess(t, c, l3Good.Hosts["neigh"], hostIPv4, escapeHatch, true)
|
||||
})
|
||||
|
||||
t.Run(fmt.Sprintf("NAT/IPv6/HostAddr/EscapeHatch=%t", escapeHatch), func(t *testing.T) {
|
||||
l3Bad.Hosts["attacker"].Run(t, "ip", "route", "add", hostIPv6+"/128", "via", "fde8:19ff:6e09::2", "dev", "eth0")
|
||||
defer l3Bad.Hosts["attacker"].Run(t, "ip", "route", "delete", hostIPv6+"/128", "via", "fde8:19ff:6e09::2", "dev", "eth0")
|
||||
|
||||
nwOpts := []func(*networktypes.CreateOptions){
|
||||
network.WithIPv6(),
|
||||
network.WithIPAM("fd1d:b78f:79e3::/64", "fd1d:b78f:79e3::1"),
|
||||
}
|
||||
|
||||
testAccess(t, c, l3Bad.Hosts["attacker"], hostIPv6, escapeHatch, escapeHatch, nwOpts...)
|
||||
// Test access from the L3 segment where the port is published to
|
||||
// make sure that the test works properly (otherwise we might
|
||||
// reintroduce the security issue without realizing).
|
||||
testAccess(t, c, l3Good.Hosts["neigh"], hostIPv6, escapeHatch, true, nwOpts...)
|
||||
})
|
||||
|
||||
// IPv6 port-bindings to IPv4-only containers (ie. not attached to any
|
||||
// IPv6 network) aren't NATed, but go through docker-proxy.
|
||||
t.Run(fmt.Sprintf("Proxy/IPv6/HostAddr/EscapeHatch=%t", escapeHatch), func(t *testing.T) {
|
||||
l3Bad.Hosts["attacker"].Run(t, "ip", "route", "add", hostIPv6+"/128", "via", "fde8:19ff:6e09::2", "dev", "eth0")
|
||||
defer l3Bad.Hosts["attacker"].Run(t, "ip", "route", "delete", hostIPv6+"/128", "via", "fde8:19ff:6e09::2", "dev", "eth0")
|
||||
|
||||
testAccess(t, c, l3Bad.Hosts["attacker"], hostIPv6, escapeHatch, escapeHatch, network.WithIPv6Disabled())
|
||||
// Test access from the L3 segment where the port is published to
|
||||
// make sure that the test works properly (otherwise we might
|
||||
// reintroduce the security issue without realizing).
|
||||
testAccess(t, c, l3Good.Hosts["neigh"], hostIPv6, escapeHatch, true, network.WithIPv6Disabled())
|
||||
})
|
||||
|
||||
// IPv6 loopback address is non routable, so the kernel will block any
|
||||
// packet spoofing it without the need for any iptables rules. No need
|
||||
// to test that case here.
|
||||
}
|
||||
}
|
||||
|
||||
@@ -45,7 +45,7 @@ func NewL3Segment(t *testing.T, nsName string, addrs ...netip.Prefix) *L3Segment
|
||||
Hosts: map[string]Host{},
|
||||
}
|
||||
|
||||
l3.bridge = newHost(t, nsName, "br0")
|
||||
l3.bridge = newHost(t, "bridge", nsName, "br0")
|
||||
defer func() {
|
||||
if t.Failed() {
|
||||
l3.Destroy(t)
|
||||
@@ -70,12 +70,13 @@ func (l3 *L3Segment) AddHost(t *testing.T, hostname, nsName, ifname string, addr
|
||||
t.Fatalf("hostname too long")
|
||||
}
|
||||
|
||||
host := newHost(t, nsName, ifname)
|
||||
host := newHost(t, hostname, nsName, ifname)
|
||||
l3.Hosts[hostname] = host
|
||||
|
||||
host.MustRun(t, "ip", "link", "add", hostname, "netns", l3.bridge.ns, "type", "veth", "peer", "name", host.Iface)
|
||||
l3.bridge.MustRun(t, "ip", "link", "set", hostname, "up", "master", l3.bridge.Iface)
|
||||
host.MustRun(t, "ip", "link", "set", host.Iface, "up")
|
||||
host.MustRun(t, "ip", "link", "set", "lo", "up")
|
||||
|
||||
for _, addr := range addrs {
|
||||
host.MustRun(t, "ip", "addr", "add", addr.String(), "dev", host.Iface, "nodad")
|
||||
@@ -83,6 +84,7 @@ func (l3 *L3Segment) AddHost(t *testing.T, hostname, nsName, ifname string, addr
|
||||
}
|
||||
|
||||
func (l3 *L3Segment) Destroy(t *testing.T) {
|
||||
t.Helper()
|
||||
for _, host := range l3.Hosts {
|
||||
host.Destroy(t)
|
||||
}
|
||||
@@ -90,11 +92,12 @@ func (l3 *L3Segment) Destroy(t *testing.T) {
|
||||
}
|
||||
|
||||
type Host struct {
|
||||
Name string
|
||||
Iface string // Iface is the interface name in the host network namespace.
|
||||
ns string // ns is the network namespace name.
|
||||
}
|
||||
|
||||
func newHost(t *testing.T, nsName, ifname string) Host {
|
||||
func newHost(t *testing.T, hostname, nsName, ifname string) Host {
|
||||
t.Helper()
|
||||
|
||||
if len(ifname) >= syscall.IFNAMSIZ {
|
||||
@@ -109,6 +112,7 @@ func newHost(t *testing.T, nsName, ifname string) Host {
|
||||
}
|
||||
|
||||
return Host{
|
||||
Name: hostname,
|
||||
Iface: ifname,
|
||||
ns: nsName,
|
||||
}
|
||||
@@ -142,25 +146,27 @@ func (h Host) MustRun(t *testing.T, cmd string, args ...string) string {
|
||||
func (h Host) Do(t *testing.T, fn func()) {
|
||||
t.Helper()
|
||||
|
||||
targetNs, err := netns.GetFromName(h.ns)
|
||||
if err != nil {
|
||||
t.Fatalf("failed to get netns handle: %v", err)
|
||||
}
|
||||
defer targetNs.Close()
|
||||
if h.ns != CurrentNetns {
|
||||
targetNs, err := netns.GetFromName(h.ns)
|
||||
if err != nil {
|
||||
t.Fatalf("failed to get netns handle: %v", err)
|
||||
}
|
||||
defer targetNs.Close()
|
||||
|
||||
origNs, err := netns.Get()
|
||||
if err != nil {
|
||||
t.Fatalf("failed to get current netns: %v", err)
|
||||
}
|
||||
defer origNs.Close()
|
||||
origNs, err := netns.Get()
|
||||
if err != nil {
|
||||
t.Fatalf("failed to get current netns: %v", err)
|
||||
}
|
||||
defer origNs.Close()
|
||||
|
||||
runtime.LockOSThread()
|
||||
defer runtime.UnlockOSThread()
|
||||
runtime.LockOSThread()
|
||||
defer runtime.UnlockOSThread()
|
||||
|
||||
if err := netns.Set(targetNs); err != nil {
|
||||
t.Fatalf("failed to enter netns: %v", err)
|
||||
if err := netns.Set(targetNs); err != nil {
|
||||
t.Fatalf("failed to enter netns: %v", err)
|
||||
}
|
||||
defer netns.Set(origNs)
|
||||
}
|
||||
defer netns.Set(origNs)
|
||||
|
||||
fn()
|
||||
}
|
||||
|
||||
@@ -176,9 +176,9 @@ func (n *bridgeNetwork) addPortMappings(
|
||||
}
|
||||
|
||||
for i := range bindings {
|
||||
if pdc != nil && bindings[i].HostPort != 0 {
|
||||
b := bindings[i]
|
||||
if pdc != nil && b.HostPort != 0 {
|
||||
var err error
|
||||
b := &bindings[i]
|
||||
hip, ok := netip.AddrFromSlice(b.HostIP)
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("invalid host IP address in %s", b)
|
||||
@@ -187,12 +187,18 @@ func (n *bridgeNetwork) addPortMappings(
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("invalid child host IP address %s in %s", b.childHostIP, b)
|
||||
}
|
||||
b.portDriverRemove, err = pdc.AddPort(ctx, b.Proto.String(), hip, chip, int(b.HostPort))
|
||||
bindings[i].portDriverRemove, err = pdc.AddPort(ctx, b.Proto.String(), hip, chip, int(b.HostPort))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
if err := n.setPerPortIptables(bindings[i], true); err != nil {
|
||||
if err := n.setPerPortIptables(b, true); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
// When the port is proxied, and not NATed (eg. when an IPv6 HostIP is
|
||||
// specified, but the container has no IPv6 address), the container's
|
||||
// port must be protected by filterPortByInputIface.
|
||||
if err := n.filterPortByInputIface(b, true); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
@@ -746,10 +752,11 @@ func (n *bridgeNetwork) releasePortBindings(pbs []portBinding) error {
|
||||
if errN != nil {
|
||||
errN = fmt.Errorf("failed to remove iptables rules for port mapping %s: %w", pb, errN)
|
||||
}
|
||||
errF := n.filterPortByInputIface(pb, false)
|
||||
if pb.HostPort > 0 {
|
||||
portallocator.Get().ReleasePort(pb.childHostIP, pb.Proto.String(), int(pb.HostPort))
|
||||
}
|
||||
errs = append(errs, errS, errPD, errP, errN)
|
||||
errs = append(errs, errS, errPD, errP, errN, errF)
|
||||
}
|
||||
return errors.Join(errs...)
|
||||
}
|
||||
@@ -869,6 +876,59 @@ func setPerPortForwarding(b portBinding, ipv iptables.IPVersion, bridgeName stri
|
||||
return nil
|
||||
}
|
||||
|
||||
// filterPortByInputIface adds a couple of iptables rules to accept packets
|
||||
// destined to a mapped port with a dest addr that matches the interface
|
||||
// they're received on. If that doesn't match, the second rule drop them
|
||||
// unceremoniously.
|
||||
//
|
||||
// These rules will block rogue hosts that try to access a mapped port while
|
||||
// they aren't part of the L2 segment where the mapped port is exposed.
|
||||
// For instance, if HostIP == 127.0.0.1, no ingress should come from anything
|
||||
// but lo.
|
||||
func (n *bridgeNetwork) filterPortByInputIface(b portBinding, enable bool) error {
|
||||
hostIP := b.childHostIP
|
||||
if b.HostPort == 0 {
|
||||
// Direct routing mode is used, we can't filter based on the input iface.
|
||||
return nil
|
||||
}
|
||||
|
||||
// DOCKER_DISABLE_INPUT_IFACE_FILTERING can be used as an escape hatch if
|
||||
// this filtering doesn't work out well for some users.
|
||||
if v, _ := strconv.ParseBool(os.Getenv("DOCKER_DISABLE_INPUT_IFACE_FILTERING")); v {
|
||||
log.G(context.TODO()).Warn("DOCKER_DISABLE_INPUT_IFACE_FILTERING is set, skipping input iface filtering.")
|
||||
return nil
|
||||
}
|
||||
|
||||
ipv := iptables.IPv4
|
||||
if b.childHostIP.To4() == nil {
|
||||
ipv = iptables.IPv6
|
||||
}
|
||||
accept := iptables.Rule{IPVer: ipv, Table: iptables.Raw, Chain: "PREROUTING", Args: []string{
|
||||
"-p", b.Proto.String(),
|
||||
"-d", hostIP.String(),
|
||||
"--dport", strconv.Itoa(int(b.HostPort)),
|
||||
"-m", "addrtype",
|
||||
"--dst-type", "LOCAL",
|
||||
"--limit-iface-in",
|
||||
"-j", "ACCEPT",
|
||||
}}
|
||||
if err := appendOrDelChainRule(accept, "INPUT IFACE FILTERING - ACCEPT", enable); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
drop := iptables.Rule{IPVer: ipv, Table: iptables.Raw, Chain: "PREROUTING", Args: []string{
|
||||
"-p", b.Proto.String(),
|
||||
"-d", hostIP.String(),
|
||||
"--dport", strconv.Itoa(int(b.HostPort)),
|
||||
"-j", "DROP",
|
||||
}}
|
||||
if err := appendOrDelChainRule(drop, "INPUT IFACE FILTERING - DROP", enable); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (n *bridgeNetwork) reapplyPerPortIptables4() {
|
||||
n.reapplyPerPortIptables(func(b portBinding) bool { return b.IP.To4() != nil })
|
||||
}
|
||||
|
||||
@@ -13,6 +13,7 @@ import (
|
||||
"testing"
|
||||
|
||||
"github.com/containerd/log"
|
||||
"github.com/docker/docker/internal/nlwrap"
|
||||
"github.com/docker/docker/internal/testutils/netnsutils"
|
||||
"github.com/docker/docker/internal/testutils/storeutils"
|
||||
"github.com/docker/docker/libnetwork/iptables"
|
||||
@@ -821,6 +822,12 @@ func TestAddPortMappings(t *testing.T) {
|
||||
defer ul.Close()
|
||||
}
|
||||
|
||||
var err error
|
||||
d := newDriver(storeutils.NewTempStore(t))
|
||||
d.nlh, err = nlwrap.NewHandle()
|
||||
assert.NilError(t, err)
|
||||
defer d.nlh.Close()
|
||||
|
||||
n := &bridgeNetwork{
|
||||
config: &networkConfiguration{
|
||||
BridgeName: "dummybridge",
|
||||
@@ -829,7 +836,7 @@ func TestAddPortMappings(t *testing.T) {
|
||||
GwModeIPv4: tc.gwMode4,
|
||||
GwModeIPv6: tc.gwMode6,
|
||||
},
|
||||
driver: newDriver(storeutils.NewTempStore(t)),
|
||||
driver: d,
|
||||
}
|
||||
genericOption := map[string]interface{}{
|
||||
netlabel.GenericData: &configuration{
|
||||
@@ -840,7 +847,7 @@ func TestAddPortMappings(t *testing.T) {
|
||||
Rootless: tc.rootless,
|
||||
},
|
||||
}
|
||||
err := n.driver.configure(genericOption)
|
||||
err = n.driver.configure(genericOption)
|
||||
assert.NilError(t, err)
|
||||
|
||||
assert.Check(t, is.Equal(n.driver.portDriverClient == nil, !tc.rootless))
|
||||
|
||||
@@ -49,6 +49,8 @@ const (
|
||||
Filter Table = "filter"
|
||||
// Mangle table is used for mangling the packet.
|
||||
Mangle Table = "mangle"
|
||||
// Raw table is used for filtering packets before they are NATed.
|
||||
Raw Table = "raw"
|
||||
)
|
||||
|
||||
// IPVersion refers to IP version, v4 or v6
|
||||
|
||||
Reference in New Issue
Block a user