mirror of
https://github.com/moby/moby.git
synced 2026-01-11 18:51:37 +00:00
Revert "libnet/d/bridge: port mappings: filter by input iface"
This reverts commit 433b1f9b17.
Signed-off-by: Albin Kerouanton <albinker@gmail.com>
This commit is contained in:
@@ -542,7 +542,6 @@ RUN --mount=type=cache,sharing=locked,id=moby-dev-aptlib,target=/var/lib/apt \
|
||||
libprotobuf-c1 \
|
||||
libyajl2 \
|
||||
net-tools \
|
||||
netcat-openbsd \
|
||||
patch \
|
||||
pigz \
|
||||
sudo \
|
||||
|
||||
@@ -9,7 +9,6 @@ import (
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
runcoptions "github.com/containerd/containerd/api/types/runc/options"
|
||||
@@ -160,12 +159,6 @@ func (daemon *Daemon) fillPlatformInfo(ctx context.Context, v *system.Info, sysI
|
||||
if !v.IPv4Forwarding {
|
||||
v.Warnings = append(v.Warnings, "WARNING: IPv4 forwarding is disabled")
|
||||
}
|
||||
if filtering, _ := strconv.ParseBool(os.Getenv("DOCKER_DISABLE_INPUT_IFACE_FILTERING")); filtering {
|
||||
v.Warnings = append(v.Warnings,
|
||||
"WARNING: input interface filtering is disabled on port mappings, this might be insecure",
|
||||
"DEPRECATED: DOCKER_DISABLE_INPUT_IFACE_FILTERING is deprecated and will be removed in a future release",
|
||||
)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
|
||||
@@ -27,14 +27,6 @@ func WithIPv6() func(*network.CreateOptions) {
|
||||
}
|
||||
}
|
||||
|
||||
// WithIPv6Disabled makes sure IPv6 is disabled on the network.
|
||||
func WithIPv6Disabled() func(*network.CreateOptions) {
|
||||
return func(n *network.CreateOptions) {
|
||||
enable := false
|
||||
n.EnableIPv6 = &enable
|
||||
}
|
||||
}
|
||||
|
||||
// WithInternal enables Internal flag on the create network request
|
||||
func WithInternal() func(*network.CreateOptions) {
|
||||
return func(n *network.CreateOptions) {
|
||||
|
||||
@@ -1,147 +0,0 @@
|
||||
## Container on a user-defined network, with a port published on a specific HostIP
|
||||
|
||||
Adding a network running a container with a mapped port, equivalent to:
|
||||
|
||||
docker network create \
|
||||
-o com.docker.network.bridge.name=bridge1 \
|
||||
--subnet 192.0.2.0/24 --gateway 192.0.2.1 bridge1
|
||||
docker run --network bridge1 -p 127.0.0.1:8080:80 --name c1 busybox
|
||||
|
||||
The filter and nat tables are the same as with no HostIP specified.
|
||||
|
||||
<details>
|
||||
<summary>Filter table</summary>
|
||||
|
||||
Chain INPUT (policy ACCEPT 0 packets, 0 bytes)
|
||||
num pkts bytes target prot opt in out source destination
|
||||
|
||||
Chain FORWARD (policy ACCEPT 0 packets, 0 bytes)
|
||||
num pkts bytes target prot opt in out source destination
|
||||
1 0 0 DOCKER-USER 0 -- * * 0.0.0.0/0 0.0.0.0/0
|
||||
2 0 0 ACCEPT 0 -- * * 0.0.0.0/0 0.0.0.0/0 match-set docker-ext-bridges-v4 dst ctstate RELATED,ESTABLISHED
|
||||
3 0 0 DOCKER-ISOLATION-STAGE-1 0 -- * * 0.0.0.0/0 0.0.0.0/0
|
||||
4 0 0 DOCKER 0 -- * * 0.0.0.0/0 0.0.0.0/0 match-set docker-ext-bridges-v4 dst
|
||||
5 0 0 ACCEPT 0 -- docker0 * 0.0.0.0/0 0.0.0.0/0
|
||||
6 0 0 ACCEPT 0 -- bridge1 * 0.0.0.0/0 0.0.0.0/0
|
||||
|
||||
Chain OUTPUT (policy ACCEPT 0 packets, 0 bytes)
|
||||
num pkts bytes target prot opt in out source destination
|
||||
|
||||
Chain DOCKER (1 references)
|
||||
num pkts bytes target prot opt in out source destination
|
||||
1 0 0 ACCEPT 6 -- !bridge1 bridge1 0.0.0.0/0 192.0.2.2 tcp dpt:80
|
||||
2 0 0 DROP 0 -- !docker0 docker0 0.0.0.0/0 0.0.0.0/0
|
||||
3 0 0 DROP 0 -- !bridge1 bridge1 0.0.0.0/0 0.0.0.0/0
|
||||
|
||||
Chain DOCKER-ISOLATION-STAGE-1 (1 references)
|
||||
num pkts bytes target prot opt in out source destination
|
||||
1 0 0 DOCKER-ISOLATION-STAGE-2 0 -- docker0 !docker0 0.0.0.0/0 0.0.0.0/0
|
||||
2 0 0 DOCKER-ISOLATION-STAGE-2 0 -- bridge1 !bridge1 0.0.0.0/0 0.0.0.0/0
|
||||
|
||||
Chain DOCKER-ISOLATION-STAGE-2 (2 references)
|
||||
num pkts bytes target prot opt in out source destination
|
||||
1 0 0 DROP 0 -- * bridge1 0.0.0.0/0 0.0.0.0/0
|
||||
2 0 0 DROP 0 -- * docker0 0.0.0.0/0 0.0.0.0/0
|
||||
|
||||
Chain DOCKER-USER (1 references)
|
||||
num pkts bytes target prot opt in out source destination
|
||||
1 0 0 RETURN 0 -- * * 0.0.0.0/0 0.0.0.0/0
|
||||
|
||||
|
||||
-P INPUT ACCEPT
|
||||
-P FORWARD ACCEPT
|
||||
-P OUTPUT ACCEPT
|
||||
-N DOCKER
|
||||
-N DOCKER-ISOLATION-STAGE-1
|
||||
-N DOCKER-ISOLATION-STAGE-2
|
||||
-N DOCKER-USER
|
||||
-A FORWARD -j DOCKER-USER
|
||||
-A FORWARD -m set --match-set docker-ext-bridges-v4 dst -m conntrack --ctstate RELATED,ESTABLISHED -j ACCEPT
|
||||
-A FORWARD -j DOCKER-ISOLATION-STAGE-1
|
||||
-A FORWARD -m set --match-set docker-ext-bridges-v4 dst -j DOCKER
|
||||
-A FORWARD -i docker0 -j ACCEPT
|
||||
-A FORWARD -i bridge1 -j ACCEPT
|
||||
-A DOCKER -d 192.0.2.2/32 ! -i bridge1 -o bridge1 -p tcp -m tcp --dport 80 -j ACCEPT
|
||||
-A DOCKER ! -i docker0 -o docker0 -j DROP
|
||||
-A DOCKER ! -i bridge1 -o bridge1 -j DROP
|
||||
-A DOCKER-ISOLATION-STAGE-1 -i docker0 ! -o docker0 -j DOCKER-ISOLATION-STAGE-2
|
||||
-A DOCKER-ISOLATION-STAGE-1 -i bridge1 ! -o bridge1 -j DOCKER-ISOLATION-STAGE-2
|
||||
-A DOCKER-ISOLATION-STAGE-2 -o bridge1 -j DROP
|
||||
-A DOCKER-ISOLATION-STAGE-2 -o docker0 -j DROP
|
||||
-A DOCKER-USER -j RETURN
|
||||
|
||||
|
||||
</details>
|
||||
|
||||
<details>
|
||||
<summary>NAT table</summary>
|
||||
|
||||
Chain PREROUTING (policy ACCEPT 0 packets, 0 bytes)
|
||||
num pkts bytes target prot opt in out source destination
|
||||
1 0 0 DOCKER 0 -- * * 0.0.0.0/0 0.0.0.0/0 ADDRTYPE match dst-type LOCAL
|
||||
|
||||
Chain INPUT (policy ACCEPT 0 packets, 0 bytes)
|
||||
num pkts bytes target prot opt in out source destination
|
||||
|
||||
Chain OUTPUT (policy ACCEPT 0 packets, 0 bytes)
|
||||
num pkts bytes target prot opt in out source destination
|
||||
1 0 0 DOCKER 0 -- * * 0.0.0.0/0 !127.0.0.0/8 ADDRTYPE match dst-type LOCAL
|
||||
|
||||
Chain POSTROUTING (policy ACCEPT 0 packets, 0 bytes)
|
||||
num pkts bytes target prot opt in out source destination
|
||||
1 0 0 MASQUERADE 0 -- * !bridge1 192.0.2.0/24 0.0.0.0/0
|
||||
2 0 0 MASQUERADE 0 -- * !docker0 172.17.0.0/16 0.0.0.0/0
|
||||
|
||||
Chain DOCKER (2 references)
|
||||
num pkts bytes target prot opt in out source destination
|
||||
1 0 0 RETURN 0 -- bridge1 * 0.0.0.0/0 0.0.0.0/0
|
||||
2 0 0 RETURN 0 -- docker0 * 0.0.0.0/0 0.0.0.0/0
|
||||
3 0 0 DNAT 6 -- !bridge1 * 0.0.0.0/0 127.0.0.1 tcp dpt:8080 to:192.0.2.2:80
|
||||
|
||||
|
||||
-P PREROUTING ACCEPT
|
||||
-P INPUT ACCEPT
|
||||
-P OUTPUT ACCEPT
|
||||
-P POSTROUTING ACCEPT
|
||||
-N DOCKER
|
||||
-A PREROUTING -m addrtype --dst-type LOCAL -j DOCKER
|
||||
-A OUTPUT ! -d 127.0.0.0/8 -m addrtype --dst-type LOCAL -j DOCKER
|
||||
-A POSTROUTING -s 192.0.2.0/24 ! -o bridge1 -j MASQUERADE
|
||||
-A POSTROUTING -s 172.17.0.0/16 ! -o docker0 -j MASQUERADE
|
||||
-A DOCKER -i bridge1 -j RETURN
|
||||
-A DOCKER -i docker0 -j RETURN
|
||||
-A DOCKER -d 127.0.0.1/32 ! -i bridge1 -p tcp -m tcp --dport 8080 -j DNAT --to-destination 192.0.2.2:80
|
||||
|
||||
|
||||
</details>
|
||||
|
||||
The raw table is:
|
||||
|
||||
Chain PREROUTING (policy ACCEPT 0 packets, 0 bytes)
|
||||
num pkts bytes target prot opt in out source destination
|
||||
1 0 0 ACCEPT 6 -- * * 0.0.0.0/0 127.0.0.1 tcp dpt:8080 ADDRTYPE match dst-type LOCAL limit-in
|
||||
2 0 0 DROP 6 -- * * 0.0.0.0/0 127.0.0.1 tcp dpt:8080
|
||||
|
||||
Chain OUTPUT (policy ACCEPT 0 packets, 0 bytes)
|
||||
num pkts bytes target prot opt in out source destination
|
||||
|
||||
|
||||
<details>
|
||||
<summary>iptables commands</summary>
|
||||
|
||||
-P PREROUTING ACCEPT
|
||||
-P OUTPUT ACCEPT
|
||||
-A PREROUTING -d 127.0.0.1/32 -p tcp -m tcp --dport 8080 -m addrtype --dst-type LOCAL --limit-iface-in -j ACCEPT
|
||||
-A PREROUTING -d 127.0.0.1/32 -p tcp -m tcp --dport 8080 -j DROP
|
||||
|
||||
|
||||
</details>
|
||||
|
||||
The difference from [port mapping with no HostIP][0] is:
|
||||
|
||||
- An ACCEPT rule is added to the PREROUTING chain to drop packets targeting the
|
||||
mapped port and coming from the interface that has the HostIP assigned.
|
||||
- And a DROP rule is added too, to drop packets targeting the mapped port but
|
||||
didn't pass the previous check.
|
||||
|
||||
[0]: usernet-portmap.md
|
||||
@@ -46,4 +46,3 @@ Scenarios:
|
||||
- [Container on a routed-mode network, with a published port](generated/usernet-portmap-routed.md)
|
||||
- [Container on a nat-unprotected network, with a published port](generated/usernet-portmap-natunprot.md)
|
||||
- [Swarm service, with a published port](generated/swarm-portmap.md)
|
||||
- [Container on a user-defined network, with a port published on a specific HostIP](generated/usernet-portmap-hostip.md)
|
||||
|
||||
@@ -174,18 +174,6 @@ var index = []section{
|
||||
},
|
||||
}},
|
||||
},
|
||||
{
|
||||
name: "usernet-portmap-hostip.md",
|
||||
networks: []networkDesc{{
|
||||
name: "bridge1",
|
||||
containers: []ctrDesc{
|
||||
{
|
||||
name: "c1",
|
||||
portMappings: nat.PortMap{"80/tcp": {{HostIP: "127.0.0.1", HostPort: "8080"}}},
|
||||
},
|
||||
},
|
||||
}},
|
||||
},
|
||||
}
|
||||
|
||||
// iptCmdType is used to look up iptCmds in the markdown (can't use an int
|
||||
@@ -200,8 +188,6 @@ const (
|
||||
iptCmdSFilterDocker4 iptCmdType = "SFilterDocker4"
|
||||
iptCmdLNat4 iptCmdType = "LNat4"
|
||||
iptCmdSNat4 iptCmdType = "SNat4"
|
||||
iptCmdLRaw4 iptCmdType = "LRaw4"
|
||||
iptCmdSRaw4 iptCmdType = "SRaw4"
|
||||
)
|
||||
|
||||
var iptCmds = map[iptCmdType][]string{
|
||||
@@ -212,8 +198,6 @@ var iptCmds = map[iptCmdType][]string{
|
||||
iptCmdSFilterDocker4: {"iptables", "-S", "DOCKER"},
|
||||
iptCmdLNat4: {"iptables", "-nvL", "--line-numbers", "-t", "nat"},
|
||||
iptCmdSNat4: {"iptables", "-S", "-t", "nat"},
|
||||
iptCmdLRaw4: {"iptables", "-nvL", "--line-numbers", "-t", "raw"},
|
||||
iptCmdSRaw4: {"iptables", "-S", "-t", "raw"},
|
||||
}
|
||||
|
||||
func TestBridgeIptablesDoc(t *testing.T) {
|
||||
|
||||
@@ -1,48 +0,0 @@
|
||||
## Container on a user-defined network, with a port published on a specific HostIP
|
||||
|
||||
Adding a network running a container with a mapped port, equivalent to:
|
||||
|
||||
docker network create \
|
||||
-o com.docker.network.bridge.name=bridge1 \
|
||||
--subnet 192.0.2.0/24 --gateway 192.0.2.1 bridge1
|
||||
docker run --network bridge1 -p 127.0.0.1:8080:80 --name c1 busybox
|
||||
|
||||
The filter and nat tables are the same as with no HostIP specified.
|
||||
|
||||
<details>
|
||||
<summary>Filter table</summary>
|
||||
|
||||
{{index . "LFilter4"}}
|
||||
|
||||
{{index . "SFilter4"}}
|
||||
|
||||
</details>
|
||||
|
||||
<details>
|
||||
<summary>NAT table</summary>
|
||||
|
||||
{{index . "LNat4"}}
|
||||
|
||||
{{index . "SNat4"}}
|
||||
|
||||
</details>
|
||||
|
||||
The raw table is:
|
||||
|
||||
{{index . "LRaw4"}}
|
||||
|
||||
<details>
|
||||
<summary>iptables commands</summary>
|
||||
|
||||
{{index . "SRaw4"}}
|
||||
|
||||
</details>
|
||||
|
||||
The difference from [port mapping with no HostIP][0] is:
|
||||
|
||||
- An ACCEPT rule is added to the PREROUTING chain to drop packets targeting the
|
||||
mapped port and coming from the interface that has the HostIP assigned.
|
||||
- And a DROP rule is added too, to drop packets targeting the mapped port but
|
||||
didn't pass the previous check.
|
||||
|
||||
[0]: usernet-portmap.md
|
||||
@@ -1,7 +1,6 @@
|
||||
package networking
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"fmt"
|
||||
"net"
|
||||
@@ -17,12 +16,10 @@ import (
|
||||
|
||||
containertypes "github.com/docker/docker/api/types/container"
|
||||
networktypes "github.com/docker/docker/api/types/network"
|
||||
"github.com/docker/docker/client"
|
||||
"github.com/docker/docker/integration/internal/container"
|
||||
"github.com/docker/docker/integration/internal/network"
|
||||
"github.com/docker/docker/internal/testutils/networking"
|
||||
"github.com/docker/docker/libnetwork/drivers/bridge"
|
||||
"github.com/docker/docker/pkg/stdcopy"
|
||||
"github.com/docker/docker/testutil"
|
||||
"github.com/docker/docker/testutil/daemon"
|
||||
"github.com/docker/go-connections/nat"
|
||||
@@ -784,185 +781,3 @@ func TestDirectRoutingOpenPorts(t *testing.T) {
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TestAccessPublishedPortFromNonMatchingIface checks that, on multi-homed
|
||||
// network hosts, PBs created with a specific HostIP aren't accessible from
|
||||
// interfaces that don't match the HostIP.
|
||||
//
|
||||
// Regression test for https://github.com/moby/moby/issues/45610.
|
||||
func TestAccessPublishedPortFromNonMatchingIface(t *testing.T) {
|
||||
// This test checks iptables rules that live in dockerd's netns. In the case
|
||||
// of rootlesskit, this is not the same netns as the host, so they don't
|
||||
// have any effect.
|
||||
// TODO(aker): we need to figure out what we want to do for rootlesskit.
|
||||
skip.If(t, testEnv.IsRootless, "rootlesskit has its own netns")
|
||||
|
||||
ctx := setupTest(t)
|
||||
|
||||
const (
|
||||
hostIPv4 = "192.168.120.2"
|
||||
hostIPv6 = "fdbc:277b:d40b::2"
|
||||
)
|
||||
|
||||
// l3Good is where the port will be published.
|
||||
l3Good := networking.NewL3Segment(t, "test-matching-iface-br",
|
||||
netip.MustParsePrefix("192.168.120.1/24"),
|
||||
netip.MustParsePrefix("fdbc:277b:d40b::1/64"))
|
||||
defer l3Good.Destroy(t)
|
||||
// "docker" is the host where dockerd is running. Suffix the iface name to
|
||||
// not collide with the L3 segment below.
|
||||
l3Good.AddHost(t, "docker", networking.CurrentNetns, "eth-test1",
|
||||
netip.MustParsePrefix(hostIPv4+"/24"),
|
||||
netip.MustParsePrefix(hostIPv6+"/64"))
|
||||
l3Good.AddHost(t, "neigh", "test-matching-iface-neighbor", "eth0",
|
||||
netip.MustParsePrefix("192.168.120.3/24"),
|
||||
netip.MustParsePrefix("fdbc:277b:d40b::3/64"))
|
||||
|
||||
// l3Bad is another L3Segment, from which the published port should be
|
||||
// inaccessible.
|
||||
l3Bad := networking.NewL3Segment(t, "test-non-matching-iface-br",
|
||||
netip.MustParsePrefix("192.168.123.1/24"),
|
||||
netip.MustParsePrefix("fde8:19ff:6e09::1/64"))
|
||||
defer l3Bad.Destroy(t)
|
||||
// "docker" is the host where dockerd is running. Suffix the iface name to
|
||||
// not collide with the L3 segment above.
|
||||
l3Bad.AddHost(t, "docker", networking.CurrentNetns, "eth-test2",
|
||||
netip.MustParsePrefix("192.168.123.2/24"),
|
||||
netip.MustParsePrefix("fde8:19ff:6e09::2/64"))
|
||||
l3Bad.AddHost(t, "attacker", "test-non-matching-iface-attacker", "eth0",
|
||||
netip.MustParsePrefix("192.168.123.3/24"),
|
||||
netip.MustParsePrefix("fde8:19ff:6e09::3/64"))
|
||||
|
||||
testAccess := func(t *testing.T, c *client.Client, host networking.Host, hostAddr string, escapeHatch, expAccess bool, nwOpts ...func(*networktypes.CreateOptions)) {
|
||||
testutil.StartSpan(ctx, t)
|
||||
|
||||
const bridgeName = "brattacked"
|
||||
network.CreateNoError(ctx, t, c, bridgeName, append(nwOpts,
|
||||
network.WithDriver("bridge"),
|
||||
network.WithOption(bridge.BridgeName, bridgeName),
|
||||
)...)
|
||||
defer network.RemoveNoError(ctx, t, c, bridgeName)
|
||||
|
||||
const hostPort = "5000"
|
||||
// Create the victim container, with a non-empty / non-unspecified
|
||||
// HostIP in its port binding.
|
||||
serverID := container.Run(ctx, t, c,
|
||||
container.WithName(sanitizeCtrName(t.Name()+"-server")),
|
||||
container.WithCmd("nc", "-lup", "5000"),
|
||||
container.WithExposedPorts("5000/udp"),
|
||||
container.WithPortMap(nat.PortMap{"5000/udp": {{HostIP: hostAddr, HostPort: hostPort}}}),
|
||||
container.WithNetworkMode(bridgeName))
|
||||
defer c.ContainerRemove(ctx, serverID, containertypes.RemoveOptions{Force: true})
|
||||
|
||||
// Send a UDP datagram to the published port, from the 'host' passed
|
||||
// as argument.
|
||||
//
|
||||
// Here UDP is preferred, because it's a one-way, connectionless
|
||||
// protocol. With TCP the three-way handshake has to be completed
|
||||
// before sending a payload. But since some of the test cases try to
|
||||
// spoof the loopback address, the 'attacker host' will drop the
|
||||
// SYN-ACK by default (because the source addr will be considered
|
||||
// invalid / non-routable). This would require further tuning to make
|
||||
// it work. But with UDP, this problem doesn't exist - the payload can
|
||||
// be sent straight away.
|
||||
host.Do(t, func() {
|
||||
// Send a payload to the victim container from the attacker host.
|
||||
for i := 0; i < 10; i++ {
|
||||
t.Logf("Sending probe #%d to %s:%s from host %s", i, hostAddr, hostPort, host.Name)
|
||||
|
||||
// For some unexplainable reason, the first few packets might
|
||||
// not reach the container (ie. the container returns an ICMP
|
||||
// 'Port Unreachable' message).
|
||||
time.Sleep(50 * time.Millisecond)
|
||||
icmd.RunCommand("/bin/sh", "-c", fmt.Sprintf("echo foobar | nc -w1 -u %s %s", hostAddr, hostPort)).Assert(t, icmd.Success)
|
||||
}
|
||||
})
|
||||
|
||||
// Check whether the payload was received by the victim container.
|
||||
logReader, err := c.ContainerLogs(ctx, serverID, containertypes.LogsOptions{ShowStdout: true})
|
||||
assert.NilError(t, err)
|
||||
defer logReader.Close()
|
||||
|
||||
var actualStdout bytes.Buffer
|
||||
_, err = stdcopy.StdCopy(&actualStdout, nil, logReader)
|
||||
assert.NilError(t, err)
|
||||
|
||||
stdOut := strings.TrimSpace(actualStdout.String())
|
||||
if expAccess {
|
||||
assert.Assert(t, strings.Contains(stdOut, "foobar"), "Host %s should have access to the container, but the payload wasn't received by the docker host", host.Name)
|
||||
} else {
|
||||
assert.Assert(t, !strings.Contains(stdOut, "foobar"), "Host %s should not have access to the container, but the payload was received by the docker host", host.Name)
|
||||
}
|
||||
}
|
||||
|
||||
for _, escapeHatch := range []bool{false, true} {
|
||||
var dopts []daemon.Option
|
||||
if escapeHatch {
|
||||
dopts = []daemon.Option{daemon.WithEnvVars("DOCKER_DISABLE_INPUT_IFACE_FILTERING=1")}
|
||||
}
|
||||
|
||||
d := daemon.New(t, dopts...)
|
||||
d.StartWithBusybox(ctx, t)
|
||||
defer d.Stop(t)
|
||||
|
||||
c := d.NewClientT(t)
|
||||
defer c.Close()
|
||||
|
||||
t.Run(fmt.Sprintf("NAT/IPv4/lo/EscapeHatch=%t", escapeHatch), func(t *testing.T) {
|
||||
const hostAddr = "127.0.10.1"
|
||||
|
||||
l3Bad.Hosts["attacker"].Run(t, "ip", "route", "add", hostAddr+"/32", "via", "192.168.123.2", "dev", "eth0")
|
||||
defer l3Bad.Hosts["attacker"].Run(t, "ip", "route", "delete", hostAddr+"/32", "via", "192.168.123.2", "dev", "eth0")
|
||||
|
||||
testAccess(t, c, l3Bad.Hosts["attacker"], hostAddr, escapeHatch, escapeHatch)
|
||||
// Test access from the L3 segment where the port is published to
|
||||
// make sure that the test works properly (otherwise we might
|
||||
// reintroduce the security issue without realizing).
|
||||
testAccess(t, c, l3Good.Hosts["docker"], hostAddr, escapeHatch, true)
|
||||
})
|
||||
|
||||
t.Run(fmt.Sprintf("NAT/IPv4/HostAddr/EscapeHatch=%t", escapeHatch), func(t *testing.T) {
|
||||
l3Bad.Hosts["attacker"].Run(t, "ip", "route", "add", hostIPv4+"/32", "via", "192.168.123.2", "dev", "eth0")
|
||||
defer l3Bad.Hosts["attacker"].Run(t, "ip", "route", "delete", hostIPv4+"/32", "via", "192.168.123.2", "dev", "eth0")
|
||||
|
||||
testAccess(t, c, l3Bad.Hosts["attacker"], hostIPv4, escapeHatch, escapeHatch)
|
||||
// Test access from the L3 segment where the port is published to
|
||||
// make sure that the test works properly (otherwise we might
|
||||
// reintroduce the security issue without realizing).
|
||||
testAccess(t, c, l3Good.Hosts["neigh"], hostIPv4, escapeHatch, true)
|
||||
})
|
||||
|
||||
t.Run(fmt.Sprintf("NAT/IPv6/HostAddr/EscapeHatch=%t", escapeHatch), func(t *testing.T) {
|
||||
l3Bad.Hosts["attacker"].Run(t, "ip", "route", "add", hostIPv6+"/128", "via", "fde8:19ff:6e09::2", "dev", "eth0")
|
||||
defer l3Bad.Hosts["attacker"].Run(t, "ip", "route", "delete", hostIPv6+"/128", "via", "fde8:19ff:6e09::2", "dev", "eth0")
|
||||
|
||||
nwOpts := []func(*networktypes.CreateOptions){
|
||||
network.WithIPv6(),
|
||||
network.WithIPAM("fd1d:b78f:79e3::/64", "fd1d:b78f:79e3::1"),
|
||||
}
|
||||
|
||||
testAccess(t, c, l3Bad.Hosts["attacker"], hostIPv6, escapeHatch, escapeHatch, nwOpts...)
|
||||
// Test access from the L3 segment where the port is published to
|
||||
// make sure that the test works properly (otherwise we might
|
||||
// reintroduce the security issue without realizing).
|
||||
testAccess(t, c, l3Good.Hosts["neigh"], hostIPv6, escapeHatch, true, nwOpts...)
|
||||
})
|
||||
|
||||
// IPv6 port-bindings to IPv4-only containers (ie. not attached to any
|
||||
// IPv6 network) aren't NATed, but go through docker-proxy.
|
||||
t.Run(fmt.Sprintf("Proxy/IPv6/HostAddr/EscapeHatch=%t", escapeHatch), func(t *testing.T) {
|
||||
l3Bad.Hosts["attacker"].Run(t, "ip", "route", "add", hostIPv6+"/128", "via", "fde8:19ff:6e09::2", "dev", "eth0")
|
||||
defer l3Bad.Hosts["attacker"].Run(t, "ip", "route", "delete", hostIPv6+"/128", "via", "fde8:19ff:6e09::2", "dev", "eth0")
|
||||
|
||||
testAccess(t, c, l3Bad.Hosts["attacker"], hostIPv6, escapeHatch, escapeHatch, network.WithIPv6Disabled())
|
||||
// Test access from the L3 segment where the port is published to
|
||||
// make sure that the test works properly (otherwise we might
|
||||
// reintroduce the security issue without realizing).
|
||||
testAccess(t, c, l3Good.Hosts["neigh"], hostIPv6, escapeHatch, true, network.WithIPv6Disabled())
|
||||
})
|
||||
|
||||
// IPv6 loopback address is non routable, so the kernel will block any
|
||||
// packet spoofing it without the need for any iptables rules. No need
|
||||
// to test that case here.
|
||||
}
|
||||
}
|
||||
|
||||
@@ -45,7 +45,7 @@ func NewL3Segment(t *testing.T, nsName string, addrs ...netip.Prefix) *L3Segment
|
||||
Hosts: map[string]Host{},
|
||||
}
|
||||
|
||||
l3.bridge = newHost(t, "bridge", nsName, "br0")
|
||||
l3.bridge = newHost(t, nsName, "br0")
|
||||
defer func() {
|
||||
if t.Failed() {
|
||||
l3.Destroy(t)
|
||||
@@ -70,13 +70,12 @@ func (l3 *L3Segment) AddHost(t *testing.T, hostname, nsName, ifname string, addr
|
||||
t.Fatalf("hostname too long")
|
||||
}
|
||||
|
||||
host := newHost(t, hostname, nsName, ifname)
|
||||
host := newHost(t, nsName, ifname)
|
||||
l3.Hosts[hostname] = host
|
||||
|
||||
host.MustRun(t, "ip", "link", "add", hostname, "netns", l3.bridge.ns, "type", "veth", "peer", "name", host.Iface)
|
||||
l3.bridge.MustRun(t, "ip", "link", "set", hostname, "up", "master", l3.bridge.Iface)
|
||||
host.MustRun(t, "ip", "link", "set", host.Iface, "up")
|
||||
host.MustRun(t, "ip", "link", "set", "lo", "up")
|
||||
|
||||
for _, addr := range addrs {
|
||||
host.MustRun(t, "ip", "addr", "add", addr.String(), "dev", host.Iface, "nodad")
|
||||
@@ -84,7 +83,6 @@ func (l3 *L3Segment) AddHost(t *testing.T, hostname, nsName, ifname string, addr
|
||||
}
|
||||
|
||||
func (l3 *L3Segment) Destroy(t *testing.T) {
|
||||
t.Helper()
|
||||
for _, host := range l3.Hosts {
|
||||
host.Destroy(t)
|
||||
}
|
||||
@@ -92,12 +90,11 @@ func (l3 *L3Segment) Destroy(t *testing.T) {
|
||||
}
|
||||
|
||||
type Host struct {
|
||||
Name string
|
||||
Iface string // Iface is the interface name in the host network namespace.
|
||||
ns string // ns is the network namespace name.
|
||||
}
|
||||
|
||||
func newHost(t *testing.T, hostname, nsName, ifname string) Host {
|
||||
func newHost(t *testing.T, nsName, ifname string) Host {
|
||||
t.Helper()
|
||||
|
||||
if len(ifname) >= syscall.IFNAMSIZ {
|
||||
@@ -112,7 +109,6 @@ func newHost(t *testing.T, hostname, nsName, ifname string) Host {
|
||||
}
|
||||
|
||||
return Host{
|
||||
Name: hostname,
|
||||
Iface: ifname,
|
||||
ns: nsName,
|
||||
}
|
||||
@@ -146,27 +142,25 @@ func (h Host) MustRun(t *testing.T, cmd string, args ...string) string {
|
||||
func (h Host) Do(t *testing.T, fn func()) {
|
||||
t.Helper()
|
||||
|
||||
if h.ns != CurrentNetns {
|
||||
targetNs, err := netns.GetFromName(h.ns)
|
||||
if err != nil {
|
||||
t.Fatalf("failed to get netns handle: %v", err)
|
||||
}
|
||||
defer targetNs.Close()
|
||||
|
||||
origNs, err := netns.Get()
|
||||
if err != nil {
|
||||
t.Fatalf("failed to get current netns: %v", err)
|
||||
}
|
||||
defer origNs.Close()
|
||||
|
||||
runtime.LockOSThread()
|
||||
defer runtime.UnlockOSThread()
|
||||
|
||||
if err := netns.Set(targetNs); err != nil {
|
||||
t.Fatalf("failed to enter netns: %v", err)
|
||||
}
|
||||
defer netns.Set(origNs)
|
||||
targetNs, err := netns.GetFromName(h.ns)
|
||||
if err != nil {
|
||||
t.Fatalf("failed to get netns handle: %v", err)
|
||||
}
|
||||
defer targetNs.Close()
|
||||
|
||||
origNs, err := netns.Get()
|
||||
if err != nil {
|
||||
t.Fatalf("failed to get current netns: %v", err)
|
||||
}
|
||||
defer origNs.Close()
|
||||
|
||||
runtime.LockOSThread()
|
||||
defer runtime.UnlockOSThread()
|
||||
|
||||
if err := netns.Set(targetNs); err != nil {
|
||||
t.Fatalf("failed to enter netns: %v", err)
|
||||
}
|
||||
defer netns.Set(origNs)
|
||||
|
||||
fn()
|
||||
}
|
||||
|
||||
@@ -176,9 +176,9 @@ func (n *bridgeNetwork) addPortMappings(
|
||||
}
|
||||
|
||||
for i := range bindings {
|
||||
b := bindings[i]
|
||||
if pdc != nil && b.HostPort != 0 {
|
||||
if pdc != nil && bindings[i].HostPort != 0 {
|
||||
var err error
|
||||
b := &bindings[i]
|
||||
hip, ok := netip.AddrFromSlice(b.HostIP)
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("invalid host IP address in %s", b)
|
||||
@@ -187,18 +187,12 @@ func (n *bridgeNetwork) addPortMappings(
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("invalid child host IP address %s in %s", b.childHostIP, b)
|
||||
}
|
||||
bindings[i].portDriverRemove, err = pdc.AddPort(ctx, b.Proto.String(), hip, chip, int(b.HostPort))
|
||||
b.portDriverRemove, err = pdc.AddPort(ctx, b.Proto.String(), hip, chip, int(b.HostPort))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
if err := n.setPerPortIptables(b, true); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
// When the port is proxied, and not NATed (eg. when an IPv6 HostIP is
|
||||
// specified, but the container has no IPv6 address), the container's
|
||||
// port must be protected by filterPortByInputIface.
|
||||
if err := n.filterPortByInputIface(b, true); err != nil {
|
||||
if err := n.setPerPortIptables(bindings[i], true); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
@@ -752,11 +746,10 @@ func (n *bridgeNetwork) releasePortBindings(pbs []portBinding) error {
|
||||
if errN != nil {
|
||||
errN = fmt.Errorf("failed to remove iptables rules for port mapping %s: %w", pb, errN)
|
||||
}
|
||||
errF := n.filterPortByInputIface(pb, false)
|
||||
if pb.HostPort > 0 {
|
||||
portallocator.Get().ReleasePort(pb.childHostIP, pb.Proto.String(), int(pb.HostPort))
|
||||
}
|
||||
errs = append(errs, errS, errPD, errP, errN, errF)
|
||||
errs = append(errs, errS, errPD, errP, errN)
|
||||
}
|
||||
return errors.Join(errs...)
|
||||
}
|
||||
@@ -876,59 +869,6 @@ func setPerPortForwarding(b portBinding, ipv iptables.IPVersion, bridgeName stri
|
||||
return nil
|
||||
}
|
||||
|
||||
// filterPortByInputIface adds a couple of iptables rules to accept packets
|
||||
// destined to a mapped port with a dest addr that matches the interface
|
||||
// they're received on. If that doesn't match, the second rule drop them
|
||||
// unceremoniously.
|
||||
//
|
||||
// These rules will block rogue hosts that try to access a mapped port while
|
||||
// they aren't part of the L2 segment where the mapped port is exposed.
|
||||
// For instance, if HostIP == 127.0.0.1, no ingress should come from anything
|
||||
// but lo.
|
||||
func (n *bridgeNetwork) filterPortByInputIface(b portBinding, enable bool) error {
|
||||
hostIP := b.childHostIP
|
||||
if b.HostPort == 0 {
|
||||
// Direct routing mode is used, we can't filter based on the input iface.
|
||||
return nil
|
||||
}
|
||||
|
||||
// DOCKER_DISABLE_INPUT_IFACE_FILTERING can be used as an escape hatch if
|
||||
// this filtering doesn't work out well for some users.
|
||||
if v, _ := strconv.ParseBool(os.Getenv("DOCKER_DISABLE_INPUT_IFACE_FILTERING")); v {
|
||||
log.G(context.TODO()).Warn("DOCKER_DISABLE_INPUT_IFACE_FILTERING is set, skipping input iface filtering.")
|
||||
return nil
|
||||
}
|
||||
|
||||
ipv := iptables.IPv4
|
||||
if b.childHostIP.To4() == nil {
|
||||
ipv = iptables.IPv6
|
||||
}
|
||||
accept := iptables.Rule{IPVer: ipv, Table: iptables.Raw, Chain: "PREROUTING", Args: []string{
|
||||
"-p", b.Proto.String(),
|
||||
"-d", hostIP.String(),
|
||||
"--dport", strconv.Itoa(int(b.HostPort)),
|
||||
"-m", "addrtype",
|
||||
"--dst-type", "LOCAL",
|
||||
"--limit-iface-in",
|
||||
"-j", "ACCEPT",
|
||||
}}
|
||||
if err := appendOrDelChainRule(accept, "INPUT IFACE FILTERING - ACCEPT", enable); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
drop := iptables.Rule{IPVer: ipv, Table: iptables.Raw, Chain: "PREROUTING", Args: []string{
|
||||
"-p", b.Proto.String(),
|
||||
"-d", hostIP.String(),
|
||||
"--dport", strconv.Itoa(int(b.HostPort)),
|
||||
"-j", "DROP",
|
||||
}}
|
||||
if err := appendOrDelChainRule(drop, "INPUT IFACE FILTERING - DROP", enable); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (n *bridgeNetwork) reapplyPerPortIptables4() {
|
||||
n.reapplyPerPortIptables(func(b portBinding) bool { return b.IP.To4() != nil })
|
||||
}
|
||||
|
||||
@@ -13,7 +13,6 @@ import (
|
||||
"testing"
|
||||
|
||||
"github.com/containerd/log"
|
||||
"github.com/docker/docker/internal/nlwrap"
|
||||
"github.com/docker/docker/internal/testutils/netnsutils"
|
||||
"github.com/docker/docker/internal/testutils/storeutils"
|
||||
"github.com/docker/docker/libnetwork/iptables"
|
||||
@@ -822,12 +821,6 @@ func TestAddPortMappings(t *testing.T) {
|
||||
defer ul.Close()
|
||||
}
|
||||
|
||||
var err error
|
||||
d := newDriver(storeutils.NewTempStore(t))
|
||||
d.nlh, err = nlwrap.NewHandle()
|
||||
assert.NilError(t, err)
|
||||
defer d.nlh.Close()
|
||||
|
||||
n := &bridgeNetwork{
|
||||
config: &networkConfiguration{
|
||||
BridgeName: "dummybridge",
|
||||
@@ -836,7 +829,7 @@ func TestAddPortMappings(t *testing.T) {
|
||||
GwModeIPv4: tc.gwMode4,
|
||||
GwModeIPv6: tc.gwMode6,
|
||||
},
|
||||
driver: d,
|
||||
driver: newDriver(storeutils.NewTempStore(t)),
|
||||
}
|
||||
genericOption := map[string]interface{}{
|
||||
netlabel.GenericData: &configuration{
|
||||
@@ -847,7 +840,7 @@ func TestAddPortMappings(t *testing.T) {
|
||||
Rootless: tc.rootless,
|
||||
},
|
||||
}
|
||||
err = n.driver.configure(genericOption)
|
||||
err := n.driver.configure(genericOption)
|
||||
assert.NilError(t, err)
|
||||
|
||||
assert.Check(t, is.Equal(n.driver.portDriverClient == nil, !tc.rootless))
|
||||
|
||||
@@ -49,8 +49,6 @@ const (
|
||||
Filter Table = "filter"
|
||||
// Mangle table is used for mangling the packet.
|
||||
Mangle Table = "mangle"
|
||||
// Raw table is used for filtering packets before they are NATed.
|
||||
Raw Table = "raw"
|
||||
)
|
||||
|
||||
// IPVersion refers to IP version, v4 or v6
|
||||
|
||||
Reference in New Issue
Block a user