Routed networks accept traffic from anywhere.

Create ipsets containing the subnet of each non-internal bridge network.

Signed-off-by: Rob Murray <rob.murray@docker.com>
This commit is contained in:
Rob Murray
2024-09-20 15:14:34 +01:00
parent 10338053f0
commit 0546d9084f
16 changed files with 297 additions and 213 deletions

View File

@@ -11,9 +11,9 @@ Table `filter`:
Chain FORWARD (policy ACCEPT 0 packets, 0 bytes)
num pkts bytes target prot opt in out source destination
1 0 0 DOCKER-USER 0 -- * * 0.0.0.0/0 0.0.0.0/0
2 0 0 DOCKER-ISOLATION-STAGE-1 0 -- * * 0.0.0.0/0 0.0.0.0/0
3 0 0 ACCEPT 0 -- * docker0 0.0.0.0/0 0.0.0.0/0 ctstate RELATED,ESTABLISHED
4 0 0 DOCKER 0 -- * docker0 0.0.0.0/0 0.0.0.0/0
2 0 0 ACCEPT 0 -- * * 0.0.0.0/0 0.0.0.0/0 match-set docker-ext-bridges-v4 dst ctstate RELATED,ESTABLISHED
3 0 0 DOCKER-ISOLATION-STAGE-1 0 -- * * 0.0.0.0/0 0.0.0.0/0
4 0 0 DOCKER 0 -- * * 0.0.0.0/0 0.0.0.0/0 match-set docker-ext-bridges-v4 dst
5 0 0 ACCEPT 0 -- docker0 !docker0 0.0.0.0/0 0.0.0.0/0
6 0 0 ACCEPT 0 -- docker0 docker0 0.0.0.0/0 0.0.0.0/0
@@ -48,9 +48,9 @@ Table `filter`:
-N DOCKER-ISOLATION-STAGE-2
-N DOCKER-USER
-A FORWARD -j DOCKER-USER
-A FORWARD -m set --match-set docker-ext-bridges-v4 dst -m conntrack --ctstate RELATED,ESTABLISHED -j ACCEPT
-A FORWARD -j DOCKER-ISOLATION-STAGE-1
-A FORWARD -o docker0 -m conntrack --ctstate RELATED,ESTABLISHED -j ACCEPT
-A FORWARD -o docker0 -j DOCKER
-A FORWARD -m set --match-set docker-ext-bridges-v4 dst -j DOCKER
-A FORWARD -i docker0 ! -o docker0 -j ACCEPT
-A FORWARD -i docker0 -o docker0 -j ACCEPT
-A DOCKER ! -i docker0 -o docker0 -j DROP
@@ -78,17 +78,19 @@ The FORWARD chain rules are numbered in the output above, they are:
Docker won't add rules to the DOCKER-USER chain, it's only for user-defined rules.
It's (mostly) kept at the top of the by deleting it and re-creating after each
new network is created, while traffic may be running for other networks.
2. Unconditional jump to DOCKER-ISOLATION-STAGE-1.
Set up during network creation by [setupIPTables][11], which ensures it appears
2. Early ACCEPT for any RELATED,ESTABLISHED traffic to a docker bridge. This rule
matches against an `ipset` called `docker-ext-bridges-v4` (`v6` for IPv6). The
set contains the CIDR address of each docker network, and it is updated as networks
are created and deleted.
So, this rule could be set up during bridge driver initialisation. But, it is
currently set up when a network is created, in [setupIPTables][11].
3. Unconditional jump to DOCKER-ISOLATION-STAGE-1.
Set up during network creation by [setupIPTables][12], which ensures it appears
after the jump to DOCKER-USER (by deleting it and re-creating, while traffic
may be running for other networks).
3. ACCEPT RELATED,ESTABLISHED packets into a specific bridge network.
Allows responses to outgoing requests, and continuation of incoming requests,
without needing to process any further rules.
This rule is also added during network creation, but the code to do it
is in libnetwork, [ProgramChain][12].
4. Jump to DOCKER, for any packet destined for a bridge network. Added when
the network is created, in [ProgramChain][13] ("filterChain" is the DOCKER chain).
4. Jump to DOCKER, for any packet destined for any bridge network, identified by
matching against the `docker-ext-bridge-v[46]` set. Added when the network is
created, in [setupIPTables][13].
The DOCKER chain implements per-port/protocol filtering for each container.
5. ACCEPT any packet leaving a network, also set up when the network is created, in
[setupIPTablesInternal][14].
@@ -97,9 +99,9 @@ The FORWARD chain rules are numbered in the output above, they are:
[setIcc][15].
[10]: https://github.com/moby/moby/blob/e05848c0025b67a16aaafa8cdff95d5e2c064105/libnetwork/firewall_linux.go#L50
[11]: https://github.com/moby/moby/blob/333cfa640239153477bf635a8131734d0e9d099d/libnetwork/drivers/bridge/setup_ip_tables_linux.go#L201
[12]: https://github.com/moby/moby/blob/e05848c0025b67a16aaafa8cdff95d5e2c064105/libnetwork/iptables/iptables.go#L270
[13]: https://github.com/moby/moby/blob/e05848c0025b67a16aaafa8cdff95d5e2c064105/libnetwork/iptables/iptables.go#L251-L255
[11]: https://github.com/robmry/moby/blob/52c89d467fc5326149e4bbb8903d23589b66ff0d/libnetwork/drivers/bridge/setup_ip_tables_linux.go#L230-L232
[12]: https://github.com/robmry/moby/blob/52c89d467fc5326149e4bbb8903d23589b66ff0d/libnetwork/drivers/bridge/setup_ip_tables_linux.go#L227-L229
[13]: https://github.com/robmry/moby/blob/52c89d467fc5326149e4bbb8903d23589b66ff0d/libnetwork/drivers/bridge/setup_ip_tables_linux.go#L223-L226
[14]: https://github.com/moby/moby/blob/333cfa640239153477bf635a8131734d0e9d099d/libnetwork/drivers/bridge/setup_ip_tables_linux.go#L264
[15]: https://github.com/moby/moby/blob/333cfa640239153477bf635a8131734d0e9d099d/libnetwork/drivers/bridge/setup_ip_tables_linux.go#L343

View File

@@ -16,10 +16,10 @@ The filter table is updated as follows:
Chain FORWARD (policy ACCEPT 0 packets, 0 bytes)
num pkts bytes target prot opt in out source destination
1 0 0 DOCKER-USER 0 -- * * 0.0.0.0/0 0.0.0.0/0
2 0 0 DOCKER-ISOLATION-STAGE-1 0 -- * * 0.0.0.0/0 0.0.0.0/0
3 0 0 ACCEPT 0 -- bridge1 bridge1 0.0.0.0/0 0.0.0.0/0
4 0 0 ACCEPT 0 -- * docker0 0.0.0.0/0 0.0.0.0/0 ctstate RELATED,ESTABLISHED
5 0 0 DOCKER 0 -- * docker0 0.0.0.0/0 0.0.0.0/0
2 0 0 ACCEPT 0 -- * * 0.0.0.0/0 0.0.0.0/0 match-set docker-ext-bridges-v4 dst ctstate RELATED,ESTABLISHED
3 0 0 DOCKER-ISOLATION-STAGE-1 0 -- * * 0.0.0.0/0 0.0.0.0/0
4 0 0 DOCKER 0 -- * * 0.0.0.0/0 0.0.0.0/0 match-set docker-ext-bridges-v4 dst
5 0 0 ACCEPT 0 -- bridge1 bridge1 0.0.0.0/0 0.0.0.0/0
6 0 0 ACCEPT 0 -- docker0 !docker0 0.0.0.0/0 0.0.0.0/0
7 0 0 ACCEPT 0 -- docker0 docker0 0.0.0.0/0 0.0.0.0/0
@@ -56,10 +56,10 @@ The filter table is updated as follows:
-N DOCKER-ISOLATION-STAGE-2
-N DOCKER-USER
-A FORWARD -j DOCKER-USER
-A FORWARD -m set --match-set docker-ext-bridges-v4 dst -m conntrack --ctstate RELATED,ESTABLISHED -j ACCEPT
-A FORWARD -j DOCKER-ISOLATION-STAGE-1
-A FORWARD -m set --match-set docker-ext-bridges-v4 dst -j DOCKER
-A FORWARD -i bridge1 -o bridge1 -j ACCEPT
-A FORWARD -o docker0 -m conntrack --ctstate RELATED,ESTABLISHED -j ACCEPT
-A FORWARD -o docker0 -j DOCKER
-A FORWARD -i docker0 ! -o docker0 -j ACCEPT
-A FORWARD -i docker0 -o docker0 -j ACCEPT
-A DOCKER ! -i docker0 -o docker0 -j DROP

View File

@@ -16,20 +16,18 @@ The filter table is:
Chain FORWARD (policy ACCEPT 0 packets, 0 bytes)
num pkts bytes target prot opt in out source destination
1 0 0 DOCKER-USER 0 -- * * 0.0.0.0/0 0.0.0.0/0
2 0 0 DOCKER-ISOLATION-STAGE-1 0 -- * * 0.0.0.0/0 0.0.0.0/0
3 0 0 ACCEPT 0 -- * bridge1 0.0.0.0/0 0.0.0.0/0 ctstate RELATED,ESTABLISHED
4 0 0 DOCKER 0 -- * bridge1 0.0.0.0/0 0.0.0.0/0
2 0 0 ACCEPT 0 -- * * 0.0.0.0/0 0.0.0.0/0 match-set docker-ext-bridges-v4 dst ctstate RELATED,ESTABLISHED
3 0 0 DOCKER-ISOLATION-STAGE-1 0 -- * * 0.0.0.0/0 0.0.0.0/0
4 0 0 DOCKER 0 -- * * 0.0.0.0/0 0.0.0.0/0 match-set docker-ext-bridges-v4 dst
5 0 0 ACCEPT 0 -- bridge1 !bridge1 0.0.0.0/0 0.0.0.0/0
6 0 0 ACCEPT 0 -- * docker0 0.0.0.0/0 0.0.0.0/0 ctstate RELATED,ESTABLISHED
7 0 0 DOCKER 0 -- * docker0 0.0.0.0/0 0.0.0.0/0
8 0 0 ACCEPT 0 -- docker0 !docker0 0.0.0.0/0 0.0.0.0/0
9 0 0 ACCEPT 0 -- docker0 docker0 0.0.0.0/0 0.0.0.0/0
10 0 0 DROP 0 -- bridge1 bridge1 0.0.0.0/0 0.0.0.0/0
6 0 0 ACCEPT 0 -- docker0 !docker0 0.0.0.0/0 0.0.0.0/0
7 0 0 ACCEPT 0 -- docker0 docker0 0.0.0.0/0 0.0.0.0/0
8 0 0 DROP 0 -- bridge1 bridge1 0.0.0.0/0 0.0.0.0/0
Chain OUTPUT (policy ACCEPT 0 packets, 0 bytes)
num pkts bytes target prot opt in out source destination
Chain DOCKER (2 references)
Chain DOCKER (1 references)
num pkts bytes target prot opt in out source destination
1 0 0 ACCEPT 6 -- !bridge1 bridge1 0.0.0.0/0 192.0.2.2 tcp dpt:80
2 0 0 DROP 0 -- !docker0 docker0 0.0.0.0/0 0.0.0.0/0
@@ -37,8 +35,8 @@ The filter table is:
Chain DOCKER-ISOLATION-STAGE-1 (1 references)
num pkts bytes target prot opt in out source destination
1 0 0 DOCKER-ISOLATION-STAGE-2 0 -- bridge1 !bridge1 0.0.0.0/0 0.0.0.0/0
2 0 0 DOCKER-ISOLATION-STAGE-2 0 -- docker0 !docker0 0.0.0.0/0 0.0.0.0/0
1 0 0 DOCKER-ISOLATION-STAGE-2 0 -- docker0 !docker0 0.0.0.0/0 0.0.0.0/0
2 0 0 DOCKER-ISOLATION-STAGE-2 0 -- bridge1 !bridge1 0.0.0.0/0 0.0.0.0/0
Chain DOCKER-ISOLATION-STAGE-2 (2 references)
num pkts bytes target prot opt in out source destination
@@ -61,20 +59,18 @@ The filter table is:
-N DOCKER-ISOLATION-STAGE-2
-N DOCKER-USER
-A FORWARD -j DOCKER-USER
-A FORWARD -m set --match-set docker-ext-bridges-v4 dst -m conntrack --ctstate RELATED,ESTABLISHED -j ACCEPT
-A FORWARD -j DOCKER-ISOLATION-STAGE-1
-A FORWARD -o bridge1 -m conntrack --ctstate RELATED,ESTABLISHED -j ACCEPT
-A FORWARD -o bridge1 -j DOCKER
-A FORWARD -m set --match-set docker-ext-bridges-v4 dst -j DOCKER
-A FORWARD -i bridge1 ! -o bridge1 -j ACCEPT
-A FORWARD -o docker0 -m conntrack --ctstate RELATED,ESTABLISHED -j ACCEPT
-A FORWARD -o docker0 -j DOCKER
-A FORWARD -i docker0 ! -o docker0 -j ACCEPT
-A FORWARD -i docker0 -o docker0 -j ACCEPT
-A FORWARD -i bridge1 -o bridge1 -j DROP
-A DOCKER -d 192.0.2.2/32 ! -i bridge1 -o bridge1 -p tcp -m tcp --dport 80 -j ACCEPT
-A DOCKER ! -i docker0 -o docker0 -j DROP
-A DOCKER ! -i bridge1 -o bridge1 -j DROP
-A DOCKER-ISOLATION-STAGE-1 -i bridge1 ! -o bridge1 -j DOCKER-ISOLATION-STAGE-2
-A DOCKER-ISOLATION-STAGE-1 -i docker0 ! -o docker0 -j DOCKER-ISOLATION-STAGE-2
-A DOCKER-ISOLATION-STAGE-1 -i bridge1 ! -o bridge1 -j DOCKER-ISOLATION-STAGE-2
-A DOCKER-ISOLATION-STAGE-2 -o bridge1 -j DROP
-A DOCKER-ISOLATION-STAGE-2 -o docker0 -j DROP
-A DOCKER-USER -j RETURN
@@ -84,7 +80,7 @@ The filter table is:
By comparison with [ICC=true][1]:
- Rule 10 in the FORWARD chain replaces an ACCEPT rule that would have followed rule 5, matching the same packets.
- Rule 8 in the FORWARD chain replaces an ACCEPT rule that would have followed rule 5, matching the same packets.
- Added in [setIcc][2]
[1]: usernet-portmap.md

View File

@@ -19,20 +19,18 @@ The filter table is the same as with the userland proxy enabled.
Chain FORWARD (policy ACCEPT 0 packets, 0 bytes)
num pkts bytes target prot opt in out source destination
1 0 0 DOCKER-USER 0 -- * * 0.0.0.0/0 0.0.0.0/0
2 0 0 DOCKER-ISOLATION-STAGE-1 0 -- * * 0.0.0.0/0 0.0.0.0/0
3 0 0 ACCEPT 0 -- * bridge1 0.0.0.0/0 0.0.0.0/0 ctstate RELATED,ESTABLISHED
4 0 0 DOCKER 0 -- * bridge1 0.0.0.0/0 0.0.0.0/0
2 0 0 ACCEPT 0 -- * * 0.0.0.0/0 0.0.0.0/0 match-set docker-ext-bridges-v4 dst ctstate RELATED,ESTABLISHED
3 0 0 DOCKER-ISOLATION-STAGE-1 0 -- * * 0.0.0.0/0 0.0.0.0/0
4 0 0 DOCKER 0 -- * * 0.0.0.0/0 0.0.0.0/0 match-set docker-ext-bridges-v4 dst
5 0 0 ACCEPT 0 -- bridge1 !bridge1 0.0.0.0/0 0.0.0.0/0
6 0 0 ACCEPT 0 -- bridge1 bridge1 0.0.0.0/0 0.0.0.0/0
7 0 0 ACCEPT 0 -- * docker0 0.0.0.0/0 0.0.0.0/0 ctstate RELATED,ESTABLISHED
8 0 0 DOCKER 0 -- * docker0 0.0.0.0/0 0.0.0.0/0
9 0 0 ACCEPT 0 -- docker0 !docker0 0.0.0.0/0 0.0.0.0/0
10 0 0 ACCEPT 0 -- docker0 docker0 0.0.0.0/0 0.0.0.0/0
7 0 0 ACCEPT 0 -- docker0 !docker0 0.0.0.0/0 0.0.0.0/0
8 0 0 ACCEPT 0 -- docker0 docker0 0.0.0.0/0 0.0.0.0/0
Chain OUTPUT (policy ACCEPT 0 packets, 0 bytes)
num pkts bytes target prot opt in out source destination
Chain DOCKER (2 references)
Chain DOCKER (1 references)
num pkts bytes target prot opt in out source destination
1 0 0 ACCEPT 6 -- !bridge1 bridge1 0.0.0.0/0 192.0.2.2 tcp dpt:80
2 0 0 DROP 0 -- !docker0 docker0 0.0.0.0/0 0.0.0.0/0
@@ -40,8 +38,8 @@ The filter table is the same as with the userland proxy enabled.
Chain DOCKER-ISOLATION-STAGE-1 (1 references)
num pkts bytes target prot opt in out source destination
1 0 0 DOCKER-ISOLATION-STAGE-2 0 -- bridge1 !bridge1 0.0.0.0/0 0.0.0.0/0
2 0 0 DOCKER-ISOLATION-STAGE-2 0 -- docker0 !docker0 0.0.0.0/0 0.0.0.0/0
1 0 0 DOCKER-ISOLATION-STAGE-2 0 -- docker0 !docker0 0.0.0.0/0 0.0.0.0/0
2 0 0 DOCKER-ISOLATION-STAGE-2 0 -- bridge1 !bridge1 0.0.0.0/0 0.0.0.0/0
Chain DOCKER-ISOLATION-STAGE-2 (2 references)
num pkts bytes target prot opt in out source destination
@@ -61,20 +59,18 @@ The filter table is the same as with the userland proxy enabled.
-N DOCKER-ISOLATION-STAGE-2
-N DOCKER-USER
-A FORWARD -j DOCKER-USER
-A FORWARD -m set --match-set docker-ext-bridges-v4 dst -m conntrack --ctstate RELATED,ESTABLISHED -j ACCEPT
-A FORWARD -j DOCKER-ISOLATION-STAGE-1
-A FORWARD -o bridge1 -m conntrack --ctstate RELATED,ESTABLISHED -j ACCEPT
-A FORWARD -o bridge1 -j DOCKER
-A FORWARD -m set --match-set docker-ext-bridges-v4 dst -j DOCKER
-A FORWARD -i bridge1 ! -o bridge1 -j ACCEPT
-A FORWARD -i bridge1 -o bridge1 -j ACCEPT
-A FORWARD -o docker0 -m conntrack --ctstate RELATED,ESTABLISHED -j ACCEPT
-A FORWARD -o docker0 -j DOCKER
-A FORWARD -i docker0 ! -o docker0 -j ACCEPT
-A FORWARD -i docker0 -o docker0 -j ACCEPT
-A DOCKER -d 192.0.2.2/32 ! -i bridge1 -o bridge1 -p tcp -m tcp --dport 80 -j ACCEPT
-A DOCKER ! -i docker0 -o docker0 -j DROP
-A DOCKER ! -i bridge1 -o bridge1 -j DROP
-A DOCKER-ISOLATION-STAGE-1 -i bridge1 ! -o bridge1 -j DOCKER-ISOLATION-STAGE-2
-A DOCKER-ISOLATION-STAGE-1 -i docker0 ! -o docker0 -j DOCKER-ISOLATION-STAGE-2
-A DOCKER-ISOLATION-STAGE-1 -i bridge1 ! -o bridge1 -j DOCKER-ISOLATION-STAGE-2
-A DOCKER-ISOLATION-STAGE-2 -o bridge1 -j DROP
-A DOCKER-ISOLATION-STAGE-2 -o docker0 -j DROP
-A DOCKER-USER -j RETURN

View File

@@ -8,15 +8,7 @@ Running the daemon with the userland proxy disabled then, as before, adding a ne
--subnet 192.0.2.0/24 --gateway 192.0.2.1 bridge1
docker run --network bridge1 -p 8080:80 --name c1 busybox
The filter table is largely the same as with the userland proxy enabled.
_Note that this means inter-network communication is disabled as-normal so,
although published ports will be directly accessible from a remote host
they are not accessible from containers in neighbouring docker networks
on the same host._
<details>
<summary>Filter table</summary>
The filter table is:
Chain INPUT (policy ACCEPT 0 packets, 0 bytes)
num pkts bytes target prot opt in out source destination
@@ -24,20 +16,18 @@ on the same host._
Chain FORWARD (policy ACCEPT 0 packets, 0 bytes)
num pkts bytes target prot opt in out source destination
1 0 0 DOCKER-USER 0 -- * * 0.0.0.0/0 0.0.0.0/0
2 0 0 DOCKER-ISOLATION-STAGE-1 0 -- * * 0.0.0.0/0 0.0.0.0/0
3 0 0 ACCEPT 0 -- * bridge1 0.0.0.0/0 0.0.0.0/0 ctstate RELATED,ESTABLISHED
4 0 0 DOCKER 0 -- * bridge1 0.0.0.0/0 0.0.0.0/0
2 0 0 ACCEPT 0 -- * * 0.0.0.0/0 0.0.0.0/0 match-set docker-ext-bridges-v4 dst ctstate RELATED,ESTABLISHED
3 0 0 DOCKER-ISOLATION-STAGE-1 0 -- * * 0.0.0.0/0 0.0.0.0/0
4 0 0 DOCKER 0 -- * * 0.0.0.0/0 0.0.0.0/0 match-set docker-ext-bridges-v4 dst
5 0 0 ACCEPT 0 -- bridge1 !bridge1 0.0.0.0/0 0.0.0.0/0
6 0 0 ACCEPT 0 -- bridge1 bridge1 0.0.0.0/0 0.0.0.0/0
7 0 0 ACCEPT 0 -- * docker0 0.0.0.0/0 0.0.0.0/0 ctstate RELATED,ESTABLISHED
8 0 0 DOCKER 0 -- * docker0 0.0.0.0/0 0.0.0.0/0
9 0 0 ACCEPT 0 -- docker0 !docker0 0.0.0.0/0 0.0.0.0/0
10 0 0 ACCEPT 0 -- docker0 docker0 0.0.0.0/0 0.0.0.0/0
7 0 0 ACCEPT 0 -- docker0 !docker0 0.0.0.0/0 0.0.0.0/0
8 0 0 ACCEPT 0 -- docker0 docker0 0.0.0.0/0 0.0.0.0/0
Chain OUTPUT (policy ACCEPT 0 packets, 0 bytes)
num pkts bytes target prot opt in out source destination
Chain DOCKER (2 references)
Chain DOCKER (1 references)
num pkts bytes target prot opt in out source destination
1 0 0 ACCEPT 6 -- !bridge1 bridge1 0.0.0.0/0 192.0.2.2 tcp dpt:80
2 0 0 DROP 0 -- !docker0 docker0 0.0.0.0/0 0.0.0.0/0
@@ -46,8 +36,10 @@ on the same host._
Chain DOCKER-ISOLATION-STAGE-1 (1 references)
num pkts bytes target prot opt in out source destination
1 0 0 DOCKER-ISOLATION-STAGE-2 0 -- bridge1 !bridge1 0.0.0.0/0 0.0.0.0/0
2 0 0 DOCKER-ISOLATION-STAGE-2 0 -- docker0 !docker0 0.0.0.0/0 0.0.0.0/0
1 0 0 ACCEPT 0 -- bridge1 * 0.0.0.0/0 0.0.0.0/0 ctstate RELATED,ESTABLISHED
2 0 0 RETURN 0 -- * bridge1 0.0.0.0/0 0.0.0.0/0
3 0 0 DOCKER-ISOLATION-STAGE-2 0 -- docker0 !docker0 0.0.0.0/0 0.0.0.0/0
4 0 0 DOCKER-ISOLATION-STAGE-2 0 -- bridge1 !bridge1 0.0.0.0/0 0.0.0.0/0
Chain DOCKER-ISOLATION-STAGE-2 (2 references)
num pkts bytes target prot opt in out source destination
@@ -59,6 +51,9 @@ on the same host._
1 0 0 RETURN 0 -- * * 0.0.0.0/0 0.0.0.0/0
<details>
<summary>iptables commands</summary>
-P INPUT ACCEPT
-P FORWARD ACCEPT
-P OUTPUT ACCEPT
@@ -67,21 +62,21 @@ on the same host._
-N DOCKER-ISOLATION-STAGE-2
-N DOCKER-USER
-A FORWARD -j DOCKER-USER
-A FORWARD -m set --match-set docker-ext-bridges-v4 dst -m conntrack --ctstate RELATED,ESTABLISHED -j ACCEPT
-A FORWARD -j DOCKER-ISOLATION-STAGE-1
-A FORWARD -o bridge1 -m conntrack --ctstate RELATED,ESTABLISHED -j ACCEPT
-A FORWARD -o bridge1 -j DOCKER
-A FORWARD -m set --match-set docker-ext-bridges-v4 dst -j DOCKER
-A FORWARD -i bridge1 ! -o bridge1 -j ACCEPT
-A FORWARD -i bridge1 -o bridge1 -j ACCEPT
-A FORWARD -o docker0 -m conntrack --ctstate RELATED,ESTABLISHED -j ACCEPT
-A FORWARD -o docker0 -j DOCKER
-A FORWARD -i docker0 ! -o docker0 -j ACCEPT
-A FORWARD -i docker0 -o docker0 -j ACCEPT
-A DOCKER -d 192.0.2.2/32 ! -i bridge1 -o bridge1 -p tcp -m tcp --dport 80 -j ACCEPT
-A DOCKER ! -i docker0 -o docker0 -j DROP
-A DOCKER -o bridge1 -p icmp -j ACCEPT
-A DOCKER ! -i bridge1 -o bridge1 -j DROP
-A DOCKER-ISOLATION-STAGE-1 -i bridge1 ! -o bridge1 -j DOCKER-ISOLATION-STAGE-2
-A DOCKER-ISOLATION-STAGE-1 -i bridge1 -m conntrack --ctstate RELATED,ESTABLISHED -j ACCEPT
-A DOCKER-ISOLATION-STAGE-1 -o bridge1 -j RETURN
-A DOCKER-ISOLATION-STAGE-1 -i docker0 ! -o docker0 -j DOCKER-ISOLATION-STAGE-2
-A DOCKER-ISOLATION-STAGE-1 -i bridge1 ! -o bridge1 -j DOCKER-ISOLATION-STAGE-2
-A DOCKER-ISOLATION-STAGE-2 -o bridge1 -j DROP
-A DOCKER-ISOLATION-STAGE-2 -o docker0 -j DROP
-A DOCKER-USER -j RETURN
@@ -89,17 +84,25 @@ on the same host._
</details>
However, a rule is added by [setICMP][5] to the DOCKER chain (shown below) to
allow ICMP. The equivalent IPv6 rule uses `-p icmpv6` rather than `-p icmp`,
so *ALL* ICMP message types are allowed.
Compared to the equivalent [nat mode network][1]:
_The ACCEPT rule as shown by `iptables -L` looks alarming until you spot that it's
for `prot 1`._
Because the ICMP rule (rule 3) is per-network, it is appended to the chain along
with the default-DROP rule (rule 4). So, it is likely to be separated from
per-port/protocol ACCEPT rules for published ports on the same network. But it
will always appear before the default-DROP.
- In DOCKER-ISOLATION-STAGE-1:
- Rule 1 accepts outgoing packets related to established connections. This
is for responses to containers on NAT networks that would not normally
accept packets from another network, and may have port/protocol filtering
rules in place that would otherwise drop these responses.
- Rule 2 skips the jump to DOCKER-ISOLATION-STAGE-2 for any packet routed
to the routed-mode network. So, it will accept packets from other networks,
if they make it through the port/protocol filtering rules in the DOCKER
chain.
- In the DOCKER chain:
- A rule is added by [setICMP][5] to allow ICMP.
*ALL* ICMP message types are allowed.
The equivalent IPv6 rule uses `-p icmpv6` rather than `-p icmp`.
- Because the ICMP rule (rule 3) is per-network, it is appended to the chain along
with the default-DROP rule (rule 4). So, it is likely to be separated from
per-port/protocol ACCEPT rules for published ports on the same network. But it
will always appear before the default-DROP.
_[RFC 4890 section 4.3][6] makes recommendations for filtering ICMPv6. These
have been considered, but the host firewall is not a network boundary in the
@@ -107,7 +110,10 @@ sense used by the RFC. So, Node Information and Router Renumbering messages are
not discarded, and experimental/unused types are allowed because they may be
needed._
Chain DOCKER (2 references)
The ICMP rule, as shown by `iptables -L`, looks alarming until you spot that it's
for `prot 1`:
Chain DOCKER (1 references)
num pkts bytes target prot opt in out source destination
1 0 0 ACCEPT 6 -- !bridge1 bridge1 0.0.0.0/0 192.0.2.2 tcp dpt:80
2 0 0 DROP 0 -- !docker0 docker0 0.0.0.0/0 0.0.0.0/0
@@ -141,7 +147,8 @@ The nat table is:
Chain DOCKER (2 references)
num pkts bytes target prot opt in out source destination
1 0 0 RETURN 0 -- docker0 * 0.0.0.0/0 0.0.0.0/0
1 0 0 RETURN 0 -- bridge1 * 0.0.0.0/0 0.0.0.0/0
2 0 0 RETURN 0 -- docker0 * 0.0.0.0/0 0.0.0.0/0
<details>
@@ -155,6 +162,7 @@ The nat table is:
-A PREROUTING -m addrtype --dst-type LOCAL -j DOCKER
-A OUTPUT ! -d 127.0.0.0/8 -m addrtype --dst-type LOCAL -j DOCKER
-A POSTROUTING -s 172.17.0.0/16 ! -o docker0 -j MASQUERADE
-A DOCKER -i bridge1 -j RETURN
-A DOCKER -i docker0 -j RETURN

View File

@@ -15,20 +15,18 @@ The filter table is updated as follows:
Chain FORWARD (policy ACCEPT 0 packets, 0 bytes)
num pkts bytes target prot opt in out source destination
1 0 0 DOCKER-USER 0 -- * * 0.0.0.0/0 0.0.0.0/0
2 0 0 DOCKER-ISOLATION-STAGE-1 0 -- * * 0.0.0.0/0 0.0.0.0/0
3 0 0 ACCEPT 0 -- * bridge1 0.0.0.0/0 0.0.0.0/0 ctstate RELATED,ESTABLISHED
4 0 0 DOCKER 0 -- * bridge1 0.0.0.0/0 0.0.0.0/0
2 0 0 ACCEPT 0 -- * * 0.0.0.0/0 0.0.0.0/0 match-set docker-ext-bridges-v4 dst ctstate RELATED,ESTABLISHED
3 0 0 DOCKER-ISOLATION-STAGE-1 0 -- * * 0.0.0.0/0 0.0.0.0/0
4 0 0 DOCKER 0 -- * * 0.0.0.0/0 0.0.0.0/0 match-set docker-ext-bridges-v4 dst
5 0 0 ACCEPT 0 -- bridge1 !bridge1 0.0.0.0/0 0.0.0.0/0
6 0 0 ACCEPT 0 -- bridge1 bridge1 0.0.0.0/0 0.0.0.0/0
7 0 0 ACCEPT 0 -- * docker0 0.0.0.0/0 0.0.0.0/0 ctstate RELATED,ESTABLISHED
8 0 0 DOCKER 0 -- * docker0 0.0.0.0/0 0.0.0.0/0
9 0 0 ACCEPT 0 -- docker0 !docker0 0.0.0.0/0 0.0.0.0/0
10 0 0 ACCEPT 0 -- docker0 docker0 0.0.0.0/0 0.0.0.0/0
7 0 0 ACCEPT 0 -- docker0 !docker0 0.0.0.0/0 0.0.0.0/0
8 0 0 ACCEPT 0 -- docker0 docker0 0.0.0.0/0 0.0.0.0/0
Chain OUTPUT (policy ACCEPT 0 packets, 0 bytes)
num pkts bytes target prot opt in out source destination
Chain DOCKER (2 references)
Chain DOCKER (1 references)
num pkts bytes target prot opt in out source destination
1 0 0 ACCEPT 6 -- !bridge1 bridge1 0.0.0.0/0 192.0.2.2 tcp dpt:80
2 0 0 DROP 0 -- !docker0 docker0 0.0.0.0/0 0.0.0.0/0
@@ -36,8 +34,8 @@ The filter table is updated as follows:
Chain DOCKER-ISOLATION-STAGE-1 (1 references)
num pkts bytes target prot opt in out source destination
1 0 0 DOCKER-ISOLATION-STAGE-2 0 -- bridge1 !bridge1 0.0.0.0/0 0.0.0.0/0
2 0 0 DOCKER-ISOLATION-STAGE-2 0 -- docker0 !docker0 0.0.0.0/0 0.0.0.0/0
1 0 0 DOCKER-ISOLATION-STAGE-2 0 -- docker0 !docker0 0.0.0.0/0 0.0.0.0/0
2 0 0 DOCKER-ISOLATION-STAGE-2 0 -- bridge1 !bridge1 0.0.0.0/0 0.0.0.0/0
Chain DOCKER-ISOLATION-STAGE-2 (2 references)
num pkts bytes target prot opt in out source destination
@@ -60,20 +58,18 @@ The filter table is updated as follows:
-N DOCKER-ISOLATION-STAGE-2
-N DOCKER-USER
-A FORWARD -j DOCKER-USER
-A FORWARD -m set --match-set docker-ext-bridges-v4 dst -m conntrack --ctstate RELATED,ESTABLISHED -j ACCEPT
-A FORWARD -j DOCKER-ISOLATION-STAGE-1
-A FORWARD -o bridge1 -m conntrack --ctstate RELATED,ESTABLISHED -j ACCEPT
-A FORWARD -o bridge1 -j DOCKER
-A FORWARD -m set --match-set docker-ext-bridges-v4 dst -j DOCKER
-A FORWARD -i bridge1 ! -o bridge1 -j ACCEPT
-A FORWARD -i bridge1 -o bridge1 -j ACCEPT
-A FORWARD -o docker0 -m conntrack --ctstate RELATED,ESTABLISHED -j ACCEPT
-A FORWARD -o docker0 -j DOCKER
-A FORWARD -i docker0 ! -o docker0 -j ACCEPT
-A FORWARD -i docker0 -o docker0 -j ACCEPT
-A DOCKER -d 192.0.2.2/32 ! -i bridge1 -o bridge1 -p tcp -m tcp --dport 80 -j ACCEPT
-A DOCKER ! -i docker0 -o docker0 -j DROP
-A DOCKER ! -i bridge1 -o bridge1 -j DROP
-A DOCKER-ISOLATION-STAGE-1 -i bridge1 ! -o bridge1 -j DOCKER-ISOLATION-STAGE-2
-A DOCKER-ISOLATION-STAGE-1 -i docker0 ! -o docker0 -j DOCKER-ISOLATION-STAGE-2
-A DOCKER-ISOLATION-STAGE-1 -i bridge1 ! -o bridge1 -j DOCKER-ISOLATION-STAGE-2
-A DOCKER-ISOLATION-STAGE-2 -o bridge1 -j DROP
-A DOCKER-ISOLATION-STAGE-2 -o docker0 -j DROP
-A DOCKER-USER -j RETURN
@@ -83,9 +79,9 @@ The filter table is updated as follows:
Note that:
- In the FORWARD chain, rules 3-6 for the new network have been inserted at
- In the FORWARD chain, rules 5-6 for the new network have been inserted at
the top of the chain, pushing the equivalent docker0 rules down to positions
7-10. (Rules 3-6 were inserted at the top of the chain, then rules 1-2 were
7-8. (Rules 5-6 were inserted at the top of the chain, then rules 1-4 were
shuffled back to the top by deleting/recreating, as described above.)
- In the DOCKER-ISOLATION chains, rules equivalent to the docker0 rules have
also been inserted for the new bridge.

View File

@@ -31,17 +31,19 @@ The FORWARD chain rules are numbered in the output above, they are:
Docker won't add rules to the DOCKER-USER chain, it's only for user-defined rules.
It's (mostly) kept at the top of the by deleting it and re-creating after each
new network is created, while traffic may be running for other networks.
2. Unconditional jump to DOCKER-ISOLATION-STAGE-1.
Set up during network creation by [setupIPTables][11], which ensures it appears
2. Early ACCEPT for any RELATED,ESTABLISHED traffic to a docker bridge. This rule
matches against an `ipset` called `docker-ext-bridges-v4` (`v6` for IPv6). The
set contains the CIDR address of each docker network, and it is updated as networks
are created and deleted.
So, this rule could be set up during bridge driver initialisation. But, it is
currently set up when a network is created, in [setupIPTables][11].
3. Unconditional jump to DOCKER-ISOLATION-STAGE-1.
Set up during network creation by [setupIPTables][12], which ensures it appears
after the jump to DOCKER-USER (by deleting it and re-creating, while traffic
may be running for other networks).
3. ACCEPT RELATED,ESTABLISHED packets into a specific bridge network.
Allows responses to outgoing requests, and continuation of incoming requests,
without needing to process any further rules.
This rule is also added during network creation, but the code to do it
is in libnetwork, [ProgramChain][12].
4. Jump to DOCKER, for any packet destined for a bridge network. Added when
the network is created, in [ProgramChain][13] ("filterChain" is the DOCKER chain).
4. Jump to DOCKER, for any packet destined for any bridge network, identified by
matching against the `docker-ext-bridge-v[46]` set. Added when the network is
created, in [setupIPTables][13].
The DOCKER chain implements per-port/protocol filtering for each container.
5. ACCEPT any packet leaving a network, also set up when the network is created, in
[setupIPTablesInternal][14].
@@ -50,9 +52,9 @@ The FORWARD chain rules are numbered in the output above, they are:
[setIcc][15].
[10]: https://github.com/moby/moby/blob/e05848c0025b67a16aaafa8cdff95d5e2c064105/libnetwork/firewall_linux.go#L50
[11]: https://github.com/moby/moby/blob/333cfa640239153477bf635a8131734d0e9d099d/libnetwork/drivers/bridge/setup_ip_tables_linux.go#L201
[12]: https://github.com/moby/moby/blob/e05848c0025b67a16aaafa8cdff95d5e2c064105/libnetwork/iptables/iptables.go#L270
[13]: https://github.com/moby/moby/blob/e05848c0025b67a16aaafa8cdff95d5e2c064105/libnetwork/iptables/iptables.go#L251-L255
[11]: https://github.com/robmry/moby/blob/52c89d467fc5326149e4bbb8903d23589b66ff0d/libnetwork/drivers/bridge/setup_ip_tables_linux.go#L230-L232
[12]: https://github.com/robmry/moby/blob/52c89d467fc5326149e4bbb8903d23589b66ff0d/libnetwork/drivers/bridge/setup_ip_tables_linux.go#L227-L229
[13]: https://github.com/robmry/moby/blob/52c89d467fc5326149e4bbb8903d23589b66ff0d/libnetwork/drivers/bridge/setup_ip_tables_linux.go#L223-L226
[14]: https://github.com/moby/moby/blob/333cfa640239153477bf635a8131734d0e9d099d/libnetwork/drivers/bridge/setup_ip_tables_linux.go#L264
[15]: https://github.com/moby/moby/blob/333cfa640239153477bf635a8131734d0e9d099d/libnetwork/drivers/bridge/setup_ip_tables_linux.go#L343

View File

@@ -21,7 +21,7 @@ The filter table is:
By comparison with [ICC=true][1]:
- Rule 10 in the FORWARD chain replaces an ACCEPT rule that would have followed rule 5, matching the same packets.
- Rule 8 in the FORWARD chain replaces an ACCEPT rule that would have followed rule 5, matching the same packets.
- Added in [setIcc][2]
[1]: usernet-portmap.md

View File

@@ -8,33 +8,36 @@ Running the daemon with the userland proxy disabled then, as before, adding a ne
--subnet 192.0.2.0/24 --gateway 192.0.2.1 bridge1
docker run --network bridge1 -p 8080:80 --name c1 busybox
The filter table is largely the same as with the userland proxy enabled.
_Note that this means inter-network communication is disabled as-normal so,
although published ports will be directly accessible from a remote host
they are not accessible from containers in neighbouring docker networks
on the same host._
<details>
<summary>Filter table</summary>
The filter table is:
{{index . "LFilter4"}}
<details>
<summary>iptables commands</summary>
{{index . "SFilter4"}}
</details>
However, a rule is added by [setICMP][5] to the DOCKER chain (shown below) to
allow ICMP. The equivalent IPv6 rule uses `-p icmpv6` rather than `-p icmp`,
so *ALL* ICMP message types are allowed.
Compared to the equivalent [nat mode network][1]:
_The ACCEPT rule as shown by `iptables -L` looks alarming until you spot that it's
for `prot 1`._
Because the ICMP rule (rule 3) is per-network, it is appended to the chain along
with the default-DROP rule (rule 4). So, it is likely to be separated from
per-port/protocol ACCEPT rules for published ports on the same network. But it
will always appear before the default-DROP.
- In DOCKER-ISOLATION-STAGE-1:
- Rule 1 accepts outgoing packets related to established connections. This
is for responses to containers on NAT networks that would not normally
accept packets from another network, and may have port/protocol filtering
rules in place that would otherwise drop these responses.
- Rule 2 skips the jump to DOCKER-ISOLATION-STAGE-2 for any packet routed
to the routed-mode network. So, it will accept packets from other networks,
if they make it through the port/protocol filtering rules in the DOCKER
chain.
- In the DOCKER chain:
- A rule is added by [setICMP][5] to allow ICMP.
*ALL* ICMP message types are allowed.
The equivalent IPv6 rule uses `-p icmpv6` rather than `-p icmp`.
- Because the ICMP rule (rule 3) is per-network, it is appended to the chain along
with the default-DROP rule (rule 4). So, it is likely to be separated from
per-port/protocol ACCEPT rules for published ports on the same network. But it
will always appear before the default-DROP.
_[RFC 4890 section 4.3][6] makes recommendations for filtering ICMPv6. These
have been considered, but the host firewall is not a network boundary in the
@@ -42,6 +45,9 @@ sense used by the RFC. So, Node Information and Router Renumbering messages are
not discarded, and experimental/unused types are allowed because they may be
needed._
The ICMP rule, as shown by `iptables -L`, looks alarming until you spot that it's
for `prot 1`:
{{index . "LFilterDocker4"}}
{{index . "SFilterDocker4"}}

View File

@@ -20,9 +20,9 @@ The filter table is updated as follows:
Note that:
- In the FORWARD chain, rules 3-6 for the new network have been inserted at
- In the FORWARD chain, rules 5-6 for the new network have been inserted at
the top of the chain, pushing the equivalent docker0 rules down to positions
7-10. (Rules 3-6 were inserted at the top of the chain, then rules 1-2 were
7-8. (Rules 5-6 were inserted at the top of the chain, then rules 1-4 were
shuffled back to the top by deleting/recreating, as described above.)
- In the DOCKER-ISOLATION chains, rules equivalent to the docker0 rules have
also been inserted for the new bridge.

View File

@@ -27,6 +27,7 @@ import (
"go.opentelemetry.io/otel"
"go.opentelemetry.io/otel/attribute"
"go.opentelemetry.io/otel/trace"
"golang.org/x/sys/unix"
)
const (
@@ -472,12 +473,12 @@ func (n *bridgeNetwork) isolateNetwork(enable bool) error {
// Install the rules to isolate this network against each of the other networks
if n.driver.config.EnableIPTables {
if err := setINC(iptables.IPv4, thisConfig.BridgeName, enable); err != nil {
if err := setINC(iptables.IPv4, thisConfig.BridgeName, thisConfig.GwModeIPv4, enable); err != nil {
return err
}
}
if n.driver.config.EnableIP6Tables {
if err := setINC(iptables.IPv6, thisConfig.BridgeName, enable); err != nil {
if err := setINC(iptables.IPv6, thisConfig.BridgeName, thisConfig.GwModeIPv6, enable); err != nil {
return err
}
}
@@ -518,6 +519,9 @@ func (d *driver) configure(option map[string]interface{}) error {
if config.EnableIPTables {
removeIPChains(iptables.IPv4)
if err := setupHashNetIpset(ipsetExtBridges4, unix.AF_INET); err != nil {
return err
}
natChain, filterChain, isolationChain1, isolationChain2, err = setupIPChains(config, iptables.IPv4)
if err != nil {
return err
@@ -535,22 +539,27 @@ func (d *driver) configure(option map[string]interface{}) error {
if config.EnableIP6Tables {
removeIPChains(iptables.IPv6)
natChainV6, filterChainV6, isolationChain1V6, isolationChain2V6, err = setupIPChains(config, iptables.IPv6)
if err != nil {
// If the chains couldn't be set up, it's probably because the kernel has no IPv6
// support, or it doesn't have module ip6_tables loaded. It won't be possible to
// create IPv6 networks without enabling ip6_tables in the kernel, or disabling
// ip6tables in the daemon config. But, allow the daemon to start because IPv4
// will work. So, log the problem, and continue.
log.G(context.TODO()).WithError(err).Warn("ip6tables is enabled, but cannot set up ip6tables chains")
if err := setupHashNetIpset(ipsetExtBridges6, unix.AF_INET6); err != nil {
// Continue, IPv4 will work (as below).
log.G(context.TODO()).WithError(err).Warn("ip6tables is enabled, but cannot set up IPv6 ipset")
} else {
// Make sure on firewall reload, first thing being re-played is chains creation
iptables.OnReloaded(func() {
log.G(context.TODO()).Debugf("Recreating ip6tables chains on firewall reload")
if _, _, _, _, err := setupIPChains(config, iptables.IPv6); err != nil {
log.G(context.TODO()).WithError(err).Error("Error reloading ip6tables chains")
}
})
natChainV6, filterChainV6, isolationChain1V6, isolationChain2V6, err = setupIPChains(config, iptables.IPv6)
if err != nil {
// If the chains couldn't be set up, it's probably because the kernel has no IPv6
// support, or it doesn't have module ip6_tables loaded. It won't be possible to
// create IPv6 networks without enabling ip6_tables in the kernel, or disabling
// ip6tables in the daemon config. But, allow the daemon to start because IPv4
// will work. So, log the problem, and continue.
log.G(context.TODO()).WithError(err).Warn("ip6tables is enabled, but cannot set up ip6tables chains")
} else {
// Make sure on firewall reload, first thing being re-played is chains creation
iptables.OnReloaded(func() {
log.G(context.TODO()).Debugf("Recreating ip6tables chains on firewall reload")
if _, _, _, _, err := setupIPChains(config, iptables.IPv6); err != nil {
log.G(context.TODO()).WithError(err).Error("Error reloading ip6tables chains")
}
})
}
}
}
@@ -578,6 +587,19 @@ func (d *driver) configure(option map[string]interface{}) error {
return d.initStore(option)
}
func setupHashNetIpset(name string, family uint8) error {
if err := netlink.IpsetCreate(name, "hash:net", netlink.IpsetCreateOptions{
Replace: true,
Family: family,
}); err != nil {
return err
}
if err := netlink.IpsetFlush(name); err != nil {
return err
}
return nil
}
func (d *driver) getNetwork(id string) (*bridgeNetwork, error) {
d.Lock()
defer d.Unlock()

View File

@@ -32,6 +32,11 @@ const (
IsolationChain1 = "DOCKER-ISOLATION-STAGE-1"
IsolationChain2 = "DOCKER-ISOLATION-STAGE-2"
// ipset names for IPv4 and IPv6 bridge subnets that don't belong
// to --internal networks.
ipsetExtBridges4 = "docker-ext-bridges-v4"
ipsetExtBridges6 = "docker-ext-bridges-v6"
)
// Path to the executable installed in Linux under WSL2 that reports on
@@ -154,6 +159,10 @@ func (n *bridgeNetwork) setupIPTables(ipVersion iptables.IPVersion, maskedAddr *
hairpinMode := !driverConfig.EnableUserlandProxy
iptable := iptables.GetIptable(ipVersion)
ipsetName := ipsetExtBridges4
if ipVersion == iptables.IPv6 {
ipsetName = ipsetExtBridges6
}
if config.Internal {
if err = setupInternalNetworkRules(config.BridgeName, maskedAddr, config.EnableICC, true); err != nil {
@@ -194,12 +203,40 @@ func (n *bridgeNetwork) setupIPTables(ipVersion iptables.IPVersion, maskedAddr *
n.registerIptCleanFunc(func() error {
return defaultDrop(ipVersion, config.BridgeName, false)
})
cidr, _ := maskedAddr.Mask.Size()
if cidr == 0 {
return fmt.Errorf("no CIDR for bridge %s addr %s", config.BridgeName, maskedAddr)
}
ipsetEntry := &netlink.IPSetEntry{
IP: maskedAddr.IP,
CIDR: uint8(cidr),
}
if err := netlink.IpsetAdd(ipsetName, ipsetEntry); err != nil {
return fmt.Errorf("failed to add bridge %s (%s) to ipset: %w",
config.BridgeName, maskedAddr, err)
}
n.registerIptCleanFunc(func() error {
return netlink.IpsetDel(ipsetName, ipsetEntry)
})
}
d.Lock()
err = iptable.EnsureJumpRule("FORWARD", IsolationChain1)
d.Unlock()
return err
defer d.Unlock()
if err := iptable.EnsureJumpRule("FORWARD", DockerChain,
"-m", "set", "--match-set", ipsetName, "dst"); err != nil {
return err
}
if err := iptable.EnsureJumpRule("FORWARD", IsolationChain1); err != nil {
return err
}
if err := iptable.EnsureJumpRule("FORWARD", "ACCEPT",
"-m", "set", "--match-set", ipsetName, "dst",
"-m", "conntrack", "--ctstate", "RELATED,ESTABLISHED",
); err != nil {
return err
}
return nil
}
func setICMP(ipv iptables.IPVersion, bridgeName string, enable bool) error {
@@ -313,10 +350,10 @@ func setupIPTablesInternal(ipVer iptables.IPVersion, config *networkConfiguratio
if err := programChainRule(natRule, "NAT", enable); err != nil {
return err
}
if !hairpin {
if err := programChainRule(skipDNAT, "SKIP DNAT", enable); err != nil {
return err
}
}
if !nat || (config.EnableIPMasquerade && !hairpin) {
if err := programChainRule(skipDNAT, "SKIP DNAT", enable); err != nil {
return err
}
}
@@ -399,16 +436,43 @@ func setIcc(version iptables.IPVersion, bridgeIface string, iccEnable, insert bo
// Install rules only if they aren't present, remove only if they are.
// If this method returns an error, it doesn't roll back any rules it has added.
// No error is returned if rules cannot be removed (errors are just logged).
func setINC(version iptables.IPVersion, iface string, enable bool) (retErr error) {
func setINC(version iptables.IPVersion, iface string, gwm gwMode, enable bool) (retErr error) {
iptable := iptables.GetIptable(version)
action := iptables.Insert
actionI, actionA := iptables.Insert, iptables.Append
actionMsg := "add"
if !enable {
action = iptables.Delete
actionI, actionA = iptables.Delete, iptables.Delete
actionMsg = "remove"
}
if err := iptable.ProgramRule(iptables.Filter, IsolationChain1, action, []string{
if gwm.routed() {
// Anything is allowed into a routed network at this stage, so RETURN. Port
// filtering rules in the DOCKER chain will drop anything that's not destined
// for an open port.
if err := iptable.ProgramRule(iptables.Filter, IsolationChain1, actionI, []string{
"-o", iface,
"-j", "RETURN",
}); err != nil {
log.G(context.TODO()).WithError(err).Warnf("Failed to %s inter-network communication rule", actionMsg)
if enable {
return fmt.Errorf("%s inter-network communication rule: %w", actionMsg, err)
}
}
// Allow responses from the routed network into whichever network made the request.
if err := iptable.ProgramRule(iptables.Filter, IsolationChain1, actionI, []string{
"-i", iface,
"-m", "conntrack", "--ctstate", "RELATED,ESTABLISHED",
"-j", "ACCEPT",
}); err != nil {
log.G(context.TODO()).WithError(err).Warnf("Failed to %s inter-network communication rule", actionMsg)
if enable {
return fmt.Errorf("%s inter-network communication rule: %w", actionMsg, err)
}
}
}
if err := iptable.ProgramRule(iptables.Filter, IsolationChain1, actionA, []string{
"-i", iface,
"!", "-o", iface,
"-j", IsolationChain2,
@@ -419,7 +483,7 @@ func setINC(version iptables.IPVersion, iface string, enable bool) (retErr error
}
}
if err := iptable.ProgramRule(iptables.Filter, IsolationChain2, action, []string{
if err := iptable.ProgramRule(iptables.Filter, IsolationChain2, actionI, []string{
"-o", iface,
"-j", "DROP",
}); err != nil {

View File

@@ -12,6 +12,7 @@ import (
"github.com/docker/docker/libnetwork/iptables"
"github.com/docker/docker/libnetwork/netlabel"
"github.com/vishvananda/netlink"
"golang.org/x/sys/unix"
"gotest.tools/v3/assert"
is "gotest.tools/v3/assert/cmp"
)
@@ -90,15 +91,22 @@ func TestSetupIPChains(t *testing.T) {
createTestBridge(config, br, t)
assertBridgeConfig(config, br, d, t)
// The purpose of this test is unclear but, now there's an ipset of bridges, it's
// an error to create a bridge that's already been created. That can't happen in
// normal running. So, just flush the set between each step.
assert.NilError(t, netlink.IpsetFlush(ipsetExtBridges4))
config.EnableIPMasquerade = true
assertBridgeConfig(config, br, d, t)
assert.NilError(t, netlink.IpsetFlush(ipsetExtBridges4))
config.EnableICC = true
assertBridgeConfig(config, br, d, t)
assert.NilError(t, netlink.IpsetFlush(ipsetExtBridges4))
config.EnableIPMasquerade = false
assertBridgeConfig(config, br, d, t)
assert.NilError(t, netlink.IpsetFlush(ipsetExtBridges4))
}
func getBasicTestConfig() *networkConfiguration {
@@ -148,15 +156,16 @@ func assertIPTableChainProgramming(rule iptRule, descr string, t *testing.T) {
func assertChainConfig(d *driver, t *testing.T) {
var err error
err = setupHashNetIpset(ipsetExtBridges4, unix.AF_INET)
assert.NilError(t, err)
d.natChain, d.filterChain, d.isolationChain1, d.isolationChain2, err = setupIPChains(d.config, iptables.IPv4)
if err != nil {
t.Fatal(err)
}
assert.NilError(t, err)
if d.config.EnableIP6Tables {
err = setupHashNetIpset(ipsetExtBridges6, unix.AF_INET6)
assert.NilError(t, err)
d.natChainV6, d.filterChainV6, d.isolationChain1V6, d.isolationChain2V6, err = setupIPChains(d.config, iptables.IPv6)
if err != nil {
t.Fatal(err)
}
assert.NilError(t, err)
}
}

View File

@@ -38,12 +38,15 @@ func TestReloaded(t *testing.T) {
if err != nil {
t.Fatal(err)
}
defer fwdChain.Remove()
err = iptable.ProgramChain(fwdChain, bridgeName, false, true)
// This jump from the FORWARD chain prevents FWD from being deleted by
// "iptables -X", called from fwdChain.Remove().
err = iptable.EnsureJumpRule("FORWARD", "FWD")
if err != nil {
t.Fatal(err)
}
defer fwdChain.Remove()
defer iptable.Raw("-D", "FORWARD", "-j", "FWD")
// copy-pasted from iptables_test:TestLink
ip1 := net.ParseIP("192.168.1.1")

View File

@@ -248,18 +248,14 @@ func (iptable IPTable) ProgramChain(c *ChainInfo, bridgeName string, hairpinMode
if bridgeName == "" {
return fmt.Errorf("could not program chain %s/%s, missing bridge name", c.Table, c.Name)
}
// Delete legacy per-bridge jump to the DOCKER chain from the FORWARD chain, if it exists.
// These rules have been replaced by an ipset-matching rule.
link := []string{
"-o", bridgeName,
"-j", c.Name,
}
if !iptable.Exists(Filter, "FORWARD", link...) && enable {
insert := append([]string{string(Insert), "FORWARD"}, link...)
if output, err := iptable.Raw(insert...); err != nil {
return err
} else if len(output) != 0 {
return fmt.Errorf("could not create linking rule to %s/%s: %s", c.Table, c.Name, output)
}
} else if iptable.Exists(Filter, "FORWARD", link...) && !enable {
if iptable.Exists(Filter, "FORWARD", link...) {
del := append([]string{string(Delete), "FORWARD"}, link...)
if output, err := iptable.Raw(del...); err != nil {
return err
@@ -267,20 +263,16 @@ func (iptable IPTable) ProgramChain(c *ChainInfo, bridgeName string, hairpinMode
return fmt.Errorf("could not delete linking rule from %s/%s: %s", c.Table, c.Name, output)
}
}
// Delete legacy per-bridge related/established rule if it exists. These rules
// have been replaced by an ipset-matching rule.
establish := []string{
"-o", bridgeName,
"-m", "conntrack",
"--ctstate", "RELATED,ESTABLISHED",
"-j", "ACCEPT",
}
if !iptable.Exists(Filter, "FORWARD", establish...) && enable {
insert := append([]string{string(Insert), "FORWARD"}, establish...)
if output, err := iptable.Raw(insert...); err != nil {
return err
} else if len(output) != 0 {
return fmt.Errorf("could not create establish rule to %s: %s", c.Table, output)
}
} else if iptable.Exists(Filter, "FORWARD", establish...) && !enable {
if iptable.Exists(Filter, "FORWARD", establish...) {
del := append([]string{string(Delete), "FORWARD"}, establish...)
if output, err := iptable.Raw(del...); err != nil {
return err
@@ -534,13 +526,14 @@ func (iptable IPTable) AddReturnRule(chain string) error {
}
// EnsureJumpRule ensures the jump rule is on top
func (iptable IPTable) EnsureJumpRule(fromChain, toChain string) error {
if iptable.Exists(Filter, fromChain, "-j", toChain) {
if err := iptable.RawCombinedOutput("-D", fromChain, "-j", toChain); err != nil {
func (iptable IPTable) EnsureJumpRule(fromChain, toChain string, rule ...string) error {
rule = append(rule, "-j", toChain)
if iptable.Exists(Filter, fromChain, rule...) {
if err := iptable.RawCombinedOutput(append([]string{"-D", fromChain}, rule...)...); err != nil {
return fmt.Errorf("unable to remove jump to %s rule in %s chain: %v", toChain, fromChain, err)
}
}
if err := iptable.RawCombinedOutput("-I", fromChain, "-j", toChain); err != nil {
if err := iptable.RawCombinedOutput(append([]string{"-I", fromChain}, rule...)...); err != nil {
return fmt.Errorf("unable to insert jump to %s rule in %s chain: %v", toChain, fromChain, err)
}
return nil

View File

@@ -212,19 +212,6 @@ func addSomeRules(c *ChainInfo, ip net.IP, port int, proto, destAddr string, des
func TestCleanup(t *testing.T) {
iptable, _, filterChain := createNewChain(t)
var rules []byte
// Cleanup filter/FORWARD first otherwise output of iptables-save is dirty
link := []string{
"-t", string(filterChain.Table),
string(Delete), "FORWARD",
"-o", bridgeName,
"-j", filterChain.Name,
}
if _, err := iptable.Raw(link...); err != nil {
t.Fatal(err)
}
filterChain.Remove()
err := iptable.RemoveExistingChain(chainName, Nat)
@@ -232,7 +219,7 @@ func TestCleanup(t *testing.T) {
t.Fatal(err)
}
rules, err = exec.Command("iptables-save").Output()
rules, err := exec.Command("iptables-save").Output()
if err != nil {
t.Fatal(err)
}