mirror of
https://github.com/moby/moby.git
synced 2026-01-11 18:51:37 +00:00
iptables: Direct routing DROP rules per-container, not per-port
Commit 27adcd5 ("libnet/d/bridge: drop connections to lo mappings, and
direct remote connections") introduced an iptables rule to drop 'direct'
remote connections made to the container's IP address - for each
published port on the container.
The normal filter-FORWARD rules would then drop packets sent directly to
unpublished ports. This rule was only created along with the rest of port
publishing (when a container's endpoint was selected as its gateway). Until
then, all packets addressed directly to the container's ports were dropped
by the filter-FORWARD rules.
But, the rule doesn't need to be per-port. Just drop packets sent
directly to a container's address unless they originate on the host.
That means fewer rules, that can be created along with the endpoint (then
directly-routed get dropped at the same point whether or not the endpoint
is currently the gateway - very slightly earlier than when it's not the
gateway).
Signed-off-by: Rob Murray <rob.murray@docker.com>
This commit is contained in:
@@ -139,8 +139,8 @@ The filter and nat tables are identical to [nat mode][0]:
|
||||
|
||||
Chain PREROUTING (policy ACCEPT 0 packets, 0 bytes)
|
||||
num pkts bytes target prot opt in out source destination
|
||||
1 0 0 DROP 6 -- !lo * 0.0.0.0/0 127.0.0.1 tcp dpt:8080
|
||||
2 0 0 DROP 6 -- !bridge1 * 0.0.0.0/0 192.0.2.2 tcp dpt:80
|
||||
1 0 0 DROP 0 -- !bridge1 * 0.0.0.0/0 192.0.2.2
|
||||
2 0 0 DROP 6 -- !lo * 0.0.0.0/0 127.0.0.1 tcp dpt:8080
|
||||
|
||||
Chain OUTPUT (policy ACCEPT 0 packets, 0 bytes)
|
||||
num pkts bytes target prot opt in out source destination
|
||||
@@ -151,8 +151,8 @@ The filter and nat tables are identical to [nat mode][0]:
|
||||
|
||||
-P PREROUTING ACCEPT
|
||||
-P OUTPUT ACCEPT
|
||||
-A PREROUTING -d 192.0.2.2/32 ! -i bridge1 -j DROP
|
||||
-A PREROUTING -d 127.0.0.1/32 ! -i lo -p tcp -m tcp --dport 8080 -j DROP
|
||||
-A PREROUTING -d 192.0.2.2/32 ! -i bridge1 -p tcp -m tcp --dport 80 -j DROP
|
||||
|
||||
|
||||
</details>
|
||||
|
||||
@@ -163,7 +163,7 @@ And the raw table:
|
||||
|
||||
Chain PREROUTING (policy ACCEPT 0 packets, 0 bytes)
|
||||
num pkts bytes target prot opt in out source destination
|
||||
1 0 0 DROP 6 -- !bridge1 * 0.0.0.0/0 192.0.2.2 tcp dpt:80
|
||||
1 0 0 DROP 0 -- !bridge1 * 0.0.0.0/0 192.0.2.2
|
||||
|
||||
Chain OUTPUT (policy ACCEPT 0 packets, 0 bytes)
|
||||
num pkts bytes target prot opt in out source destination
|
||||
@@ -174,7 +174,7 @@ And the raw table:
|
||||
|
||||
-P PREROUTING ACCEPT
|
||||
-P OUTPUT ACCEPT
|
||||
-A PREROUTING -d 192.0.2.2/32 ! -i bridge1 -p tcp -m tcp --dport 80 -j DROP
|
||||
-A PREROUTING -d 192.0.2.2/32 ! -i bridge1 -j DROP
|
||||
|
||||
|
||||
</details>
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
-P PREROUTING ACCEPT
|
||||
-P OUTPUT ACCEPT
|
||||
-A PREROUTING -d 192.168.0.2/32 ! -i docker0 -j DROP
|
||||
-A PREROUTING -d 127.0.0.1/32 ! -i lo -p tcp -m tcp --dport 8080 -j DROP
|
||||
-A PREROUTING -d 192.168.0.2/32 ! -i docker0 -p tcp -m tcp --dport 80 -j DROP
|
||||
|
||||
@@ -1,3 +1,3 @@
|
||||
-P PREROUTING ACCEPT
|
||||
-P OUTPUT ACCEPT
|
||||
-A PREROUTING -d fd30:1159:a755::2/128 ! -i docker0 -p tcp -m tcp --dport 80 -j DROP
|
||||
-A PREROUTING -d fd30:1159:a755::2/128 ! -i docker0 -j DROP
|
||||
|
||||
@@ -1174,6 +1174,11 @@ func (d *driver) CreateEndpoint(ctx context.Context, nid, eid string, ifInfo dri
|
||||
}
|
||||
}
|
||||
|
||||
netip4, netip6 := endpoint.netipAddrs()
|
||||
if err := n.iptablesNetwork.AddEndpoint(ctx, netip4, netip6); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Up the host interface after finishing all netlink configuration
|
||||
if err = d.linkUp(ctx, host); err != nil {
|
||||
return fmt.Errorf("could not set link up for host interface %s: %v", hostIfName, err)
|
||||
@@ -1190,6 +1195,18 @@ func (d *driver) CreateEndpoint(ctx context.Context, nid, eid string, ifInfo dri
|
||||
return nil
|
||||
}
|
||||
|
||||
// netipAddrs converts ep.addr and ep.addrv6 from net.IPNet to netip.Addr. If an address
|
||||
// is non-nil, it's assumed to be valid.
|
||||
func (ep *bridgeEndpoint) netipAddrs() (v4, v6 netip.Addr) {
|
||||
if ep.addr != nil {
|
||||
v4, _ = netip.AddrFromSlice(ep.addr.IP)
|
||||
}
|
||||
if ep.addrv6 != nil {
|
||||
v6, _ = netip.AddrFromSlice(ep.addrv6.IP)
|
||||
}
|
||||
return v4, v6
|
||||
}
|
||||
|
||||
// createVeth creates a veth device with one end in the container's network namespace,
|
||||
// if it can get hold of the netns path and open the handles. In that case, it returns
|
||||
// a netlink handle in the container's namespace that must be closed by the caller.
|
||||
@@ -1282,6 +1299,11 @@ func (d *driver) DeleteEndpoint(nid, eid string) error {
|
||||
return endpointNotFoundError(eid)
|
||||
}
|
||||
|
||||
netip4, netip6 := ep.netipAddrs()
|
||||
if err := n.iptablesNetwork.DelEndpoint(context.TODO(), netip4, netip6); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Remove it
|
||||
n.Lock()
|
||||
delete(n.endpoints, eid)
|
||||
|
||||
@@ -87,6 +87,13 @@ func (d *driver) populateEndpoints() error {
|
||||
continue
|
||||
}
|
||||
n.endpoints[ep.id] = ep
|
||||
netip4, netip6 := ep.netipAddrs()
|
||||
if err := n.iptablesNetwork.AddEndpoint(context.TODO(), netip4, netip6); err != nil {
|
||||
log.G(context.TODO()).WithFields(log.Fields{
|
||||
"error": err,
|
||||
"ep.id": ep.id,
|
||||
}).Warn("Failed to restore per-endpoint firewall rules")
|
||||
}
|
||||
n.restorePortAllocations(ep)
|
||||
log.G(context.TODO()).Debugf("Endpoint (%.7s) restored to network (%.7s)", ep.id, ep.nid)
|
||||
}
|
||||
|
||||
64
libnetwork/drivers/bridge/internal/iptabler/endpoint.go
Normal file
64
libnetwork/drivers/bridge/internal/iptabler/endpoint.go
Normal file
@@ -0,0 +1,64 @@
|
||||
//go:build linux
|
||||
|
||||
package iptabler
|
||||
|
||||
import (
|
||||
"context"
|
||||
"net/netip"
|
||||
|
||||
"github.com/docker/docker/libnetwork/iptables"
|
||||
)
|
||||
|
||||
func (n *Network) AddEndpoint(ctx context.Context, epIPv4, epIPv6 netip.Addr) error {
|
||||
return n.modEndpoint(ctx, epIPv4, epIPv6, true)
|
||||
}
|
||||
|
||||
func (n *Network) DelEndpoint(ctx context.Context, epIPv4, epIPv6 netip.Addr) error {
|
||||
return n.modEndpoint(ctx, epIPv4, epIPv6, false)
|
||||
}
|
||||
|
||||
func (n *Network) modEndpoint(ctx context.Context, epIPv4, epIPv6 netip.Addr, enable bool) error {
|
||||
if n.ipt.IPv4 && epIPv4.IsValid() {
|
||||
if err := n.filterDirectAccess(ctx, iptables.IPv4, n.Config4, epIPv4, enable); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if n.ipt.IPv6 && epIPv6.IsValid() {
|
||||
if err := n.filterDirectAccess(ctx, iptables.IPv6, n.Config6, epIPv6, enable); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// filterDirectAccess drops packets addressed directly to the container's IP address,
|
||||
// when direct routing is not permitted by network configuration.
|
||||
//
|
||||
// It is a no-op if:
|
||||
// - the network is internal
|
||||
// - gateway mode is "nat-unprotected" or "routed".
|
||||
// - "raw" rules are disabled (possibly because the host doesn't have the necessary
|
||||
// kernel support).
|
||||
//
|
||||
// Packets originating on the bridge's own interface and addressed directly to the
|
||||
// container are allowed - the host always has direct access to its own containers
|
||||
// (it doesn't need to use the port mapped to its own addresses, although it can).
|
||||
func (n *Network) filterDirectAccess(ctx context.Context, ipv iptables.IPVersion, config NetworkConfigFam, epIP netip.Addr, enable bool) error {
|
||||
if n.Internal || config.Unprotected || config.Routed {
|
||||
return nil
|
||||
}
|
||||
// For config that may change between daemon restarts, make sure rules are
|
||||
// removed - if the container was left running when the daemon stopped, and
|
||||
// direct routing has since been disabled, the rules need to be deleted when
|
||||
// cleanup happens on restart. This also means a change in config over a
|
||||
// live-restore restart will take effect.
|
||||
if rawRulesDisabled(ctx) {
|
||||
enable = false
|
||||
}
|
||||
accept := iptables.Rule{IPVer: ipv, Table: iptables.Raw, Chain: "PREROUTING", Args: []string{
|
||||
"-d", epIP.String(),
|
||||
"!", "-i", n.IfName,
|
||||
"-j", "DROP",
|
||||
}}
|
||||
return appendOrDelChainRule(accept, "DIRECT ACCESS FILTERING - DROP", enable)
|
||||
}
|
||||
@@ -51,7 +51,7 @@ func (n *Network) setPerPortIptables(ctx context.Context, b types.PortBinding, e
|
||||
return err
|
||||
}
|
||||
|
||||
if err := n.filterDirectAccess(ctx, b, enable); err != nil {
|
||||
if err := n.dropLegacyFilterDirectAccess(ctx, b); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
@@ -203,12 +203,28 @@ func filterPortMappedOnLoopback(ctx context.Context, b types.PortBinding, hostIP
|
||||
return nil
|
||||
}
|
||||
|
||||
// filterDirectAccess adds an iptables rule that drops 'direct' remote
|
||||
// connections made to the container's IP address, when the network gateway
|
||||
// mode is "nat".
|
||||
// dropLegacyFilterDirectAccess deletes a rule that was introduced in 28.0.0 to
|
||||
// drop 'direct' remote connections made to the container's IP address - for
|
||||
// each published port on the container.
|
||||
//
|
||||
// This is a no-op if the gw_mode is "nat-unprotected" or "routed".
|
||||
func (n *Network) filterDirectAccess(ctx context.Context, b types.PortBinding, enable bool) error {
|
||||
// The normal filter-FORWARD rules would then drop packets sent directly to
|
||||
// unpublished ports. This rule was only created along with the rest of port
|
||||
// publishing (when a container's endpoint was selected as its gateway). Until
|
||||
// then, all packets addressed directly to the container's ports were dropped
|
||||
// by the filter-FORWARD rules.
|
||||
//
|
||||
// Since 28.0.2, direct routed packets sent to a container's address are all
|
||||
// dropped in a raw-PREROUTING rule - it doesn't need to be per-port (so, fewer
|
||||
// rules), and it can be created along with the endpoint (so directly-routed
|
||||
// packets are dropped at the same point whether or not the endpoint is currently
|
||||
// the gateway - so, very slightly earlier when it's not the gateway).
|
||||
//
|
||||
// This function was a no-op if the gw_mode was "nat-unprotected" or "routed".
|
||||
// It still is. but now always deletes the rule if it might have been created
|
||||
// by an older version of the daemon.
|
||||
//
|
||||
// TODO(robmry) - remove this once there's no upgrade path from 28.0.x or 28.1.x.
|
||||
func (n *Network) dropLegacyFilterDirectAccess(ctx context.Context, b types.PortBinding) error {
|
||||
if rawRulesDisabled(ctx) {
|
||||
return nil
|
||||
}
|
||||
@@ -232,7 +248,7 @@ func (n *Network) filterDirectAccess(ctx context.Context, b types.PortBinding, e
|
||||
"!", "-i", n.IfName,
|
||||
"-j", "DROP",
|
||||
}}
|
||||
if err := appendOrDelChainRule(drop, "DIRECT ACCESS FILTERING - DROP", enable); err != nil {
|
||||
if err := appendOrDelChainRule(drop, "LEGACY DIRECT ACCESS FILTERING - DROP", false); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user