Add option --bridge-accept-fwmark

Packets with the given firewall mark are accepted by the bridge
driver's filter-FORWARD rules.

The value can either be an integer mark, or it can include a
mask in the format "<mark>/<mask>".

Signed-off-by: Rob Murray <rob.murray@docker.com>
This commit is contained in:
Rob Murray
2025-07-22 12:17:20 +01:00
parent 0c60a0e482
commit cf1695bef1
11 changed files with 291 additions and 4 deletions

View File

@@ -39,6 +39,7 @@ func installConfigFlags(conf *config.Config, flags *pflag.FlagSet) {
flags.BoolVar(&conf.BridgeConfig.EnableUserlandProxy, "userland-proxy", true, "Use userland proxy for loopback traffic") flags.BoolVar(&conf.BridgeConfig.EnableUserlandProxy, "userland-proxy", true, "Use userland proxy for loopback traffic")
flags.StringVar(&conf.BridgeConfig.UserlandProxyPath, "userland-proxy-path", conf.BridgeConfig.UserlandProxyPath, "Path to the userland proxy binary") flags.StringVar(&conf.BridgeConfig.UserlandProxyPath, "userland-proxy-path", conf.BridgeConfig.UserlandProxyPath, "Path to the userland proxy binary")
flags.BoolVar(&conf.BridgeConfig.AllowDirectRouting, "allow-direct-routing", false, "Allow remote access to published ports on container IP addresses") flags.BoolVar(&conf.BridgeConfig.AllowDirectRouting, "allow-direct-routing", false, "Allow remote access to published ports on container IP addresses")
flags.StringVar(&conf.BridgeConfig.BridgeAcceptFwMark, "bridge-accept-fwmark", "", "In bridge networks, accept packets with this firewall mark/mask")
flags.StringVar(&conf.CgroupParent, "cgroup-parent", "", "Set parent cgroup for all containers") flags.StringVar(&conf.CgroupParent, "cgroup-parent", "", "Set parent cgroup for all containers")
flags.StringVar(&conf.RemappedRoot, "userns-remap", "", "User/Group setting for user namespaces") flags.StringVar(&conf.RemappedRoot, "userns-remap", "", "User/Group setting for user namespaces")
flags.BoolVar(&conf.LiveRestoreEnabled, "live-restore", false, "Enable live restore of docker when containers are still running") flags.BoolVar(&conf.LiveRestoreEnabled, "live-restore", false, "Enable live restore of docker when containers are still running")

View File

@@ -6,6 +6,7 @@ import (
"net" "net"
"os/exec" "os/exec"
"path/filepath" "path/filepath"
"strconv"
"strings" "strings"
"github.com/containerd/cgroups/v3" "github.com/containerd/cgroups/v3"
@@ -49,6 +50,7 @@ type BridgeConfig struct {
EnableUserlandProxy bool `json:"userland-proxy,omitempty"` EnableUserlandProxy bool `json:"userland-proxy,omitempty"`
UserlandProxyPath string `json:"userland-proxy-path,omitempty"` UserlandProxyPath string `json:"userland-proxy-path,omitempty"`
AllowDirectRouting bool `json:"allow-direct-routing,omitempty"` AllowDirectRouting bool `json:"allow-direct-routing,omitempty"`
BridgeAcceptFwMark string `json:"bridge-accept-fwmark,omitempty"`
} }
// DefaultBridgeConfig stores all the parameters for the default bridge network. // DefaultBridgeConfig stores all the parameters for the default bridge network.
@@ -243,15 +245,15 @@ func validatePlatformConfig(conf *Config) error {
if err := verifyDefaultIpcMode(conf.IpcMode); err != nil { if err := verifyDefaultIpcMode(conf.IpcMode); err != nil {
return err return err
} }
if err := bridge.ValidateFixedCIDRV6(conf.FixedCIDRv6); err != nil { if err := bridge.ValidateFixedCIDRV6(conf.FixedCIDRv6); err != nil {
return errors.Wrap(err, "invalid fixed-cidr-v6") return errors.Wrap(err, "invalid fixed-cidr-v6")
} }
if err := validateFirewallBackend(conf.FirewallBackend); err != nil { if err := validateFirewallBackend(conf.FirewallBackend); err != nil {
return errors.Wrap(err, "invalid firewall-backend") return errors.Wrap(err, "invalid firewall-backend")
} }
if err := validateFwMarkMask(conf.BridgeAcceptFwMark); err != nil {
return errors.Wrap(err, "invalid bridge-accept-fwmark")
}
return verifyDefaultCgroupNsMode(conf.CgroupNamespaceMode) return verifyDefaultCgroupNsMode(conf.CgroupNamespaceMode)
} }
@@ -311,6 +313,22 @@ func validateFirewallBackend(val string) error {
return errors.New(`allowed values are "iptables" and "nftables"`) return errors.New(`allowed values are "iptables" and "nftables"`)
} }
func validateFwMarkMask(val string) error {
if val == "" {
return nil
}
mark, mask, haveMask := strings.Cut(val, "/")
if _, err := strconv.ParseUint(mark, 0, 32); err != nil {
return fmt.Errorf("invalid firewall mark %q: %w", val, err)
}
if haveMask {
if _, err := strconv.ParseUint(mask, 0, 32); err != nil {
return fmt.Errorf("invalid firewall mask %q: %w", val, err)
}
}
return nil
}
func verifyDefaultCgroupNsMode(mode string) error { func verifyDefaultCgroupNsMode(mode string) error {
cm := container.CgroupnsMode(mode) cm := container.CgroupnsMode(mode)
if !cm.Valid() { if !cm.Valid() {

View File

@@ -396,3 +396,71 @@ func TestDaemonLegacyOptions(t *testing.T) {
}) })
} }
} }
func TestValidateAcceptFwMarkMark(t *testing.T) {
tests := []struct {
name string
val string
expErr string
}{
{
name: "empty",
val: "",
},
{
name: "dec/no-mask",
val: "1",
},
{
name: "hex/no-mask",
val: "0x1",
},
{
name: "dec/mask",
val: "1/2",
},
{
name: "hex/mask",
val: "0x1/0x2",
},
{
name: "octal/mask",
val: "010/0xff",
},
{
name: "bad/mark",
val: "hello/0x2",
expErr: `invalid firewall mark "hello/0x2": strconv.ParseUint: parsing "hello": invalid syntax`,
},
{
name: "bad/mark",
val: "1/hello",
expErr: `invalid firewall mask "1/hello": strconv.ParseUint: parsing "hello": invalid syntax`,
},
{
name: "bad/sep",
val: "1+hello",
expErr: `invalid firewall mark "1+hello": strconv.ParseUint: parsing "1+hello": invalid syntax`,
},
{
name: "bad/no-mask",
val: "1/",
expErr: `invalid firewall mask "1/": strconv.ParseUint: parsing "": invalid syntax`,
},
{
name: "bad/negative",
val: "-1",
expErr: `invalid firewall mark "-1": strconv.ParseUint: parsing "-1": invalid syntax`,
},
}
for _, tc := range tests {
t.Run(tc.name, func(t *testing.T) {
err := validateFwMarkMask(tc.val)
if tc.expErr == "" {
assert.NilError(t, err)
} else {
assert.Check(t, is.ErrorContains(err, tc.expErr))
}
})
}
}

View File

@@ -938,6 +938,7 @@ func networkPlatformOptions(conf *config.Config) []nwconfig.Option {
"EnableIP6Tables": conf.BridgeConfig.EnableIP6Tables, "EnableIP6Tables": conf.BridgeConfig.EnableIP6Tables,
"Hairpin": !conf.EnableUserlandProxy || conf.UserlandProxyPath == "", "Hairpin": !conf.EnableUserlandProxy || conf.UserlandProxyPath == "",
"AllowDirectRouting": conf.BridgeConfig.AllowDirectRouting, "AllowDirectRouting": conf.BridgeConfig.AllowDirectRouting,
"AcceptFwMark": conf.BridgeConfig.BridgeAcceptFwMark,
}, },
}), }),
} }

View File

@@ -77,6 +77,7 @@ type configuration struct {
// hairpinned. // hairpinned.
Hairpin bool Hairpin bool
AllowDirectRouting bool AllowDirectRouting bool
AcceptFwMark string
} }
// networkConfiguration for network specific configuration // networkConfiguration for network specific configuration
@@ -429,6 +430,7 @@ func (n *bridgeNetwork) newFirewallerNetwork(ctx context.Context) (_ firewaller.
ICC: n.config.EnableICC, ICC: n.config.EnableICC,
Masquerade: n.config.EnableIPMasquerade, Masquerade: n.config.EnableIPMasquerade,
TrustedHostInterfaces: n.config.TrustedHostInterfaces, TrustedHostInterfaces: n.config.TrustedHostInterfaces,
AcceptFwMark: n.driver.config.AcceptFwMark,
Config4: config4, Config4: config4,
Config6: config6, Config6: config6,
}) })

View File

@@ -48,6 +48,10 @@ type NetworkConfig struct {
// bridge itself). In particular, these are not external interfaces for the purpose of // bridge itself). In particular, these are not external interfaces for the purpose of
// blocking direct-routing to a container's IP address. // blocking direct-routing to a container's IP address.
TrustedHostInterfaces []string TrustedHostInterfaces []string
// AcceptFwMark is a firewall mark/mask. Packets with this mark will not be dropped by
// per-port blocking rules. So, packets with this mark have access to unpublished
// container ports.
AcceptFwMark string
// Config4 contains IPv4-specific configuration for the network. // Config4 contains IPv4-specific configuration for the network.
Config4 NetworkConfigFam Config4 NetworkConfigFam
// Config6 contains IPv6-specific configuration for the network. // Config6 contains IPv6-specific configuration for the network.

View File

@@ -7,6 +7,8 @@ import (
"errors" "errors"
"fmt" "fmt"
"net/netip" "net/netip"
"strconv"
"strings"
"github.com/containerd/log" "github.com/containerd/log"
"github.com/docker/docker/daemon/libnetwork/drivers/bridge/internal/firewaller" "github.com/docker/docker/daemon/libnetwork/drivers/bridge/internal/firewaller"
@@ -263,6 +265,18 @@ func setDefaultForwardRule(ipVersion iptables.IPVersion, ifName string, unprotec
} }
func (n *network) setupNonInternalNetworkRules(ctx context.Context, ipVer iptables.IPVersion, config firewaller.NetworkConfigFam, enable bool) error { func (n *network) setupNonInternalNetworkRules(ctx context.Context, ipVer iptables.IPVersion, config firewaller.NetworkConfigFam, enable bool) error {
if n.config.AcceptFwMark != "" {
fwm, err := iptablesFwMark(n.config.AcceptFwMark)
if err != nil {
return err
}
if err := programChainRule(iptables.Rule{IPVer: ipVer, Table: iptables.Filter, Chain: DockerForwardChain, Args: []string{
"-m", "mark", "--mark", fwm, "-j", "ACCEPT",
}}, "ALLOW FW MARK", enable); err != nil {
return err
}
}
var natArgs, hpNatArgs []string var natArgs, hpNatArgs []string
if config.HostIP.IsValid() { if config.HostIP.IsValid() {
// The user wants IPv4/IPv6 SNAT with the given address. // The user wants IPv4/IPv6 SNAT with the given address.
@@ -459,3 +473,23 @@ func setupInternalNetworkRules(ctx context.Context, bridgeIface string, prefix n
// Set Inter Container Communication. // Set Inter Container Communication.
return setIcc(ctx, version, bridgeIface, icc, true, insert) return setIcc(ctx, version, bridgeIface, icc, true, insert)
} }
// iptablesFwMark takes a string representing a firewall mark with an optional
// "/mask" parses the mark and mask, and returns the same "mark/mask" with the
// numbers converted to decimal, because strings.ParseUint accepts more integer
// formats than iptables.
func iptablesFwMark(val string) (string, error) {
markStr, maskStr, haveMask := strings.Cut(val, "/")
mark, err := strconv.ParseUint(markStr, 0, 32)
if err != nil {
return "", fmt.Errorf("invalid firewall mark %q: %w", val, err)
}
if haveMask {
mask, err := strconv.ParseUint(maskStr, 0, 32)
if err != nil {
return "", fmt.Errorf("invalid firewall mask %q: %w", val, err)
}
return fmt.Sprintf("%d/%d", mark, mask), nil
}
return strconv.FormatUint(mark, 10), nil
}

View File

@@ -5,6 +5,8 @@ package nftabler
import ( import (
"context" "context"
"fmt" "fmt"
"strconv"
"strings"
"github.com/containerd/log" "github.com/containerd/log"
"github.com/docker/docker/daemon/libnetwork/drivers/bridge/internal/firewaller" "github.com/docker/docker/daemon/libnetwork/drivers/bridge/internal/firewaller"
@@ -157,6 +159,20 @@ func (n *network) configure(ctx context.Context, table nftables.TableRef, conf f
} }
cleanup.Add(cf) cleanup.Add(cf)
} else { } else {
// AcceptFwMark
if n.config.AcceptFwMark != "" {
fwm, err := nftFwMark(n.config.AcceptFwMark)
if err != nil {
return nil, fmt.Errorf("adding fwmark %q for %q: %w", n.config.AcceptFwMark, n.config.IfName, err)
}
cf, err = fwdInChain.AppendRuleCf(ctx, fwdInAcceptFwMarkRuleGroup,
`meta mark %s counter accept comment "ALLOW FW MARK"`, fwm)
if err != nil {
return nil, fmt.Errorf("adding ALLOW FW MARK rule for %q: %w", n.config.IfName, err)
}
cleanup.Add(cf)
}
// Inter-Container Communication // Inter-Container Communication
cf, err = fwdInChain.AppendRuleCf(ctx, fwdInICCRuleGroup, "iifname == %s counter %s comment ICC", cf, err = fwdInChain.AppendRuleCf(ctx, fwdInICCRuleGroup, "iifname == %s counter %s comment ICC",
n.config.IfName, iccVerdict) n.config.IfName, iccVerdict)
@@ -270,3 +286,23 @@ func chainNatPostRtOut(ifName string) string {
func chainNatPostRtIn(ifName string) string { func chainNatPostRtIn(ifName string) string {
return "nat-postrouting-in__" + ifName return "nat-postrouting-in__" + ifName
} }
// nftFwMark takes a string representing a firewall mark with an optional
// "/mask", parses the mark and mask, and returns an nftables expression
// representing the same mask/mark. Numbers are converted to decimal, because
// strings.ParseUint accepts more integer formats than nft.
func nftFwMark(val string) (string, error) {
markStr, maskStr, haveMask := strings.Cut(val, "/")
mark, err := strconv.ParseUint(markStr, 0, 32)
if err != nil {
return "", fmt.Errorf("invalid firewall mark %q: %w", val, err)
}
if haveMask {
mask, err := strconv.ParseUint(maskStr, 0, 32)
if err != nil {
return "", fmt.Errorf("invalid firewall mask %q: %w", val, err)
}
return fmt.Sprintf("and %d == %d", mask, mark), nil
}
return strconv.FormatUint(mark, 10), nil
}

View File

@@ -34,7 +34,8 @@ const (
) )
const ( const (
fwdInLegacyLinksRuleGroup = iota + initialRuleGroup + 1 fwdInAcceptFwMarkRuleGroup = iota + initialRuleGroup + 1
fwdInLegacyLinksRuleGroup
fwdInICCRuleGroup fwdInICCRuleGroup
fwdInPortsRuleGroup fwdInPortsRuleGroup
fwdInFinalRuleGroup fwdInFinalRuleGroup

View File

@@ -798,6 +798,124 @@ func TestDirectRoutingOpenPorts(t *testing.T) {
} }
} }
func TestAcceptFwMark(t *testing.T) {
skip.If(t, testEnv.IsRootless())
ctx := setupTest(t)
d := daemon.New(t)
d.StartWithBusybox(ctx, t, "--bridge-accept-fwmark=2/3")
t.Cleanup(func() { d.Stop(t) })
c := d.NewClientT(t)
t.Cleanup(func() { c.Close() })
// Simulate the remote host.
l3 := networking.NewL3Segment(t, "test-routed-open-ports",
netip.MustParsePrefix("192.168.124.1/24"),
netip.MustParsePrefix("fdc0:36dc:a4dd::1/64"))
t.Cleanup(func() { l3.Destroy(t) })
// "docker" is the host where dockerd is running.
l3.AddHost(t, "docker", networking.CurrentNetns, "eth-test",
netip.MustParsePrefix("192.168.124.2/24"),
netip.MustParsePrefix("fdc0:36dc:a4dd::2/64"))
// "remote" simulates the remote host.
l3.AddHost(t, "remote", "test-remote-host", "eth0",
netip.MustParsePrefix("192.168.124.3/24"),
netip.MustParsePrefix("fdc0:36dc:a4dd::3/64"))
// Add default routes to the "docker" Host from the "remote" Host.
l3.Hosts["remote"].MustRun(t, "ip", "route", "add", "default", "via", "192.168.124.2")
l3.Hosts["remote"].MustRun(t, "ip", "-6", "route", "add", "default", "via", "fdc0:36dc:a4dd::2")
// Create a network and run a container on it.
// Don't publish any ports.
const netName = "test-acceptfwmark"
network.CreateNoError(ctx, t, c, netName,
network.WithOption(bridge.BridgeName, "br-acceptfwmark"),
network.WithOption(bridge.TrustedHostInterfaces, "eth-test"),
network.WithIPv6(),
)
t.Cleanup(func() {
network.RemoveNoError(ctx, t, c, netName)
})
ctrId := container.Run(ctx, t, c,
container.WithNetworkMode(netName),
container.WithCmd("httpd", "-f"),
)
t.Cleanup(func() {
c.ContainerRemove(ctx, ctrId, containertypes.RemoveOptions{Force: true})
})
insp := container.Inspect(ctx, t, c, ctrId)
ctrIPv4 := insp.NetworkSettings.Networks[netName].IPAddress
ctrIPv6 := insp.NetworkSettings.Networks[netName].GlobalIPv6Address
const (
httpSuccess = "404 Not Found"
httpFail = "Connection timed out"
pingSuccess = 0
pingFail = 1
)
testPing := func(t *testing.T, cmd, addr string, expExit int) {
t.Helper()
t.Parallel()
l3.Hosts["remote"].Do(t, func() {
t.Helper()
pingRes := icmd.RunCommand(cmd, "--numeric", "--count=1", "--timeout=3", addr)
assert.Check(t, pingRes.ExitCode == expExit, "%s %s -> out:%s err:%s",
cmd, addr, pingRes.Stdout(), pingRes.Stderr())
})
}
testHttp := func(t *testing.T, addr, port, expOut string) {
t.Helper()
t.Parallel()
l3.Hosts["remote"].Do(t, func() {
t.Helper()
u := "http://" + net.JoinHostPort(addr, port)
res := icmd.RunCommand("curl", "--max-time", "3", "--show-error", "--silent", u)
assert.Check(t, is.Contains(res.Combined(), expOut), "url:%s", u)
})
}
test := func(name string, expPing int, expHttp string) {
t.Run(name, func(t *testing.T) {
t.Run("v4/ping", func(t *testing.T) {
testPing(t, "ping", ctrIPv4, expPing)
})
t.Run("v6/ping", func(t *testing.T) {
testPing(t, "ping6", ctrIPv6, expPing)
})
t.Run("v4/http", func(t *testing.T) {
testHttp(t, ctrIPv4, "80", expHttp)
})
t.Run("v6/http", func(t *testing.T) {
testHttp(t, ctrIPv6, "80", expHttp)
})
})
}
test("nofwmark", pingFail, httpFail)
// This nftables will work if --firewall-backend=iptables, as long as it's iptables-nft.
cmd := icmd.Command("nft", "-f", "-")
res := icmd.RunCmd(cmd, icmd.WithStdin(strings.NewReader(`
table inet test-acceptfwmark {
chain raw-PREROUTING {
type filter hook prerouting priority raw
iifname "eth-test" counter mark set 0xe
}
}
`)))
res.Assert(t, icmd.Success)
defer func() {
icmd.RunCommand("nft", "delete table inet test-acceptfwmark").Assert(t, icmd.Success)
}()
test("fwmark", pingSuccess, httpSuccess)
}
// TestRoutedNonGateway checks whether a published container port on an endpoint in a // TestRoutedNonGateway checks whether a published container port on an endpoint in a
// gateway mode "routed" network is accessible when the routed network is not providing // gateway mode "routed" network is accessible when the routed network is not providing
// the container's default gateway. // the container's default gateway.

View File

@@ -10,6 +10,7 @@ dockerd - Enable daemon mode
[**-b**|**--bridge**[=*BRIDGE*]] [**-b**|**--bridge**[=*BRIDGE*]]
[**--bip**[=*BIP*]] [**--bip**[=*BIP*]]
[**--bip6**[=*BIP*]] [**--bip6**[=*BIP*]]
[**--bridge-accept-fwmark**[=*[]*]]
[**--cgroup-parent**[=*[]*]] [**--cgroup-parent**[=*[]*]]
[**--config-file**[=*path*]] [**--config-file**[=*path*]]
[**--containerd**[=*SOCKET-PATH*]] [**--containerd**[=*SOCKET-PATH*]]
@@ -140,6 +141,9 @@ $ sudo dockerd --add-runtime runc=runc --add-runtime custom=/usr/local/bin/my-ru
Use the provided CIDR notation IPv6 address for the default bridge network; Use the provided CIDR notation IPv6 address for the default bridge network;
Mutually exclusive of \-b Mutually exclusive of \-b
**--bridge-accept-fwmark**=""
Bridge networks will accept packets with this firewall mark/mask.
**--cgroup-parent**="" **--cgroup-parent**=""
Set parent cgroup for all containers. Default is "/docker" for fs cgroup Set parent cgroup for all containers. Default is "/docker" for fs cgroup
driver and "system.slice" for systemd cgroup driver. driver and "system.slice" for systemd cgroup driver.