diff --git a/daemon/command/config_unix.go b/daemon/command/config_unix.go index fa9e3c5f9f..ec8ce8c4f8 100644 --- a/daemon/command/config_unix.go +++ b/daemon/command/config_unix.go @@ -39,6 +39,7 @@ func installConfigFlags(conf *config.Config, flags *pflag.FlagSet) { flags.BoolVar(&conf.BridgeConfig.EnableUserlandProxy, "userland-proxy", true, "Use userland proxy for loopback traffic") flags.StringVar(&conf.BridgeConfig.UserlandProxyPath, "userland-proxy-path", conf.BridgeConfig.UserlandProxyPath, "Path to the userland proxy binary") flags.BoolVar(&conf.BridgeConfig.AllowDirectRouting, "allow-direct-routing", false, "Allow remote access to published ports on container IP addresses") + flags.StringVar(&conf.BridgeConfig.BridgeAcceptFwMark, "bridge-accept-fwmark", "", "In bridge networks, accept packets with this firewall mark/mask") flags.StringVar(&conf.CgroupParent, "cgroup-parent", "", "Set parent cgroup for all containers") flags.StringVar(&conf.RemappedRoot, "userns-remap", "", "User/Group setting for user namespaces") flags.BoolVar(&conf.LiveRestoreEnabled, "live-restore", false, "Enable live restore of docker when containers are still running") diff --git a/daemon/config/config_linux.go b/daemon/config/config_linux.go index b9278e2c8d..139d9a62a8 100644 --- a/daemon/config/config_linux.go +++ b/daemon/config/config_linux.go @@ -6,6 +6,7 @@ import ( "net" "os/exec" "path/filepath" + "strconv" "strings" "github.com/containerd/cgroups/v3" @@ -49,6 +50,7 @@ type BridgeConfig struct { EnableUserlandProxy bool `json:"userland-proxy,omitempty"` UserlandProxyPath string `json:"userland-proxy-path,omitempty"` AllowDirectRouting bool `json:"allow-direct-routing,omitempty"` + BridgeAcceptFwMark string `json:"bridge-accept-fwmark,omitempty"` } // DefaultBridgeConfig stores all the parameters for the default bridge network. @@ -243,15 +245,15 @@ func validatePlatformConfig(conf *Config) error { if err := verifyDefaultIpcMode(conf.IpcMode); err != nil { return err } - if err := bridge.ValidateFixedCIDRV6(conf.FixedCIDRv6); err != nil { return errors.Wrap(err, "invalid fixed-cidr-v6") } - if err := validateFirewallBackend(conf.FirewallBackend); err != nil { return errors.Wrap(err, "invalid firewall-backend") } - + if err := validateFwMarkMask(conf.BridgeAcceptFwMark); err != nil { + return errors.Wrap(err, "invalid bridge-accept-fwmark") + } return verifyDefaultCgroupNsMode(conf.CgroupNamespaceMode) } @@ -311,6 +313,22 @@ func validateFirewallBackend(val string) error { return errors.New(`allowed values are "iptables" and "nftables"`) } +func validateFwMarkMask(val string) error { + if val == "" { + return nil + } + mark, mask, haveMask := strings.Cut(val, "/") + if _, err := strconv.ParseUint(mark, 0, 32); err != nil { + return fmt.Errorf("invalid firewall mark %q: %w", val, err) + } + if haveMask { + if _, err := strconv.ParseUint(mask, 0, 32); err != nil { + return fmt.Errorf("invalid firewall mask %q: %w", val, err) + } + } + return nil +} + func verifyDefaultCgroupNsMode(mode string) error { cm := container.CgroupnsMode(mode) if !cm.Valid() { diff --git a/daemon/config/config_linux_test.go b/daemon/config/config_linux_test.go index 5317c3287d..2e36031c2d 100644 --- a/daemon/config/config_linux_test.go +++ b/daemon/config/config_linux_test.go @@ -396,3 +396,71 @@ func TestDaemonLegacyOptions(t *testing.T) { }) } } + +func TestValidateAcceptFwMarkMark(t *testing.T) { + tests := []struct { + name string + val string + expErr string + }{ + { + name: "empty", + val: "", + }, + { + name: "dec/no-mask", + val: "1", + }, + { + name: "hex/no-mask", + val: "0x1", + }, + { + name: "dec/mask", + val: "1/2", + }, + { + name: "hex/mask", + val: "0x1/0x2", + }, + { + name: "octal/mask", + val: "010/0xff", + }, + { + name: "bad/mark", + val: "hello/0x2", + expErr: `invalid firewall mark "hello/0x2": strconv.ParseUint: parsing "hello": invalid syntax`, + }, + { + name: "bad/mark", + val: "1/hello", + expErr: `invalid firewall mask "1/hello": strconv.ParseUint: parsing "hello": invalid syntax`, + }, + { + name: "bad/sep", + val: "1+hello", + expErr: `invalid firewall mark "1+hello": strconv.ParseUint: parsing "1+hello": invalid syntax`, + }, + { + name: "bad/no-mask", + val: "1/", + expErr: `invalid firewall mask "1/": strconv.ParseUint: parsing "": invalid syntax`, + }, + { + name: "bad/negative", + val: "-1", + expErr: `invalid firewall mark "-1": strconv.ParseUint: parsing "-1": invalid syntax`, + }, + } + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + err := validateFwMarkMask(tc.val) + if tc.expErr == "" { + assert.NilError(t, err) + } else { + assert.Check(t, is.ErrorContains(err, tc.expErr)) + } + }) + } +} diff --git a/daemon/daemon_unix.go b/daemon/daemon_unix.go index ae294f3339..2d3c83b60c 100644 --- a/daemon/daemon_unix.go +++ b/daemon/daemon_unix.go @@ -938,6 +938,7 @@ func networkPlatformOptions(conf *config.Config) []nwconfig.Option { "EnableIP6Tables": conf.BridgeConfig.EnableIP6Tables, "Hairpin": !conf.EnableUserlandProxy || conf.UserlandProxyPath == "", "AllowDirectRouting": conf.BridgeConfig.AllowDirectRouting, + "AcceptFwMark": conf.BridgeConfig.BridgeAcceptFwMark, }, }), } diff --git a/daemon/libnetwork/drivers/bridge/bridge_linux.go b/daemon/libnetwork/drivers/bridge/bridge_linux.go index 3c815c908b..2ca5a07a5a 100644 --- a/daemon/libnetwork/drivers/bridge/bridge_linux.go +++ b/daemon/libnetwork/drivers/bridge/bridge_linux.go @@ -77,6 +77,7 @@ type configuration struct { // hairpinned. Hairpin bool AllowDirectRouting bool + AcceptFwMark string } // networkConfiguration for network specific configuration @@ -429,6 +430,7 @@ func (n *bridgeNetwork) newFirewallerNetwork(ctx context.Context) (_ firewaller. ICC: n.config.EnableICC, Masquerade: n.config.EnableIPMasquerade, TrustedHostInterfaces: n.config.TrustedHostInterfaces, + AcceptFwMark: n.driver.config.AcceptFwMark, Config4: config4, Config6: config6, }) diff --git a/daemon/libnetwork/drivers/bridge/internal/firewaller/firewaller.go b/daemon/libnetwork/drivers/bridge/internal/firewaller/firewaller.go index 89622cae33..5b66e1a4e5 100644 --- a/daemon/libnetwork/drivers/bridge/internal/firewaller/firewaller.go +++ b/daemon/libnetwork/drivers/bridge/internal/firewaller/firewaller.go @@ -48,6 +48,10 @@ type NetworkConfig struct { // bridge itself). In particular, these are not external interfaces for the purpose of // blocking direct-routing to a container's IP address. TrustedHostInterfaces []string + // AcceptFwMark is a firewall mark/mask. Packets with this mark will not be dropped by + // per-port blocking rules. So, packets with this mark have access to unpublished + // container ports. + AcceptFwMark string // Config4 contains IPv4-specific configuration for the network. Config4 NetworkConfigFam // Config6 contains IPv6-specific configuration for the network. diff --git a/daemon/libnetwork/drivers/bridge/internal/iptabler/network.go b/daemon/libnetwork/drivers/bridge/internal/iptabler/network.go index f8fbd7c3bc..f819dfa150 100644 --- a/daemon/libnetwork/drivers/bridge/internal/iptabler/network.go +++ b/daemon/libnetwork/drivers/bridge/internal/iptabler/network.go @@ -7,6 +7,8 @@ import ( "errors" "fmt" "net/netip" + "strconv" + "strings" "github.com/containerd/log" "github.com/docker/docker/daemon/libnetwork/drivers/bridge/internal/firewaller" @@ -263,6 +265,18 @@ func setDefaultForwardRule(ipVersion iptables.IPVersion, ifName string, unprotec } func (n *network) setupNonInternalNetworkRules(ctx context.Context, ipVer iptables.IPVersion, config firewaller.NetworkConfigFam, enable bool) error { + if n.config.AcceptFwMark != "" { + fwm, err := iptablesFwMark(n.config.AcceptFwMark) + if err != nil { + return err + } + if err := programChainRule(iptables.Rule{IPVer: ipVer, Table: iptables.Filter, Chain: DockerForwardChain, Args: []string{ + "-m", "mark", "--mark", fwm, "-j", "ACCEPT", + }}, "ALLOW FW MARK", enable); err != nil { + return err + } + } + var natArgs, hpNatArgs []string if config.HostIP.IsValid() { // The user wants IPv4/IPv6 SNAT with the given address. @@ -459,3 +473,23 @@ func setupInternalNetworkRules(ctx context.Context, bridgeIface string, prefix n // Set Inter Container Communication. return setIcc(ctx, version, bridgeIface, icc, true, insert) } + +// iptablesFwMark takes a string representing a firewall mark with an optional +// "/mask" parses the mark and mask, and returns the same "mark/mask" with the +// numbers converted to decimal, because strings.ParseUint accepts more integer +// formats than iptables. +func iptablesFwMark(val string) (string, error) { + markStr, maskStr, haveMask := strings.Cut(val, "/") + mark, err := strconv.ParseUint(markStr, 0, 32) + if err != nil { + return "", fmt.Errorf("invalid firewall mark %q: %w", val, err) + } + if haveMask { + mask, err := strconv.ParseUint(maskStr, 0, 32) + if err != nil { + return "", fmt.Errorf("invalid firewall mask %q: %w", val, err) + } + return fmt.Sprintf("%d/%d", mark, mask), nil + } + return strconv.FormatUint(mark, 10), nil +} diff --git a/daemon/libnetwork/drivers/bridge/internal/nftabler/network.go b/daemon/libnetwork/drivers/bridge/internal/nftabler/network.go index e9ced708e9..0e9b55f2c0 100644 --- a/daemon/libnetwork/drivers/bridge/internal/nftabler/network.go +++ b/daemon/libnetwork/drivers/bridge/internal/nftabler/network.go @@ -5,6 +5,8 @@ package nftabler import ( "context" "fmt" + "strconv" + "strings" "github.com/containerd/log" "github.com/docker/docker/daemon/libnetwork/drivers/bridge/internal/firewaller" @@ -157,6 +159,20 @@ func (n *network) configure(ctx context.Context, table nftables.TableRef, conf f } cleanup.Add(cf) } else { + // AcceptFwMark + if n.config.AcceptFwMark != "" { + fwm, err := nftFwMark(n.config.AcceptFwMark) + if err != nil { + return nil, fmt.Errorf("adding fwmark %q for %q: %w", n.config.AcceptFwMark, n.config.IfName, err) + } + cf, err = fwdInChain.AppendRuleCf(ctx, fwdInAcceptFwMarkRuleGroup, + `meta mark %s counter accept comment "ALLOW FW MARK"`, fwm) + if err != nil { + return nil, fmt.Errorf("adding ALLOW FW MARK rule for %q: %w", n.config.IfName, err) + } + cleanup.Add(cf) + } + // Inter-Container Communication cf, err = fwdInChain.AppendRuleCf(ctx, fwdInICCRuleGroup, "iifname == %s counter %s comment ICC", n.config.IfName, iccVerdict) @@ -270,3 +286,23 @@ func chainNatPostRtOut(ifName string) string { func chainNatPostRtIn(ifName string) string { return "nat-postrouting-in__" + ifName } + +// nftFwMark takes a string representing a firewall mark with an optional +// "/mask", parses the mark and mask, and returns an nftables expression +// representing the same mask/mark. Numbers are converted to decimal, because +// strings.ParseUint accepts more integer formats than nft. +func nftFwMark(val string) (string, error) { + markStr, maskStr, haveMask := strings.Cut(val, "/") + mark, err := strconv.ParseUint(markStr, 0, 32) + if err != nil { + return "", fmt.Errorf("invalid firewall mark %q: %w", val, err) + } + if haveMask { + mask, err := strconv.ParseUint(maskStr, 0, 32) + if err != nil { + return "", fmt.Errorf("invalid firewall mask %q: %w", val, err) + } + return fmt.Sprintf("and %d == %d", mask, mark), nil + } + return strconv.FormatUint(mark, 10), nil +} diff --git a/daemon/libnetwork/drivers/bridge/internal/nftabler/nftabler.go b/daemon/libnetwork/drivers/bridge/internal/nftabler/nftabler.go index 1be24340cf..a83c2b3b71 100644 --- a/daemon/libnetwork/drivers/bridge/internal/nftabler/nftabler.go +++ b/daemon/libnetwork/drivers/bridge/internal/nftabler/nftabler.go @@ -34,7 +34,8 @@ const ( ) const ( - fwdInLegacyLinksRuleGroup = iota + initialRuleGroup + 1 + fwdInAcceptFwMarkRuleGroup = iota + initialRuleGroup + 1 + fwdInLegacyLinksRuleGroup fwdInICCRuleGroup fwdInPortsRuleGroup fwdInFinalRuleGroup diff --git a/integration/networking/port_mapping_linux_test.go b/integration/networking/port_mapping_linux_test.go index fdf5f787c4..ed54261997 100644 --- a/integration/networking/port_mapping_linux_test.go +++ b/integration/networking/port_mapping_linux_test.go @@ -798,6 +798,124 @@ func TestDirectRoutingOpenPorts(t *testing.T) { } } +func TestAcceptFwMark(t *testing.T) { + skip.If(t, testEnv.IsRootless()) + ctx := setupTest(t) + + d := daemon.New(t) + d.StartWithBusybox(ctx, t, "--bridge-accept-fwmark=2/3") + t.Cleanup(func() { d.Stop(t) }) + + c := d.NewClientT(t) + t.Cleanup(func() { c.Close() }) + + // Simulate the remote host. + + l3 := networking.NewL3Segment(t, "test-routed-open-ports", + netip.MustParsePrefix("192.168.124.1/24"), + netip.MustParsePrefix("fdc0:36dc:a4dd::1/64")) + t.Cleanup(func() { l3.Destroy(t) }) + + // "docker" is the host where dockerd is running. + l3.AddHost(t, "docker", networking.CurrentNetns, "eth-test", + netip.MustParsePrefix("192.168.124.2/24"), + netip.MustParsePrefix("fdc0:36dc:a4dd::2/64")) + // "remote" simulates the remote host. + l3.AddHost(t, "remote", "test-remote-host", "eth0", + netip.MustParsePrefix("192.168.124.3/24"), + netip.MustParsePrefix("fdc0:36dc:a4dd::3/64")) + // Add default routes to the "docker" Host from the "remote" Host. + l3.Hosts["remote"].MustRun(t, "ip", "route", "add", "default", "via", "192.168.124.2") + l3.Hosts["remote"].MustRun(t, "ip", "-6", "route", "add", "default", "via", "fdc0:36dc:a4dd::2") + + // Create a network and run a container on it. + // Don't publish any ports. + const netName = "test-acceptfwmark" + network.CreateNoError(ctx, t, c, netName, + network.WithOption(bridge.BridgeName, "br-acceptfwmark"), + network.WithOption(bridge.TrustedHostInterfaces, "eth-test"), + network.WithIPv6(), + ) + t.Cleanup(func() { + network.RemoveNoError(ctx, t, c, netName) + }) + + ctrId := container.Run(ctx, t, c, + container.WithNetworkMode(netName), + container.WithCmd("httpd", "-f"), + ) + t.Cleanup(func() { + c.ContainerRemove(ctx, ctrId, containertypes.RemoveOptions{Force: true}) + }) + + insp := container.Inspect(ctx, t, c, ctrId) + ctrIPv4 := insp.NetworkSettings.Networks[netName].IPAddress + ctrIPv6 := insp.NetworkSettings.Networks[netName].GlobalIPv6Address + + const ( + httpSuccess = "404 Not Found" + httpFail = "Connection timed out" + pingSuccess = 0 + pingFail = 1 + ) + + testPing := func(t *testing.T, cmd, addr string, expExit int) { + t.Helper() + t.Parallel() + l3.Hosts["remote"].Do(t, func() { + t.Helper() + pingRes := icmd.RunCommand(cmd, "--numeric", "--count=1", "--timeout=3", addr) + assert.Check(t, pingRes.ExitCode == expExit, "%s %s -> out:%s err:%s", + cmd, addr, pingRes.Stdout(), pingRes.Stderr()) + }) + } + testHttp := func(t *testing.T, addr, port, expOut string) { + t.Helper() + t.Parallel() + l3.Hosts["remote"].Do(t, func() { + t.Helper() + u := "http://" + net.JoinHostPort(addr, port) + res := icmd.RunCommand("curl", "--max-time", "3", "--show-error", "--silent", u) + assert.Check(t, is.Contains(res.Combined(), expOut), "url:%s", u) + }) + } + + test := func(name string, expPing int, expHttp string) { + t.Run(name, func(t *testing.T) { + t.Run("v4/ping", func(t *testing.T) { + testPing(t, "ping", ctrIPv4, expPing) + }) + t.Run("v6/ping", func(t *testing.T) { + testPing(t, "ping6", ctrIPv6, expPing) + }) + t.Run("v4/http", func(t *testing.T) { + testHttp(t, ctrIPv4, "80", expHttp) + }) + t.Run("v6/http", func(t *testing.T) { + testHttp(t, ctrIPv6, "80", expHttp) + }) + }) + } + test("nofwmark", pingFail, httpFail) + + // This nftables will work if --firewall-backend=iptables, as long as it's iptables-nft. + cmd := icmd.Command("nft", "-f", "-") + res := icmd.RunCmd(cmd, icmd.WithStdin(strings.NewReader(` + table inet test-acceptfwmark { + chain raw-PREROUTING { + type filter hook prerouting priority raw + iifname "eth-test" counter mark set 0xe + } + } + `))) + res.Assert(t, icmd.Success) + defer func() { + icmd.RunCommand("nft", "delete table inet test-acceptfwmark").Assert(t, icmd.Success) + }() + + test("fwmark", pingSuccess, httpSuccess) +} + // TestRoutedNonGateway checks whether a published container port on an endpoint in a // gateway mode "routed" network is accessible when the routed network is not providing // the container's default gateway. diff --git a/man/dockerd.8.md b/man/dockerd.8.md index 042f6ed0b7..c6043ca215 100644 --- a/man/dockerd.8.md +++ b/man/dockerd.8.md @@ -10,6 +10,7 @@ dockerd - Enable daemon mode [**-b**|**--bridge**[=*BRIDGE*]] [**--bip**[=*BIP*]] [**--bip6**[=*BIP*]] +[**--bridge-accept-fwmark**[=*[]*]] [**--cgroup-parent**[=*[]*]] [**--config-file**[=*path*]] [**--containerd**[=*SOCKET-PATH*]] @@ -140,6 +141,9 @@ $ sudo dockerd --add-runtime runc=runc --add-runtime custom=/usr/local/bin/my-ru Use the provided CIDR notation IPv6 address for the default bridge network; Mutually exclusive of \-b +**--bridge-accept-fwmark**="" +Bridge networks will accept packets with this firewall mark/mask. + **--cgroup-parent**="" Set parent cgroup for all containers. Default is "/docker" for fs cgroup driver and "system.slice" for systemd cgroup driver.