From 976f855f685b7f49e9adf949d544f480d47958e4 Mon Sep 17 00:00:00 2001 From: Rob Murray Date: Wed, 14 May 2025 10:03:07 +0100 Subject: [PATCH 1/3] Add OTEL span for nftables updates Signed-off-by: Rob Murray --- libnetwork/internal/nftables/nftables_linux.go | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/libnetwork/internal/nftables/nftables_linux.go b/libnetwork/internal/nftables/nftables_linux.go index 62065f0c96..884263d748 100644 --- a/libnetwork/internal/nftables/nftables_linux.go +++ b/libnetwork/internal/nftables/nftables_linux.go @@ -52,8 +52,12 @@ import ( "text/template" "github.com/containerd/log" + "go.opentelemetry.io/otel" ) +// Prefix for OTEL span names. +const spanPrefix = "libnetwork.internal.nftables" + var ( // nftPath is the path of the "nft" tool, set by [Enable] and left empty if the tool // is not present - in which case, nftables is disabled. @@ -696,6 +700,9 @@ func parseTemplate() error { // nftApply runs the "nft" command. func nftApply(ctx context.Context, nftCmd []byte) error { + ctx, span := otel.Tracer("").Start(ctx, spanPrefix+".nftApply") + defer span.End() + if !Enabled() { return errors.New("nftables is not enabled") } From 06afbe9618ce8dcb3da420ebe18dace1ef7a3efc Mon Sep 17 00:00:00 2001 From: Rob Murray Date: Wed, 14 May 2025 10:03:42 +0100 Subject: [PATCH 2/3] Check nftables is enabled before applying updates Signed-off-by: Rob Murray --- libnetwork/internal/nftables/nftables_linux.go | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/libnetwork/internal/nftables/nftables_linux.go b/libnetwork/internal/nftables/nftables_linux.go index 884263d748..c78656a128 100644 --- a/libnetwork/internal/nftables/nftables_linux.go +++ b/libnetwork/internal/nftables/nftables_linux.go @@ -259,9 +259,12 @@ table {{$family}} {{$tableName}} { // Apply makes incremental updates to nftables, corresponding to changes to the [TableRef] // since Apply was last called. func (t TableRef) Apply(ctx context.Context) error { - var buf bytes.Buffer + if !Enabled() { + return errors.New("nftables is not enabled") + } // Update nftables. + var buf bytes.Buffer if err := incrementalUpdateTempl.Execute(&buf, t.t); err != nil { return fmt.Errorf("failed to execute template nft ruleset: %w", err) } From 350bb5197a76e2771e21456eb6634f202d28cab5 Mon Sep 17 00:00:00 2001 From: Rob Murray Date: Thu, 24 Apr 2025 17:13:14 +0100 Subject: [PATCH 3/3] nftables: attempt a table-reload after an Apply error Signed-off-by: Rob Murray --- .../internal/nftables/nftables_linux.go | 117 +++++++++++++++--- .../internal/nftables/nftables_linux_test.go | 49 +++++++- .../testdata/TestReload_created.golden | 18 +++ .../testdata/TestReload_recovered.golden | 17 +++ .../testdata/TestReload_reloaded.golden | 18 +++ .../testdata/TestTable_created.golden | 4 - 6 files changed, 203 insertions(+), 20 deletions(-) create mode 100644 libnetwork/internal/nftables/testdata/TestReload_created.golden create mode 100644 libnetwork/internal/nftables/testdata/TestReload_recovered.golden create mode 100644 libnetwork/internal/nftables/testdata/TestReload_reloaded.golden delete mode 100644 libnetwork/internal/nftables/testdata/TestTable_created.golden diff --git a/libnetwork/internal/nftables/nftables_linux.go b/libnetwork/internal/nftables/nftables_linux.go index c78656a128..8bf8179cee 100644 --- a/libnetwork/internal/nftables/nftables_linux.go +++ b/libnetwork/internal/nftables/nftables_linux.go @@ -64,6 +64,8 @@ var ( nftPath string // incrementalUpdateTempl is a parsed text/template, used to apply incremental updates. incrementalUpdateTempl *template.Template + // reloadTempl is a parsed text/template, used to apply a whole table. + reloadTempl *template.Template // enableOnce is used by [Enable] to avoid checking the path for "nft" more than once. enableOnce sync.Once ) @@ -256,6 +258,42 @@ table {{$family}} {{$tableName}} { {{end}}{{end}} ` +// reloadTemplText is used with text/template to generate an nftables command file +// (which will be applied atomically), to fully re-create a table. +// +// It first declares the table so if it doesn't already exist, it can be deleted. +// Then it deletes the table and re-creates it. +const reloadTemplText = `{{$family := .Family}}{{$tableName := .Name}} +table {{$family}} {{$tableName}} {} +delete table {{$family}} {{$tableName}} +table {{$family}} {{$tableName}} { + {{range .VMaps}}map {{.Name}} { + type {{.ElementType}} : verdict + {{if len .Flags}}flags{{range .Flags}} {{.}}{{end}}{{end}} + {{if .Elements}}elements = { + {{range $k,$v := .Elements}}{{$k}} : {{$v}}, + {{end -}} + }{{end}} + } + {{end}} + {{range .Sets}}set {{.Name}} { + type {{.ElementType}} + {{if len .Flags}}flags{{range .Flags}} {{.}}{{end}}{{end}} + {{if .Elements}}elements = { + {{range $k,$v := .Elements}}{{$k}}, + {{end -}} + }{{end}} + } + {{end}} + {{range .Chains}}chain {{.Name}} { + {{if .ChainType}}type {{.ChainType}} hook {{.Hook}} priority {{.Priority}}; policy {{.Policy}}{{end}} + {{range .Rules}}{{.}} + {{end}} + } + {{end}} +} +` + // Apply makes incremental updates to nftables, corresponding to changes to the [TableRef] // since Apply was last called. func (t TableRef) Apply(ctx context.Context) error { @@ -268,6 +306,7 @@ func (t TableRef) Apply(ctx context.Context) error { if err := incrementalUpdateTempl.Execute(&buf, t.t); err != nil { return fmt.Errorf("failed to execute template nft ruleset: %w", err) } + if err := nftApply(ctx, buf.Bytes()); err != nil { // On error, log a line-numbered version of the generated "nft" input (because // nft error messages refer to line numbers). @@ -278,25 +317,51 @@ func (t TableRef) Apply(ctx context.Context) error { sb.Write(line) } log.G(ctx).Error("nftables: failed to update nftables:\n", sb.String(), "\n", err) + + // It's possible something destructive has happened to nftables. For example, in + // integration-cli tests, tests start daemons in the same netns as the integration + // test's own daemon. They don't always use their own daemon, but they tend to leave + // behind networks for the test infrastructure to clean up between tests. Starting + // a daemon flushes the "docker-bridges" table, so the cleanup fails to delete a + // rule that's been flushed. So, try reloading the whole table to get back in-sync. + return t.Reload(ctx) + } + + // Note that updates have been applied. + t.t.updatesApplied() + return nil +} + +// Reload deletes the table, then re-creates it, atomically. +func (t TableRef) Reload(ctx context.Context) error { + if !Enabled() { + return errors.New("nftables is not enabled") + } + + ctx = log.WithLogger(ctx, log.G(ctx).WithFields(log.Fields{"table": t.t.Name, "family": t.t.Family})) + log.G(ctx).Warn("nftables: reloading table") + + // Build the update. + var buf bytes.Buffer + if err := reloadTempl.Execute(&buf, t.t); err != nil { + return fmt.Errorf("failed to execute reload template: %w", err) + } + + if err := nftApply(ctx, buf.Bytes()); err != nil { + // On error, log a line-numbered version of the generated "nft" input (because + // nft error messages refer to line numbers). + var sb strings.Builder + for i, line := range bytes.SplitAfter(buf.Bytes(), []byte("\n")) { + sb.WriteString(strconv.Itoa(i + 1)) + sb.WriteString(":\t") + sb.Write(line) + } + log.G(ctx).Error("nftables: failed to reload nftable:\n", sb.String(), "\n", err) return err } // Note that updates have been applied. - t.t.DeleteChainCommands = t.t.DeleteChainCommands[:0] - for _, c := range t.t.Chains { - c.Dirty = false - } - for _, m := range t.t.VMaps { - m.Dirty = false - m.AddedElements = map[string]string{} - m.DeletedElements = map[string]struct{}{} - } - for _, s := range t.t.Sets { - s.Dirty = false - s.AddedElements = map[string]struct{}{} - s.DeletedElements = map[string]struct{}{} - } - t.t.Dirty = false + t.t.updatesApplied() return nil } @@ -657,6 +722,24 @@ func (s SetRef) DeleteElement(element string) error { // //////////////////////////// // Internal +func (t *table) updatesApplied() { + t.DeleteChainCommands = t.DeleteChainCommands[:0] + for _, c := range t.Chains { + c.Dirty = false + } + for _, m := range t.VMaps { + m.Dirty = false + m.AddedElements = map[string]string{} + m.DeletedElements = map[string]struct{}{} + } + for _, s := range t.Sets { + s.Dirty = false + s.AddedElements = map[string]struct{}{} + s.DeletedElements = map[string]struct{}{} + } + t.Dirty = false +} + /* Can't make text/template range over this, not sure why ... func (c *chain) Rules() iter.Seq[string] { groups := make([]int, 0, len(c.ruleGroups)) @@ -698,6 +781,10 @@ func parseTemplate() error { if err != nil { return fmt.Errorf("parsing 'incrementalUpdateTemplText': %w", err) } + reloadTempl, err = template.New("ruleset").Parse(reloadTemplText) + if err != nil { + return fmt.Errorf("parsing 'reloadTemplText': %w", err) + } return nil } diff --git a/libnetwork/internal/nftables/nftables_linux_test.go b/libnetwork/internal/nftables/nftables_linux_test.go index af694b25cc..b73afd2e9c 100644 --- a/libnetwork/internal/nftables/nftables_linux_test.go +++ b/libnetwork/internal/nftables/nftables_linux_test.go @@ -36,6 +36,7 @@ func testSetup(t *testing.T) func() { func disable() { incrementalUpdateTempl = nil nftPath = "" + reloadTempl = nil enableOnce = sync.Once{} } @@ -44,7 +45,7 @@ func applyAndCheck(t *testing.T, tbl TableRef, goldenFilename string) { err := tbl.Apply(context.Background()) assert.Check(t, err) res := icmd.RunCommand("nft", "list", "ruleset") - assert.Check(t, is.Equal(res.ExitCode, 0)) + res.Assert(t, icmd.Success) golden.Assert(t, res.Combined(), goldenFilename) } @@ -250,3 +251,49 @@ func TestSet(t *testing.T) { applyAndCheck(t, tbl4, t.Name()+"_deleted4.golden") applyAndCheck(t, tbl6, t.Name()+"_deleted46.golden") } + +func TestReload(t *testing.T) { + defer testSetup(t)() + + // Create a table with some stuff in it. + const tableName = "this_is_a_table" + tbl, err := NewTable(IPv4, tableName) + assert.NilError(t, err) + bc, err := tbl.BaseChain("a_base_chain", BaseChainTypeFilter, BaseChainHookForward, BaseChainPriorityFilter) + assert.NilError(t, err) + err = bc.AppendRule(0, "counter") + assert.NilError(t, err) + m := tbl.InterfaceVMap("this_is_a_vmap") + err = m.AddElement("eth0", "return") + assert.Check(t, err) + err = m.AddElement("eth1", "return") + assert.Check(t, err) + err = tbl.PrefixSet("set4").AddElement("192.0.2.0/24") + assert.Check(t, err) + applyAndCheck(t, tbl, t.Name()+"_created.golden") + + // Delete the underlying nftables table. + deleteTable := func() { + t.Helper() + res := icmd.RunCommand("nft", "delete", "table", string(IPv4), tableName) + res.Assert(t, icmd.Success) + res = icmd.RunCommand("nft", "list", "ruleset") + res.Assert(t, icmd.Success) + assert.Check(t, is.Equal(res.Combined(), "")) + } + deleteTable() + + // Reconstruct the nftables table. + err = tbl.Reload(context.Background()) + assert.Check(t, err) + applyAndCheck(t, tbl, t.Name()+"_reloaded.golden") + + // Delete again. + deleteTable() + + // Check implicit/recovery reload - only deleting something that's gone missing + // from a vmap/set will trigger this. + err = m.DeleteElement("eth1") + assert.Check(t, err) + applyAndCheck(t, tbl, t.Name()+"_recovered.golden") +} diff --git a/libnetwork/internal/nftables/testdata/TestReload_created.golden b/libnetwork/internal/nftables/testdata/TestReload_created.golden new file mode 100644 index 0000000000..f69a290c70 --- /dev/null +++ b/libnetwork/internal/nftables/testdata/TestReload_created.golden @@ -0,0 +1,18 @@ +table ip this_is_a_table { + map this_is_a_vmap { + type ifname : verdict + elements = { "eth0" : return, + "eth1" : return } + } + + set set4 { + type ipv4_addr + flags interval + elements = { 192.0.2.0/24 } + } + + chain a_base_chain { + type filter hook forward priority filter; policy accept; + counter packets 0 bytes 0 + } +} diff --git a/libnetwork/internal/nftables/testdata/TestReload_recovered.golden b/libnetwork/internal/nftables/testdata/TestReload_recovered.golden new file mode 100644 index 0000000000..98d7036061 --- /dev/null +++ b/libnetwork/internal/nftables/testdata/TestReload_recovered.golden @@ -0,0 +1,17 @@ +table ip this_is_a_table { + map this_is_a_vmap { + type ifname : verdict + elements = { "eth0" : return } + } + + set set4 { + type ipv4_addr + flags interval + elements = { 192.0.2.0/24 } + } + + chain a_base_chain { + type filter hook forward priority filter; policy accept; + counter packets 0 bytes 0 + } +} diff --git a/libnetwork/internal/nftables/testdata/TestReload_reloaded.golden b/libnetwork/internal/nftables/testdata/TestReload_reloaded.golden new file mode 100644 index 0000000000..f69a290c70 --- /dev/null +++ b/libnetwork/internal/nftables/testdata/TestReload_reloaded.golden @@ -0,0 +1,18 @@ +table ip this_is_a_table { + map this_is_a_vmap { + type ifname : verdict + elements = { "eth0" : return, + "eth1" : return } + } + + set set4 { + type ipv4_addr + flags interval + elements = { 192.0.2.0/24 } + } + + chain a_base_chain { + type filter hook forward priority filter; policy accept; + counter packets 0 bytes 0 + } +} diff --git a/libnetwork/internal/nftables/testdata/TestTable_created.golden b/libnetwork/internal/nftables/testdata/TestTable_created.golden deleted file mode 100644 index 3bb18c8761..0000000000 --- a/libnetwork/internal/nftables/testdata/TestTable_created.golden +++ /dev/null @@ -1,4 +0,0 @@ -table ip ipv4_table { -} -table ip6 ipv6_table { -}