From 224d7291df2170929d395a0c4b79edea930d5b8b Mon Sep 17 00:00:00 2001 From: Albin Kerouanton Date: Mon, 22 Apr 2024 07:06:34 +0200 Subject: [PATCH] container: add a span to CheckpointTo Signed-off-by: Albin Kerouanton --- container/container.go | 10 +++++++++- container/view_test.go | 11 ++++++----- daemon/container.go | 2 +- daemon/container_operations.go | 4 ++-- daemon/daemon.go | 8 ++++---- daemon/delete.go | 2 +- daemon/kill.go | 2 +- daemon/monitor.go | 12 ++++++------ daemon/pause.go | 2 +- daemon/rename.go | 4 ++-- daemon/start.go | 4 ++-- daemon/start_unix.go | 4 +++- daemon/unpause.go | 2 +- daemon/update.go | 4 ++-- 14 files changed, 41 insertions(+), 30 deletions(-) diff --git a/container/container.go b/container/container.go index 9809124d8e..dfc056a427 100644 --- a/container/container.go +++ b/container/container.go @@ -42,6 +42,9 @@ import ( "github.com/moby/sys/symlink" ocispec "github.com/opencontainers/image-spec/specs-go/v1" "github.com/pkg/errors" + "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/trace" ) const ( @@ -200,7 +203,12 @@ func (container *Container) toDisk() (*Container, error) { // CheckpointTo makes the Container's current state visible to queries, and persists state. // Callers must hold a Container lock. -func (container *Container) CheckpointTo(store *ViewDB) error { +func (container *Container) CheckpointTo(ctx context.Context, store *ViewDB) error { + ctx, span := otel.Tracer("").Start(ctx, "container.CheckpointTo", trace.WithAttributes( + attribute.String("container.ID", container.ID), + attribute.String("container.Name", container.Name))) + defer span.End() + deepCopy, err := container.toDisk() if err != nil { return err diff --git a/container/view_test.go b/container/view_test.go index 2c815478f1..d489ec7e84 100644 --- a/container/view_test.go +++ b/container/view_test.go @@ -1,6 +1,7 @@ package container // import "github.com/docker/docker/container" import ( + "context" "math/rand" "os" "path/filepath" @@ -46,7 +47,7 @@ func TestViewSaveDelete(t *testing.T) { t.Fatal(err) } c := newContainer(t) - if err := c.CheckpointTo(db); err != nil { + if err := c.CheckpointTo(context.Background(), db); err != nil { t.Fatal(err) } if err := db.Delete(c); err != nil { @@ -61,11 +62,11 @@ func TestViewAll(t *testing.T) { two = newContainer(t) ) one.Pid = 10 - if err := one.CheckpointTo(db); err != nil { + if err := one.CheckpointTo(context.Background(), db); err != nil { t.Fatal(err) } two.Pid = 20 - if err := two.CheckpointTo(db); err != nil { + if err := two.CheckpointTo(context.Background(), db); err != nil { t.Fatal(err) } @@ -94,7 +95,7 @@ func TestViewGet(t *testing.T) { one = newContainer(t) ) one.ImageID = "some-image-123" - if err := one.CheckpointTo(db); err != nil { + if err := one.CheckpointTo(context.Background(), db); err != nil { t.Fatal(err) } s, err := db.Snapshot().Get(one.ID) @@ -174,7 +175,7 @@ func TestViewWithHealthCheck(t *testing.T) { Status: "starting", }, } - if err := one.CheckpointTo(db); err != nil { + if err := one.CheckpointTo(context.Background(), db); err != nil { t.Fatal(err) } s, err := db.Snapshot().Get(one.ID) diff --git a/daemon/container.go b/daemon/container.go index f8df74e4be..0b568120fd 100644 --- a/daemon/container.go +++ b/daemon/container.go @@ -117,7 +117,7 @@ func (daemon *Daemon) Register(c *container.Container) error { defer c.Unlock() daemon.containers.Add(c.ID, c) - return c.CheckpointTo(daemon.containersReplica) + return c.CheckpointTo(context.TODO(), daemon.containersReplica) } func (daemon *Daemon) newContainer(name string, operatingSystem string, config *containertypes.Config, hostConfig *containertypes.HostConfig, imgID image.ID, managed bool) (*container.Container, error) { diff --git a/daemon/container_operations.go b/daemon/container_operations.go index a2e494b7a8..54b6af2b00 100644 --- a/daemon/container_operations.go +++ b/daemon/container_operations.go @@ -1085,7 +1085,7 @@ func (daemon *Daemon) ConnectToNetwork(ctx context.Context, container *container } } - return container.CheckpointTo(daemon.containersReplica) + return container.CheckpointTo(ctx, daemon.containersReplica) } // DisconnectFromNetwork disconnects container from network n. @@ -1119,7 +1119,7 @@ func (daemon *Daemon) DisconnectFromNetwork(ctx context.Context, container *cont return err } - if err := container.CheckpointTo(daemon.containersReplica); err != nil { + if err := container.CheckpointTo(ctx, daemon.containersReplica); err != nil { return err } diff --git a/daemon/daemon.go b/daemon/daemon.go index e0dff8024e..3d91bdd7c1 100644 --- a/daemon/daemon.go +++ b/daemon/daemon.go @@ -473,7 +473,7 @@ func (daemon *Daemon) restore(cfg *configStore) error { c.Paused = false daemon.setStateCounter(c) daemon.initHealthMonitor(c) - if err := c.CheckpointTo(daemon.containersReplica); err != nil { + if err := c.CheckpointTo(context.TODO(), daemon.containersReplica); err != nil { baseLogger.WithError(err).Error("failed to update paused container state") } c.Unlock() @@ -497,7 +497,7 @@ func (daemon *Daemon) restore(cfg *configStore) error { } c.SetStopped(&ces) daemon.Cleanup(context.TODO(), c) - if err := c.CheckpointTo(daemon.containersReplica); err != nil { + if err := c.CheckpointTo(context.TODO(), daemon.containersReplica); err != nil { baseLogger.WithError(err).Error("failed to update stopped container state") } c.Unlock() @@ -564,7 +564,7 @@ func (daemon *Daemon) restore(cfg *configStore) error { // state and leave further processing up to them. c.RemovalInProgress = false c.Dead = true - if err := c.CheckpointTo(daemon.containersReplica); err != nil { + if err := c.CheckpointTo(context.TODO(), daemon.containersReplica); err != nil { baseLogger.WithError(err).Error("failed to update RemovalInProgress container state") } else { baseLogger.Debugf("reset RemovalInProgress state for container") @@ -1615,7 +1615,7 @@ func RemapContainerdNamespaces(config *config.Config) (ns string, pluginNs strin func (daemon *Daemon) checkpointAndSave(container *container.Container) error { container.Lock() defer container.Unlock() - if err := container.CheckpointTo(daemon.containersReplica); err != nil { + if err := container.CheckpointTo(context.TODO(), daemon.containersReplica); err != nil { return fmt.Errorf("Error saving container state: %v", err) } return nil diff --git a/daemon/delete.go b/daemon/delete.go index 9ea8c395c1..c1925cee40 100644 --- a/daemon/delete.go +++ b/daemon/delete.go @@ -128,7 +128,7 @@ func (daemon *Daemon) cleanupContainer(container *container.Container, config ba // Save container state to disk. So that if error happens before // container meta file got removed from disk, then a restart of // docker should not make a dead container alive. - if err := container.CheckpointTo(daemon.containersReplica); err != nil && !os.IsNotExist(err) { + if err := container.CheckpointTo(context.WithoutCancel(context.TODO()), daemon.containersReplica); err != nil && !os.IsNotExist(err) { log.G(context.TODO()).Errorf("Error saving dying container to disk: %v", err) } container.Unlock() diff --git a/daemon/kill.go b/daemon/kill.go index 420fdfedb5..cd80c6fdcb 100644 --- a/daemon/kill.go +++ b/daemon/kill.go @@ -88,7 +88,7 @@ func (daemon *Daemon) killWithSignal(container *containerpkg.Container, stopSign if !daemon.IsShuttingDown() { container.HasBeenManuallyStopped = true - if err := container.CheckpointTo(daemon.containersReplica); err != nil { + if err := container.CheckpointTo(context.WithoutCancel(context.TODO()), daemon.containersReplica); err != nil { log.G(context.TODO()).WithFields(log.Fields{ "error": err, "container": container.ID, diff --git a/daemon/monitor.go b/daemon/monitor.go index 65c82fdf85..de15f36c7e 100644 --- a/daemon/monitor.go +++ b/daemon/monitor.go @@ -109,7 +109,7 @@ func (daemon *Daemon) handleContainerExit(c *container.Container, e *libcontaine defer c.Unlock() // needs to be called before autoRemove daemon.setStateCounter(c) - checkpointErr := c.CheckpointTo(daemon.containersReplica) + checkpointErr := c.CheckpointTo(context.TODO(), daemon.containersReplica) daemon.LogContainerEventWithAttributes(c, events.ActionDie, attributes) @@ -134,7 +134,7 @@ func (daemon *Daemon) handleContainerExit(c *container.Container, e *libcontaine c.Lock() c.SetStopped(&exitStatus) daemon.setStateCounter(c) - c.CheckpointTo(daemon.containersReplica) + c.CheckpointTo(context.TODO(), daemon.containersReplica) c.Unlock() defer daemon.autoRemove(&cfg.Config, c) if err != restartmanager.ErrRestartCanceled { @@ -165,7 +165,7 @@ func (daemon *Daemon) ProcessEvent(id string, e libcontainerdtypes.EventType, ei defer c.Unlock() c.OOMKilled = true daemon.updateHealthMonitor(c) - if err := c.CheckpointTo(daemon.containersReplica); err != nil { + if err := c.CheckpointTo(context.TODO(), daemon.containersReplica); err != nil { return err } @@ -261,7 +261,7 @@ func (daemon *Daemon) ProcessEvent(id string, e libcontainerdtypes.EventType, ei daemon.initHealthMonitor(c) - if err := c.CheckpointTo(daemon.containersReplica); err != nil { + if err := c.CheckpointTo(context.TODO(), daemon.containersReplica); err != nil { return err } daemon.LogContainerEvent(c, events.ActionStart) @@ -275,7 +275,7 @@ func (daemon *Daemon) ProcessEvent(id string, e libcontainerdtypes.EventType, ei c.Paused = true daemon.setStateCounter(c) daemon.updateHealthMonitor(c) - if err := c.CheckpointTo(daemon.containersReplica); err != nil { + if err := c.CheckpointTo(context.TODO(), daemon.containersReplica); err != nil { return err } daemon.LogContainerEvent(c, events.ActionPause) @@ -289,7 +289,7 @@ func (daemon *Daemon) ProcessEvent(id string, e libcontainerdtypes.EventType, ei daemon.setStateCounter(c) daemon.updateHealthMonitor(c) - if err := c.CheckpointTo(daemon.containersReplica); err != nil { + if err := c.CheckpointTo(context.TODO(), daemon.containersReplica); err != nil { return err } daemon.LogContainerEvent(c, events.ActionUnPause) diff --git a/daemon/pause.go b/daemon/pause.go index 1071d53ba1..a81a1678d8 100644 --- a/daemon/pause.go +++ b/daemon/pause.go @@ -49,7 +49,7 @@ func (daemon *Daemon) containerPause(container *container.Container) error { daemon.updateHealthMonitor(container) daemon.LogContainerEvent(container, events.ActionPause) - if err := container.CheckpointTo(daemon.containersReplica); err != nil { + if err := container.CheckpointTo(context.WithoutCancel(context.TODO()), daemon.containersReplica); err != nil { log.G(context.TODO()).WithError(err).Warn("could not save container to disk") } diff --git a/daemon/rename.go b/daemon/rename.go index 4983e58d5e..befb73dfd3 100644 --- a/daemon/rename.go +++ b/daemon/rename.go @@ -78,7 +78,7 @@ func (daemon *Daemon) ContainerRename(oldName, newName string) (retErr error) { daemon.linkIndex.unlink(oldName+k, v, container) daemon.containersReplica.ReleaseName(oldName + k) } - if err := container.CheckpointTo(daemon.containersReplica); err != nil { + if err := container.CheckpointTo(context.TODO(), daemon.containersReplica); err != nil { return err } @@ -92,7 +92,7 @@ func (daemon *Daemon) ContainerRename(oldName, newName string) (retErr error) { defer func() { if retErr != nil { container.Name = oldName - if err := container.CheckpointTo(daemon.containersReplica); err != nil { + if err := container.CheckpointTo(context.WithoutCancel(context.TODO()), daemon.containersReplica); err != nil { log.G(context.TODO()).WithFields(log.Fields{ "containerID": container.ID, "error": err, diff --git a/daemon/start.go b/daemon/start.go index 64f0721d1e..c0119e4fdf 100644 --- a/daemon/start.go +++ b/daemon/start.go @@ -93,7 +93,7 @@ func (daemon *Daemon) containerStart(ctx context.Context, daemonCfg *configStore if container.ExitCode() == 0 { container.SetExitCode(exitUnknown) } - if err := container.CheckpointTo(daemon.containersReplica); err != nil { + if err := container.CheckpointTo(context.WithoutCancel(ctx), daemon.containersReplica); err != nil { log.G(ctx).Errorf("%s: failed saving state on start failure: %v", container.ID, err) } container.Reset(false) @@ -211,7 +211,7 @@ func (daemon *Daemon) containerStart(ctx context.Context, daemonCfg *configStore daemon.initHealthMonitor(container) - if err := container.CheckpointTo(daemon.containersReplica); err != nil { + if err := container.CheckpointTo(context.WithoutCancel(ctx), daemon.containersReplica); err != nil { log.G(ctx).WithError(err).WithField("container", container.ID). Errorf("failed to store container") } diff --git a/daemon/start_unix.go b/daemon/start_unix.go index fb59425a29..d585ddbd35 100644 --- a/daemon/start_unix.go +++ b/daemon/start_unix.go @@ -3,6 +3,8 @@ package daemon // import "github.com/docker/docker/daemon" import ( + "context" + "github.com/docker/docker/container" ) @@ -11,7 +13,7 @@ func (daemon *Daemon) getLibcontainerdCreateOptions(daemonCfg *configStore, cont // Ensure a runtime has been assigned to this container if container.HostConfig.Runtime == "" { container.HostConfig.Runtime = daemonCfg.Runtimes.Default - container.CheckpointTo(daemon.containersReplica) + container.CheckpointTo(context.WithoutCancel(context.TODO()), daemon.containersReplica) } shim, opts, err := daemonCfg.Runtimes.Get(container.HostConfig.Runtime) diff --git a/daemon/unpause.go b/daemon/unpause.go index 2ea14c1fe7..784a8cd410 100644 --- a/daemon/unpause.go +++ b/daemon/unpause.go @@ -41,7 +41,7 @@ func (daemon *Daemon) containerUnpause(ctr *container.Container) error { daemon.updateHealthMonitor(ctr) daemon.LogContainerEvent(ctr, events.ActionUnPause) - if err := ctr.CheckpointTo(daemon.containersReplica); err != nil { + if err := ctr.CheckpointTo(context.WithoutCancel(context.TODO()), daemon.containersReplica); err != nil { log.G(context.TODO()).WithError(err).Warn("could not save container to disk") } diff --git a/daemon/update.go b/daemon/update.go index 69fef16bcc..c07b636525 100644 --- a/daemon/update.go +++ b/daemon/update.go @@ -45,7 +45,7 @@ func (daemon *Daemon) update(name string, hostConfig *container.HostConfig) erro ctr.Lock() if !ctr.RemovalInProgress && !ctr.Dead { ctr.HostConfig = &backupHostConfig - ctr.CheckpointTo(daemon.containersReplica) + ctr.CheckpointTo(context.WithoutCancel(context.TODO()), daemon.containersReplica) } ctr.Unlock() } @@ -63,7 +63,7 @@ func (daemon *Daemon) update(name string, hostConfig *container.HostConfig) erro ctr.Unlock() return errCannotUpdate(ctr.ID, err) } - if err := ctr.CheckpointTo(daemon.containersReplica); err != nil { + if err := ctr.CheckpointTo(context.TODO(), daemon.containersReplica); err != nil { restoreConfig = true ctr.Unlock() return errCannotUpdate(ctr.ID, err)