Merge pull request #47906 from akerouanton/libnet-add-otel-spans-v3

api, daemon, libnet: Create OTel spans at various places
This commit is contained in:
Albin Kerouanton
2024-06-14 17:03:56 +02:00
committed by GitHub
77 changed files with 914 additions and 542 deletions

View File

@@ -11,6 +11,9 @@ import (
"github.com/docker/docker/errdefs"
"github.com/docker/docker/libcontainerd"
"github.com/pkg/errors"
"go.opentelemetry.io/otel"
"go.opentelemetry.io/otel/attribute"
"go.opentelemetry.io/otel/trace"
)
// validateState verifies if the container is in a non-conflicting state.
@@ -66,6 +69,11 @@ func (daemon *Daemon) ContainerStart(ctx context.Context, name string, checkpoin
// between containers. The container is left waiting for a signal to
// begin running.
func (daemon *Daemon) containerStart(ctx context.Context, daemonCfg *configStore, container *container.Container, checkpoint string, checkpointDir string, resetRestartManager bool) (retErr error) {
ctx, span := otel.Tracer("").Start(ctx, "daemon.containerStart", trace.WithAttributes(
attribute.String("container.ID", container.ID),
attribute.String("container.Name", container.Name)))
defer span.End()
start := time.Now()
container.Lock()
defer container.Unlock()
@@ -92,7 +100,7 @@ func (daemon *Daemon) containerStart(ctx context.Context, daemonCfg *configStore
if container.ExitCode() == 0 {
container.SetExitCode(exitUnknown)
}
if err := container.CheckpointTo(daemon.containersReplica); err != nil {
if err := container.CheckpointTo(context.WithoutCancel(ctx), daemon.containersReplica); err != nil {
log.G(ctx).Errorf("%s: failed saving state on start failure: %v", container.ID, err)
}
container.Reset(false)
@@ -113,7 +121,7 @@ func (daemon *Daemon) containerStart(ctx context.Context, daemonCfg *configStore
return err
}
if err := daemon.initializeNetworking(&daemonCfg.Config, container); err != nil {
if err := daemon.initializeNetworking(ctx, &daemonCfg.Config, container); err != nil {
return err
}
@@ -180,7 +188,7 @@ func (daemon *Daemon) containerStart(ctx context.Context, daemonCfg *configStore
startupTime := time.Now()
// TODO(mlaventure): we need to specify checkpoint options here
tsk, err := ctr.NewTask(context.TODO(), // Passing ctx caused integration tests to be stuck in the cleanup phase
tsk, err := ctr.NewTask(context.WithoutCancel(ctx), // passing a cancelable ctx caused integration tests to be stuck in the cleanup phase
checkpointDir, container.StreamConfig.Stdin() != nil || container.Config.Tty,
container.InitializeStdio)
if err != nil {
@@ -199,7 +207,7 @@ func (daemon *Daemon) containerStart(ctx context.Context, daemonCfg *configStore
return err
}
if err := tsk.Start(context.TODO()); err != nil { // passing ctx caused integration tests to be stuck in the cleanup phase
if err := tsk.Start(context.WithoutCancel(ctx)); err != nil { // passing a cancelable ctx caused integration tests to be stuck in the cleanup phase
return setExitCodeFromError(container.SetExitCode, err)
}
@@ -210,7 +218,7 @@ func (daemon *Daemon) containerStart(ctx context.Context, daemonCfg *configStore
daemon.initHealthMonitor(container)
if err := container.CheckpointTo(daemon.containersReplica); err != nil {
if err := container.CheckpointTo(context.WithoutCancel(ctx), daemon.containersReplica); err != nil {
log.G(ctx).WithError(err).WithField("container", container.ID).
Errorf("failed to store container")
}
@@ -232,7 +240,7 @@ func (daemon *Daemon) Cleanup(ctx context.Context, container *container.Containe
}
}
daemon.releaseNetwork(container)
daemon.releaseNetwork(ctx, container)
if err := container.UnmountIpcMount(); err != nil {
log.G(ctx).Warnf("%s cleanup: failed to unmount IPC: %s", container.ID, err)