mirror of
https://github.com/moby/moby.git
synced 2026-01-11 18:51:37 +00:00
NRI: add ContainerCreate hook, allow env-var adjustments
Signed-off-by: Rob Murray <rob.murray@docker.com>
This commit is contained in:
@@ -261,6 +261,10 @@ func (daemon *Daemon) create(ctx context.Context, daemonCfg *config.Config, opts
|
||||
}
|
||||
daemon.updateContainerNetworkSettings(ctr, endpointsConfigs)
|
||||
|
||||
if err := daemon.nri.CreateContainer(ctx, ctr); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if err := daemon.registerMountPoints(ctr, opts.params.DefaultReadOnlyNonRecursive); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
@@ -1,3 +1,19 @@
|
||||
// Package nri integrates the daemon with the NRI (Node Resource Interface) framework.
|
||||
//
|
||||
// NRI allows external plugins to observe and adjust container resources and settings
|
||||
// at creation time, and to observe container lifecycle events. These plugins run with
|
||||
// the same level of trust as the daemon itself, because they can make arbitrary
|
||||
// modifications to container settings.
|
||||
//
|
||||
// The NRI framework is implemented by https://github.com/containerd/nri - see that
|
||||
// package for more details about NRI and the framework.
|
||||
//
|
||||
// Plugins are long-running processed (not instantiated per-request like runtime shims,
|
||||
// so they can maintain state across container events). They can either be started by
|
||||
// the NRI framework itself, it is configured with directories to search for plugins
|
||||
// and config for those plugins. Or, plugins can independently, and connect to the
|
||||
// daemon via a listening socket. By default, the listening socket is disabled in this
|
||||
// implementation.
|
||||
package nri
|
||||
|
||||
import (
|
||||
@@ -5,6 +21,7 @@ import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"sync"
|
||||
|
||||
"github.com/containerd/log"
|
||||
@@ -43,6 +60,7 @@ type Config struct {
|
||||
ContainerLister ContainerLister
|
||||
}
|
||||
|
||||
// NewNRI creates and starts a new NRI instance based on the provided configuration.
|
||||
func NewNRI(ctx context.Context, cfg Config) (*NRI, error) {
|
||||
n := &NRI{cfg: cfg}
|
||||
if !n.cfg.DaemonConfig.Enable {
|
||||
@@ -71,6 +89,7 @@ func NewNRI(ctx context.Context, cfg Config) (*NRI, error) {
|
||||
return n, nil
|
||||
}
|
||||
|
||||
// Shutdown stops the NRI instance and releases its resources.
|
||||
func (n *NRI) Shutdown(ctx context.Context) {
|
||||
n.mu.Lock()
|
||||
defer n.mu.Unlock()
|
||||
@@ -82,10 +101,76 @@ func (n *NRI) Shutdown(ctx context.Context) {
|
||||
n.nri = nil
|
||||
}
|
||||
|
||||
func (n *NRI) syncFn(ctx context.Context, syncCB adaptation.SyncCB) error {
|
||||
// CreateContainer notifies plugins of a "creation" NRI-lifecycle event for a container,
|
||||
// allowing the plugin to adjust settings before the container is created.
|
||||
func (n *NRI) CreateContainer(ctx context.Context, ctr *container.Container) error {
|
||||
n.mu.RLock()
|
||||
defer n.mu.RUnlock()
|
||||
if n.nri == nil {
|
||||
return nil
|
||||
}
|
||||
// ctr.State can safely be locked here, but there's no need because it's expected
|
||||
// to be newly created and not yet accessible in any other thread.
|
||||
|
||||
nriPod, nriCtr, err := containerToNRI(ctr)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// TODO(robmry): call RunPodSandbox?
|
||||
|
||||
resp, err := n.nri.CreateContainer(ctx, &adaptation.CreateContainerRequest{
|
||||
Pod: nriPod,
|
||||
Container: nriCtr,
|
||||
})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if resp.GetUpdate() != nil {
|
||||
return errors.New("container update is not supported")
|
||||
}
|
||||
if err := applyAdjustments(ctx, ctr, resp.GetAdjust()); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// syncFn is called when a plugin registers, allowing the plugin to learn the
|
||||
// current state of all containers.
|
||||
func (n *NRI) syncFn(ctx context.Context, syncCB adaptation.SyncCB) error {
|
||||
// Claim a write lock so containers can't be created/removed until sync is done.
|
||||
// The plugin will get create/remove events after the sync, so won't miss anything.
|
||||
//
|
||||
// If a container's state changes during the sync, the plugin may see already-modified
|
||||
// state, then get a change notification with no changes.
|
||||
n.mu.Lock()
|
||||
defer n.mu.Unlock()
|
||||
|
||||
containers := n.cfg.ContainerLister.List()
|
||||
nriPods := make([]*adaptation.PodSandbox, 0, len(containers))
|
||||
nriCtrs := make([]*adaptation.Container, 0, len(containers))
|
||||
for _, ctr := range containers {
|
||||
ctr.State.Lock()
|
||||
nriPod, nriCtr, err := containerToNRI(ctr)
|
||||
ctr.State.Unlock()
|
||||
if err != nil {
|
||||
return fmt.Errorf("converting container %s to NRI: %w", ctr.ID, err)
|
||||
}
|
||||
nriPods = append(nriPods, nriPod)
|
||||
nriCtrs = append(nriCtrs, nriCtr)
|
||||
}
|
||||
updates, err := syncCB(ctx, nriPods, nriCtrs)
|
||||
if err != nil {
|
||||
return fmt.Errorf("synchronizing NRI state: %w", err)
|
||||
}
|
||||
if len(updates) > 0 {
|
||||
return errors.New("container updates during sync are not implemented")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// updateFn may be called asynchronously by plugins.
|
||||
func (n *NRI) updateFn(context.Context, []*adaptation.ContainerUpdate) ([]*adaptation.ContainerUpdate, error) {
|
||||
return nil, errors.New("not implemented")
|
||||
}
|
||||
@@ -128,3 +213,95 @@ func nriOptions(cfg opts.NRIOpts) []adaptation.Option {
|
||||
}
|
||||
return res
|
||||
}
|
||||
|
||||
func containerToNRI(ctr *container.Container) (*adaptation.PodSandbox, *adaptation.Container, error) {
|
||||
// TODO(robmry) - this implementation is incomplete, most fields are not populated.
|
||||
//
|
||||
// Many of these fields have straightforward mappings from Docker container fields,
|
||||
// but each will need consideration and tests for both outgoing settings and
|
||||
// adjutments from plugins.
|
||||
//
|
||||
// Docker doesn't have pods - but PodSandbox is how plugins will learn the container's
|
||||
// network namespace. So, the intent is to represent each container as having its own
|
||||
// PodSandbox, with the same ID and lifecycle as the container. We can probably represent
|
||||
// container-networking as containers sharing a pod.
|
||||
nriPod := &adaptation.PodSandbox{
|
||||
Id: ctr.ID,
|
||||
Name: ctr.Name,
|
||||
Uid: "",
|
||||
Namespace: "",
|
||||
Labels: nil,
|
||||
Annotations: nil,
|
||||
RuntimeHandler: "",
|
||||
Linux: nil,
|
||||
Pid: 0,
|
||||
Ips: nil,
|
||||
}
|
||||
|
||||
nriCtr := &adaptation.Container{
|
||||
Id: ctr.ID,
|
||||
PodSandboxId: ctr.ID,
|
||||
Name: ctr.Name,
|
||||
State: adaptation.ContainerState_CONTAINER_UNKNOWN,
|
||||
Labels: ctr.Config.Labels,
|
||||
Annotations: ctr.HostConfig.Annotations,
|
||||
Args: ctr.Config.Cmd,
|
||||
Env: ctr.Config.Env,
|
||||
Hooks: nil,
|
||||
Linux: &adaptation.LinuxContainer{
|
||||
Namespaces: nil,
|
||||
Devices: nil,
|
||||
Resources: nil,
|
||||
OomScoreAdj: nil,
|
||||
CgroupsPath: "",
|
||||
IoPriority: nil,
|
||||
SeccompProfile: nil,
|
||||
SeccompPolicy: nil,
|
||||
},
|
||||
Mounts: nil,
|
||||
Pid: uint32(ctr.Pid),
|
||||
Rlimits: nil,
|
||||
CreatedAt: 0,
|
||||
StartedAt: 0,
|
||||
FinishedAt: 0,
|
||||
ExitCode: 0,
|
||||
StatusReason: "",
|
||||
StatusMessage: "",
|
||||
CDIDevices: nil,
|
||||
}
|
||||
return nriPod, nriCtr, nil
|
||||
}
|
||||
|
||||
func applyAdjustments(ctx context.Context, ctr *container.Container, adj *adaptation.ContainerAdjustment) error {
|
||||
if adj == nil {
|
||||
return nil
|
||||
}
|
||||
if err := applyEnvVars(ctx, ctr, adj.Env); err != nil {
|
||||
return fmt.Errorf("applying environment variable adjustments: %w", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func applyEnvVars(ctx context.Context, ctr *container.Container, envVars []*adaptation.KeyValue) error {
|
||||
if len(envVars) == 0 {
|
||||
return nil
|
||||
}
|
||||
existing := make(map[string]int, len(ctr.Config.Env))
|
||||
for i, e := range ctr.Config.Env {
|
||||
k, _, _ := strings.Cut(e, "=")
|
||||
existing[k] = i
|
||||
}
|
||||
for _, kv := range envVars {
|
||||
if kv.Key == "" {
|
||||
return errors.New("empty environment variable key")
|
||||
}
|
||||
val := kv.Key + "=" + kv.Value
|
||||
log.G(ctx).Debugf("Applying NRI env var adjustment to %s", kv.Key)
|
||||
if i, found := existing[kv.Key]; found {
|
||||
ctr.Config.Env[i] = val
|
||||
} else {
|
||||
ctr.Config.Env = append(ctr.Config.Env, val)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user