Merge pull request #49302 from robmry/create_veth_in_container

Create bridge veth in container netns
This commit is contained in:
Albin Kerouanton
2025-01-28 11:47:33 +01:00
committed by GitHub
12 changed files with 265 additions and 36 deletions

View File

@@ -912,6 +912,10 @@ func buildCreateEndpointOptions(c *container.Container, n *libnetwork.Network, e
}
}
if path, ok := sb.NetnsPath(); ok {
createOptions = append(createOptions, libnetwork.WithNetnsPath(path))
}
return createOptions, nil
}

View File

@@ -127,6 +127,14 @@ type InterfaceInfo interface {
// AddressIPv6 returns the IPv6 address.
AddressIPv6() *net.IPNet
// NetnsPath returns the path of the network namespace, if there is one. Else "".
NetnsPath() string
// SetCreatedInContainer can be called by the driver to indicate that it's
// created the network interface in the container's network namespace (so,
// it doesn't need to be moved there).
SetCreatedInContainer(bool)
}
// InterfaceNameInfo provides a go interface for the drivers to assign names

View File

@@ -8,6 +8,7 @@ import (
"os"
"strconv"
"sync"
"syscall"
"github.com/containerd/log"
"github.com/docker/docker/errdefs"
@@ -26,6 +27,7 @@ import (
"github.com/docker/docker/libnetwork/types"
"github.com/pkg/errors"
"github.com/vishvananda/netlink"
"github.com/vishvananda/netns"
"go.opentelemetry.io/otel"
"go.opentelemetry.io/otel/attribute"
"go.opentelemetry.io/otel/trace"
@@ -1157,12 +1159,12 @@ func (d *driver) CreateEndpoint(ctx context.Context, nid, eid string, ifInfo dri
}
// Generate and add the interface pipe host <-> sandbox
veth := &netlink.Veth{
LinkAttrs: netlink.LinkAttrs{Name: hostIfName, TxQLen: 0},
PeerName: containerIfName,
}
if err = d.nlh.LinkAdd(veth); err != nil {
return types.InternalErrorf("failed to add the host (%s) <=> sandbox (%s) pair interfaces: %v", hostIfName, containerIfName, err)
nlhSb := d.nlh
if nlh, err := createVeth(ctx, hostIfName, containerIfName, ifInfo, d.nlh); err != nil {
return err
} else if nlh != nil {
defer nlh.Close()
nlhSb = *nlh
}
// Get the host side pipe interface handler
@@ -1179,13 +1181,13 @@ func (d *driver) CreateEndpoint(ctx context.Context, nid, eid string, ifInfo dri
}()
// Get the sandbox side pipe interface handler
sbox, err := d.nlh.LinkByName(containerIfName)
sbox, err := nlhSb.LinkByName(containerIfName)
if err != nil {
return types.InternalErrorf("failed to find sandbox side interface %s: %v", containerIfName, err)
}
defer func() {
if err != nil {
if err := d.nlh.LinkDel(sbox); err != nil {
if err := nlhSb.LinkDel(sbox); err != nil {
log.G(ctx).WithError(err).Warnf("Failed to delete sandbox side interface (%s)'s link", containerIfName)
}
}
@@ -1201,7 +1203,7 @@ func (d *driver) CreateEndpoint(ctx context.Context, nid, eid string, ifInfo dri
if err != nil {
return types.InternalErrorf("failed to set MTU on host interface %s: %v", hostIfName, err)
}
err = d.nlh.LinkSetMTU(sbox, config.Mtu)
err = nlhSb.LinkSetMTU(sbox, config.Mtu)
if err != nil {
return types.InternalErrorf("failed to set MTU on sandbox interface %s: %v", containerIfName, err)
}
@@ -1248,6 +1250,58 @@ func (d *driver) CreateEndpoint(ctx context.Context, nid, eid string, ifInfo dri
return nil
}
// createVeth creates a veth device with one end in the container's network namespace,
// if it can get hold of the netns path and open the handles. In that case, it returns
// a netlink handle in the container's namespace that must be closed by the caller.
//
// If the netns path isn't available, possibly because the netns hasn't been created
// yet, or it's not possible to get a netns or netlink handle in the container's
// namespace - both ends of the veth device are created in nlh's netns, and no netlink
// handle is returned.
//
// (Only the error from creating the interface is returned. Failure to create the
// interface in the container's netns is not an error.)
func createVeth(ctx context.Context, hostIfName, containerIfName string, ifInfo driverapi.InterfaceInfo, nlh nlwrap.Handle) (nlhCtr *nlwrap.Handle, retErr error) {
veth := &netlink.Veth{
LinkAttrs: netlink.LinkAttrs{Name: hostIfName, TxQLen: 0},
PeerName: containerIfName,
}
if nspath := ifInfo.NetnsPath(); nspath == "" {
log.G(ctx).WithField("ifname", containerIfName).Debug("No container netns path, creating interface in host netns")
} else if netnsh, err := netns.GetFromPath(nspath); err != nil {
log.G(ctx).WithFields(log.Fields{
"error": err,
"netns": nspath,
"ifname": containerIfName,
}).Warn("No container netns, creating interface in host netns")
} else {
defer netnsh.Close()
if nh, err := nlwrap.NewHandleAt(netnsh, syscall.NETLINK_ROUTE); err != nil {
log.G(ctx).WithFields(log.Fields{
"error": err,
"netns": nspath,
}).Warn("No netlink handle for container, creating interface in host netns")
} else {
defer func() {
if retErr != nil {
nh.Close()
}
}()
veth.PeerNamespace = netlink.NsFd(netnsh)
nlhCtr = &nh
ifInfo.SetCreatedInContainer(true)
}
}
if err := nlh.LinkAdd(veth); err != nil {
return nil, types.InternalErrorf("failed to add the host (%s) <=> sandbox (%s) pair interfaces: %v", hostIfName, containerIfName, err)
}
return nlhCtr, nil
}
func (d *driver) linkUp(ctx context.Context, host netlink.Link) error {
ctx, span := otel.Tracer("").Start(ctx, "libnetwork.drivers.bridge.linkUp", trace.WithAttributes(
attribute.String("host", host.Attrs().Name)))

View File

@@ -26,9 +26,11 @@ import (
"github.com/docker/docker/libnetwork/portallocator"
"github.com/docker/docker/libnetwork/types"
"github.com/vishvananda/netlink"
"github.com/vishvananda/netns"
"golang.org/x/sys/unix"
"gotest.tools/v3/assert"
is "gotest.tools/v3/assert/cmp"
"gotest.tools/v3/icmd"
)
func TestEndpointMarshalling(t *testing.T) {
@@ -418,6 +420,84 @@ func TestCreateFullOptionsLabels(t *testing.T) {
assert.Check(t, is.Equal(te2.iface.mac.String(), macAddr))
}
func TestCreateVeth(t *testing.T) {
tests := []struct {
name string
netnsName string
createNetns bool
expCreatedInContainer bool
}{
{
name: "host netns",
},
{
name: "container netns",
netnsName: "testnsctr",
createNetns: true,
expCreatedInContainer: true,
},
{
name: "netns not created",
netnsName: "testnsctr",
},
}
for _, tc := range tests {
t.Run(tc.name, func(t *testing.T) {
// Create a "host" network namespace with a netlink handle.
const hostNsName = "testnshost"
res := icmd.RunCommand("ip", "netns", "add", hostNsName)
assert.Assert(t, is.Equal(res.ExitCode, 0))
defer icmd.RunCommand("ip", "netns", "del", hostNsName)
nsh, err := netns.GetFromPath("/var/run/netns/" + hostNsName)
assert.NilError(t, err)
defer nsh.Close()
nlh, err := nlwrap.NewHandleAt(nsh)
assert.NilError(t, err)
defer nlh.Close()
netnsPath := ""
if tc.netnsName != "" {
netnsPath = "/var/run/netns/" + tc.netnsName
}
if tc.createNetns {
res := icmd.RunCommand("ip", "netns", "add", tc.netnsName)
assert.Assert(t, is.Equal(res.ExitCode, 0))
defer icmd.RunCommand("ip", "netns", "del", tc.netnsName)
}
const hostIfName = "vethtesth"
const containerIfName = "vethtestc"
defer func() {
// Just in case anything ends up in the host's netns, make sure it doesn't hang around ...
icmd.RunCommand("ip", "link", "del", hostIfName)
icmd.RunCommand("ip", "link", "del", containerIfName)
}()
iface := &testInterface{netnsPath: netnsPath}
nlhCtr, err := createVeth(context.Background(), hostIfName, containerIfName, iface, nlh)
assert.Check(t, err)
assert.Check(t, is.Equal(iface.createdInContainer, tc.expCreatedInContainer))
if tc.expCreatedInContainer {
assert.Check(t, nlhCtr != nil)
res := icmd.RunCommand("ip", "netns", "exec", hostNsName, "ip", "link", "show", hostIfName)
assert.Check(t, is.Equal(res.ExitCode, 0))
res = icmd.RunCommand("ip", "netns", "exec", hostNsName, "ip", "link", "show", containerIfName)
assert.Check(t, is.Equal(res.ExitCode, 1))
res = icmd.RunCommand("ip", "netns", "exec", tc.netnsName, "ip", "link", "show", containerIfName)
assert.Check(t, is.Equal(res.ExitCode, 0))
} else {
assert.Check(t, nlhCtr == nil)
res := icmd.RunCommand("ip", "netns", "exec", hostNsName, "ip", "link", "show", hostIfName)
assert.Check(t, is.Equal(res.ExitCode, 0))
res = icmd.RunCommand("ip", "netns", "exec", hostNsName, "ip", "link", "show", containerIfName)
assert.Check(t, is.Equal(res.ExitCode, 0))
}
})
}
}
func TestCreate(t *testing.T) {
defer netnsutils.SetupTestOSContext(t)()
@@ -558,11 +638,13 @@ func verifyV4INCEntries(networks map[string]*bridgeNetwork, t *testing.T) {
}
type testInterface struct {
mac net.HardwareAddr
addr *net.IPNet
addrv6 *net.IPNet
srcName string
dstName string
mac net.HardwareAddr
addr *net.IPNet
addrv6 *net.IPNet
srcName string
dstName string
createdInContainer bool
netnsPath string
}
type testEndpoint struct {
@@ -637,6 +719,14 @@ func setAddress(ifaceAddr **net.IPNet, address *net.IPNet) error {
return nil
}
func (i *testInterface) NetnsPath() string {
return i.netnsPath
}
func (i *testInterface) SetCreatedInContainer(cic bool) {
i.createdInContainer = cic
}
func (i *testInterface) SetNames(srcName string, dstName string) error {
i.srcName = srcName
i.dstName = dstName

View File

@@ -183,6 +183,10 @@ func (test *testEndpoint) SetGatewayIPv6(ipv6 net.IP) error {
return nil
}
func (test *testEndpoint) NetnsPath() string { return "" }
func (test *testEndpoint) SetCreatedInContainer(bool) {}
func (test *testEndpoint) SetNames(src string, dst string) error {
if test.src != src {
test.t.Fatalf(`Wrong SrcName; expected "%s", got "%s"`, test.src, src)
@@ -571,6 +575,10 @@ func (r *rollbackEndpoint) SetIPAddress(ip *net.IPNet) error {
return errors.New("invalid ip")
}
func (r *rollbackEndpoint) NetnsPath() string { return "" }
func (r *rollbackEndpoint) SetCreatedInContainer(bool) {}
func TestRollback(t *testing.T) {
plugin := "test-net-driver-rollback"

View File

@@ -145,3 +145,10 @@ func (test *testEndpoint) AddStaticRoute(destination *net.IPNet, routeType int,
func (test *testEndpoint) DisableGatewayService() {
test.disableGatewayService = true
}
func (test *testEndpoint) NetnsPath() string {
return ""
}
func (test *testEndpoint) SetCreatedInContainer(bool) {
}

View File

@@ -1254,6 +1254,12 @@ func JoinOptionPriority(prio int) EndpointOption {
}
}
func WithNetnsPath(path string) EndpointOption {
return func(ep *Endpoint) {
ep.iface.netnsPath = path
}
}
func (ep *Endpoint) assignAddress(ipam ipamapi.Ipam, assignIPv4, assignIPv6 bool) error {
n := ep.getNetwork()
if n.hasSpecialDriver() {

View File

@@ -37,15 +37,17 @@ type EndpointInfo interface {
// EndpointInterface holds interface addresses bound to the endpoint.
type EndpointInterface struct {
mac net.HardwareAddr
addr *net.IPNet
addrv6 *net.IPNet
llAddrs []*net.IPNet
srcName string
dstPrefix string
routes []*net.IPNet
v4PoolID string
v6PoolID string
mac net.HardwareAddr
addr *net.IPNet
addrv6 *net.IPNet
llAddrs []*net.IPNet
srcName string
dstPrefix string
routes []*net.IPNet
v4PoolID string
v6PoolID string
netnsPath string
createdInContainer bool
}
func (epi *EndpointInterface) MarshalJSON() ([]byte, error) {
@@ -75,6 +77,7 @@ func (epi *EndpointInterface) MarshalJSON() ([]byte, error) {
epMap["routes"] = routes
epMap["v4PoolID"] = epi.v4PoolID
epMap["v6PoolID"] = epi.v6PoolID
epMap["createdInContainer"] = epi.createdInContainer
return json.Marshal(epMap)
}
@@ -132,6 +135,9 @@ func (epi *EndpointInterface) UnmarshalJSON(b []byte) error {
epi.v4PoolID = epMap["v4PoolID"].(string)
epi.v6PoolID = epMap["v6PoolID"].(string)
if v, ok := epMap["createdInContainer"]; ok {
epi.createdInContainer = v.(bool)
}
return nil
}
@@ -143,6 +149,7 @@ func (epi *EndpointInterface) CopyTo(dstEpi *EndpointInterface) error {
dstEpi.dstPrefix = epi.dstPrefix
dstEpi.v4PoolID = epi.v4PoolID
dstEpi.v6PoolID = epi.v6PoolID
dstEpi.createdInContainer = epi.createdInContainer
if len(epi.llAddrs) != 0 {
dstEpi.llAddrs = make([]*net.IPNet, 0, len(epi.llAddrs))
dstEpi.llAddrs = append(dstEpi.llAddrs, epi.llAddrs...)
@@ -269,6 +276,18 @@ func (epi *EndpointInterface) SetNames(srcName string, dstPrefix string) error {
return nil
}
// NetnsPath returns the path of the network namespace, if there is one. Else "".
func (epi *EndpointInterface) NetnsPath() string {
return epi.netnsPath
}
// SetCreatedInContainer can be called by the driver to indicate that it's
// created the network interface in the container's network namespace (so,
// it doesn't need to be moved there).
func (epi *EndpointInterface) SetCreatedInContainer(cic bool) {
epi.createdInContainer = cic
}
func (ep *Endpoint) InterfaceName() driverapi.InterfaceNameInfo {
ep.mu.Lock()
defer ep.mu.Unlock()

View File

@@ -106,6 +106,7 @@ type Interface struct {
// advertiseAddrInterval is the interval between unsolicited ARP/NA messages sent to
// advertise the interface's addresses.
advertiseAddrInterval time.Duration
createdInContainer bool
ns *Namespace
}
@@ -205,19 +206,15 @@ func (n *Namespace) findDst(srcName string, isBridge bool) string {
return ""
}
func moveLink(ctx context.Context, nlhHost nlwrap.Handle, iface netlink.Link, i *Interface, path string) (netns.NsHandle, error) {
func moveLink(ctx context.Context, nlhHost nlwrap.Handle, iface netlink.Link, i *Interface, nsh netns.NsHandle) error {
ctx, span := otel.Tracer("").Start(ctx, "libnetwork.osl.moveLink", trace.WithAttributes(
attribute.String("ifaceName", i.DstName())))
defer span.End()
newNs, err := netns.GetFromPath(path)
if err != nil {
return netns.None(), fmt.Errorf("failed get network namespace %q: %v", path, err)
if err := nlhHost.LinkSetNsFd(iface, int(nsh)); err != nil {
return fmt.Errorf("failed to set namespace on link %q: %v", i.srcName, err)
}
if err := nlhHost.LinkSetNsFd(iface, int(newNs)); err != nil {
return netns.None(), fmt.Errorf("failed to set namespace on link %q: %v", i.srcName, err)
}
return newNs, nil
return nil
}
// AddInterface adds an existing Interface to the sandbox. The operation will rename
@@ -251,6 +248,13 @@ func (n *Namespace) AddInterface(ctx context.Context, srcName, dstPrefix string,
n.mu.Unlock()
newNs := netns.None()
if !isDefault {
newNs, err = netns.GetFromPath(path)
if err != nil {
return fmt.Errorf("failed get network namespace %q: %v", path, err)
}
defer newNs.Close()
}
// If it is a bridge interface we have to create the bridge inside
// the namespace so don't try to lookup the interface using srcName
@@ -262,7 +266,7 @@ func (n *Namespace) AddInterface(ctx context.Context, srcName, dstPrefix string,
}); err != nil {
return fmt.Errorf("failed to create bridge %q: %v", i.srcName, err)
}
} else {
} else if !i.createdInContainer {
// Find the network interface identified by the SrcName attribute.
iface, err := nlhHost.LinkByName(i.srcName)
if err != nil {
@@ -273,12 +277,9 @@ func (n *Namespace) AddInterface(ctx context.Context, srcName, dstPrefix string,
// namespace only if the namespace is not a default
// type
if !isDefault {
var err error
newNs, err = moveLink(ctx, nlhHost, iface, i, path)
if err != nil {
if err := moveLink(ctx, nlhHost, iface, i, newNs); err != nil {
return err
}
defer newNs.Close()
}
}
@@ -350,6 +351,9 @@ func (n *Namespace) AddInterface(ctx context.Context, srcName, dstPrefix string,
}
func waitForIfUpped(ctx context.Context, ns netns.NsHandle, ifIndex int) (bool, error) {
ctx, span := otel.Tracer("").Start(context.WithoutCancel(ctx), "libnetwork.osl.waitforIfUpped")
defer span.End()
update := make(chan netlink.LinkUpdate, 100)
upped := make(chan struct{})
opts := netlink.LinkSubscribeOptions{
@@ -398,6 +402,7 @@ func waitForIfUpped(ctx context.Context, ns netns.NsHandle, ifIndex int) (bool,
for {
select {
case <-timerC:
log.G(ctx).Warnf("timeout in waitForIfUpped")
return false, nil
case u, ok := <-update:
if !ok {

View File

@@ -119,3 +119,13 @@ func WithAdvertiseAddrInterval(interval time.Duration) IfaceOption {
return nil
}
}
// WithCreatedInContainer can be used to say the network driver created the
// interface in the container's network namespace (and, therefore, it doesn't
// need to be moved into that namespace.)
func WithCreatedInContainer(cic bool) IfaceOption {
return func(i *Interface) error {
i.createdInContainer = cic
return nil
}
}

View File

@@ -207,6 +207,18 @@ func (sb *Sandbox) SetKey(ctx context.Context, basePath string) error {
return nil
}
// NetnsPath returns the network namespace's path and true, if a network has been
// created - else the empty string and false.
func (sb *Sandbox) NetnsPath() (path string, ok bool) {
sb.mu.Lock()
osSbox := sb.osSbox
sb.mu.Unlock()
if osSbox == nil {
return "", false
}
return osSbox.Key(), true
}
// IPv6Enabled determines whether a container supports IPv6.
// IPv6 support can always be determined for host networking. For other network
// types it can only be determined once there's a container namespace to probe,
@@ -348,6 +360,7 @@ func (sb *Sandbox) populateNetworkResources(ctx context.Context, ep *Endpoint) e
ifaceOptions = append(ifaceOptions, osl.WithAdvertiseAddrInterval(interval))
}
}
ifaceOptions = append(ifaceOptions, osl.WithCreatedInContainer(i.createdInContainer))
if err := sb.osSbox.AddInterface(ctx, i.srcName, i.dstPrefix, ifaceOptions...); err != nil {
return fmt.Errorf("failed to add interface %s to sandbox: %v", i.srcName, err)

View File

@@ -28,6 +28,11 @@ func (sb *Sandbox) restoreOslSandbox() error {
return nil
}
// NetnsPath is not implemented on Windows (Sandbox.osSbox is always nil)
func (sb *Sandbox) NetnsPath() (path string, ok bool) {
return "", false
}
func (sb *Sandbox) populateNetworkResources(context.Context, *Endpoint) error {
// not implemented on Windows (Sandbox.osSbox is always nil)
return nil