Skip to content

Fix local IP address depletion by cleaning up network bridges for aws… #4741

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Aug 6, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 2 additions & 7 deletions ecs-agent/netlib/platform/common_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -697,10 +697,8 @@ func (c *common) configureRegularENI(ctx context.Context, netNSPath string, eni
cniNetConf = append(cniNetConf, createENIPluginConfigs(netNSPath, eni))
add = true
case status.NetworkDeleted:
if eni.IsPrimary() {
cniNetConf = append(cniNetConf, createBridgePluginConfig(netNSPath))
}
cniNetConf = append(cniNetConf, createENIPluginConfigs(netNSPath, eni))
// Regular ENIs are used in single-use warmpool instances, so cleanup isn't necessary.
cniNetConf = nil
add = false
}

Expand Down Expand Up @@ -737,9 +735,6 @@ func (c *common) configureBranchENI(ctx context.Context, netNSPath string, eni *
// We block IMDS access in awsvpc tasks.
cniNetConf = append(cniNetConf, createBranchENIConfig(netNSPath, eni, VPCBranchENIInterfaceTypeVlan, blockInstanceMetadataDefault))
case status.NetworkDeleted:
if eni.IsPrimary() {
cniNetConf = append(cniNetConf, createBridgePluginConfig(netNSPath))
}
cniNetConf = append(cniNetConf, createBranchENIConfig(netNSPath, eni, VPCBranchENIInterfaceTypeVlan, blockInstanceMetadataDefault))
add = false
}
Expand Down
3 changes: 0 additions & 3 deletions ecs-agent/netlib/platform/common_linux_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -409,8 +409,6 @@ func testRegularENIConfiguration(t *testing.T) {
gomock.InOrder(
osWrapper.EXPECT().Setenv("ECS_CNI_LOG_FILE", ecscni.PluginLogPath).Times(1),
osWrapper.EXPECT().Setenv("IPAM_DB_PATH", filepath.Join(commonPlatform.stateDBDir, "eni-ipam.db")),
cniClient.EXPECT().Del(gomock.Any(), bridgeConfig).Return(nil).Times(1),
cniClient.EXPECT().Del(gomock.Any(), eniConfig).Return(nil).Times(1),
)
err = commonPlatform.configureInterface(ctx, netNSPath, eni, nil)
require.NoError(t, err)
Expand Down Expand Up @@ -450,7 +448,6 @@ func testBranchENIConfiguration(t *testing.T) {
// Delete workflow.
branchENI.DesiredStatus = status.NetworkDeleted
osWrapper.EXPECT().Setenv("IPAM_DB_PATH", filepath.Join(commonPlatform.stateDBDir, "eni-ipam.db"))
cniClient.EXPECT().Del(gomock.Any(), bridgeConfig).Return(nil).Times(1)
cniClient.EXPECT().Del(gomock.Any(), cniConfig).Return(nil).Times(1)
err = commonPlatform.configureInterface(ctx, netNSPath, branchENI, nil)
require.NoError(t, err)
Expand Down
104 changes: 103 additions & 1 deletion ecs-agent/netlib/platform/managed_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,15 @@ import (
"context"
goErr "errors"
"fmt"
"path/filepath"

"github.com/aws/amazon-ecs-agent/ecs-agent/acs/model/ecsacs"
"github.com/aws/amazon-ecs-agent/ecs-agent/ec2"
"github.com/aws/amazon-ecs-agent/ecs-agent/logger"
loggerfield "github.com/aws/amazon-ecs-agent/ecs-agent/logger/field"
netlibdata "github.com/aws/amazon-ecs-agent/ecs-agent/netlib/data"
"github.com/aws/amazon-ecs-agent/ecs-agent/netlib/model/appmesh"
"github.com/aws/amazon-ecs-agent/ecs-agent/netlib/model/ecscni"
"github.com/aws/amazon-ecs-agent/ecs-agent/netlib/model/networkinterface"
"github.com/aws/amazon-ecs-agent/ecs-agent/netlib/model/serviceconnect"
"github.com/aws/amazon-ecs-agent/ecs-agent/netlib/model/status"
Expand Down Expand Up @@ -80,7 +82,107 @@ func (m *managedLinux) ConfigureInterface(
) error {
// Set the network interface name on the task network namespace to eth1.
iface.DeviceName = NetworkInterfaceDeviceName
return m.common.configureInterface(ctx, netNSPath, iface, netDAO)
return m.configureInterface(ctx, netNSPath, iface, netDAO)
}

func (m *managedLinux) configureInterface(
ctx context.Context,
netNSPath string,
iface *networkinterface.NetworkInterface,
netDAO netlibdata.NetworkDataClient,
) error {
var err error
switch iface.InterfaceAssociationProtocol {
case networkinterface.DefaultInterfaceAssociationProtocol:
err = m.configureRegularENI(ctx, netNSPath, iface)
case networkinterface.VLANInterfaceAssociationProtocol:
err = m.configureBranchENI(ctx, netNSPath, iface)
case networkinterface.V2NInterfaceAssociationProtocol:
err = m.common.configureGENEVEInterface(ctx, netNSPath, iface, netDAO)
case networkinterface.VETHInterfaceAssociationProtocol:
// Do nothing.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: can we have a comment about why?

return nil
default:
err = errors.New("invalid interface association protocol " + iface.InterfaceAssociationProtocol)
}
return err
}

func (m *managedLinux) configureRegularENI(ctx context.Context, netNSPath string, eni *networkinterface.NetworkInterface) error {
logger.Info("Configuring regular ENI", map[string]interface{}{
"ENIName": eni.Name,
"NetNSPath": netNSPath,
})

var cniNetConf []ecscni.PluginConfig
var add bool
var err error

m.common.os.Setenv(CNIPluginLogFileEnv, ecscni.PluginLogPath)
m.common.os.Setenv(IPAMDataPathEnv, filepath.Join(m.common.stateDBDir, IPAMDataFileName))

switch eni.DesiredStatus {
case status.NetworkReadyPull:
// The task metadata interface setup by bridge plugin is required only for the primary ENI.
if eni.IsPrimary() {
cniNetConf = append(cniNetConf, createBridgePluginConfig(netNSPath))
}
cniNetConf = append(cniNetConf, createENIPluginConfigs(netNSPath, eni))
add = true
case status.NetworkDeleted:
if eni.IsPrimary() {
cniNetConf = append(cniNetConf, createBridgePluginConfig(netNSPath))
}
cniNetConf = append(cniNetConf, createENIPluginConfigs(netNSPath, eni))
add = false
}

_, err = m.common.executeCNIPlugin(ctx, add, cniNetConf...)
if err != nil {
err = errors.Wrap(err, "failed to setup regular eni")
}

return err
}

// configureBranchENI configures a network interface for a branch ENI.
func (m *managedLinux) configureBranchENI(ctx context.Context, netNSPath string, eni *networkinterface.NetworkInterface) error {
logger.Info("Configuring branch ENI", map[string]interface{}{
"ENIName": eni.Name,
"NetNSPath": netNSPath,
})

// Set the path for the IPAM CNI local db to track assigned IPs.
// Default path is /data but in some linux distros (i.e.Amazon BottleRocket) the root volume is read-only.
m.common.os.Setenv(IPAMDataPathEnv, filepath.Join(m.common.stateDBDir, IPAMDataFileName))

var cniNetConf []ecscni.PluginConfig
var err error
add := true

// Generate CNI network configuration based on the ENI's desired state.
switch eni.DesiredStatus {
case status.NetworkReadyPull:
// Setup bridge to connect task network namespace to TMDS running in host's primary netns.
if eni.IsPrimary() {
cniNetConf = append(cniNetConf, createBridgePluginConfig(netNSPath))
}
// We block IMDS access in awsvpc tasks.
cniNetConf = append(cniNetConf, createBranchENIConfig(netNSPath, eni, VPCBranchENIInterfaceTypeVlan, blockInstanceMetadataDefault))
case status.NetworkDeleted:
if eni.IsPrimary() {
cniNetConf = append(cniNetConf, createBridgePluginConfig(netNSPath))
}
cniNetConf = append(cniNetConf, createBranchENIConfig(netNSPath, eni, VPCBranchENIInterfaceTypeVlan, blockInstanceMetadataDefault))
add = false
}

_, err = m.common.executeCNIPlugin(ctx, add, cniNetConf...)
if err != nil {
err = errors.Wrap(err, "failed to setup branch eni")
}

return err
}

func (m *managedLinux) ConfigureAppMesh(ctx context.Context,
Expand Down
108 changes: 108 additions & 0 deletions ecs-agent/netlib/platform/managed_linux_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,20 @@
package platform

import (
"context"
"fmt"
"net"
"path/filepath"
"testing"

mock_ec2 "github.com/aws/amazon-ecs-agent/ecs-agent/ec2/mocks"
"github.com/aws/amazon-ecs-agent/ecs-agent/netlib/model/ecscni"
mock_ecscni2 "github.com/aws/amazon-ecs-agent/ecs-agent/netlib/model/ecscni/mocks_ecscni"
"github.com/aws/amazon-ecs-agent/ecs-agent/netlib/model/networkinterface"
"github.com/aws/amazon-ecs-agent/ecs-agent/netlib/model/status"
mock_netlinkwrapper "github.com/aws/amazon-ecs-agent/ecs-agent/utils/netlinkwrapper/mocks"
mock_netwrapper "github.com/aws/amazon-ecs-agent/ecs-agent/utils/netwrapper/mocks"
mock_oswrapper "github.com/aws/amazon-ecs-agent/ecs-agent/utils/oswrapper/mocks"

"github.com/golang/mock/gomock"
"github.com/stretchr/testify/assert"
Expand All @@ -36,6 +42,88 @@ const (
macAddress = "0a:1b:2c:3d:4e:5f"
)

func TestManagedLinux_TestConfigureInterface(t *testing.T) {
t.Run("regular-eni", testManagedLinuxRegularENIConfiguration)
t.Run("branch-eni", testManagedLinuxBranchENIConfiguration)
}

// testRegularENIConfiguration verifies the precise list of operations are invoked
// with the correct arguments while configuring a regular ENI on a host.
func testManagedLinuxRegularENIConfiguration(t *testing.T) {
ctrl := gomock.NewController(t)
defer ctrl.Finish()

ctx, osWrapper, cniClient, eni, managedLinuxPlatform := setupManagedLinuxTestConfigureInterface(ctrl, getTestRegularV4ENI)

// When the ENI is the primary ENI.
eniConfig := createENIPluginConfigs(netNSPath, eni)
bridgeConfig := createBridgePluginConfig(netNSPath)
gomock.InOrder(
osWrapper.EXPECT().Setenv("ECS_CNI_LOG_FILE", ecscni.PluginLogPath).Times(1),
osWrapper.EXPECT().Setenv("IPAM_DB_PATH", filepath.Join(managedLinuxPlatform.stateDBDir, "eni-ipam.db")),
cniClient.EXPECT().Add(gomock.Any(), bridgeConfig).Return(nil, nil).Times(1),
cniClient.EXPECT().Add(gomock.Any(), eniConfig).Return(nil, nil).Times(1),
)
err := managedLinuxPlatform.configureInterface(ctx, netNSPath, eni, nil)
require.NoError(t, err)

// Non-primary ENI case.
eni.Default = false
eniConfig = createENIPluginConfigs(netNSPath, eni)
gomock.InOrder(
osWrapper.EXPECT().Setenv("ECS_CNI_LOG_FILE", ecscni.PluginLogPath).Times(1),
osWrapper.EXPECT().Setenv("IPAM_DB_PATH", filepath.Join(managedLinuxPlatform.stateDBDir, "eni-ipam.db")),
cniClient.EXPECT().Add(gomock.Any(), eniConfig).Return(nil, nil).Times(1),
)
err = managedLinuxPlatform.configureInterface(ctx, netNSPath, eni, nil)
require.NoError(t, err)

// Delete workflow.
eni.Default = true
eni.DesiredStatus = status.NetworkDeleted
eniConfig = createENIPluginConfigs(netNSPath, eni)
gomock.InOrder(
osWrapper.EXPECT().Setenv("ECS_CNI_LOG_FILE", ecscni.PluginLogPath).Times(1),
osWrapper.EXPECT().Setenv("IPAM_DB_PATH", filepath.Join(managedLinuxPlatform.stateDBDir, "eni-ipam.db")),
cniClient.EXPECT().Del(gomock.Any(), bridgeConfig).Return(nil).Times(1),
cniClient.EXPECT().Del(gomock.Any(), eniConfig).Return(nil).Times(1),
)
err = managedLinuxPlatform.configureInterface(ctx, netNSPath, eni, nil)
require.NoError(t, err)
}

func testManagedLinuxBranchENIConfiguration(t *testing.T) {
ctrl := gomock.NewController(t)
defer ctrl.Finish()

ctx, osWrapper, cniClient, eni, managedLinuxPlatform := setupManagedLinuxTestConfigureInterface(ctrl, getTestBranchV4ENI)

eni.DesiredStatus = status.NetworkReadyPull
bridgeConfig := createBridgePluginConfig(netNSPath)
cniConfig := createBranchENIConfig(netNSPath, eni, VPCBranchENIInterfaceTypeVlan, blockInstanceMetadataDefault)
gomock.InOrder(
osWrapper.EXPECT().Setenv("IPAM_DB_PATH", filepath.Join(managedLinuxPlatform.stateDBDir, "eni-ipam.db")),
cniClient.EXPECT().Add(gomock.Any(), bridgeConfig).Return(nil, nil).Times(1),
cniClient.EXPECT().Add(gomock.Any(), cniConfig).Return(nil, nil).Times(1),
)
err := managedLinuxPlatform.configureInterface(ctx, netNSPath, eni, nil)
require.NoError(t, err)

// Ready-Pull to Ready transition
eni.DesiredStatus = status.NetworkReady
osWrapper.EXPECT().Setenv("IPAM_DB_PATH", filepath.Join(managedLinuxPlatform.stateDBDir, "eni-ipam.db"))
err = managedLinuxPlatform.configureInterface(ctx, netNSPath, eni, nil)
require.NoError(t, err)

// Delete workflow.
eni.DesiredStatus = status.NetworkDeleted
osWrapper.EXPECT().Setenv("IPAM_DB_PATH", filepath.Join(managedLinuxPlatform.stateDBDir, "eni-ipam.db"))
cniClient.EXPECT().Del(gomock.Any(), bridgeConfig).Return(nil).Times(1)
cniClient.EXPECT().Del(gomock.Any(), cniConfig).Return(nil).Times(1)
err = managedLinuxPlatform.configureInterface(ctx, netNSPath, eni, nil)
require.NoError(t, err)
}

func TestBuildDefaultNetworkNamespace(t *testing.T) {
tests := []struct {
name string
Expand Down Expand Up @@ -226,3 +314,23 @@ func TestBuildDefaultNetworkNamespace(t *testing.T) {
})
}
}

// setupManagedLinuxTestConfigureInterface provisions all the resources needed to facilitate the two
// subtests in TestManagedLinux_TestConfigureInterface.
func setupManagedLinuxTestConfigureInterface(
ctrl *gomock.Controller, getTestENI func() *networkinterface.NetworkInterface) (
context.Context, *mock_oswrapper.MockOS, *mock_ecscni2.MockCNI, *networkinterface.NetworkInterface, *managedLinux) {
ctx := context.TODO()
osWrapper := mock_oswrapper.NewMockOS(ctrl)
cniClient := mock_ecscni2.NewMockCNI(ctrl)
eni := getTestENI()
managedLinuxPlatform := &managedLinux{
common: common{
os: osWrapper,
cniClient: cniClient,
stateDBDir: "dummy-db-dir",
},
client: nil,
}
return ctx, osWrapper, cniClient, eni, managedLinuxPlatform
}
Loading