From 2a86c35768b6d9179ed3c3e83df44a5cd42d433f Mon Sep 17 00:00:00 2001 From: Kir Kolyshkin Date: Tue, 7 Jan 2025 19:15:33 -0800 Subject: [PATCH 1/6] libct: document initConfig and friends This is one of the dark corners of runc / libcontainer, so let's shed some light on it. initConfig is a structure which is filled in [mostly] by newInitConfig, and one of its hidden aspects is it contains a process config which is the result of merge between the container and the process configs. Let's document how all this happens, where the fields are coming from, which one has a preference, and how it all works. Signed-off-by: Kir Kolyshkin --- libcontainer/container_linux.go | 9 ++++ libcontainer/init_linux.go | 74 +++++++++++++++++++++++---------- libcontainer/process.go | 43 ++++++++++++------- 3 files changed, 88 insertions(+), 38 deletions(-) diff --git a/libcontainer/container_linux.go b/libcontainer/container_linux.go index a411d40813d..15090fe2ebd 100644 --- a/libcontainer/container_linux.go +++ b/libcontainer/container_linux.go @@ -689,6 +689,9 @@ func (c *Container) newSetnsProcess(p *Process, cmd *exec.Cmd, comm *processComm } func (c *Container) newInitConfig(process *Process) *initConfig { + // Set initial properties. For those properties that exist + // both in the container config and the process, use the ones + // from the container config first, and override them later. cfg := &initConfig{ Config: c.config, Args: process.Args, @@ -710,6 +713,9 @@ func (c *Container) newInitConfig(process *Process) *initConfig { ConsoleWidth: process.ConsoleWidth, ConsoleHeight: process.ConsoleHeight, } + + // Overwrite config properties with ones from process. + if process.NoNewPrivileges != nil { cfg.NoNewPrivileges = *process.NoNewPrivileges } @@ -722,6 +728,9 @@ func (c *Container) newInitConfig(process *Process) *initConfig { if len(process.Rlimits) > 0 { cfg.Rlimits = process.Rlimits } + + // Set misc properties. + if cgroups.IsCgroup2UnifiedMode() { cfg.Cgroup2Path = c.cgroupManager.Path("") } diff --git a/libcontainer/init_linux.go b/libcontainer/init_linux.go index cff21e1bc66..b2cae3a8478 100644 --- a/libcontainer/init_linux.go +++ b/libcontainer/init_linux.go @@ -47,30 +47,58 @@ type network struct { TempVethPeerName string `json:"temp_veth_peer_name"` } -// initConfig is used for transferring parameters from Exec() to Init() +// initConfig is used for transferring parameters from Exec() to Init(). +// It contains: +// - original container config; +// - some [Process] properties; +// - set of properties merged from the container config ([configs.Config]) +// and the process ([Process]); +// - some properties that come from the container. +// +// When adding new fields, please make sure they go into the relevant section. type initConfig struct { - Args []string `json:"args"` - Env []string `json:"env"` - Cwd string `json:"cwd"` - Capabilities *configs.Capabilities `json:"capabilities"` - ProcessLabel string `json:"process_label"` - AppArmorProfile string `json:"apparmor_profile"` - NoNewPrivileges bool `json:"no_new_privileges"` - UID int `json:"uid"` - GID int `json:"gid"` - AdditionalGroups []int `json:"additional_groups"` - Config *configs.Config `json:"config"` - Networks []*network `json:"network"` - PassedFilesCount int `json:"passed_files_count"` - ContainerID string `json:"containerid"` - Rlimits []configs.Rlimit `json:"rlimits"` - CreateConsole bool `json:"create_console"` - ConsoleWidth uint16 `json:"console_width"` - ConsoleHeight uint16 `json:"console_height"` - RootlessEUID bool `json:"rootless_euid,omitempty"` - RootlessCgroups bool `json:"rootless_cgroups,omitempty"` - SpecState *specs.State `json:"spec_state,omitempty"` - Cgroup2Path string `json:"cgroup2_path,omitempty"` + // Config is the original container config. + Config *configs.Config `json:"config"` + + // Properties that are unique to and come from [Process]. + + Args []string `json:"args"` + Env []string `json:"env"` + UID int `json:"uid"` + GID int `json:"gid"` + AdditionalGroups []int `json:"additional_groups"` + Cwd string `json:"cwd"` + CreateConsole bool `json:"create_console"` + ConsoleWidth uint16 `json:"console_width"` + ConsoleHeight uint16 `json:"console_height"` + PassedFilesCount int `json:"passed_files_count"` + + // Properties that exists both in the container config and the process, + // as merged by [Container.newInitConfig] (process properties has preference). + + AppArmorProfile string `json:"apparmor_profile"` + Capabilities *configs.Capabilities `json:"capabilities"` + NoNewPrivileges bool `json:"no_new_privileges"` + ProcessLabel string `json:"process_label"` + Rlimits []configs.Rlimit `json:"rlimits"` + + // Properties that only exist in container config. + // FIXME: they are also passed in Config above. + + RootlessEUID bool `json:"rootless_euid,omitempty"` + RootlessCgroups bool `json:"rootless_cgroups,omitempty"` + + // Miscellaneous properties, filled in by [Container.newInitConfig] + // unless documented otherwise. + + ContainerID string `json:"containerid"` + Cgroup2Path string `json:"cgroup2_path,omitempty"` + + // Networks is filled in from container config by [initProcess.createNetworkInterfaces]. + Networks []*network `json:"network"` + + // SpecState is filled in by [initProcess.Start]. + SpecState *specs.State `json:"spec_state,omitempty"` } // Init is part of "runc init" implementation. diff --git a/libcontainer/process.go b/libcontainer/process.go index 09162be9a40..5b2928be42c 100644 --- a/libcontainer/process.go +++ b/libcontainer/process.go @@ -17,8 +17,11 @@ type processOperations interface { pid() int } -// Process specifies the configuration and IO for a process inside -// a container. +// Process defines the configuration and IO for a process inside a container. +// +// Note that some Process properties are also present in container configuration +// ([configs.Config]). In all such cases, Process properties take precedence +// over container configuration ones. type Process struct { // The command to be run followed by any arguments. Args []string @@ -34,44 +37,54 @@ type Process struct { // in addition to those that the user belongs to. AdditionalGroups []int - // Cwd will change the processes current working directory inside the container's rootfs. + // Cwd will change the process's current working directory inside the container's rootfs. Cwd string - // Stdin is a pointer to a reader which provides the standard input stream. + // Stdin is a reader which provides the standard input stream. Stdin io.Reader - // Stdout is a pointer to a writer which receives the standard output stream. + // Stdout is a writer which receives the standard output stream. Stdout io.Writer - // Stderr is a pointer to a writer which receives the standard error stream. + // Stderr is a writer which receives the standard error stream. Stderr io.Writer - // ExtraFiles specifies additional open files to be inherited by the container + // ExtraFiles specifies additional open files to be inherited by the process. ExtraFiles []*os.File - // open handles to cloned binaries -- see dmz.CloneSelfExe for more details + // Open handles to cloned binaries -- see dmz.CloneSelfExe for more details. clonedExes []*os.File - // Initial sizings for the console + // Initial size for the console. ConsoleWidth uint16 ConsoleHeight uint16 - // Capabilities specify the capabilities to keep when executing the process inside the container - // All capabilities not specified will be dropped from the processes capability mask + // Capabilities specify the capabilities to keep when executing the process. + // All capabilities not specified will be dropped from the processes capability mask. + // + // If not nil, takes precedence over container's [configs.Config.Capabilities]. Capabilities *configs.Capabilities // AppArmorProfile specifies the profile to apply to the process and is - // changed at the time the process is execed + // changed at the time the process is executed. + // + // If not empty, takes precedence over container's [configs.Config.AppArmorProfile]. AppArmorProfile string - // Label specifies the label to apply to the process. It is commonly used by selinux + // Label specifies the label to apply to the process. It is commonly used by selinux. + // + // If not empty, takes precedence over container's [configs.Config.ProcessLabel]. Label string // NoNewPrivileges controls whether processes can gain additional privileges. + // + // If not nil, takes precedence over container's [configs.Config.NoNewPrivileges]. NoNewPrivileges *bool - // Rlimits specifies the resource limits, such as max open files, to set in the container - // If Rlimits are not set, the container will inherit rlimits from the parent process + // Rlimits specifies the resource limits, such as max open files, to set for the process. + // If unset, the process will inherit rlimits from the parent process. + // + // If not empty, takes precedence over container's [configs.Config.Rlimit]. Rlimits []configs.Rlimit // ConsoleSocket provides the masterfd console. From f26ec92221e68b2c81cbc0c72489a3f0135f4d6c Mon Sep 17 00:00:00 2001 From: Kir Kolyshkin Date: Wed, 15 Jan 2025 23:28:08 -0800 Subject: [PATCH 2/6] libct: rm Rootless* properties from initConfig They are passed in initConfig twice, so it does not make sense. NB: the alternative to that would be to remove Config field from initConfig, but it results in a much bigger patch and more maintenance down the road. Signed-off-by: Kir Kolyshkin --- libcontainer/container_linux.go | 2 -- libcontainer/init_linux.go | 8 +------- libcontainer/rootfs_linux.go | 2 +- 3 files changed, 2 insertions(+), 10 deletions(-) diff --git a/libcontainer/container_linux.go b/libcontainer/container_linux.go index 15090fe2ebd..3ee0cb2c2a6 100644 --- a/libcontainer/container_linux.go +++ b/libcontainer/container_linux.go @@ -704,8 +704,6 @@ func (c *Container) newInitConfig(process *Process) *initConfig { PassedFilesCount: len(process.ExtraFiles), ContainerID: c.ID(), NoNewPrivileges: c.config.NoNewPrivileges, - RootlessEUID: c.config.RootlessEUID, - RootlessCgroups: c.config.RootlessCgroups, AppArmorProfile: c.config.AppArmorProfile, ProcessLabel: c.config.ProcessLabel, Rlimits: c.config.Rlimits, diff --git a/libcontainer/init_linux.go b/libcontainer/init_linux.go index b2cae3a8478..fba9bb32ce8 100644 --- a/libcontainer/init_linux.go +++ b/libcontainer/init_linux.go @@ -82,12 +82,6 @@ type initConfig struct { ProcessLabel string `json:"process_label"` Rlimits []configs.Rlimit `json:"rlimits"` - // Properties that only exist in container config. - // FIXME: they are also passed in Config above. - - RootlessEUID bool `json:"rootless_euid,omitempty"` - RootlessCgroups bool `json:"rootless_cgroups,omitempty"` - // Miscellaneous properties, filled in by [Container.newInitConfig] // unless documented otherwise. @@ -484,7 +478,7 @@ func setupUser(config *initConfig) error { // There's nothing we can do about /etc/group entries, so we silently // ignore setting groups here (since the user didn't explicitly ask us to // set the group). - allowSupGroups := !config.RootlessEUID && string(bytes.TrimSpace(setgroups)) != "deny" + allowSupGroups := !config.Config.RootlessEUID && string(bytes.TrimSpace(setgroups)) != "deny" if allowSupGroups { if err := unix.Setgroups(config.AdditionalGroups); err != nil { diff --git a/libcontainer/rootfs_linux.go b/libcontainer/rootfs_linux.go index 012b0506713..68e16b7920b 100644 --- a/libcontainer/rootfs_linux.go +++ b/libcontainer/rootfs_linux.go @@ -106,7 +106,7 @@ func prepareRootfs(pipe *syncSocket, iConfig *initConfig) (err error) { root: config.Rootfs, label: config.MountLabel, cgroup2Path: iConfig.Cgroup2Path, - rootlessCgroups: iConfig.RootlessCgroups, + rootlessCgroups: config.RootlessCgroups, cgroupns: config.Namespaces.Contains(configs.NEWCGROUP), } for _, m := range config.Mounts { From 049a5f76cf969745fb9b7f647af536962e88251e Mon Sep 17 00:00:00 2001 From: Kir Kolyshkin Date: Wed, 8 Jan 2025 12:25:42 -0800 Subject: [PATCH 3/6] libct/cap: allow New(nil) In runtime-spec, capabilities property is optional, but libcontainer/capabilities panics when New(nil) is called. Because of this, there's a kludge in finalizeNamespace to ensure capabilities.New is not called with nil argument, and there's a TestProcessEmptyCaps to ensure runc won't panic. Let's fix this at the source, allowing libct/cap to work with nil capabilities. (The caller is fixed by the next commit.) Signed-off-by: Kir Kolyshkin --- libcontainer/capabilities/capabilities.go | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/libcontainer/capabilities/capabilities.go b/libcontainer/capabilities/capabilities.go index 8ed3cac0870..8bddc0007b7 100644 --- a/libcontainer/capabilities/capabilities.go +++ b/libcontainer/capabilities/capabilities.go @@ -47,6 +47,9 @@ func KnownCapabilities() []string { // printing a warning instead. func New(capConfig *configs.Capabilities) (*Caps, error) { var c Caps + if capConfig == nil { + return &c, nil + } _, err := capMap() if err != nil { @@ -103,6 +106,9 @@ type Caps struct { // ApplyBoundingSet sets the capability bounding set to those specified in the whitelist. func (c *Caps) ApplyBoundingSet() error { + if c.pid == nil { + return nil + } c.pid.Clear(capability.BOUNDING) c.pid.Set(capability.BOUNDING, c.caps[capability.BOUNDING]...) return c.pid.Apply(capability.BOUNDING) @@ -110,6 +116,9 @@ func (c *Caps) ApplyBoundingSet() error { // Apply sets all the capabilities for the current process in the config. func (c *Caps) ApplyCaps() error { + if c.pid == nil { + return nil + } c.pid.Clear(capability.CAPS | capability.BOUNDS) for _, g := range []capability.CapType{ capability.EFFECTIVE, From 73849e797f993cabc4cba7ceb989fcbac57f64d8 Mon Sep 17 00:00:00 2001 From: Kir Kolyshkin Date: Tue, 7 Jan 2025 19:32:27 -0800 Subject: [PATCH 4/6] libct: simplify Caps inheritance For all other properties that are available in both Config and Process, the merging is performed by newInitConfig. Let's do the same for Capabilities for the sake of code uniformity. Also, thanks to the previous commit, we no longer have to make sure we do not call capabilities.New(nil). Signed-off-by: Kir Kolyshkin --- libcontainer/container_linux.go | 5 ++++- libcontainer/init_linux.go | 8 +------- 2 files changed, 5 insertions(+), 8 deletions(-) diff --git a/libcontainer/container_linux.go b/libcontainer/container_linux.go index 3ee0cb2c2a6..a0d2ec2606e 100644 --- a/libcontainer/container_linux.go +++ b/libcontainer/container_linux.go @@ -700,7 +700,7 @@ func (c *Container) newInitConfig(process *Process) *initConfig { GID: process.GID, AdditionalGroups: process.AdditionalGroups, Cwd: process.Cwd, - Capabilities: process.Capabilities, + Capabilities: c.config.Capabilities, PassedFilesCount: len(process.ExtraFiles), ContainerID: c.ID(), NoNewPrivileges: c.config.NoNewPrivileges, @@ -714,6 +714,9 @@ func (c *Container) newInitConfig(process *Process) *initConfig { // Overwrite config properties with ones from process. + if process.Capabilities != nil { + cfg.Capabilities = process.Capabilities + } if process.NoNewPrivileges != nil { cfg.NoNewPrivileges = *process.NoNewPrivileges } diff --git a/libcontainer/init_linux.go b/libcontainer/init_linux.go index fba9bb32ce8..11a9069d8e7 100644 --- a/libcontainer/init_linux.go +++ b/libcontainer/init_linux.go @@ -322,13 +322,7 @@ func finalizeNamespace(config *initConfig) error { } } - caps := &configs.Capabilities{} - if config.Capabilities != nil { - caps = config.Capabilities - } else if config.Config.Capabilities != nil { - caps = config.Config.Capabilities - } - w, err := capabilities.New(caps) + w, err := capabilities.New(config.Capabilities) if err != nil { return err } From b9114d91e2ee50fd610b9fa201375763b64df121 Mon Sep 17 00:00:00 2001 From: Kir Kolyshkin Date: Tue, 7 Jan 2025 19:48:58 -0800 Subject: [PATCH 5/6] runc exec: fix setting process.ioPriority Commit bfbd0305b added IOPriority field into both Config and Process, but forgot to add a mechanism to actually use Process.IOPriority. As a result, runc exec does not set Process.IOPriority ever. Fix it, and a test case (which fails before the fix). Signed-off-by: Kir Kolyshkin --- CHANGELOG.md | 5 +++++ libcontainer/container_linux.go | 4 ++++ libcontainer/init_linux.go | 3 ++- libcontainer/process.go | 3 +++ libcontainer/setns_init_linux.go | 2 +- libcontainer/standard_init_linux.go | 2 +- tests/integration/ioprio.bats | 22 ++++++++++++++++++---- 7 files changed, 34 insertions(+), 7 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2ebb6072239..a810474ce9c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,6 +17,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 IDs before calling libcontainer; it is recommended to use Go package github.com/moby/sys/user for that. (#3999) +### Fixed + * `runc exec -p` no longer ignores specified `ioPriority` setting. + Similarly, libcontainer's `Container.Start` and `Container.Run` + methods no longer ignore `Process.IOPriority` setting. (#4585) + ## [1.2.0] - 2024-10-22 > できるときにできることをやるんだ。それが今だ。 diff --git a/libcontainer/container_linux.go b/libcontainer/container_linux.go index a0d2ec2606e..8a9cf078696 100644 --- a/libcontainer/container_linux.go +++ b/libcontainer/container_linux.go @@ -707,6 +707,7 @@ func (c *Container) newInitConfig(process *Process) *initConfig { AppArmorProfile: c.config.AppArmorProfile, ProcessLabel: c.config.ProcessLabel, Rlimits: c.config.Rlimits, + IOPriority: c.config.IOPriority, CreateConsole: process.ConsoleSocket != nil, ConsoleWidth: process.ConsoleWidth, ConsoleHeight: process.ConsoleHeight, @@ -729,6 +730,9 @@ func (c *Container) newInitConfig(process *Process) *initConfig { if len(process.Rlimits) > 0 { cfg.Rlimits = process.Rlimits } + if process.IOPriority != nil { + cfg.IOPriority = process.IOPriority + } // Set misc properties. diff --git a/libcontainer/init_linux.go b/libcontainer/init_linux.go index 11a9069d8e7..5976b2933d3 100644 --- a/libcontainer/init_linux.go +++ b/libcontainer/init_linux.go @@ -81,6 +81,7 @@ type initConfig struct { NoNewPrivileges bool `json:"no_new_privileges"` ProcessLabel string `json:"process_label"` Rlimits []configs.Rlimit `json:"rlimits"` + IOPriority *configs.IOPriority `json:"io_priority,omitempty"` // Miscellaneous properties, filled in by [Container.newInitConfig] // unless documented otherwise. @@ -623,7 +624,7 @@ func setupScheduler(config *configs.Config) error { return nil } -func setupIOPriority(config *configs.Config) error { +func setupIOPriority(config *initConfig) error { const ioprioWhoPgrp = 1 ioprio := config.IOPriority diff --git a/libcontainer/process.go b/libcontainer/process.go index 5b2928be42c..09e57ae46c7 100644 --- a/libcontainer/process.go +++ b/libcontainer/process.go @@ -114,6 +114,9 @@ type Process struct { Scheduler *configs.Scheduler + // IOPriority is a process I/O priority. + // + // If not empty, takes precedence over container's [configs.Config.IOPriority]. IOPriority *configs.IOPriority } diff --git a/libcontainer/setns_init_linux.go b/libcontainer/setns_init_linux.go index d1885b3fdda..505af1d08a8 100644 --- a/libcontainer/setns_init_linux.go +++ b/libcontainer/setns_init_linux.go @@ -75,7 +75,7 @@ func (l *linuxSetnsInit) Init() error { return err } - if err := setupIOPriority(l.config.Config); err != nil { + if err := setupIOPriority(l.config); err != nil { return err } // Tell our parent that we're ready to exec. This must be done before the diff --git a/libcontainer/standard_init_linux.go b/libcontainer/standard_init_linux.go index 9517820bcad..0c83db30f79 100644 --- a/libcontainer/standard_init_linux.go +++ b/libcontainer/standard_init_linux.go @@ -159,7 +159,7 @@ func (l *linuxStandardInit) Init() error { return err } - if err := setupIOPriority(l.config.Config); err != nil { + if err := setupIOPriority(l.config); err != nil { return err } diff --git a/tests/integration/ioprio.bats b/tests/integration/ioprio.bats index a907d782f01..9faa72d61ab 100644 --- a/tests/integration/ioprio.bats +++ b/tests/integration/ioprio.bats @@ -20,11 +20,25 @@ function teardown() { # Check the init process. runc exec test_ioprio ionice -p 1 [ "$status" -eq 0 ] - [[ "$output" = *'best-effort: prio 4'* ]] + [ "${lines[0]}" = 'best-effort: prio 4' ] - # Check the process made from the exec command. + # Check an exec process, which should derive ioprio from config.json. runc exec test_ioprio ionice [ "$status" -eq 0 ] - - [[ "$output" = *'best-effort: prio 4'* ]] + [ "${lines[0]}" = 'best-effort: prio 4' ] + + # Check an exec with a priority taken from process.json, + # which should override the ioprio in config.json. + proc=' +{ + "terminal": false, + "ioPriority": { + "class": "IOPRIO_CLASS_IDLE" + }, + "args": [ "/usr/bin/ionice" ], + "cwd": "/" +}' + runc exec --process <(echo "$proc") test_ioprio + [ "$status" -eq 0 ] + [ "${lines[0]}" = 'idle' ] } From 99f9ed94dc6e12d6f921b63fc15d2d4d45d57224 Mon Sep 17 00:00:00 2001 From: Kir Kolyshkin Date: Wed, 8 Jan 2025 14:11:02 -0800 Subject: [PATCH 6/6] runc exec: fix setting process.Scheduler Commit 770728e1 added Scheduler field into both Config and Process, but forgot to add a mechanism to actually use Process.Scheduler. As a result, runc exec does not set Process.Scheduler ever. Fix it, and a test case (which fails before the fix). Signed-off-by: Kir Kolyshkin --- CHANGELOG.md | 7 +++--- libcontainer/container_linux.go | 4 +++ libcontainer/init_linux.go | 5 ++-- libcontainer/process.go | 3 +++ libcontainer/setns_init_linux.go | 2 +- libcontainer/standard_init_linux.go | 2 +- tests/integration/scheduler.bats | 38 ++++++++++++++++++++++++++--- 7 files changed, 51 insertions(+), 10 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a810474ce9c..aae5d9f46d6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -18,9 +18,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 github.com/moby/sys/user for that. (#3999) ### Fixed - * `runc exec -p` no longer ignores specified `ioPriority` setting. - Similarly, libcontainer's `Container.Start` and `Container.Run` - methods no longer ignore `Process.IOPriority` setting. (#4585) + * `runc exec -p` no longer ignores specified `ioPriority` and `scheduler` + settings. Similarly, libcontainer's `Container.Start` and `Container.Run` + methods no longer ignore `Process.IOPriority` and `Process.Scheduler` + settings. (#4585) ## [1.2.0] - 2024-10-22 diff --git a/libcontainer/container_linux.go b/libcontainer/container_linux.go index 8a9cf078696..54a0eaafe06 100644 --- a/libcontainer/container_linux.go +++ b/libcontainer/container_linux.go @@ -708,6 +708,7 @@ func (c *Container) newInitConfig(process *Process) *initConfig { ProcessLabel: c.config.ProcessLabel, Rlimits: c.config.Rlimits, IOPriority: c.config.IOPriority, + Scheduler: c.config.Scheduler, CreateConsole: process.ConsoleSocket != nil, ConsoleWidth: process.ConsoleWidth, ConsoleHeight: process.ConsoleHeight, @@ -733,6 +734,9 @@ func (c *Container) newInitConfig(process *Process) *initConfig { if process.IOPriority != nil { cfg.IOPriority = process.IOPriority } + if process.Scheduler != nil { + cfg.Scheduler = process.Scheduler + } // Set misc properties. diff --git a/libcontainer/init_linux.go b/libcontainer/init_linux.go index 5976b2933d3..f78e561755f 100644 --- a/libcontainer/init_linux.go +++ b/libcontainer/init_linux.go @@ -82,6 +82,7 @@ type initConfig struct { ProcessLabel string `json:"process_label"` Rlimits []configs.Rlimit `json:"rlimits"` IOPriority *configs.IOPriority `json:"io_priority,omitempty"` + Scheduler *configs.Scheduler `json:"scheduler,omitempty"` // Miscellaneous properties, filled in by [Container.newInitConfig] // unless documented otherwise. @@ -607,7 +608,7 @@ func setupRlimits(limits []configs.Rlimit, pid int) error { return nil } -func setupScheduler(config *configs.Config) error { +func setupScheduler(config *initConfig) error { if config.Scheduler == nil { return nil } @@ -616,7 +617,7 @@ func setupScheduler(config *configs.Config) error { return err } if err := unix.SchedSetAttr(0, attr, 0); err != nil { - if errors.Is(err, unix.EPERM) && config.Cgroups.CpusetCpus != "" { + if errors.Is(err, unix.EPERM) && config.Config.Cgroups.CpusetCpus != "" { return errors.New("process scheduler can't be used together with AllowedCPUs") } return fmt.Errorf("error setting scheduler: %w", err) diff --git a/libcontainer/process.go b/libcontainer/process.go index 09e57ae46c7..0e24c548ed8 100644 --- a/libcontainer/process.go +++ b/libcontainer/process.go @@ -112,6 +112,9 @@ type Process struct { // For cgroup v2, the only key allowed is "". SubCgroupPaths map[string]string + // Scheduler represents the scheduling attributes for a process. + // + // If not empty, takes precedence over container's [configs.Config.Scheduler]. Scheduler *configs.Scheduler // IOPriority is a process I/O priority. diff --git a/libcontainer/setns_init_linux.go b/libcontainer/setns_init_linux.go index 505af1d08a8..0a79f197e6d 100644 --- a/libcontainer/setns_init_linux.go +++ b/libcontainer/setns_init_linux.go @@ -71,7 +71,7 @@ func (l *linuxSetnsInit) Init() error { unix.Umask(int(*l.config.Config.Umask)) } - if err := setupScheduler(l.config.Config); err != nil { + if err := setupScheduler(l.config); err != nil { return err } diff --git a/libcontainer/standard_init_linux.go b/libcontainer/standard_init_linux.go index 0c83db30f79..384750bf837 100644 --- a/libcontainer/standard_init_linux.go +++ b/libcontainer/standard_init_linux.go @@ -155,7 +155,7 @@ func (l *linuxStandardInit) Init() error { } } - if err := setupScheduler(l.config.Config); err != nil { + if err := setupScheduler(l.config); err != nil { return err } diff --git a/tests/integration/scheduler.bats b/tests/integration/scheduler.bats index b7cd96f8890..6c80d86426b 100644 --- a/tests/integration/scheduler.bats +++ b/tests/integration/scheduler.bats @@ -12,17 +12,49 @@ function teardown() { } @test "scheduler is applied" { - update_config ' .process.scheduler = {"policy": "SCHED_DEADLINE", "nice": 19, "priority": 0, "runtime": 42000, "deadline": 1000000, "period": 1000000, }' + update_config ' .process.scheduler = { + "policy": "SCHED_BATCH", + "priority": 0, + "nice": 19 + }' runc run -d --console-socket "$CONSOLE_SOCKET" test_scheduler [ "$status" -eq 0 ] + # Check init settings. runc exec test_scheduler chrt -p 1 [ "$status" -eq 0 ] + [[ "${lines[0]}" == *"scheduling policy: SCHED_BATCH" ]] + [[ "${lines[1]}" == *"priority: 0" ]] + + # Check exec settings derived from config.json. + runc exec test_scheduler sh -c 'chrt -p $$' + [ "$status" -eq 0 ] + [[ "${lines[0]}" == *"scheduling policy: SCHED_BATCH" ]] + [[ "${lines[1]}" == *"priority: 0" ]] + + # Another exec, with different scheduler settings. + proc=' +{ + "terminal": false, + "args": [ "/bin/sleep", "600" ], + "cwd": "/", + "scheduler": { + "policy": "SCHED_DEADLINE", + "flags": [ "SCHED_FLAG_RESET_ON_FORK" ], + "nice": 19, + "priority": 0, + "runtime": 42000, + "deadline": 100000, + "period": 1000000 + } +}' + __runc exec -d --pid-file pid.txt --process <(echo "$proc") test_scheduler - [[ "${lines[0]}" == *"scheduling policy: SCHED_DEADLINE" ]] + run chrt -p "$(cat pid.txt)" + [[ "${lines[0]}" == *"scheduling policy: SCHED_DEADLINE|SCHED_RESET_ON_FORK" ]] [[ "${lines[1]}" == *"priority: 0" ]] - [[ "${lines[2]}" == *"runtime/deadline/period parameters: 42000/1000000/1000000" ]] + [[ "${lines[2]}" == *"runtime/deadline/period parameters: 42000/100000/1000000" ]] } # Checks that runc emits a specific error when scheduling policy is used