Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
174 changes: 119 additions & 55 deletions cmd/plugins/balloons/policy/balloons-policy.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@ import (
"github.com/containers/nri-plugins/pkg/kubernetes"
logger "github.com/containers/nri-plugins/pkg/log"
"github.com/containers/nri-plugins/pkg/resmgr/cache"
cpucontrol "github.com/containers/nri-plugins/pkg/resmgr/control/cpu"
"github.com/containers/nri-plugins/pkg/resmgr/events"
libmem "github.com/containers/nri-plugins/pkg/resmgr/lib/memory"
policy "github.com/containers/nri-plugins/pkg/resmgr/policy"
Expand Down Expand Up @@ -89,9 +88,10 @@ type balloons struct {
meters *Meters // balloon metrics
meterLock sync.RWMutex // protects metrics collection against allocation

cpuAllocator cpuallocator.CPUAllocator // CPU allocator used by the policy
memAllocator *libmem.Allocator // memory allocator used by the policy
loadVirtDev map[string]*loadClassVirtDev // map LoadClasses to virtual devices
cpuAllocator cpuallocator.CPUAllocator // CPU allocator used by the policy
memAllocator *libmem.Allocator // memory allocator used by the policy
turboAllocator *CPUClassTurboAllocator // turbo budget allocator based on CPUClasses
loadVirtDev map[string]*loadClassVirtDev // map LoadClasses to virtual devices
}

// Balloon contains attributes of a balloon instance
Expand Down Expand Up @@ -791,80 +791,62 @@ func largest(sliceLen int, valueOf func(i int) int) ([]int, int) {
// resetCpuClass resets CPU configurations globally. All balloons can
// be ignored, their CPU configurations will be applied later.
func (p *balloons) resetCpuClass() error {
// Usual inputs:
// - p.allowed (cpuset.CPUset): all CPUs available for this
// policy.
// - p.IdleCpuClass (string): CPU class for allowed CPUs.
//
// Other inputs, if needed:
// - p.reserved (cpuset.CPUset): CPUs of ReservedResources
// (typically for kube-system containers).
//
// Note: p.useCpuClass(balloon) will be called before assigning
// containers on the balloon, including the reserved balloon.
//
// TODO: don't depend on cpu controller directly
if err := cpucontrol.Assign(p.cch, p.bpoptions.IdleCpuClass, p.allowed.UnsortedList()...); err != nil {
// p.useCpuClass(balloon) will be called later for every balloon,
// including the reserved balloon, to set the per-balloon CPU
// class. Here we only assign the idle class to all allowed CPUs.
if p.turboAllocator == nil {
return nil
}
idle := p.turboAllocator.ResolveClassName(p.bpoptions.IdleCpuClass)
if err := p.turboAllocator.ResetIdle(p.allowed); err != nil {
log.Warnf("failed to reset class of available cpus: %v", err)
} else {
log.Debugf("reset class of available cpus: %q (reserved: %q)", p.allowed, p.reserved)
log.Debugf("reset class of available cpus: %q to idle class %q (reserved: %q)",
p.allowed, idle, p.reserved)
}
return nil
}

// useCpuClass configures CPUs of a balloon.
// useCpuClass configures CPUs of a balloon by delegating to the
// turbo-aware CPU class allocator.
func (p *balloons) useCpuClass(bln *Balloon) error {
// Usual inputs:
// - CPUs that cpuallocator has reserved for this balloon:
// bln.Cpus (cpuset.CPUSet).
// - User-defined CPU configuration for CPUs of balloon of this type:
// bln.Def.CpuClass (string).
// - Current configuration(?): feel free to add data
// structure for this. For instance policy-global p.cpuConfs,
// or balloon-local bln.cpuConfs.
//
// Other input examples, if needed:
// - Requested CPU resources by all containers in the balloon:
// p.requestedMilliCpus(bln).
// - Free CPU resources in the balloon: p.freeMilliCpus(bln).
// - Number of assigned containers: bln.ContainerCount().
// - Container details: access p.cch with bln.ContainerIDs().
// - User-defined CPU AllocatorPriority: bln.Def.AllocatorPriority.
// - All existing balloon instances: p.balloons.
// - CPU configurations by user: bln.Def.CpuClass (for bln in p.balloons)
if len(bln.components) > 0 {
// If this is a composite balloon, CPU class is
// defined in the component balloons.
log.Debugf("apply CPU class %q on CPUs %s of composite balloon %q",
bln.Def.CpuClass, bln.Cpus, bln.PrettyName())
// Composite balloon: each component carries its own CpuClass.
log.Debugf("apply CPU classes of components of composite balloon %q on CPUs %s",
bln.PrettyName(), bln.Cpus)
for _, compBln := range bln.components {
if err := p.useCpuClass(compBln); err != nil {
log.Warnf("failed to apply CPU class %q on CPUs %s of %q in composite balloon %q: %v",
compBln.Def.CpuClass, compBln.Cpus, compBln.PrettyName(), bln.PrettyName(), err)
}

}
return nil
}
if err := cpucontrol.Assign(p.cch, bln.Def.CpuClass, bln.Cpus.UnsortedList()...); err != nil {
log.Warnf("failed to apply class %q on CPUs %q: %v", bln.Def.CpuClass, bln.Cpus, err)
} else {
log.Debugf("apply CPU class %q on CPUs %q of %q", bln.Def.CpuClass, bln.Cpus, bln.PrettyName())
if p.turboAllocator == nil {
return nil
}
className := p.turboAllocator.ResolveClassName(bln.Def.CpuClass)
log.Debugf("apply CPU class %q on CPUs %q of %q", className, bln.Cpus, bln.PrettyName())
if err := p.turboAllocator.UseClass(bln.Def.CpuClass, bln.Cpus); err != nil {
log.Warnf("failed to apply class %q on CPUs %q: %v", className, bln.Cpus, err)
}
return nil
}

// forgetCpuClass is called when CPUs of a balloon are released from duty.
func (p *balloons) forgetCpuClass(bln *Balloon) {
// Use p.IdleCpuClass for bln.Cpus.
// Usual inputs: see useCpuClass
if err := cpucontrol.Assign(p.cch, p.bpoptions.IdleCpuClass, bln.Cpus.UnsortedList()...); err != nil {
log.Warnf("failed to forget class %q of cpus %q: %v", bln.Def.CpuClass, bln.Cpus, err)
if p.turboAllocator == nil {
return
}
idle := p.turboAllocator.ResolveClassName(p.bpoptions.IdleCpuClass)
if err := p.turboAllocator.ForgetClass(bln.Cpus); err != nil {
log.Warnf("failed to forget class of cpus %q (idle class %q): %v", bln.Cpus, idle, err)
} else {
if len(bln.components) > 0 {
log.Debugf("forget classes of composite balloon %q cpus %q", bln.Def.Name, bln.Cpus)
log.Debugf("forget classes of composite balloon %q cpus %q (idle class %q)",
bln.Def.Name, bln.Cpus, idle)
} else {
log.Debugf("forget class %q of cpus %q", bln.Def.CpuClass, bln.Cpus)
log.Debugf("forget class of cpus %q (idle class %q)", bln.Cpus, idle)
}
}
}
Expand Down Expand Up @@ -1399,14 +1381,28 @@ func changesBalloons(opts0, opts1 *BalloonsOptions) bool {
}
o0 := opts0.DeepCopy()
o1 := opts1.DeepCopy()
// Ignore differences in CPU class names. Every other change
// potentially changes balloons or workloads.
// Ignore differences in BalloonsOptions that do not affect
// CPU-to-balloon or container-to-balloon mapping. Such
// differences include:
//
// 1. CPUClass related parameters
o0.IdleCpuClass = ""
o1.IdleCpuClass = ""
o0.TurboDomain = ""
o1.TurboDomain = ""
o0.CPUClasses = nil
o1.CPUClasses = nil
for i := range o0.BalloonDefs {
o0.BalloonDefs[i].CpuClass = ""
o1.BalloonDefs[i].CpuClass = ""
}
// 2. Schedulingpolicy parameters
o0.SchedulingClasses = nil
o1.SchedulingClasses = nil
for i := range o0.BalloonDefs {
o0.BalloonDefs[i].SchedulingClass = ""
o1.BalloonDefs[i].SchedulingClass = ""
}
Comment thread
askervin marked this conversation as resolved.
return utils.DumpJSON(o0) != utils.DumpJSON(o1)
}

Expand All @@ -1424,6 +1420,9 @@ func changesCpuClasses(opts0, opts1 *BalloonsOptions) bool {
if opts0.IdleCpuClass != opts1.IdleCpuClass {
return true
}
if opts0.TurboDomain != opts1.TurboDomain {
return true
}
if len(opts0.BalloonDefs) != len(opts1.BalloonDefs) {
return true
}
Expand All @@ -1432,6 +1431,13 @@ func changesCpuClasses(opts0, opts1 *BalloonsOptions) bool {
return true
}
}
// Detect changes in CPUClasses definitions (turbo attributes, frequencies, etc.)
if len(opts0.CPUClasses) != len(opts1.CPUClasses) {
return true
}
if utils.DumpJSON(opts0.CPUClasses) != utils.DumpJSON(opts1.CPUClasses) {
return true
Comment thread
askervin marked this conversation as resolved.
}
return false
}

Expand All @@ -1454,6 +1460,15 @@ func (p *balloons) Reconfigure(newCfg interface{}) error {
log.Infof("no configuration changes")
} else {
log.Infof("configuration changes only on CPU classes")
// Update CPUClasses definitions.
p.bpoptions.CPUClasses = newBalloonsOptions.CPUClasses
p.bpoptions.IdleCpuClass = newBalloonsOptions.IdleCpuClass
p.bpoptions.TurboDomain = newBalloonsOptions.TurboDomain
if p.turboAllocator != nil {
if err := p.turboAllocator.Reconfigure(p.bpoptions.CPUClasses, p.bpoptions.IdleCpuClass, p.bpoptions.TurboDomain); err != nil {
log.Warnf("failed to reconfigure CPU class allocator: %v", err)
}
}
// Update new CPU classes to existing balloon
// definitions. The same BalloonDef instances
// must be kept in use, because each Balloon
Expand Down Expand Up @@ -1600,6 +1615,32 @@ func (p *balloons) validateConfig(bpoptions *BalloonsOptions) error {
if len(undefinedSchedulingClasses) > 0 {
return balloonsError("schedulingClass(es) defined in balloonTypes but missing from schedulingClasses: %v", undefinedSchedulingClasses)
}
// Validate CPUClasses.
cpuClassNames := map[string]struct{}{}
for _, cc := range bpoptions.CPUClasses {
if cc.Name == "" {
return balloonsError("missing or empty name in a cpuClasses entry")
}
Comment thread
askervin marked this conversation as resolved.
if _, dup := cpuClassNames[cc.Name]; dup {
return balloonsError("duplicate cpuClasses name: %q", cc.Name)
}
cpuClassNames[cc.Name] = struct{}{}
}
// Verify that cpuClass references in balloon types are
// defined in cpuClasses. Using the legacy control.cpu.classes
// configuration is discouraged and it is possibly out-of-date
// at this point because resource-manager starts controllers
// only after policies.
for _, blnDef := range bpoptions.BalloonDefs {
if blnDef.CpuClass == "" {
continue
}
_, inCPUClasses := cpuClassNames[blnDef.CpuClass]
if !inCPUClasses {
log.Warnf("cpuClass %q referenced by balloon type %q is not defined in cpuClasses",
blnDef.CpuClass, blnDef.Name)
}
}
var circularCheck func(name string, seen map[string]int) error
circularCheck = func(name string, seen map[string]int) error {
if seen[name] > 0 {
Expand Down Expand Up @@ -1671,6 +1712,29 @@ func (p *balloons) setConfig(bpoptions *BalloonsOptions) error {

setOmittedDefaults(bpoptions)

// Set bpoptions early so the turbo allocator construction below
// has access to CPUClasses.
p.bpoptions = bpoptions

Comment thread
askervin marked this conversation as resolved.
// Construct or reconfigure the turbo-aware CPU class allocator.
if p.turboAllocator == nil {
ta, err := NewCPUClassTurboAllocator(
WithSystem(p.options.System),
WithCache(p.cch),
WithCPUClasses(bpoptions.CPUClasses),
WithIdleClass(bpoptions.IdleCpuClass),
WithTurboDomain(bpoptions.TurboDomain),
)
if err != nil {
return balloonsError("failed to create CPU class turbo allocator: %w", err)
}
p.turboAllocator = ta
} else {
if err := p.turboAllocator.Reconfigure(bpoptions.CPUClasses, bpoptions.IdleCpuClass, bpoptions.TurboDomain); err != nil {
return balloonsError("failed to reconfigure CPU class turbo allocator: %w", err)
}
}

reservedBalloonDef, defaultBalloonDef, err := p.fillBuiltinBalloonDefs(bpoptions)
if err != nil {
return err
Expand Down
Loading
Loading