Skip to content

Commit 022a3b2

Browse files
committed
fix: add idempotency guard to prevent double overcommit mutation
- Add annotation 'overcommit.inditex.dev/applied' to track if a pod has already been mutated by the webhook - Skip mutation on reinvocation (reinvocationPolicy=IfNeeded) if pod was already processed by the same overcommit class - Store applied CPU/memory ratios in annotations for observability - Resize operations always re-apply since limits may have changed
1 parent acdbaff commit 022a3b2

1 file changed

Lines changed: 38 additions & 6 deletions

File tree

pkg/overcommit/make_overcommit.go

Lines changed: 38 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ package overcommit
77

88
import (
99
"context"
10+
"fmt"
1011
"os"
1112

1213
"github.com/InditexTech/k8s-overcommit-operator/internal/metrics"
@@ -17,6 +18,11 @@ import (
1718
logf "sigs.k8s.io/controller-runtime/pkg/log"
1819
)
1920

21+
const (
22+
// AnnotationOvercommitApplied is set on pods after overcommit mutation to ensure idempotency.
23+
AnnotationOvercommitApplied = "overcommit.inditex.dev/applied"
24+
)
25+
2026
var podlog = logf.Log.WithName("overcommit")
2127

2228
func mutateContainers(containers []corev1.Container, pod *corev1.Pod, cpuValue float64, memoryValue float64) {
@@ -47,8 +53,17 @@ func mutateContainers(containers []corev1.Container, pod *corev1.Pod, cpuValue f
4753

4854
func Overcommit(pod *corev1.Pod, recorder record.EventRecorder, client client.Client) {
4955
ctx := context.Background()
56+
className := os.Getenv("OVERCOMMIT_CLASS_NAME")
57+
58+
metrics.K8sOvercommitOperatorPodsRequestedTotal.WithLabelValues(className).Inc()
5059

51-
metrics.K8sOvercommitOperatorPodsRequestedTotal.WithLabelValues(os.Getenv("OVERCOMMIT_CLASS_NAME")).Inc()
60+
// Idempotency: skip if this pod was already mutated by this class
61+
if pod.Annotations != nil {
62+
if applied, ok := pod.Annotations[AnnotationOvercommitApplied]; ok && applied == className {
63+
podlog.Info("Pod already mutated by this overcommit class, skipping", "pod", pod.Name, "class", className)
64+
return
65+
}
66+
}
5267

5368
cpuValue, memoryValue := checkOvercommitType(ctx, *pod, client)
5469

@@ -59,40 +74,57 @@ func Overcommit(pod *corev1.Pod, recorder record.EventRecorder, client client.Cl
5974
mutateContainers(pod.Spec.InitContainers, pod, cpuValue, memoryValue)
6075
}
6176

62-
metrics.K8sOvercommitOperatorMutatedPodsTotal.WithLabelValues(os.Getenv("OVERCOMMIT_CLASS_NAME")).Inc()
77+
// Mark the pod as mutated to prevent double-application on reinvocation
78+
setOvercommitAnnotation(pod, className, cpuValue, memoryValue)
79+
80+
metrics.K8sOvercommitOperatorMutatedPodsTotal.WithLabelValues(className).Inc()
6381

6482
recorder.Eventf(
6583
pod,
6684
corev1.EventTypeNormal,
6785
"OvercommitApplied",
6886
"Applied overcommit to Pod '%s': OvercommitClass = %s, CPU Overcommit = %.2f, Memory Overcommit = %.2f",
6987
pod.Name,
70-
os.Getenv("OVERCOMMIT_CLASS_NAME"),
88+
className,
7189
cpuValue,
7290
memoryValue,
7391
)
7492
}
7593

7694
func OvercommitOnResize(pod *corev1.Pod, recorder record.EventRecorder, client client.Client) {
7795
ctx := context.Background()
96+
className := os.Getenv("OVERCOMMIT_CLASS_NAME")
7897

79-
metrics.K8sOvercommitOperatorPodsRequestedTotal.WithLabelValues(os.Getenv("OVERCOMMIT_CLASS_NAME")).Inc()
98+
metrics.K8sOvercommitOperatorPodsRequestedTotal.WithLabelValues(className).Inc()
8099

81100
cpuValue, memoryValue := checkOvercommitType(ctx, *pod, client)
82101

83102
// En resize: solo containers normales.
84103
mutateContainers(pod.Spec.Containers, pod, cpuValue, memoryValue)
85104

86-
metrics.K8sOvercommitOperatorMutatedPodsTotal.WithLabelValues(os.Getenv("OVERCOMMIT_CLASS_NAME")).Inc()
105+
// Update annotation with new values after resize
106+
setOvercommitAnnotation(pod, className, cpuValue, memoryValue)
107+
108+
metrics.K8sOvercommitOperatorMutatedPodsTotal.WithLabelValues(className).Inc()
87109

88110
recorder.Eventf(
89111
pod,
90112
corev1.EventTypeNormal,
91113
"OvercommitAppliedOnResize",
92114
"Applied overcommit on resize to Pod '%s': OvercommitClass = %s, CPU Overcommit = %.2f, Memory Overcommit = %.2f",
93115
pod.Name,
94-
os.Getenv("OVERCOMMIT_CLASS_NAME"),
116+
className,
95117
cpuValue,
96118
memoryValue,
97119
)
98120
}
121+
122+
// setOvercommitAnnotation marks the pod as having been mutated by the overcommit webhook.
123+
func setOvercommitAnnotation(pod *corev1.Pod, className string, cpuValue, memoryValue float64) {
124+
if pod.Annotations == nil {
125+
pod.Annotations = make(map[string]string)
126+
}
127+
pod.Annotations[AnnotationOvercommitApplied] = className
128+
pod.Annotations["overcommit.inditex.dev/cpu"] = fmt.Sprintf("%.4f", cpuValue)
129+
pod.Annotations["overcommit.inditex.dev/memory"] = fmt.Sprintf("%.4f", memoryValue)
130+
}

0 commit comments

Comments
 (0)