Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
97 changes: 89 additions & 8 deletions pkg/network/ovn_kubernetes.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@ package network

import (
"context"
"crypto/sha1"
"encoding/hex"
"encoding/json"
"fmt"
"hash/fnv"
"log"
"math"
"math/big"
Expand Down Expand Up @@ -483,29 +483,110 @@ func renderOVNKubernetes(conf *operv1.NetworkSpec, bootstrapResult *bootstrap.Bo
// daemonsets need to know when those ConfigMaps change so they can
// restart with the new options. Render those ConfigMaps first and
// embed a hash of their data into the ovnkube-node daemonsets.
h := sha1.New()
//
// To avoid spurious DaemonSet rollouts during install (when the CNO
// reconciles multiple times in quick succession and template inputs
// may resolve differently), we compare the rendered ConfigMap .data
// against the deployed ConfigMap .data. If they match, we preserve
// the existing hash from the DaemonSet annotation rather than
// recomputing it. This prevents a DaemonSet generation bump (and
// thus a rolling update) when the ConfigMap content hasn't actually
// changed. See OCPBUGS-87818 for details.
h := fnv.New128a()
// renderedScriptLibData stores the rendered .data for the ovnkube-script-lib
// ConfigMap specifically, so we can compare it against the deployed version.
var renderedScriptLibData map[string]interface{}
for _, path := range cmPaths {
manifests, err := render.RenderTemplate(path, &data)
if err != nil {
return nil, progressing, errors.Wrapf(err, "failed to render ConfigMap template %q", path)
}

// Hash each rendered ConfigMap object's data
// Hash only the .data section of each rendered ConfigMap to avoid
// instability from metadata differences across reconciliation passes.
for _, m := range manifests {
bytes, err := json.Marshal(m)
cmData, found, err := uns.NestedMap(m.Object, "data")
if err != nil {
return nil, progressing, errors.Wrapf(err, "failed to read rendered ConfigMap %q data", path)
}
if !found {
return nil, progressing, errors.Errorf("rendered ConfigMap %q is missing .data", path)
}
if cmData == nil {
cmData = map[string]interface{}{}
}
// Track the rendered .data for the ovnkube-script-lib ConfigMap
// so we can compare it with the deployed version below.
if m.GetName() == "ovnkube-script-lib" {
renderedScriptLibData = cmData
}
bytes, err := json.Marshal(cmData)
if err != nil {
return nil, progressing, errors.Wrapf(err, "failed to marshal ConfigMap %q manifest", path)
return nil, progressing, errors.Wrapf(err, "failed to marshal ConfigMap %q data", path)
}
if _, err := h.Write(bytes); err != nil {
return nil, progressing, errors.Wrapf(err, "failed to hash ConfigMap %q data", path)
}
}
}
data.Data["OVNKubeConfigHash"] = hex.EncodeToString(h.Sum(nil))
newHash := hex.EncodeToString(h.Sum(nil))

// Preserve the existing DaemonSet hash annotation if the deployed ConfigMap
// content matches the rendered content. This avoids unnecessary rolling
// updates caused by template rendering instability during bootstrap.
configHash := newHash
Comment thread
mkowalski marked this conversation as resolved.
existingCM := &corev1.ConfigMap{}
if err := client.Default().CRClient().Get(context.TODO(),
types.NamespacedName{Name: "ovnkube-script-lib", Namespace: util.OVN_NAMESPACE}, existingCM); err != nil {
if !apierrors.IsNotFound(err) {
return nil, progressing, errors.Wrap(err, "failed to fetch deployed ovnkube-script-lib ConfigMap")
}
} else {
// ConfigMap exists — compare .data with the rendered content.
// ConfigMap .data is map[string]string, and the rendered unstructured
// .data values should also be strings. Use a type assertion to avoid
// masking malformed rendered output.
existingDataMatch := renderedScriptLibData != nil && len(existingCM.Data) == len(renderedScriptLibData)
if existingDataMatch {
for key, renderedVal := range renderedScriptLibData {
renderedStr, ok := renderedVal.(string)
if !ok {
// Rendered value is not a string — treat as a mismatch
// so we don't incorrectly preserve a stale hash.
existingDataMatch = false
break
}
existingVal, exists := existingCM.Data[key]
if !exists || existingVal != renderedStr {
existingDataMatch = false
break
}
}
}
if existingDataMatch {
// The deployed ConfigMap already has the same content.
// Look up the existing DaemonSet annotation to preserve the hash.
existingDS := &appsv1.DaemonSet{}
if err := client.Default().CRClient().Get(context.TODO(),
types.NamespacedName{Name: "ovnkube-node", Namespace: util.OVN_NAMESPACE}, existingDS); err != nil {
if !apierrors.IsNotFound(err) {
return nil, progressing, errors.Wrap(err, "failed to fetch existing ovnkube-node DaemonSet")
}
} else {
if ann := existingDS.Spec.Template.Annotations; ann != nil {
if existingHash, ok := ann["network.operator.openshift.io/ovnkube-script-lib-hash"]; ok && existingHash != "" {
klog.V(4).Infof("Preserving existing ovnkube-script-lib-hash %q (deployed ConfigMap data matches rendered data)", existingHash)
configHash = existingHash
}
}
}
}
}
Comment thread
coderabbitai[bot] marked this conversation as resolved.
data.Data["OVNKubeConfigHash"] = configHash

// Compute a separate hash for no-overlay node config (outboundSNAT).
// This allows ovnkube-node to restart when outboundSNAT changes
nodeNoOverlayHash := sha1.New()
nodeNoOverlayHash := fnv.New128a()
nodeNoOverlayHashData := fmt.Sprintf("outboundSNAT=%v", data.Data["NoOverlayOutboundSNAT"])
if _, err := nodeNoOverlayHash.Write([]byte(nodeNoOverlayHashData)); err != nil {
return nil, progressing, errors.Wrap(err, "failed to hash node no-overlay config")
Expand All @@ -515,7 +596,7 @@ func renderOVNKubernetes(conf *operv1.NetworkSpec, bootstrapResult *bootstrap.Bo
// Compute a separate hash for control-plane config (bgpManagedConfig).
// This allows ovnkube-control-plane to restart when bgpManagedConfig changes,
// without restarting ovnkube-node pods.
cpHash := sha1.New()
cpHash := fnv.New128a()
cpHashData := fmt.Sprintf("asNumber=%v,topology=%v",
data.Data["NoOverlayManagedASNumber"],
data.Data["NoOverlayManagedTopology"])
Expand Down
171 changes: 171 additions & 0 deletions pkg/network/ovn_kubernetes_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -5006,3 +5006,174 @@ func extractDaemonSetEnvVars(g *WithT, objs []*uns.Unstructured, dsName, contain
g.Expect(true).To(BeFalse(), "could not find DaemonSet %s with container %s", dsName, containerName)
return envVars
}

// getOVNKubeConfigHashFromObjs extracts the ovnkube-script-lib-hash from the
// rendered ovnkube-node DaemonSet's pod template annotations.
func getOVNKubeConfigHashFromObjs(objs []*uns.Unstructured) string {
for _, obj := range objs {
if obj.GetKind() == "DaemonSet" && obj.GetName() == "ovnkube-node" {
ann, found, err := uns.NestedStringMap(obj.Object, "spec", "template", "metadata", "annotations")
if err != nil || !found || ann == nil {
continue
}
if hash, ok := ann["network.operator.openshift.io/ovnkube-script-lib-hash"]; ok {
return hash
}
}
}
return ""
}

// testOVNBootstrapResult returns a standard OVN bootstrap result for hash tests.
func testOVNBootstrapResult() *bootstrap.BootstrapResult {
result := fakeBootstrapResult()
result.OVN = bootstrap.OVNBootstrapResult{
ControlPlaneReplicaCount: 3,
OVNKubernetesConfig: &bootstrap.OVNConfigBoostrapResult{
DpuHostModeLabel: OVN_NODE_SELECTOR_DEFAULT_DPU_HOST,
DpuModeLabel: OVN_NODE_SELECTOR_DEFAULT_DPU,
SmartNicModeLabel: OVN_NODE_SELECTOR_DEFAULT_SMART_NIC,
MgmtPortResourceName: "",
DpuNodeLeaseRenewInterval: DPU_NODE_LEASE_RENEW_INTERVAL_DEFAULT,
DpuNodeLeaseDuration: DPU_NODE_LEASE_DURATION_DEFAULT,
HyperShiftConfig: &bootstrap.OVNHyperShiftBootstrapResult{
Enabled: false,
},
},
}
return result
}

// TestOVNKubeConfigHashPreservedWhenDataUnchanged verifies that when the
// deployed ovnkube-script-lib ConfigMap .data matches the rendered .data,
// the existing DaemonSet hash annotation is preserved — even if it differs
// from the newly computed hash (which is the scenario this fix targets).
func TestOVNKubeConfigHashPreservedWhenDataUnchanged(t *testing.T) {
g := NewGomegaWithT(t)

crd := OVNKubernetesConfig.DeepCopy()
config := &crd.Spec
fillDefaults(config, nil)
bootstrapResult := testOVNBootstrapResult()
featureGatesCNO := getDefaultFeatureGates()

// First render: no pre-existing ConfigMap or DaemonSet in the cluster.
fakeClient := cnofake.NewFakeClient()
objs1, _, err := renderOVNKubernetes(config, bootstrapResult, manifestDirOvn, fakeClient, featureGatesCNO)
g.Expect(err).NotTo(HaveOccurred(), "first render of OVNKubernetes should succeed")
hash1 := getOVNKubeConfigHashFromObjs(objs1)
g.Expect(hash1).NotTo(BeEmpty(), "expected OVNKubeConfigHash to be set on first render")

// Extract the rendered ConfigMap, then create a DaemonSet in the cluster
// with a DIFFERENT hash annotation (simulating the scenario where the CNO
// previously computed a different hash for the same ConfigMap content).
var renderedCM *v1.ConfigMap
for _, obj := range objs1 {
if obj.GetKind() == "ConfigMap" && obj.GetName() == "ovnkube-script-lib" {
raw, err := json.Marshal(obj.Object)
g.Expect(err).NotTo(HaveOccurred(), "failed to marshal rendered ovnkube-script-lib ConfigMap")
renderedCM = &v1.ConfigMap{}
g.Expect(json.Unmarshal(raw, renderedCM)).NotTo(HaveOccurred(), "failed to unmarshal rendered ovnkube-script-lib ConfigMap")
}
}
g.Expect(renderedCM).NotTo(BeNil(), "rendered objects should contain ovnkube-script-lib ConfigMap")

// Deploy the ConfigMap with the same .data, but a DaemonSet with a
// DIFFERENT hash — this is the exact scenario during install where
// the first reconciliation produced hash A and we want to preserve it
// on the second reconciliation (which would compute hash B).
const existingHashValue = "previously-computed-hash-from-first-reconciliation"
existingDS := &appsv1.DaemonSet{
ObjectMeta: metav1.ObjectMeta{
Name: "ovnkube-node",
Namespace: "openshift-ovn-kubernetes",
},
Spec: appsv1.DaemonSetSpec{
Selector: &metav1.LabelSelector{MatchLabels: map[string]string{"app": "ovnkube-node"}},
Template: v1.PodTemplateSpec{
ObjectMeta: metav1.ObjectMeta{
Labels: map[string]string{"app": "ovnkube-node"},
Annotations: map[string]string{
"network.operator.openshift.io/ovnkube-script-lib-hash": existingHashValue,
},
},
Spec: v1.PodSpec{
Containers: []v1.Container{{Name: "placeholder", Image: "placeholder"}},
},
},
},
}

fakeClient2 := cnofake.NewFakeClient(renderedCM, existingDS)
objs2, _, err := renderOVNKubernetes(config, bootstrapResult, manifestDirOvn, fakeClient2, featureGatesCNO)
g.Expect(err).NotTo(HaveOccurred(), "second render with pre-existing ConfigMap and DaemonSet should succeed")
hash2 := getOVNKubeConfigHashFromObjs(objs2)
g.Expect(hash2).NotTo(BeEmpty(), "OVNKubeConfigHash should be set on second render")
// The key assertion: the existing hash is preserved because the deployed
// ConfigMap .data matches the rendered .data, even though the newly
// computed hash would be different.
g.Expect(hash2).To(Equal(existingHashValue),
"hash should be preserved from existing DaemonSet annotation when deployed ConfigMap data matches rendered data")
g.Expect(hash2).NotTo(Equal(hash1),
"this test should use a different existing hash to prove the preservation path is exercised")
}

// TestOVNKubeConfigHashChangesWhenDataDiffers verifies that when the deployed
// ovnkube-script-lib ConfigMap .data differs from the rendered .data, a new
// hash is computed (legitimate rollout).
func TestOVNKubeConfigHashChangesWhenDataDiffers(t *testing.T) {
g := NewGomegaWithT(t)

crd := OVNKubernetesConfig.DeepCopy()
config := &crd.Spec
fillDefaults(config, nil)
bootstrapResult := testOVNBootstrapResult()
featureGatesCNO := getDefaultFeatureGates()

// First render to get the expected hash.
fakeClient := cnofake.NewFakeClient()
objs1, _, err := renderOVNKubernetes(config, bootstrapResult, manifestDirOvn, fakeClient, featureGatesCNO)
g.Expect(err).NotTo(HaveOccurred(), "first render of OVNKubernetes should succeed")
hash1 := getOVNKubeConfigHashFromObjs(objs1)
g.Expect(hash1).NotTo(BeEmpty(), "OVNKubeConfigHash should be set on first render")

// Create a stale ConfigMap with different .data in the cluster,
// along with a DaemonSet that has the old hash.
staleCM := &v1.ConfigMap{
ObjectMeta: metav1.ObjectMeta{
Name: "ovnkube-script-lib",
Namespace: "openshift-ovn-kubernetes",
},
Data: map[string]string{
"ovnkube-lib.sh": "#!/bin/bash\n# stale content that does not match the rendered template",
},
}
staleDS := &appsv1.DaemonSet{
ObjectMeta: metav1.ObjectMeta{
Name: "ovnkube-node",
Namespace: "openshift-ovn-kubernetes",
},
Spec: appsv1.DaemonSetSpec{
Selector: &metav1.LabelSelector{MatchLabels: map[string]string{"app": "ovnkube-node"}},
Template: v1.PodTemplateSpec{
ObjectMeta: metav1.ObjectMeta{
Labels: map[string]string{"app": "ovnkube-node"},
Annotations: map[string]string{
"network.operator.openshift.io/ovnkube-script-lib-hash": "stale-hash-value",
},
},
Spec: v1.PodSpec{
Containers: []v1.Container{{Name: "placeholder", Image: "placeholder"}},
},
},
},
}

fakeClient2 := cnofake.NewFakeClient(staleCM, staleDS)
objs2, _, err := renderOVNKubernetes(config, bootstrapResult, manifestDirOvn, fakeClient2, featureGatesCNO)
g.Expect(err).NotTo(HaveOccurred(), "render with stale ConfigMap should succeed")
hash2 := getOVNKubeConfigHashFromObjs(objs2)
g.Expect(hash2).NotTo(BeEmpty(), "OVNKubeConfigHash should be set when deployed ConfigMap data differs")
g.Expect(hash2).NotTo(Equal("stale-hash-value"), "hash should NOT be preserved when deployed ConfigMap data differs from rendered data")
g.Expect(hash2).To(Equal(hash1), "hash should be the newly computed value when data differs")
}