From 4f18cae7c7122c8a787a0c9ba835bc13793b3d57 Mon Sep 17 00:00:00 2001 From: Piotr Rygielski <114479+vikin91@users.noreply.github.com> Date: Wed, 10 Jun 2026 11:21:37 +0200 Subject: [PATCH 1/7] feat(ocp4-virt): add virt parameters and flavor metadata Co-authored-by: Cursor --- chart/infra-server/static/flavors.yaml | 64 +++++++++++++++++++ .../static/workflow-openshift-4.yaml | 6 ++ 2 files changed, 70 insertions(+) diff --git a/chart/infra-server/static/flavors.yaml b/chart/infra-server/static/flavors.yaml index 84db1829f..98500e448 100644 --- a/chart/infra-server/static/flavors.yaml +++ b/chart/infra-server/static/flavors.yaml @@ -329,6 +329,35 @@ Defines a list of capabilities to explicitly enable. These capabilities are enabled in addition to the capabilities specified in the baseline capability set. Example: `["DeploymentConfig", "ImageRegistry"]` + - name: install-virt + description: Install OpenShift Virtualization operator with VSOCK and create a VM + value: false + kind: optional + help: | + When true, an additional n2-standard-8 worker node is added and configured + with OpenShift Virtualization (KubeVirt) including VSOCK support. A single + VM is created using the OS specified by the vm-os parameter. + + - name: vm-os + description: OS for the virtual machine (rhel9, rhel10) + value: rhel9 + kind: optional + help: | + The RHEL version for the virtual machine container disk. + Valid values: rhel9, rhel10. + The image used will be quay.io/rhacs-eng/vm-images:-dnf-primed-latest. + + - name: virt-node-dedicated + description: Taint the virt node so only VMs are scheduled on it + value: false + kind: optional + help: | + When true, the dedicated virt worker node is tainted with + node-role.kubernetes.io/virt:NoSchedule so that only VM workloads + (which have a matching toleration) are scheduled on it. + When false, the virt node also accepts regular ACS/OCP workloads. + Only relevant when install-virt is true. + artifacts: - name: kubeconfig description: Kube config for connecting to this cluster @@ -359,6 +388,9 @@ - name: cluster-console-password description: The password to login at the openshift console + - name: vm-access + description: Credentials and access commands for the created VM + ###################### # openshift-4-demo # ###################### @@ -651,6 +683,35 @@ Defines a list of capabilities to explicitly enable. These capabilities are enabled in addition to the capabilities specified in the baseline capability set. Example: `["DeploymentConfig", "ImageRegistry"]` + - name: install-virt + description: Install OpenShift Virtualization operator with VSOCK and create a VM + value: false + kind: optional + help: | + When true, an additional n2-standard-8 worker node is added and configured + with OpenShift Virtualization (KubeVirt) including VSOCK support. A single + VM is created using the OS specified by the vm-os parameter. + + - name: vm-os + description: OS for the virtual machine (rhel9, rhel10) + value: rhel9 + kind: optional + help: | + The RHEL version for the virtual machine container disk. + Valid values: rhel9, rhel10. + The image used will be quay.io/rhacs-eng/vm-images:-dnf-primed-latest. + + - name: virt-node-dedicated + description: Taint the virt node so only VMs are scheduled on it + value: false + kind: optional + help: | + When true, the dedicated virt worker node is tainted with + node-role.kubernetes.io/virt:NoSchedule so that only VM workloads + (which have a matching toleration) are scheduled on it. + When false, the virt node also accepts regular ACS/OCP workloads. + Only relevant when install-virt is true. + artifacts: - name: kubeconfig description: Kube config for connecting to this cluster @@ -681,6 +742,9 @@ - name: cluster-console-password description: The password to login at the openshift console + - name: vm-access + description: Credentials and access commands for the created VM + ##################### # AWS EKS # ##################### diff --git a/chart/infra-server/static/workflow-openshift-4.yaml b/chart/infra-server/static/workflow-openshift-4.yaml index 5290c3d85..b255a682b 100644 --- a/chart/infra-server/static/workflow-openshift-4.yaml +++ b/chart/infra-server/static/workflow-openshift-4.yaml @@ -32,6 +32,12 @@ spec: value: "vCurrent" - name: additional-enabled-capabilities value: "" + - name: install-virt + value: "false" + - name: vm-os + value: "rhel9" + - name: virt-node-dedicated + value: "false" volumeClaimTemplates: - metadata: name: data From 7af0e8bf13d184e3f816394cfaac3d54081ea619 Mon Sep 17 00:00:00 2001 From: Piotr Rygielski <114479+vikin91@users.noreply.github.com> Date: Wed, 10 Jun 2026 11:30:03 +0200 Subject: [PATCH 2/7] feat(ocp4-virt): add optional virtualization workflow and VM access artifact Co-authored-by: Cursor --- .../static/workflow-openshift-4.yaml | 467 ++++++++++++++++++ 1 file changed, 467 insertions(+) diff --git a/chart/infra-server/static/workflow-openshift-4.yaml b/chart/infra-server/static/workflow-openshift-4.yaml index b255a682b..7de0de3ec 100644 --- a/chart/infra-server/static/workflow-openshift-4.yaml +++ b/chart/infra-server/static/workflow-openshift-4.yaml @@ -50,6 +50,9 @@ spec: - name: credentials secret: secretName: openshift-4-gcp-service-account + - name: registry-pull-secret + secret: + secretName: infra-image-registry-pull-secret templates: - name: start @@ -57,6 +60,18 @@ spec: - - name: create template: create + - - name: add-virt-node + template: add-virt-node + when: '{{ "{{" }}workflow.parameters.install-virt{{ "}}" }} == true' + + - - name: install-virt-operator + template: install-virt-operator + when: '{{ "{{" }}workflow.parameters.install-virt{{ "}}" }} == true' + + - - name: create-vm + template: create-vm + when: '{{ "{{" }}workflow.parameters.install-virt{{ "}}" }} == true' + - - name: gather template: gather @@ -189,6 +204,458 @@ spec: archive: none: {} + - name: add-virt-node + activeDeadlineSeconds: 900 + script: + image: quay.io/stackrox-io/ci:automation-flavors-openshift-4-{{ .Chart.Annotations.automationFlavorsVersion }} + command: [bash] + source: | + set -euo pipefail + export KUBECONFIG=/data/auth/kubeconfig + + DEDICATED='{{ "{{" }}workflow.parameters.virt-node-dedicated{{ "}}" }}' + + echo "=== Creating dedicated virt worker MachineSet ===" + echo "Dedicated (tainted) mode: $DEDICATED" + + # Pick a worker MachineSet deterministically so multi-zone clusters behave predictably + WORKER_MS=$(oc get machinesets -n openshift-machine-api -o json | jq -r ' + .items + | map(select( + .metadata.labels["machine.openshift.io/cluster-api-machine-role"] == "worker" or + .metadata.labels["machine.openshift.io/cluster-api-machine-type"] == "worker" + )) + | sort_by(.metadata.name) + | .[0].metadata.name // empty + ') + if [ -z "$WORKER_MS" ]; then + echo "ERROR: No worker MachineSet found in openshift-machine-api" + exit 1 + fi + echo "Using $WORKER_MS as template" + + # Extract infrastructure ID and zone + INFRA_ID=$(oc get infrastructure cluster -o jsonpath='{.status.infrastructureName}') + ZONE=$(oc get machineset "$WORKER_MS" -n openshift-machine-api -o jsonpath='{.spec.template.spec.providerSpec.value.zone}') + REGION=$(oc get machineset "$WORKER_MS" -n openshift-machine-api -o jsonpath='{.spec.template.spec.providerSpec.value.region}') + PROJECT_ID=$(oc get machineset "$WORKER_MS" -n openshift-machine-api -o jsonpath='{.spec.template.spec.providerSpec.value.projectID}') + + echo "Infra ID: $INFRA_ID, Zone: $ZONE, Region: $REGION, Project: $PROJECT_ID" + + VIRT_MS_NAME="${INFRA_ID}-virt-worker-${ZONE}" + + # Check if it already exists + if oc get machineset "$VIRT_MS_NAME" -n openshift-machine-api &>/dev/null; then + echo "MachineSet $VIRT_MS_NAME already exists" + else + # Build jq filter — optionally add taint + JQ_FILTER=' + del(.metadata.uid, .metadata.resourceVersion, .metadata.creationTimestamp, .metadata.generation, .status) | + .metadata.name = $name | + .spec.replicas = 1 | + .spec.selector.matchLabels["machine.openshift.io/cluster-api-machineset"] = $name | + .spec.template.metadata.labels["machine.openshift.io/cluster-api-machineset"] = $name | + .spec.template.metadata.labels["node-role.kubernetes.io/virt"] = "" | + .spec.template.spec.providerSpec.value.machineType = $machineType | + .spec.template.spec.metadata.labels["node-role.kubernetes.io/virt"] = "" + ' + if [ "$DEDICATED" = "true" ]; then + JQ_FILTER="${JQ_FILTER} | .spec.template.spec.taints = [{\"key\": \"node-role.kubernetes.io/virt\", \"effect\": \"NoSchedule\"}]" + fi + + # Export the existing MachineSet and modify it + oc get machineset "$WORKER_MS" -n openshift-machine-api -o json | \ + jq --arg name "$VIRT_MS_NAME" --arg machineType "n2-standard-8" "$JQ_FILTER" | \ + oc apply -f - + echo "Created MachineSet $VIRT_MS_NAME" + fi + + # Wait for the machine to be provisioned and node to be Ready + echo "Waiting for virt node to become Ready..." + timeout=600 + elapsed=0 + while true; do + READY_NODES=$(oc get nodes -l node-role.kubernetes.io/virt -o jsonpath='{.items[?(@.status.conditions[?(@.type=="Ready")].status=="True")].metadata.name}' 2>/dev/null || echo "") + if [ -n "$READY_NODES" ]; then + echo "Virt node is Ready: $READY_NODES" + break + fi + if [ $elapsed -ge $timeout ]; then + echo "ERROR: Timeout waiting for virt node after ${timeout}s" + oc get machines -n openshift-machine-api -l machine.openshift.io/cluster-api-machineset="$VIRT_MS_NAME" + exit 1 + fi + if [ $((elapsed % 60)) -eq 0 ] && [ $elapsed -gt 0 ]; then + echo "Still waiting... (${elapsed}s elapsed)" + oc get machines -n openshift-machine-api -l machine.openshift.io/cluster-api-machineset="$VIRT_MS_NAME" --no-headers 2>/dev/null || true + fi + sleep 15 + elapsed=$((elapsed + 15)) + done + + echo "=== Virt worker node ready ===" + volumeMounts: + - name: data + mountPath: /data + + - name: install-virt-operator + activeDeadlineSeconds: 2400 + script: + image: quay.io/stackrox-io/ci:automation-flavors-openshift-4-{{ .Chart.Annotations.automationFlavorsVersion }} + command: [bash] + source: | + set -euo pipefail + export KUBECONFIG=/data/auth/kubeconfig + + OLM_NAMESPACE="openshift-cnv" + SUBSCRIPTION_NAME="kubevirt-hyperconverged" + HCO_NAMESPACE="$OLM_NAMESPACE" + HCO_NAME="kubevirt-hyperconverged" + + echo "=== Installing OpenShift Virtualization ===" + + # Check if already installed and healthy + if oc get hyperconverged "$HCO_NAME" -n "$HCO_NAMESPACE" &>/dev/null; then + avail=$(oc -n "$HCO_NAMESPACE" get hyperconverged "$HCO_NAME" -o jsonpath='{.status.conditions[?(@.type=="Available")].status}' 2>/dev/null || echo "Unknown") + prog=$(oc -n "$HCO_NAMESPACE" get hyperconverged "$HCO_NAME" -o jsonpath='{.status.conditions[?(@.type=="Progressing")].status}' 2>/dev/null || echo "Unknown") + degr=$(oc -n "$HCO_NAMESPACE" get hyperconverged "$HCO_NAME" -o jsonpath='{.status.conditions[?(@.type=="Degraded")].status}' 2>/dev/null || echo "Unknown") + if [ "$avail" = "True" ] && [ "$prog" = "False" ] && [ "$degr" = "False" ]; then + echo "OpenShift Virtualization already installed and healthy" + echo "Ensuring VSOCK and KVM_EMULATION are configured..." + else + echo "HyperConverged exists but not healthy (Available=$avail, Progressing=$prog, Degraded=$degr)" + fi + fi + + # Create namespace, OperatorGroup, and Subscription + cat <<'EOFK8S' | oc apply -f - + apiVersion: v1 + kind: Namespace + metadata: + name: openshift-cnv + --- + apiVersion: operators.coreos.com/v1 + kind: OperatorGroup + metadata: + name: openshift-cnv + namespace: openshift-cnv + spec: + targetNamespaces: + - openshift-cnv + --- + apiVersion: operators.coreos.com/v1alpha1 + kind: Subscription + metadata: + name: kubevirt-hyperconverged + namespace: openshift-cnv + spec: + channel: stable + name: kubevirt-hyperconverged + source: redhat-operators + sourceNamespace: openshift-marketplace + installPlanApproval: Automatic + EOFK8S + echo "Applied namespace, OperatorGroup, and Subscription" + + # Wait for installedCSV + echo "Waiting for Subscription to report installedCSV..." + timeout=300 + elapsed=0 + until oc -n "$OLM_NAMESPACE" get sub "$SUBSCRIPTION_NAME" -o jsonpath='{.status.installedCSV}' 2>/dev/null | grep -q .; do + sleep 5 + elapsed=$((elapsed + 5)) + if [ $elapsed -ge $timeout ]; then + echo "ERROR: Timeout waiting for installedCSV after ${timeout}s" + exit 1 + fi + if [ $((elapsed % 30)) -eq 0 ]; then + echo "Still waiting for installedCSV... (${elapsed}s)" + fi + done + + CSV=$(oc -n "$OLM_NAMESPACE" get sub "$SUBSCRIPTION_NAME" -o jsonpath='{.status.installedCSV}') + echo "InstalledCSV: $CSV" + + # Wait for CSV Succeeded + echo "Waiting for CSV to reach Succeeded phase..." + timeout=900 + elapsed=0 + while true; do + PHASE=$(oc -n "$OLM_NAMESPACE" get csv "$CSV" -o jsonpath='{.status.phase}' 2>/dev/null || echo "") + if [ "$PHASE" = "Succeeded" ]; then + echo "CSV is Succeeded" + break + fi + if [ $elapsed -ge $timeout ]; then + echo "ERROR: CSV did not reach Succeeded (current: $PHASE) after ${timeout}s" + exit 1 + fi + if [ $((elapsed % 60)) -eq 0 ] && [ $elapsed -gt 0 ]; then + echo "Still waiting for CSV (Phase: ${PHASE:-Unknown}, ${elapsed}s)..." + fi + sleep 10 + elapsed=$((elapsed + 10)) + done + + # Create HyperConverged CR with VSOCK + echo "Creating HyperConverged CR with VSOCK feature gate..." + cat </dev/null || echo "Unknown") + prog=$(oc -n "$HCO_NAMESPACE" get hyperconverged "$HCO_NAME" -o jsonpath='{.status.conditions[?(@.type=="Progressing")].status}' 2>/dev/null || echo "Unknown") + degr=$(oc -n "$HCO_NAMESPACE" get hyperconverged "$HCO_NAME" -o jsonpath='{.status.conditions[?(@.type=="Degraded")].status}' 2>/dev/null || echo "Unknown") + if [ "$avail" = "True" ] && [ "$prog" = "False" ] && [ "$degr" = "False" ]; then + echo "HyperConverged is healthy" + break + fi + if [ $elapsed -ge $timeout ]; then + echo "ERROR: HyperConverged not healthy after ${timeout}s (Available=$avail, Progressing=$prog, Degraded=$degr)" + exit 1 + fi + if [ $((elapsed % 60)) -eq 0 ] && [ $elapsed -gt 0 ]; then + echo "Still waiting (Available=$avail, Progressing=$prog, Degraded=$degr) - ${elapsed}s" + fi + sleep 15 + elapsed=$((elapsed + 15)) + done + + # Patch KVM_EMULATION + current_kvm=$(oc get subscription kubevirt-hyperconverged -n openshift-cnv -o jsonpath='{.spec.config.env[?(@.name=="KVM_EMULATION")].value}' 2>/dev/null || echo "") + if [ "$current_kvm" = "true" ]; then + echo "KVM_EMULATION already set" + else + echo "Patching subscription with KVM_EMULATION..." + oc patch subscription kubevirt-hyperconverged \ + -n openshift-cnv \ + --type=merge \ + -p '{"spec":{"config":{"selector":{"matchLabels":{"name":"hyperconverged-cluster-operator"}},"env":[{"name":"KVM_EMULATION","value":"true"}]}}}' + echo "KVM_EMULATION patched" + fi + + echo "=== OpenShift Virtualization installed with VSOCK + KVM_EMULATION ===" + volumeMounts: + - name: data + mountPath: /data + + - name: create-vm + activeDeadlineSeconds: 600 + outputs: + artifacts: + - name: vm-access + path: /data/vm-access.md + archive: + none: {} + script: + image: quay.io/stackrox-io/ci:automation-flavors-openshift-4-{{ .Chart.Annotations.automationFlavorsVersion }} + command: [bash] + source: | + set -euo pipefail + export KUBECONFIG=/data/auth/kubeconfig + + VM_OS='{{ "{{" }}workflow.parameters.vm-os{{ "}}" }}' + DEDICATED='{{ "{{" }}workflow.parameters.virt-node-dedicated{{ "}}" }}' + NAMESPACE="openshift-cnv" + VM_NAME="${VM_OS}-1" + CONTAINER_IMAGE="quay.io/rhacs-eng/vm-images:${VM_OS}-dnf-primed-latest" + SSH_USER="cloud-user" + PULL_SECRET_NAME="quay-rhacs-eng-ro" + + case "$VM_OS" in + rhel9|rhel10) ;; + *) + echo "ERROR: unsupported vm-os '$VM_OS'. Valid values: rhel9, rhel10." + exit 1 + ;; + esac + + VM_PASSWORD=$(openssl rand -hex 10) + + echo "=== Creating VM: $VM_NAME (OS: $VM_OS, dedicated=$DEDICATED) ===" + + # Copy the already-provisioned Quay pull secret from the infra namespace into the target cluster + echo "Creating pull secret for quay.io/rhacs-eng in namespace $NAMESPACE from mounted infra secret..." + if [ ! -s /infra-secrets/quay/.dockerconfigjson ]; then + echo "ERROR: mounted registry secret /infra-secrets/quay/.dockerconfigjson is missing or empty" + exit 1 + fi + cat </dev/null; then + STATUS=$(oc get vm "$VM_NAME" -n "$NAMESPACE" -o jsonpath='{.status.printableStatus}' 2>/dev/null || echo "Unknown") + echo "VM $VM_NAME already exists (status: $STATUS)" + if [ "$STATUS" = "Running" ]; then + echo "VM is already running" + exit 0 + fi + fi + + # Build node placement YAML fragment conditionally + NODE_PLACEMENT="" + if [ "$DEDICATED" = "true" ]; then + NODE_PLACEMENT=' + nodeSelector: + node-role.kubernetes.io/virt: "" + tolerations: + - key: "node-role.kubernetes.io/virt" + operator: "Exists" + effect: "NoSchedule"' + fi + + cat </dev/null || echo "") + if [ "$PHASE" = "Running" ]; then + echo "VMI is Running" + break + fi + if [ $elapsed -ge $timeout ]; then + echo "WARNING: VMI did not reach Running phase after ${timeout}s (current: $PHASE)" + echo "VM was created but may still be starting" + break + fi + if [ $((elapsed % 30)) -eq 0 ] && [ $elapsed -gt 0 ]; then + echo "Waiting for VMI (phase: ${PHASE:-Pending}, ${elapsed}s)..." + fi + sleep 10 + elapsed=$((elapsed + 10)) + done + + echo "" + echo "=== VM Created ===" + echo " Name: $VM_NAME" + echo " Namespace: $NAMESPACE" + echo " OS: $VM_OS" + echo " User: $SSH_USER" + echo " Password: stored in vm-access artifact" + echo " VSOCK: enabled" + echo "" + echo "Access via: virtctl ssh -n $NAMESPACE ${SSH_USER}@vmi/${VM_NAME}" + + # Save VM access info to the downloadable artifact + umask 077 + cat > /data/vm-access.md < Date: Wed, 10 Jun 2026 11:31:57 +0200 Subject: [PATCH 3/7] fix(ocp4-virt): use jq for node readiness and always write vm-access artifact Co-authored-by: Cursor --- chart/infra-server/static/workflow-openshift-4.yaml | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/chart/infra-server/static/workflow-openshift-4.yaml b/chart/infra-server/static/workflow-openshift-4.yaml index 7de0de3ec..858c35398 100644 --- a/chart/infra-server/static/workflow-openshift-4.yaml +++ b/chart/infra-server/static/workflow-openshift-4.yaml @@ -275,7 +275,7 @@ spec: timeout=600 elapsed=0 while true; do - READY_NODES=$(oc get nodes -l node-role.kubernetes.io/virt -o jsonpath='{.items[?(@.status.conditions[?(@.type=="Ready")].status=="True")].metadata.name}' 2>/dev/null || echo "") + READY_NODES=$(oc get nodes -l node-role.kubernetes.io/virt -o json 2>/dev/null | jq -r '[.items[] | select(.status.conditions[]? | select(.type=="Ready" and .status=="True")) | .metadata.name] | join(" ")' 2>/dev/null || echo "") if [ -n "$READY_NODES" ]; then echo "Virt node is Ready: $READY_NODES" break @@ -516,10 +516,6 @@ spec: if oc get vm "$VM_NAME" -n "$NAMESPACE" &>/dev/null; then STATUS=$(oc get vm "$VM_NAME" -n "$NAMESPACE" -o jsonpath='{.status.printableStatus}' 2>/dev/null || echo "Unknown") echo "VM $VM_NAME already exists (status: $STATUS)" - if [ "$STATUS" = "Running" ]; then - echo "VM is already running" - exit 0 - fi fi # Build node placement YAML fragment conditionally From 1443f803014df064aab3f7580ceb0b074f3f20a0 Mon Sep 17 00:00:00 2001 From: Piotr Rygielski <114479+vikin91@users.noreply.github.com> Date: Wed, 10 Jun 2026 16:25:53 +0200 Subject: [PATCH 4/7] feat(ocp4-virt): add CAPI support to add-virt-node template The add-virt-node template now auto-detects whether the cluster uses legacy MachineSets (openshift-machine-api) or Cluster API resources (openshift-cluster-api-guests) and creates the virt worker accordingly. CAPI path (OCP 4.19+ on GCP): - Finds a worker GCPMachine, creates a GCPMachineTemplate with n2-standard-8, then creates a CAPI MachineSet (cluster.x-k8s.io) - Extracts bootstrap dataSecretName from existing worker Machine - Falls back to labeling via Machine.status.nodeRef if CAPI label propagation is slow - Applies NoSchedule taint manually (CAPI has no taint field) Co-authored-by: Cursor --- .../static/workflow-openshift-4.yaml | 241 +++++++++++++++--- 1 file changed, 200 insertions(+), 41 deletions(-) diff --git a/chart/infra-server/static/workflow-openshift-4.yaml b/chart/infra-server/static/workflow-openshift-4.yaml index 858c35398..e482e0b68 100644 --- a/chart/infra-server/static/workflow-openshift-4.yaml +++ b/chart/infra-server/static/workflow-openshift-4.yaml @@ -214,12 +214,21 @@ spec: export KUBECONFIG=/data/auth/kubeconfig DEDICATED='{{ "{{" }}workflow.parameters.virt-node-dedicated{{ "}}" }}' + INFRA_ID=$(oc get infrastructure cluster -o jsonpath='{.status.infrastructureName}') - echo "=== Creating dedicated virt worker MachineSet ===" + echo "=== Creating dedicated virt worker node ===" echo "Dedicated (tainted) mode: $DEDICATED" + echo "Infrastructure ID: $INFRA_ID" + + READY_VIRT=$(oc get nodes -l node-role.kubernetes.io/virt -o json 2>/dev/null | \ + jq '[.items[] | select(.status.conditions[]? | select(.type=="Ready" and .status=="True"))] | length' || echo "0") + if [ "${READY_VIRT:-0}" -gt 0 ]; then + echo "Virt node already exists and is Ready" + oc get nodes -l node-role.kubernetes.io/virt + exit 0 + fi - # Pick a worker MachineSet deterministically so multi-zone clusters behave predictably - WORKER_MS=$(oc get machinesets -n openshift-machine-api -o json | jq -r ' + WORKER_MS=$(oc get machinesets -n openshift-machine-api -o json 2>/dev/null | jq -r ' .items | map(select( .metadata.labels["machine.openshift.io/cluster-api-machine-role"] == "worker" or @@ -227,72 +236,222 @@ spec: )) | sort_by(.metadata.name) | .[0].metadata.name // empty - ') - if [ -z "$WORKER_MS" ]; then - echo "ERROR: No worker MachineSet found in openshift-machine-api" - exit 1 - fi - echo "Using $WORKER_MS as template" + ' 2>/dev/null || echo "") - # Extract infrastructure ID and zone - INFRA_ID=$(oc get infrastructure cluster -o jsonpath='{.status.infrastructureName}') - ZONE=$(oc get machineset "$WORKER_MS" -n openshift-machine-api -o jsonpath='{.spec.template.spec.providerSpec.value.zone}') - REGION=$(oc get machineset "$WORKER_MS" -n openshift-machine-api -o jsonpath='{.spec.template.spec.providerSpec.value.region}') - PROJECT_ID=$(oc get machineset "$WORKER_MS" -n openshift-machine-api -o jsonpath='{.spec.template.spec.providerSpec.value.projectID}') + CREATED_VIA="" - echo "Infra ID: $INFRA_ID, Zone: $ZONE, Region: $REGION, Project: $PROJECT_ID" + if [ -n "$WORKER_MS" ]; then + # ── Legacy path: clone MachineSet in openshift-machine-api ── + echo "Using legacy MachineSet path (source: $WORKER_MS)" + ZONE=$(oc get machineset "$WORKER_MS" -n openshift-machine-api \ + -o jsonpath='{.spec.template.spec.providerSpec.value.zone}') + VIRT_MS_NAME="${INFRA_ID}-virt-worker-${ZONE}" + CREATED_VIA="legacy" - VIRT_MS_NAME="${INFRA_ID}-virt-worker-${ZONE}" + if oc get machineset "$VIRT_MS_NAME" -n openshift-machine-api &>/dev/null; then + echo "MachineSet $VIRT_MS_NAME already exists" + else + JQ_FILTER=' + del(.metadata.uid, .metadata.resourceVersion, .metadata.creationTimestamp, .metadata.generation, .status) | + .metadata.name = $name | + .spec.replicas = 1 | + .spec.selector.matchLabels["machine.openshift.io/cluster-api-machineset"] = $name | + .spec.template.metadata.labels["machine.openshift.io/cluster-api-machineset"] = $name | + .spec.template.metadata.labels["node-role.kubernetes.io/virt"] = "" | + .spec.template.spec.providerSpec.value.machineType = $machineType | + .spec.template.spec.metadata.labels["node-role.kubernetes.io/virt"] = "" + ' + if [ "$DEDICATED" = "true" ]; then + JQ_FILTER="${JQ_FILTER} | .spec.template.spec.taints = [{\"key\": \"node-role.kubernetes.io/virt\", \"effect\": \"NoSchedule\"}]" + fi + + oc get machineset "$WORKER_MS" -n openshift-machine-api -o json | \ + jq --arg name "$VIRT_MS_NAME" --arg machineType "n2-standard-8" "$JQ_FILTER" | \ + oc apply -f - + echo "Created MachineSet $VIRT_MS_NAME" + fi - # Check if it already exists - if oc get machineset "$VIRT_MS_NAME" -n openshift-machine-api &>/dev/null; then - echo "MachineSet $VIRT_MS_NAME already exists" else - # Build jq filter — optionally add taint - JQ_FILTER=' - del(.metadata.uid, .metadata.resourceVersion, .metadata.creationTimestamp, .metadata.generation, .status) | - .metadata.name = $name | - .spec.replicas = 1 | - .spec.selector.matchLabels["machine.openshift.io/cluster-api-machineset"] = $name | - .spec.template.metadata.labels["machine.openshift.io/cluster-api-machineset"] = $name | - .spec.template.metadata.labels["node-role.kubernetes.io/virt"] = "" | - .spec.template.spec.providerSpec.value.machineType = $machineType | - .spec.template.spec.metadata.labels["node-role.kubernetes.io/virt"] = "" - ' - if [ "$DEDICATED" = "true" ]; then - JQ_FILTER="${JQ_FILTER} | .spec.template.spec.taints = [{\"key\": \"node-role.kubernetes.io/virt\", \"effect\": \"NoSchedule\"}]" + # ── CAPI path: GCPMachineTemplate + MachineSet in openshift-cluster-api-guests ── + echo "No legacy MachineSets found, using Cluster API path" + CAPI_NS="openshift-cluster-api-guests" + CREATED_VIA="capi" + + CLUSTER_NAME=$(oc get clusters.cluster.x-k8s.io -n "$CAPI_NS" \ + -o jsonpath='{.items[0].metadata.name}' 2>/dev/null || echo "") + if [ -z "$CLUSTER_NAME" ]; then + echo "ERROR: No CAPI Cluster found in $CAPI_NS" + echo "Neither legacy MachineSets nor CAPI resources are available" + exit 1 + fi + echo "CAPI Cluster: $CLUSTER_NAME" + + WORKER_GCP_MACHINE=$(oc get gcpmachines.infrastructure.cluster.x-k8s.io \ + -n "$CAPI_NS" -o json | jq -r ' + [.items[] | select(.metadata.name | test("worker"))] + | sort_by(.metadata.name) + | .[0].metadata.name // empty + ') + if [ -z "$WORKER_GCP_MACHINE" ]; then + echo "ERROR: No worker GCPMachine found in $CAPI_NS" + oc get gcpmachines.infrastructure.cluster.x-k8s.io -n "$CAPI_NS" \ + --no-headers 2>/dev/null || true + exit 1 + fi + echo "Source GCPMachine: $WORKER_GCP_MACHINE" + + GCP_SPEC=$(oc get gcpmachines.infrastructure.cluster.x-k8s.io \ + "$WORKER_GCP_MACHINE" -n "$CAPI_NS" -o json | jq '.spec') + ZONE=$(echo "$GCP_SPEC" | jq -r '.zone // .failureDomain // empty') + echo "Zone: $ZONE" + + VIRT_TEMPLATE_NAME="${INFRA_ID}-virt-worker" + VIRT_MS_NAME="${INFRA_ID}-virt-worker-${ZONE:-a}" + + WORKER_MACHINE=$(oc get machines.cluster.x-k8s.io -n "$CAPI_NS" -o json | jq -r ' + [.items[] | select(.metadata.name | test("worker"))] + | sort_by(.metadata.name) + | .[0].metadata.name // empty + ') + BOOTSTRAP_SECRET=$(oc get machines.cluster.x-k8s.io "$WORKER_MACHINE" \ + -n "$CAPI_NS" -o jsonpath='{.spec.bootstrap.dataSecretName}' 2>/dev/null || echo "") + if [ -z "$BOOTSTRAP_SECRET" ]; then + BOOTSTRAP_SECRET="${INFRA_ID}-worker" + echo "Bootstrap secret not found on Machine, using convention: $BOOTSTRAP_SECRET" + else + echo "Bootstrap secret: $BOOTSTRAP_SECRET" fi - # Export the existing MachineSet and modify it - oc get machineset "$WORKER_MS" -n openshift-machine-api -o json | \ - jq --arg name "$VIRT_MS_NAME" --arg machineType "n2-standard-8" "$JQ_FILTER" | \ - oc apply -f - - echo "Created MachineSet $VIRT_MS_NAME" + if oc get machinesets.cluster.x-k8s.io "$VIRT_MS_NAME" -n "$CAPI_NS" &>/dev/null; then + echo "CAPI MachineSet $VIRT_MS_NAME already exists" + else + echo "$GCP_SPEC" | jq \ + --arg name "$VIRT_TEMPLATE_NAME" --arg ns "$CAPI_NS" '{ + apiVersion: "infrastructure.cluster.x-k8s.io/v1beta1", + kind: "GCPMachineTemplate", + metadata: { name: $name, namespace: $ns }, + spec: { template: { spec: (. | del(.providerID, .providerStatus) | + if has("instanceType") then .instanceType = "n2-standard-8" + elif has("machineType") then .machineType = "n2-standard-8" + else . + {instanceType: "n2-standard-8"} + end + )}} + }' | oc apply -f - + echo "Created GCPMachineTemplate $VIRT_TEMPLATE_NAME" + + jq -n \ + --arg name "$VIRT_MS_NAME" \ + --arg ns "$CAPI_NS" \ + --arg cluster "$CLUSTER_NAME" \ + --arg tmpl "$VIRT_TEMPLATE_NAME" \ + --arg boot "$BOOTSTRAP_SECRET" \ + '{ + apiVersion: "cluster.x-k8s.io/v1beta1", + kind: "MachineSet", + metadata: { + name: $name, + namespace: $ns, + labels: { "cluster.x-k8s.io/cluster-name": $cluster } + }, + spec: { + clusterName: $cluster, + replicas: 1, + selector: { + matchLabels: { + "cluster.x-k8s.io/cluster-name": $cluster, + "cluster.x-k8s.io/set-name": $name + } + }, + template: { + metadata: { + labels: { + "cluster.x-k8s.io/cluster-name": $cluster, + "cluster.x-k8s.io/set-name": $name, + "node-role.kubernetes.io/worker": "", + "node-role.kubernetes.io/virt": "" + } + }, + spec: { + clusterName: $cluster, + bootstrap: { dataSecretName: $boot }, + infrastructureRef: { + apiVersion: "infrastructure.cluster.x-k8s.io/v1beta1", + kind: "GCPMachineTemplate", + name: $tmpl + } + } + } + } + }' | oc apply -f - + echo "Created CAPI MachineSet $VIRT_MS_NAME" + fi fi - # Wait for the machine to be provisioned and node to be Ready echo "Waiting for virt node to become Ready..." timeout=600 elapsed=0 while true; do - READY_NODES=$(oc get nodes -l node-role.kubernetes.io/virt -o json 2>/dev/null | jq -r '[.items[] | select(.status.conditions[]? | select(.type=="Ready" and .status=="True")) | .metadata.name] | join(" ")' 2>/dev/null || echo "") + READY_NODES=$(oc get nodes -l node-role.kubernetes.io/virt -o json 2>/dev/null | \ + jq -r '[.items[] | select(.status.conditions[]? | + select(.type=="Ready" and .status=="True")) | + .metadata.name] | join(" ")' 2>/dev/null || echo "") if [ -n "$READY_NODES" ]; then echo "Virt node is Ready: $READY_NODES" break fi + + if [ "$CREATED_VIA" = "capi" ] && [ $elapsed -ge 120 ]; then + CAPI_NODE=$(oc get machines.cluster.x-k8s.io -n "$CAPI_NS" \ + -l "cluster.x-k8s.io/set-name=$VIRT_MS_NAME" \ + -o jsonpath='{.items[0].status.nodeRef.name}' 2>/dev/null || echo "") + if [ -n "$CAPI_NODE" ]; then + NODE_READY=$(oc get node "$CAPI_NODE" \ + -o jsonpath='{.status.conditions[?(@.type=="Ready")].status}' 2>/dev/null || echo "") + if [ "$NODE_READY" = "True" ]; then + echo "CAPI node $CAPI_NODE is Ready, applying virt label" + oc label node "$CAPI_NODE" node-role.kubernetes.io/virt= --overwrite + READY_NODES="$CAPI_NODE" + break + fi + fi + fi + if [ $elapsed -ge $timeout ]; then echo "ERROR: Timeout waiting for virt node after ${timeout}s" - oc get machines -n openshift-machine-api -l machine.openshift.io/cluster-api-machineset="$VIRT_MS_NAME" + if [ "$CREATED_VIA" = "legacy" ]; then + oc get machines -n openshift-machine-api \ + -l "machine.openshift.io/cluster-api-machineset=$VIRT_MS_NAME" \ + --no-headers 2>/dev/null || true + else + oc get machines.cluster.x-k8s.io -n "$CAPI_NS" \ + -l "cluster.x-k8s.io/set-name=$VIRT_MS_NAME" \ + --no-headers 2>/dev/null || true + fi exit 1 fi if [ $((elapsed % 60)) -eq 0 ] && [ $elapsed -gt 0 ]; then echo "Still waiting... (${elapsed}s elapsed)" - oc get machines -n openshift-machine-api -l machine.openshift.io/cluster-api-machineset="$VIRT_MS_NAME" --no-headers 2>/dev/null || true + if [ "$CREATED_VIA" = "legacy" ]; then + oc get machines -n openshift-machine-api \ + -l "machine.openshift.io/cluster-api-machineset=$VIRT_MS_NAME" \ + --no-headers 2>/dev/null || true + else + oc get machines.cluster.x-k8s.io -n "$CAPI_NS" \ + -l "cluster.x-k8s.io/set-name=$VIRT_MS_NAME" \ + --no-headers 2>/dev/null || true + fi fi sleep 15 elapsed=$((elapsed + 15)) done + if [ "$DEDICATED" = "true" ] && [ "$CREATED_VIA" = "capi" ]; then + VIRT_NODE=$(echo "$READY_NODES" | awk '{print $1}') + echo "Applying NoSchedule taint to $VIRT_NODE (CAPI path)" + oc adm taint nodes "$VIRT_NODE" \ + node-role.kubernetes.io/virt=:NoSchedule --overwrite + fi + echo "=== Virt worker node ready ===" volumeMounts: - name: data From e4685fe992a8cf8d2d545d5ee71f29513571c040 Mon Sep 17 00:00:00 2001 From: Piotr Rygielski <114479+vikin91@users.noreply.github.com> Date: Wed, 10 Jun 2026 17:49:15 +0200 Subject: [PATCH 5/7] fix(ocp4-virt): skip credential regen on retry and fail on VMI timeout Move VM existence check before password generation so retries don't publish stale credentials that were never applied to the guest. Exit non-zero with VMI/event diagnostics when VMI fails to reach Running within the timeout. Co-authored-by: Cursor --- .../static/workflow-openshift-4.yaml | 30 ++++++++++++------- 1 file changed, 19 insertions(+), 11 deletions(-) diff --git a/chart/infra-server/static/workflow-openshift-4.yaml b/chart/infra-server/static/workflow-openshift-4.yaml index e482e0b68..9d3deeeca 100644 --- a/chart/infra-server/static/workflow-openshift-4.yaml +++ b/chart/infra-server/static/workflow-openshift-4.yaml @@ -649,10 +649,21 @@ spec: ;; esac - VM_PASSWORD=$(openssl rand -hex 10) - echo "=== Creating VM: $VM_NAME (OS: $VM_OS, dedicated=$DEDICATED) ===" + # Check if VM already exists — skip creation to avoid publishing stale credentials + if oc get vm "$VM_NAME" -n "$NAMESPACE" &>/dev/null; then + STATUS=$(oc get vm "$VM_NAME" -n "$NAMESPACE" -o jsonpath='{.status.printableStatus}' 2>/dev/null || echo "Unknown") + echo "VM $VM_NAME already exists (status: $STATUS), skipping creation" + echo "# VM Access Information (pre-existing VM)" > /data/vm-access.md + echo "" >> /data/vm-access.md + echo "VM was already provisioned in a prior run. Password was set at first boot." >> /data/vm-access.md + echo "Check cluster secrets or prior workflow artifacts for credentials." >> /data/vm-access.md + exit 0 + fi + + VM_PASSWORD=$(openssl rand -hex 10) + # Copy the already-provisioned Quay pull secret from the infra namespace into the target cluster echo "Creating pull secret for quay.io/rhacs-eng in namespace $NAMESPACE from mounted infra secret..." if [ ! -s /infra-secrets/quay/.dockerconfigjson ]; then @@ -671,12 +682,6 @@ spec: EOFSECRET echo "Pull secret created" - # Check if VM already exists - if oc get vm "$VM_NAME" -n "$NAMESPACE" &>/dev/null; then - STATUS=$(oc get vm "$VM_NAME" -n "$NAMESPACE" -o jsonpath='{.status.printableStatus}' 2>/dev/null || echo "Unknown") - echo "VM $VM_NAME already exists (status: $STATUS)" - fi - # Build node placement YAML fragment conditionally NODE_PLACEMENT="" if [ "$DEDICATED" = "true" ]; then @@ -758,9 +763,12 @@ spec: break fi if [ $elapsed -ge $timeout ]; then - echo "WARNING: VMI did not reach Running phase after ${timeout}s (current: $PHASE)" - echo "VM was created but may still be starting" - break + echo "ERROR: VMI did not reach Running phase after ${timeout}s (current: $PHASE)" + echo "--- VMI details ---" + oc get vmi "$VM_NAME" -n "$NAMESPACE" -o yaml 2>&1 || true + echo "--- Recent events ---" + oc get events -n "$NAMESPACE" --sort-by='.lastTimestamp' 2>&1 | tail -20 || true + exit 1 fi if [ $((elapsed % 30)) -eq 0 ] && [ $elapsed -gt 0 ]; then echo "Waiting for VMI (phase: ${PHASE:-Pending}, ${elapsed}s)..." From 0f5ac971031275d30cd4e2e90bfb534c2dbde9a1 Mon Sep 17 00:00:00 2001 From: Piotr Rygielski <114479+vikin91@users.noreply.github.com> Date: Thu, 11 Jun 2026 09:34:09 +0200 Subject: [PATCH 6/7] fix(ocp4-virt): replace CAPI path with MachineSet wait loop CAPI resources (cluster.x-k8s.io) don't exist on running OCP clusters. The installer uses CAPI only during provisioning via a local envtest control plane, then shuts it down. The running cluster uses the legacy Machine API (machine.openshift.io/v1beta1) exclusively. The real issue was timing: the Machine API operator needs a moment after cluster creation to reconcile worker MachineSets. Replace the CAPI fallback with a retry loop (up to 300s) that waits for worker MachineSets to appear in openshift-machine-api. Co-authored-by: Cursor --- .../static/workflow-openshift-4.yaml | 249 ++++-------------- 1 file changed, 58 insertions(+), 191 deletions(-) diff --git a/chart/infra-server/static/workflow-openshift-4.yaml b/chart/infra-server/static/workflow-openshift-4.yaml index 9d3deeeca..ed73153b2 100644 --- a/chart/infra-server/static/workflow-openshift-4.yaml +++ b/chart/infra-server/static/workflow-openshift-4.yaml @@ -228,163 +228,66 @@ spec: exit 0 fi - WORKER_MS=$(oc get machinesets -n openshift-machine-api -o json 2>/dev/null | jq -r ' - .items - | map(select( - .metadata.labels["machine.openshift.io/cluster-api-machine-role"] == "worker" or - .metadata.labels["machine.openshift.io/cluster-api-machine-type"] == "worker" - )) - | sort_by(.metadata.name) - | .[0].metadata.name // empty - ' 2>/dev/null || echo "") - - CREATED_VIA="" - - if [ -n "$WORKER_MS" ]; then - # ── Legacy path: clone MachineSet in openshift-machine-api ── - echo "Using legacy MachineSet path (source: $WORKER_MS)" - ZONE=$(oc get machineset "$WORKER_MS" -n openshift-machine-api \ - -o jsonpath='{.spec.template.spec.providerSpec.value.zone}') - VIRT_MS_NAME="${INFRA_ID}-virt-worker-${ZONE}" - CREATED_VIA="legacy" - - if oc get machineset "$VIRT_MS_NAME" -n openshift-machine-api &>/dev/null; then - echo "MachineSet $VIRT_MS_NAME already exists" - else - JQ_FILTER=' - del(.metadata.uid, .metadata.resourceVersion, .metadata.creationTimestamp, .metadata.generation, .status) | - .metadata.name = $name | - .spec.replicas = 1 | - .spec.selector.matchLabels["machine.openshift.io/cluster-api-machineset"] = $name | - .spec.template.metadata.labels["machine.openshift.io/cluster-api-machineset"] = $name | - .spec.template.metadata.labels["node-role.kubernetes.io/virt"] = "" | - .spec.template.spec.providerSpec.value.machineType = $machineType | - .spec.template.spec.metadata.labels["node-role.kubernetes.io/virt"] = "" - ' - if [ "$DEDICATED" = "true" ]; then - JQ_FILTER="${JQ_FILTER} | .spec.template.spec.taints = [{\"key\": \"node-role.kubernetes.io/virt\", \"effect\": \"NoSchedule\"}]" - fi - - oc get machineset "$WORKER_MS" -n openshift-machine-api -o json | \ - jq --arg name "$VIRT_MS_NAME" --arg machineType "n2-standard-8" "$JQ_FILTER" | \ - oc apply -f - - echo "Created MachineSet $VIRT_MS_NAME" - fi - - else - # ── CAPI path: GCPMachineTemplate + MachineSet in openshift-cluster-api-guests ── - echo "No legacy MachineSets found, using Cluster API path" - CAPI_NS="openshift-cluster-api-guests" - CREATED_VIA="capi" - - CLUSTER_NAME=$(oc get clusters.cluster.x-k8s.io -n "$CAPI_NS" \ - -o jsonpath='{.items[0].metadata.name}' 2>/dev/null || echo "") - if [ -z "$CLUSTER_NAME" ]; then - echo "ERROR: No CAPI Cluster found in $CAPI_NS" - echo "Neither legacy MachineSets nor CAPI resources are available" - exit 1 - fi - echo "CAPI Cluster: $CLUSTER_NAME" - - WORKER_GCP_MACHINE=$(oc get gcpmachines.infrastructure.cluster.x-k8s.io \ - -n "$CAPI_NS" -o json | jq -r ' - [.items[] | select(.metadata.name | test("worker"))] + # On OCP 4.19+, the installer uses CAPI via a local envtest to provision + # machines, but the running cluster uses legacy machine.openshift.io + # MachineSets. The Machine API operator may need time after cluster creation + # to reconcile MachineSets, so we retry instead of failing immediately. + echo "Waiting for worker MachineSets in openshift-machine-api..." + ms_timeout=300 + ms_elapsed=0 + WORKER_MS="" + while [ -z "$WORKER_MS" ]; do + WORKER_MS=$(oc get machinesets -n openshift-machine-api -o json 2>/dev/null | jq -r ' + .items + | map(select( + .metadata.labels["machine.openshift.io/cluster-api-machine-role"] == "worker" or + .metadata.labels["machine.openshift.io/cluster-api-machine-type"] == "worker" + )) | sort_by(.metadata.name) | .[0].metadata.name // empty - ') - if [ -z "$WORKER_GCP_MACHINE" ]; then - echo "ERROR: No worker GCPMachine found in $CAPI_NS" - oc get gcpmachines.infrastructure.cluster.x-k8s.io -n "$CAPI_NS" \ - --no-headers 2>/dev/null || true + ' 2>/dev/null || echo "") + if [ -n "$WORKER_MS" ]; then + break + fi + if [ $ms_elapsed -ge $ms_timeout ]; then + echo "ERROR: No worker MachineSet found in openshift-machine-api after ${ms_timeout}s" + oc get machinesets -n openshift-machine-api --no-headers 2>/dev/null || true + oc get machines -n openshift-machine-api --no-headers 2>/dev/null || true exit 1 fi - echo "Source GCPMachine: $WORKER_GCP_MACHINE" - - GCP_SPEC=$(oc get gcpmachines.infrastructure.cluster.x-k8s.io \ - "$WORKER_GCP_MACHINE" -n "$CAPI_NS" -o json | jq '.spec') - ZONE=$(echo "$GCP_SPEC" | jq -r '.zone // .failureDomain // empty') - echo "Zone: $ZONE" + if [ $((ms_elapsed % 30)) -eq 0 ]; then + echo "No worker MachineSets yet, retrying... (${ms_elapsed}s)" + fi + sleep 15 + ms_elapsed=$((ms_elapsed + 15)) + done + echo "Using $WORKER_MS as template" - VIRT_TEMPLATE_NAME="${INFRA_ID}-virt-worker" - VIRT_MS_NAME="${INFRA_ID}-virt-worker-${ZONE:-a}" + ZONE=$(oc get machineset "$WORKER_MS" -n openshift-machine-api \ + -o jsonpath='{.spec.template.spec.providerSpec.value.zone}') + VIRT_MS_NAME="${INFRA_ID}-virt-worker-${ZONE}" - WORKER_MACHINE=$(oc get machines.cluster.x-k8s.io -n "$CAPI_NS" -o json | jq -r ' - [.items[] | select(.metadata.name | test("worker"))] - | sort_by(.metadata.name) - | .[0].metadata.name // empty - ') - BOOTSTRAP_SECRET=$(oc get machines.cluster.x-k8s.io "$WORKER_MACHINE" \ - -n "$CAPI_NS" -o jsonpath='{.spec.bootstrap.dataSecretName}' 2>/dev/null || echo "") - if [ -z "$BOOTSTRAP_SECRET" ]; then - BOOTSTRAP_SECRET="${INFRA_ID}-worker" - echo "Bootstrap secret not found on Machine, using convention: $BOOTSTRAP_SECRET" - else - echo "Bootstrap secret: $BOOTSTRAP_SECRET" + if oc get machineset "$VIRT_MS_NAME" -n openshift-machine-api &>/dev/null; then + echo "MachineSet $VIRT_MS_NAME already exists" + else + JQ_FILTER=' + del(.metadata.uid, .metadata.resourceVersion, .metadata.creationTimestamp, .metadata.generation, .status) | + .metadata.name = $name | + .spec.replicas = 1 | + .spec.selector.matchLabels["machine.openshift.io/cluster-api-machineset"] = $name | + .spec.template.metadata.labels["machine.openshift.io/cluster-api-machineset"] = $name | + .spec.template.metadata.labels["node-role.kubernetes.io/virt"] = "" | + .spec.template.spec.providerSpec.value.machineType = $machineType | + .spec.template.spec.metadata.labels["node-role.kubernetes.io/virt"] = "" + ' + if [ "$DEDICATED" = "true" ]; then + JQ_FILTER="${JQ_FILTER} | .spec.template.spec.taints = [{\"key\": \"node-role.kubernetes.io/virt\", \"effect\": \"NoSchedule\"}]" fi - if oc get machinesets.cluster.x-k8s.io "$VIRT_MS_NAME" -n "$CAPI_NS" &>/dev/null; then - echo "CAPI MachineSet $VIRT_MS_NAME already exists" - else - echo "$GCP_SPEC" | jq \ - --arg name "$VIRT_TEMPLATE_NAME" --arg ns "$CAPI_NS" '{ - apiVersion: "infrastructure.cluster.x-k8s.io/v1beta1", - kind: "GCPMachineTemplate", - metadata: { name: $name, namespace: $ns }, - spec: { template: { spec: (. | del(.providerID, .providerStatus) | - if has("instanceType") then .instanceType = "n2-standard-8" - elif has("machineType") then .machineType = "n2-standard-8" - else . + {instanceType: "n2-standard-8"} - end - )}} - }' | oc apply -f - - echo "Created GCPMachineTemplate $VIRT_TEMPLATE_NAME" - - jq -n \ - --arg name "$VIRT_MS_NAME" \ - --arg ns "$CAPI_NS" \ - --arg cluster "$CLUSTER_NAME" \ - --arg tmpl "$VIRT_TEMPLATE_NAME" \ - --arg boot "$BOOTSTRAP_SECRET" \ - '{ - apiVersion: "cluster.x-k8s.io/v1beta1", - kind: "MachineSet", - metadata: { - name: $name, - namespace: $ns, - labels: { "cluster.x-k8s.io/cluster-name": $cluster } - }, - spec: { - clusterName: $cluster, - replicas: 1, - selector: { - matchLabels: { - "cluster.x-k8s.io/cluster-name": $cluster, - "cluster.x-k8s.io/set-name": $name - } - }, - template: { - metadata: { - labels: { - "cluster.x-k8s.io/cluster-name": $cluster, - "cluster.x-k8s.io/set-name": $name, - "node-role.kubernetes.io/worker": "", - "node-role.kubernetes.io/virt": "" - } - }, - spec: { - clusterName: $cluster, - bootstrap: { dataSecretName: $boot }, - infrastructureRef: { - apiVersion: "infrastructure.cluster.x-k8s.io/v1beta1", - kind: "GCPMachineTemplate", - name: $tmpl - } - } - } - } - }' | oc apply -f - - echo "Created CAPI MachineSet $VIRT_MS_NAME" - fi + oc get machineset "$WORKER_MS" -n openshift-machine-api -o json | \ + jq --arg name "$VIRT_MS_NAME" --arg machineType "n2-standard-8" "$JQ_FILTER" | \ + oc apply -f - + echo "Created MachineSet $VIRT_MS_NAME" fi echo "Waiting for virt node to become Ready..." @@ -399,59 +302,23 @@ spec: echo "Virt node is Ready: $READY_NODES" break fi - - if [ "$CREATED_VIA" = "capi" ] && [ $elapsed -ge 120 ]; then - CAPI_NODE=$(oc get machines.cluster.x-k8s.io -n "$CAPI_NS" \ - -l "cluster.x-k8s.io/set-name=$VIRT_MS_NAME" \ - -o jsonpath='{.items[0].status.nodeRef.name}' 2>/dev/null || echo "") - if [ -n "$CAPI_NODE" ]; then - NODE_READY=$(oc get node "$CAPI_NODE" \ - -o jsonpath='{.status.conditions[?(@.type=="Ready")].status}' 2>/dev/null || echo "") - if [ "$NODE_READY" = "True" ]; then - echo "CAPI node $CAPI_NODE is Ready, applying virt label" - oc label node "$CAPI_NODE" node-role.kubernetes.io/virt= --overwrite - READY_NODES="$CAPI_NODE" - break - fi - fi - fi - if [ $elapsed -ge $timeout ]; then echo "ERROR: Timeout waiting for virt node after ${timeout}s" - if [ "$CREATED_VIA" = "legacy" ]; then - oc get machines -n openshift-machine-api \ - -l "machine.openshift.io/cluster-api-machineset=$VIRT_MS_NAME" \ - --no-headers 2>/dev/null || true - else - oc get machines.cluster.x-k8s.io -n "$CAPI_NS" \ - -l "cluster.x-k8s.io/set-name=$VIRT_MS_NAME" \ - --no-headers 2>/dev/null || true - fi + oc get machines -n openshift-machine-api \ + -l "machine.openshift.io/cluster-api-machineset=$VIRT_MS_NAME" \ + --no-headers 2>/dev/null || true exit 1 fi if [ $((elapsed % 60)) -eq 0 ] && [ $elapsed -gt 0 ]; then echo "Still waiting... (${elapsed}s elapsed)" - if [ "$CREATED_VIA" = "legacy" ]; then - oc get machines -n openshift-machine-api \ - -l "machine.openshift.io/cluster-api-machineset=$VIRT_MS_NAME" \ - --no-headers 2>/dev/null || true - else - oc get machines.cluster.x-k8s.io -n "$CAPI_NS" \ - -l "cluster.x-k8s.io/set-name=$VIRT_MS_NAME" \ - --no-headers 2>/dev/null || true - fi + oc get machines -n openshift-machine-api \ + -l "machine.openshift.io/cluster-api-machineset=$VIRT_MS_NAME" \ + --no-headers 2>/dev/null || true fi sleep 15 elapsed=$((elapsed + 15)) done - if [ "$DEDICATED" = "true" ] && [ "$CREATED_VIA" = "capi" ]; then - VIRT_NODE=$(echo "$READY_NODES" | awk '{print $1}') - echo "Applying NoSchedule taint to $VIRT_NODE (CAPI path)" - oc adm taint nodes "$VIRT_NODE" \ - node-role.kubernetes.io/virt=:NoSchedule --overwrite - fi - echo "=== Virt worker node ready ===" volumeMounts: - name: data From 154a61029bc2028e28dc4d6f72e31ecfd876aedd Mon Sep 17 00:00:00 2001 From: Piotr Rygielski <114479+vikin91@users.noreply.github.com> Date: Thu, 11 Jun 2026 15:59:40 +0200 Subject: [PATCH 7/7] ci: retrigger dev cluster