From 20ffe638f5597b36ff5b63123a16e1d43acb4267 Mon Sep 17 00:00:00 2001 From: Graydon Hoare Date: Wed, 17 Jun 2026 10:55:57 -0700 Subject: [PATCH 1/2] Support --core-env=FOO=bar,BAZ=quux to pass extra env vars through to core instances --- src/App/Program.fs | 23 +++++++++++++++++++ src/FSLibrary.Tests/Tests.fs | 1 + .../MissionHistoryPubnetParallelCatchupV2.fs | 11 +++++++++ src/FSLibrary/StellarKubeSpecs.fs | 16 +++++++++---- src/FSLibrary/StellarMissionContext.fs | 2 +- .../templates/catchup_workers.yaml | 4 ++++ .../parallel_catchup_helm/values.yaml | 1 + 7 files changed, 53 insertions(+), 5 deletions(-) diff --git a/src/App/Program.fs b/src/App/Program.fs index 495797f2..8303a86a 100644 --- a/src/App/Program.fs +++ b/src/App/Program.fs @@ -116,6 +116,7 @@ type MissionOptions numPregeneratedTxs: int option, genesisTestAccountCount: int option, asanOptions: string option, + coreEnv: string option, catchupSkipKnownResultsForTesting: bool option, checkEventsAreConsistentWithEntryDiffs: bool option, enableRelaxedAutoQsetConfig: bool, @@ -516,6 +517,9 @@ type MissionOptions [] member self.asanOptions = asanOptions + [] + member self.CoreEnv = coreEnv + [] @@ -636,6 +640,23 @@ let splitLabel (lab: string) : (string * string option) = | head :: tail -> head, Some(System.String.Join(":", tail)) | _ -> failwith ("unexpected label '" + lab + "', need string of form 'key' or 'key:value'") +let splitEnvVar (envVar: string) : (string * string) = + let separatorIndex = envVar.IndexOf('=') + + if separatorIndex <= 0 then + failwithf "Could not parse environment variable '%s'. Expected NAME=VALUE." envVar + + envVar.Substring(0, separatorIndex), envVar.Substring(separatorIndex + 1) + +let splitEnvVars (envVars: string option) : (string * string) list = + match envVars with + | None -> [] + | Some value -> + value.Split(',', System.StringSplitOptions.RemoveEmptyEntries) + |> Array.map (fun envVar -> envVar.Trim()) + |> Array.map splitEnvVar + |> Array.toList + // Given a (key, value option) output form `splitLabel`, return (key, value) or // fail if value is None let requireLabelValue (label: string * string option) : (string * string) = @@ -756,6 +777,7 @@ let main argv = numPregeneratedTxs = None genesisTestAccountCount = None asanOptions = None + coreEnv = [] enableTailLogging = true catchupSkipKnownResultsForTesting = None checkEventsAreConsistentWithEntryDiffs = None @@ -931,6 +953,7 @@ let main argv = updateSorobanCosts = None genesisTestAccountCount = mission.GenesisTestAccountCount asanOptions = mission.asanOptions + coreEnv = splitEnvVars mission.CoreEnv enableRelaxedAutoQsetConfig = mission.EnableRelaxedAutoQsetConfig jobMonitorExternalHost = mission.JobMonitorExternalHost txBatchMaxSize = mission.TxBatchMaxSize diff --git a/src/FSLibrary.Tests/Tests.fs b/src/FSLibrary.Tests/Tests.fs index f7ac2f64..6dab1b01 100644 --- a/src/FSLibrary.Tests/Tests.fs +++ b/src/FSLibrary.Tests/Tests.fs @@ -124,6 +124,7 @@ let ctx : MissionContext = updateSorobanCosts = None genesisTestAccountCount = None asanOptions = None + coreEnv = [] enableRelaxedAutoQsetConfig = false jobMonitorExternalHost = None txBatchMaxSize = None diff --git a/src/FSLibrary/MissionHistoryPubnetParallelCatchupV2.fs b/src/FSLibrary/MissionHistoryPubnetParallelCatchupV2.fs index 727d309e..d998903a 100644 --- a/src/FSLibrary/MissionHistoryPubnetParallelCatchupV2.fs +++ b/src/FSLibrary/MissionHistoryPubnetParallelCatchupV2.fs @@ -110,6 +110,9 @@ let tolerateTaintToHelmIndexed (index: int) ((key: string), (effect: string opti let serviceAccountAnnotationsToHelmIndexed (index: int) (key: string, value: string) = sprintf "service_account.annotations[%d].key=%s,service_account.annotations[%d].value=%s" index key index value +let coreEnvToHelmIndexed (index: int) (name: string, value: string) = + sprintf "worker.coreEnv[%d].name=%s,worker.coreEnv[%d].value=\"%s\"" index name index value + let installProject (context: MissionContext) = LogInfo "Installing Helm chart with release name: %s" helmReleaseName @@ -202,6 +205,14 @@ let installProject (context: MissionContext) = | Some asanOpts -> setOptions.Add(sprintf "worker.asanOptions=%s" asanOpts) | None -> () + if not (List.isEmpty context.coreEnv) then + let coreEnvHelm = + context.coreEnv + |> List.mapi coreEnvToHelmIndexed + |> String.concat "," + + setOptions.Add(coreEnvHelm) + // Convert labels and taints to Helm array format if not (List.isEmpty context.requireNodeLabelsPcV2) then let requireLabelsHelm = diff --git a/src/FSLibrary/StellarKubeSpecs.fs b/src/FSLibrary/StellarKubeSpecs.fs index 82304f23..7086553c 100644 --- a/src/FSLibrary/StellarKubeSpecs.fs +++ b/src/FSLibrary/StellarKubeSpecs.fs @@ -281,6 +281,7 @@ let CoreContainerForCommand (imageName: string) (configOpt: ConfigOption) (asanOptions: string option) + (coreEnv: (string * string) list) (cr: CoreResources) (command: string array) (initCommands: ShCmd array) @@ -297,6 +298,11 @@ let CoreContainerForCommand value = defaultArg asanOptions CfgVal.asanOptionsEnvVarDefaultValue ) + let coreEnvVars = + coreEnv + |> List.map (fun (name, value) -> V1EnvVar(name = name, value = value)) + |> Array.ofList + let cfgWords = cfgFileArgs configOpt MainCoreContainer let containerName = CfgVal.stellarCoreContainerName (Array.get command 0) @@ -335,7 +341,7 @@ let CoreContainerForCommand image = imageName, command = [| "/bin/sh" |], args = [| "-x"; "-c"; allCmdsAndCleanup.ToString() |], - env = [| peerNameEnvVar; asanOptionsEnvVar |], + env = Array.concat [ [| peerNameEnvVar; asanOptionsEnvVar |]; coreEnvVars ], resources = res, securityContext = V1SecurityContext(capabilities = V1Capabilities(add = [| "NET_ADMIN" |])), volumeMounts = CoreContainerVolumeMounts peerOrJobNames configOpt @@ -662,13 +668,14 @@ type NetworkCfg with let res = self.missionContext.coreResources let asan = self.missionContext.asanOptions + let coreEnv = self.missionContext.coreEnv let containers = match self.jobCoreSetOptions with - | None -> [| CoreContainerForCommand image cfgOpt asan res command [||] [| jobName |] |] + | None -> [| CoreContainerForCommand image cfgOpt asan coreEnv res command [||] [| jobName |] |] | Some (opts) -> let initCmds = self.getInitCommands cfgOpt opts - let coreContainer = CoreContainerForCommand image cfgOpt asan res command initCmds [| jobName |] + let coreContainer = CoreContainerForCommand image cfgOpt asan coreEnv res command initCmds [| jobName |] match opts.dbType with | Postgres -> [| coreContainer; PostgresContainer self.missionContext.postgresImage |] @@ -764,10 +771,11 @@ type NetworkCfg with let res = self.missionContext.coreResources let asan = self.missionContext.asanOptions + let coreEnv = self.missionContext.coreEnv let containers = [| WithProbes - (CoreContainerForCommand imageName cfgOpt asan res runCmd initCommands peerNames) + (CoreContainerForCommand imageName cfgOpt asan coreEnv res runCmd initCommands peerNames) self.missionContext.probeTimeout HistoryContainer self.missionContext.nginxImage |] diff --git a/src/FSLibrary/StellarMissionContext.fs b/src/FSLibrary/StellarMissionContext.fs index c64b4cca..1476af83 100644 --- a/src/FSLibrary/StellarMissionContext.fs +++ b/src/FSLibrary/StellarMissionContext.fs @@ -108,8 +108,8 @@ type MissionContext = pubnetParallelCatchupLedgersPerJob: int pubnetParallelCatchupNumWorkers: int genesisTestAccountCount: int option - asanOptions: string option + coreEnv: (string * string) list // Tail logging can cause the pubnet simulation missions like SorobanLoadGeneration // and SimulatePubnet to fail on the heartbeat handler due to what looks like a diff --git a/src/MissionParallelCatchup/parallel_catchup_helm/templates/catchup_workers.yaml b/src/MissionParallelCatchup/parallel_catchup_helm/templates/catchup_workers.yaml index e861a3e3..20a392c4 100644 --- a/src/MissionParallelCatchup/parallel_catchup_helm/templates/catchup_workers.yaml +++ b/src/MissionParallelCatchup/parallel_catchup_helm/templates/catchup_workers.yaml @@ -91,6 +91,10 @@ spec: fieldPath: metadata.name - name: ASAN_OPTIONS value: {{ .Values.worker.asanOptions | quote }} + {{- range .Values.worker.coreEnv }} + - name: {{ .name }} + value: {{ .value | quote }} + {{- end }} envFrom: - configMapRef: name: {{ .Release.Name }}-worker-config diff --git a/src/MissionParallelCatchup/parallel_catchup_helm/values.yaml b/src/MissionParallelCatchup/parallel_catchup_helm/values.yaml index b47e7ab1..3fa975f4 100644 --- a/src/MissionParallelCatchup/parallel_catchup_helm/values.yaml +++ b/src/MissionParallelCatchup/parallel_catchup_helm/values.yaml @@ -19,6 +19,7 @@ worker: avoidNodeLabels: [] tolerateNodeTaints: [] asanOptions: "quarantine_size_mb=1:malloc_context_size=5:alloc_dealloc_mismatch=0" + coreEnv: [] resources: # resources below are left empty on purpose, they are read and overridden from `StellarKubeCfg.fs` requests: cpu: "" From 1b15cd6bfddf906aa5e1ae35ca8fc5147dc6f55f Mon Sep 17 00:00:00 2001 From: Graydon Hoare Date: Wed, 17 Jun 2026 10:57:18 -0700 Subject: [PATCH 2/2] Configure core headless services with aggressive DNS option publishNotReadyAddresses = true --- src/FSLibrary/StellarKubeSpecs.fs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/FSLibrary/StellarKubeSpecs.fs b/src/FSLibrary/StellarKubeSpecs.fs index 7086553c..7c249989 100644 --- a/src/FSLibrary/StellarKubeSpecs.fs +++ b/src/FSLibrary/StellarKubeSpecs.fs @@ -822,7 +822,7 @@ type NetworkCfg with // hook the per-Pod Services and Ingress up to). Getting all this to work // requires that you install the DNS server component on your k8s cluster. member self.ToService() : V1Service = - let serviceSpec = V1ServiceSpec(clusterIP = "None", selector = CfgVal.labels) + let serviceSpec = V1ServiceSpec(clusterIP = "None", selector = CfgVal.labels, publishNotReadyAddresses = true) V1Service(spec = serviceSpec, metadata = self.NamespacedMeta self.ServiceName)