Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 23 additions & 0 deletions src/App/Program.fs
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,7 @@ type MissionOptions
numPregeneratedTxs: int option,
genesisTestAccountCount: int option,
asanOptions: string option,
coreEnv: string option,
catchupSkipKnownResultsForTesting: bool option,
checkEventsAreConsistentWithEntryDiffs: bool option,
enableRelaxedAutoQsetConfig: bool,
Expand Down Expand Up @@ -516,6 +517,9 @@ type MissionOptions
[<Option("asan-options", HelpText = "Value for ASAN_OPTIONS environment variable", Required = false)>]
member self.asanOptions = asanOptions

[<Option("core-env", HelpText = "Comma-separated environment variables to set on stellar-core containers, formatted as NAME=VALUE,NAME2=VALUE2.", Required = false)>]
member self.CoreEnv = coreEnv

[<Option("catchup-skip-known-results-for-testing",
HelpText = "when this flag is provided, pubnet parallel catchup workers will run with CATCHUP_SKIP_KNOWN_RESULTS_FOR_TESTING = true, resulting in skipping application of failed transaction and signature verification",
Required = false)>]
Expand Down Expand Up @@ -636,6 +640,23 @@ let splitLabel (lab: string) : (string * string option) =
| head :: tail -> head, Some(System.String.Join(":", tail))
| _ -> failwith ("unexpected label '" + lab + "', need string of form 'key' or 'key:value'")

let splitEnvVar (envVar: string) : (string * string) =
let separatorIndex = envVar.IndexOf('=')

if separatorIndex <= 0 then
failwithf "Could not parse environment variable '%s'. Expected NAME=VALUE." envVar

envVar.Substring(0, separatorIndex), envVar.Substring(separatorIndex + 1)

let splitEnvVars (envVars: string option) : (string * string) list =
match envVars with
| None -> []
| Some value ->
value.Split(',', System.StringSplitOptions.RemoveEmptyEntries)
|> Array.map (fun envVar -> envVar.Trim())
|> Array.map splitEnvVar
|> Array.toList

// Given a (key, value option) output form `splitLabel`, return (key, value) or
// fail if value is None
let requireLabelValue (label: string * string option) : (string * string) =
Expand Down Expand Up @@ -756,6 +777,7 @@ let main argv =
numPregeneratedTxs = None
genesisTestAccountCount = None
asanOptions = None
coreEnv = []
enableTailLogging = true
catchupSkipKnownResultsForTesting = None
checkEventsAreConsistentWithEntryDiffs = None
Expand Down Expand Up @@ -931,6 +953,7 @@ let main argv =
updateSorobanCosts = None
genesisTestAccountCount = mission.GenesisTestAccountCount
asanOptions = mission.asanOptions
coreEnv = splitEnvVars mission.CoreEnv
enableRelaxedAutoQsetConfig = mission.EnableRelaxedAutoQsetConfig
jobMonitorExternalHost = mission.JobMonitorExternalHost
txBatchMaxSize = mission.TxBatchMaxSize
Expand Down
1 change: 1 addition & 0 deletions src/FSLibrary.Tests/Tests.fs
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,7 @@ let ctx : MissionContext =
updateSorobanCosts = None
genesisTestAccountCount = None
asanOptions = None
coreEnv = []
enableRelaxedAutoQsetConfig = false
jobMonitorExternalHost = None
txBatchMaxSize = None
Expand Down
11 changes: 11 additions & 0 deletions src/FSLibrary/MissionHistoryPubnetParallelCatchupV2.fs
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,9 @@ let tolerateTaintToHelmIndexed (index: int) ((key: string), (effect: string opti
let serviceAccountAnnotationsToHelmIndexed (index: int) (key: string, value: string) =
sprintf "service_account.annotations[%d].key=%s,service_account.annotations[%d].value=%s" index key index value

let coreEnvToHelmIndexed (index: int) (name: string, value: string) =
sprintf "worker.coreEnv[%d].name=%s,worker.coreEnv[%d].value=\"%s\"" index name index value

let installProject (context: MissionContext) =
LogInfo "Installing Helm chart with release name: %s" helmReleaseName

Expand Down Expand Up @@ -202,6 +205,14 @@ let installProject (context: MissionContext) =
| Some asanOpts -> setOptions.Add(sprintf "worker.asanOptions=%s" asanOpts)
| None -> ()

if not (List.isEmpty context.coreEnv) then
let coreEnvHelm =
context.coreEnv
|> List.mapi coreEnvToHelmIndexed
|> String.concat ","

setOptions.Add(coreEnvHelm)

// Convert labels and taints to Helm array format
if not (List.isEmpty context.requireNodeLabelsPcV2) then
let requireLabelsHelm =
Expand Down
18 changes: 13 additions & 5 deletions src/FSLibrary/StellarKubeSpecs.fs
Original file line number Diff line number Diff line change
Expand Up @@ -281,6 +281,7 @@ let CoreContainerForCommand
(imageName: string)
(configOpt: ConfigOption)
(asanOptions: string option)
(coreEnv: (string * string) list)
(cr: CoreResources)
(command: string array)
(initCommands: ShCmd array)
Expand All @@ -297,6 +298,11 @@ let CoreContainerForCommand
value = defaultArg asanOptions CfgVal.asanOptionsEnvVarDefaultValue
)

let coreEnvVars =
coreEnv
|> List.map (fun (name, value) -> V1EnvVar(name = name, value = value))
|> Array.ofList

let cfgWords = cfgFileArgs configOpt MainCoreContainer
let containerName = CfgVal.stellarCoreContainerName (Array.get command 0)

Expand Down Expand Up @@ -335,7 +341,7 @@ let CoreContainerForCommand
image = imageName,
command = [| "/bin/sh" |],
args = [| "-x"; "-c"; allCmdsAndCleanup.ToString() |],
env = [| peerNameEnvVar; asanOptionsEnvVar |],
env = Array.concat [ [| peerNameEnvVar; asanOptionsEnvVar |]; coreEnvVars ],
resources = res,
securityContext = V1SecurityContext(capabilities = V1Capabilities(add = [| "NET_ADMIN" |])),
volumeMounts = CoreContainerVolumeMounts peerOrJobNames configOpt
Expand Down Expand Up @@ -662,13 +668,14 @@ type NetworkCfg with

let res = self.missionContext.coreResources
let asan = self.missionContext.asanOptions
let coreEnv = self.missionContext.coreEnv

let containers =
match self.jobCoreSetOptions with
| None -> [| CoreContainerForCommand image cfgOpt asan res command [||] [| jobName |] |]
| None -> [| CoreContainerForCommand image cfgOpt asan coreEnv res command [||] [| jobName |] |]
| Some (opts) ->
let initCmds = self.getInitCommands cfgOpt opts
let coreContainer = CoreContainerForCommand image cfgOpt asan res command initCmds [| jobName |]
let coreContainer = CoreContainerForCommand image cfgOpt asan coreEnv res command initCmds [| jobName |]

match opts.dbType with
| Postgres -> [| coreContainer; PostgresContainer self.missionContext.postgresImage |]
Expand Down Expand Up @@ -764,10 +771,11 @@ type NetworkCfg with

let res = self.missionContext.coreResources
let asan = self.missionContext.asanOptions
let coreEnv = self.missionContext.coreEnv

let containers =
[| WithProbes
(CoreContainerForCommand imageName cfgOpt asan res runCmd initCommands peerNames)
(CoreContainerForCommand imageName cfgOpt asan coreEnv res runCmd initCommands peerNames)
self.missionContext.probeTimeout
HistoryContainer self.missionContext.nginxImage |]

Expand Down Expand Up @@ -814,7 +822,7 @@ type NetworkCfg with
// hook the per-Pod Services and Ingress up to). Getting all this to work
// requires that you install the DNS server component on your k8s cluster.
member self.ToService() : V1Service =
let serviceSpec = V1ServiceSpec(clusterIP = "None", selector = CfgVal.labels)
let serviceSpec = V1ServiceSpec(clusterIP = "None", selector = CfgVal.labels, publishNotReadyAddresses = true)

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Apply publishNotReadyAddresses to Helm workers

This only updates the NetworkCfg-generated headless service; the ParallelCatchupV2 Helm chart still defines its own headless Service with clusterIP: None and wires the StatefulSet to it in src/MissionParallelCatchup/parallel_catchup_helm/templates/catchup_workers.yaml:1-31, but that Service still lacks publishNotReadyAddresses. When running the pubnet parallel catchup V2 mission, worker pod DNS records are still withheld until readiness, so the startup DNS race this change is meant to remove remains for that mission.

Useful? React with 👍 / 👎.

V1Service(spec = serviceSpec, metadata = self.NamespacedMeta self.ServiceName)
Comment on lines 824 to 826


Expand Down
2 changes: 1 addition & 1 deletion src/FSLibrary/StellarMissionContext.fs
Original file line number Diff line number Diff line change
Expand Up @@ -108,8 +108,8 @@ type MissionContext =
pubnetParallelCatchupLedgersPerJob: int
pubnetParallelCatchupNumWorkers: int
genesisTestAccountCount: int option

asanOptions: string option
coreEnv: (string * string) list

// Tail logging can cause the pubnet simulation missions like SorobanLoadGeneration
// and SimulatePubnet to fail on the heartbeat handler due to what looks like a
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,10 @@ spec:
fieldPath: metadata.name
- name: ASAN_OPTIONS
value: {{ .Values.worker.asanOptions | quote }}
{{- range .Values.worker.coreEnv }}
- name: {{ .name }}
value: {{ .value | quote }}
{{- end }}
envFrom:
- configMapRef:
name: {{ .Release.Name }}-worker-config
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ worker:
avoidNodeLabels: []
tolerateNodeTaints: []
asanOptions: "quarantine_size_mb=1:malloc_context_size=5:alloc_dealloc_mismatch=0"
coreEnv: []
resources: # resources below are left empty on purpose, they are read and overridden from `StellarKubeCfg.fs`
requests:
cpu: ""
Expand Down