@@ -69,6 +69,7 @@ export class RunExecution {
6969
7070 private lastHeartbeat ?: Date ;
7171 private isShuttingDown = false ;
72+ private shutdownReason ?: string ;
7273
7374 constructor ( opts : RunExecutionOptions ) {
7475 this . id = randomBytes ( 4 ) . toString ( "hex" ) ;
@@ -81,11 +82,39 @@ export class RunExecution {
8182 this . executionAbortController = new AbortController ( ) ;
8283 }
8384
85+ /**
86+ * Cancels the current execution.
87+ */
88+ public async cancel ( ) : Promise < void > {
89+ if ( this . isShuttingDown ) {
90+ throw new Error ( "cancel called after execution shut down" ) ;
91+ }
92+
93+ this . sendDebugLog ( "cancelling attempt" , { runId : this . runFriendlyId } ) ;
94+
95+ await this . taskRunProcess ?. cancel ( ) ;
96+ }
97+
98+ /**
99+ * Kills the current execution.
100+ */
101+ public async kill ( { exitExecution = true } : { exitExecution ?: boolean } = { } ) {
102+ await this . taskRunProcess ?. kill ( "SIGKILL" ) ;
103+
104+ if ( exitExecution ) {
105+ this . shutdown ( "kill" ) ;
106+ }
107+ }
108+
84109 /**
85110 * Prepares the execution with task run environment variables.
86111 * This should be called before executing, typically after a successful run to prepare for the next one.
87112 */
88113 public prepareForExecution ( opts : RunExecutionPrepareOptions ) : this {
114+ if ( this . isShuttingDown ) {
115+ throw new Error ( "prepareForExecution called after execution shut down" ) ;
116+ }
117+
89118 if ( this . taskRunProcess ) {
90119 throw new Error ( "prepareForExecution called after process was already created" ) ;
91120 }
@@ -204,7 +233,8 @@ export class RunExecution {
204233
205234 if ( this . currentAttemptNumber && this . currentAttemptNumber !== run . attemptNumber ) {
206235 this . sendDebugLog ( "error: attempt number mismatch" , snapshotMetadata ) ;
207- await this . taskRunProcess ?. suspend ( ) ;
236+ // This is a rogue execution, a new one will already have been created elsewhere
237+ await this . exitTaskRunProcessWithoutFailingRun ( { flush : false } ) ;
208238 return ;
209239 }
210240
@@ -237,14 +267,14 @@ export class RunExecution {
237267 this . sendDebugLog ( "run was re-queued" , snapshotMetadata ) ;
238268
239269 // Pretend we've just suspended the run. This will kill the process without failing the run.
240- await this . taskRunProcess ?. suspend ( ) ;
270+ await this . exitTaskRunProcessWithoutFailingRun ( { flush : true } ) ;
241271 return ;
242272 }
243273 case "FINISHED" : {
244274 this . sendDebugLog ( "run is finished" , snapshotMetadata ) ;
245275
246276 // Pretend we've just suspended the run. This will kill the process without failing the run.
247- await this . taskRunProcess ?. suspend ( ) ;
277+ await this . exitTaskRunProcessWithoutFailingRun ( { flush : true } ) ;
248278 return ;
249279 }
250280 case "QUEUED_EXECUTING" :
@@ -255,11 +285,11 @@ export class RunExecution {
255285 return ;
256286 }
257287 case "SUSPENDED" : {
258- this . sendDebugLog ( "run was suspended, kill the process " , snapshotMetadata ) ;
288+ this . sendDebugLog ( "run was suspended" , snapshotMetadata ) ;
259289
260290 // This will kill the process and fail the execution with a SuspendedProcessError
261- await this . taskRunProcess ?. suspend ( ) ;
262-
291+ // We don't flush because we already did before suspending
292+ await this . exitTaskRunProcessWithoutFailingRun ( { flush : false } ) ;
263293 return ;
264294 }
265295 case "PENDING_EXECUTING" : {
@@ -384,6 +414,10 @@ export class RunExecution {
384414 * When this returns, the child process will have been cleaned up.
385415 */
386416 public async execute ( runOpts : RunExecutionRunOptions ) : Promise < void > {
417+ if ( this . isShuttingDown ) {
418+ throw new Error ( "execute called after execution shut down" ) ;
419+ }
420+
387421 // Setup initial state
388422 this . runFriendlyId = runOpts . runFriendlyId ;
389423
@@ -420,7 +454,7 @@ export class RunExecution {
420454 if ( startError ) {
421455 this . sendDebugLog ( "failed to start attempt" , { error : startError . message } ) ;
422456
423- this . stopServices ( ) ;
457+ this . shutdown ( "failed to start attempt" ) ;
424458 return ;
425459 }
426460
@@ -429,11 +463,12 @@ export class RunExecution {
429463 if ( executeError ) {
430464 this . sendDebugLog ( "failed to execute run" , { error : executeError . message } ) ;
431465
432- this . stopServices ( ) ;
466+ this . shutdown ( "failed to execute run" ) ;
433467 return ;
434468 }
435469
436- this . stopServices ( ) ;
470+ // This is here for safety, but it
471+ this . shutdown ( "execute call finished" ) ;
437472 }
438473
439474 private async executeRunWrapper ( {
@@ -460,10 +495,7 @@ export class RunExecution {
460495 } )
461496 ) ;
462497
463- this . sendDebugLog ( "run execution completed" , { error : executeError ?. message } ) ;
464-
465498 if ( ! executeError ) {
466- this . stopServices ( ) ;
467499 return ;
468500 }
469501
@@ -505,8 +537,6 @@ export class RunExecution {
505537 if ( completeError ) {
506538 this . sendDebugLog ( "failed to complete run" , { error : completeError . message } ) ;
507539 }
508-
509- this . stopServices ( ) ;
510540 }
511541
512542 private async executeRun ( {
@@ -527,7 +557,7 @@ export class RunExecution {
527557 ! this . taskRunProcess . isPreparedForNextAttempt
528558 ) {
529559 this . sendDebugLog ( "killing existing task run process before executing next attempt" ) ;
530- await this . kill ( ) . catch ( ( ) => { } ) ;
560+ await this . kill ( { exitExecution : false } ) . catch ( ( ) => { } ) ;
531561 }
532562
533563 // To skip this step and eagerly create the task run process, run prepareForExecution first
@@ -578,25 +608,6 @@ export class RunExecution {
578608 }
579609 }
580610
581- /**
582- * Cancels the current execution.
583- */
584- public async cancel ( ) : Promise < void > {
585- this . sendDebugLog ( "cancelling attempt" , { runId : this . runFriendlyId } ) ;
586-
587- await this . taskRunProcess ?. cancel ( ) ;
588- }
589-
590- public exit ( ) {
591- if ( this . taskRunProcess ?. isPreparedForNextRun ) {
592- this . taskRunProcess ?. forceExit ( ) ;
593- }
594- }
595-
596- public async kill ( ) {
597- await this . taskRunProcess ?. kill ( "SIGKILL" ) ;
598- }
599-
600611 private async complete ( { completion } : { completion : TaskRunExecutionResult } ) : Promise < void > {
601612 if ( ! this . runFriendlyId || ! this . snapshotManager ) {
602613 throw new Error ( "cannot complete run: missing run or snapshot manager" ) ;
@@ -639,37 +650,32 @@ export class RunExecution {
639650
640651 const { attemptStatus } = result ;
641652
642- if ( attemptStatus === "RUN_FINISHED" ) {
643- this . sendDebugLog ( "run finished" ) ;
644-
645- return ;
646- }
647-
648- if ( attemptStatus === "RUN_PENDING_CANCEL" ) {
649- this . sendDebugLog ( "run pending cancel" ) ;
650- return ;
651- }
653+ switch ( attemptStatus ) {
654+ case "RUN_FINISHED" :
655+ case "RUN_PENDING_CANCEL" :
656+ case "RETRY_QUEUED" : {
657+ return ;
658+ }
659+ case "RETRY_IMMEDIATELY" : {
660+ if ( attemptStatus !== "RETRY_IMMEDIATELY" ) {
661+ return ;
662+ }
652663
653- if ( attemptStatus === "RETRY_QUEUED" ) {
654- this . sendDebugLog ( "retry queued" ) ;
664+ if ( completion . ok ) {
665+ throw new Error ( "Should retry but completion OK." ) ;
666+ }
655667
656- return ;
657- }
668+ if ( ! completion . retry ) {
669+ throw new Error ( "Should retry but missing retry params." ) ;
670+ }
658671
659- if ( attemptStatus === "RETRY_IMMEDIATELY" ) {
660- if ( completion . ok ) {
661- throw new Error ( "Should retry but completion OK." ) ;
672+ await this . retryImmediately ( { retryOpts : completion . retry } ) ;
673+ return ;
662674 }
663-
664- if ( ! completion . retry ) {
665- throw new Error ( "Should retry but missing retry params." ) ;
675+ default : {
676+ assertExhaustive ( attemptStatus ) ;
666677 }
667-
668- await this . retryImmediately ( { retryOpts : completion . retry } ) ;
669- return ;
670678 }
671-
672- assertExhaustive ( attemptStatus ) ;
673679 }
674680
675681 private updateSnapshotAfterCompletion ( snapshotId : string , status : TaskRunExecutionStatus ) {
@@ -752,7 +758,7 @@ export class RunExecution {
752758 if ( startError ) {
753759 this . sendDebugLog ( "failed to start attempt for retry" , { error : startError . message } ) ;
754760
755- this . stopServices ( ) ;
761+ this . shutdown ( "retryImmediately: failed to start attempt" ) ;
756762 return ;
757763 }
758764
@@ -761,11 +767,9 @@ export class RunExecution {
761767 if ( executeError ) {
762768 this . sendDebugLog ( "failed to execute run for retry" , { error : executeError . message } ) ;
763769
764- this . stopServices ( ) ;
770+ this . shutdown ( "retryImmediately: failed to execute run" ) ;
765771 return ;
766772 }
767-
768- this . stopServices ( ) ;
769773 }
770774
771775 /**
@@ -797,10 +801,17 @@ export class RunExecution {
797801 this . restoreCount ++ ;
798802 }
799803
804+ private async exitTaskRunProcessWithoutFailingRun ( { flush } : { flush : boolean } ) {
805+ await this . taskRunProcess ?. suspend ( { flush } ) ;
806+
807+ // No services should be left running after this line - let's make sure of it
808+ this . shutdown ( "exitTaskRunProcessWithoutFailingRun" ) ;
809+ }
810+
800811 /**
801812 * Processes env overrides from the metadata service. Generally called when we're resuming from a suspended state.
802813 */
803- async processEnvOverrides ( reason ?: string ) {
814+ public async processEnvOverrides ( reason ?: string ) {
804815 if ( ! this . env . TRIGGER_METADATA_URL ) {
805816 this . sendDebugLog ( "no metadata url, skipping env overrides" , { reason } ) ;
806817 return ;
@@ -953,15 +964,21 @@ export class RunExecution {
953964 }
954965
955966 this . executionAbortController . abort ( ) ;
956- this . stopServices ( ) ;
967+ this . shutdown ( "abortExecution" ) ;
957968 }
958969
959- private stopServices ( ) {
970+ private shutdown ( reason : string ) {
960971 if ( this . isShuttingDown ) {
972+ this . sendDebugLog ( `[shutdown] ${ reason } (already shutting down)` , {
973+ firstShutdownReason : this . shutdownReason ,
974+ } ) ;
961975 return ;
962976 }
963977
978+ this . sendDebugLog ( `[shutdown] ${ reason } ` ) ;
979+
964980 this . isShuttingDown = true ;
981+ this . shutdownReason = reason ;
965982
966983 this . snapshotPoller ?. stop ( ) ;
967984 this . snapshotManager ?. cleanup ( ) ;
0 commit comments