From 261bebe12420fd8fd55a4911df9c41ba62462074 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Amaury=20Lev=C3=A9?= Date: Mon, 25 May 2026 22:52:10 +0200 Subject: [PATCH] Prototype: phased shutdown for MTP (#5345) Introduce a two-phase shutdown model for Microsoft.Testing.Platform so test sessions get a deterministic drain window before being aborted: - Extend internal ITestApplicationCancellationTokenSource with DrainingToken (graceful cancel) and AbortingToken (forceful abort), plus an Abort() entry point. The existing CancellationToken is kept as a back-compat alias for DrainingToken so current consumers keep observing graceful cancellation without changes. - Rewrite CTRLPlusCCancellationTokenSource with a Running -> Draining -> Aborting state machine: * 1st Ctrl+C enters Draining, starts a 30s grace timer that escalates to Aborting on elapse. * 2nd Ctrl+C escalates to Aborting immediately, starts a 10s abort-timeout safety net that FailFasts via IEnvironment. * 3rd Ctrl+C is no longer intercepted - the runtime terminates the process (matches docker compose / kubectl / npm UX). - Add 6 unit tests covering initial state, single/idempotent cancel, abort, grace-period escalation, and zero-grace escalation. Passes on net9.0 and net462. CLI options (--shutdown-grace-period, --shutdown-abort-timeout), env- var propagation to controlled hosts, TerminalOutputDevice UX updates, and migration of existing token consumers are intentionally deferred to follow-up PRs and tracked in the RFC. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../CTRLPlusCCancellationTokenSource.cs | 171 ++++++++++++++++-- ...ITestApplicationCancellationTokenSource.cs | 37 ++++ .../Hosts/CommonHostTests.cs | 8 + .../CTRLPlusCCancellationTokenSourceTests.cs | 112 ++++++++++++ 4 files changed, 315 insertions(+), 13 deletions(-) create mode 100644 test/UnitTests/Microsoft.Testing.Platform.UnitTests/Services/CTRLPlusCCancellationTokenSourceTests.cs diff --git a/src/Platform/Microsoft.Testing.Platform/Services/CTRLPlusCCancellationTokenSource.cs b/src/Platform/Microsoft.Testing.Platform/Services/CTRLPlusCCancellationTokenSource.cs index 9226f4c862..20a7122389 100644 --- a/src/Platform/Microsoft.Testing.Platform/Services/CTRLPlusCCancellationTokenSource.cs +++ b/src/Platform/Microsoft.Testing.Platform/Services/CTRLPlusCCancellationTokenSource.cs @@ -6,19 +6,65 @@ namespace Microsoft.Testing.Platform.Services; +/// +/// Two-phase, Ctrl+C-aware . +/// +/// +/// Phase machine (see RFC "Phased graceful shutdown for MTP", issue #5345): +/// +/// RUNNING ──Ctrl+C / Cancel()──▶ DRAINING ──grace elapsed / 2nd Ctrl+C / Abort()──▶ ABORTING +/// ──3rd Ctrl+C──▶ (process terminated by runtime) +/// +/// +/// Transitions are idempotent and one-way. Existing consumers reading +/// automatically observe Draining (back-compat). +/// +/// internal sealed class CTRLPlusCCancellationTokenSource : ITestApplicationCancellationTokenSource, IDisposable { - private readonly CancellationTokenSource _cancellationTokenSource = new(); + // Conservative defaults inspired by .NET HostOptions.ShutdownTimeout (30s) and + // Vitest's teardownTimeout (10s). Will become CLI options in a follow-up + // (--shutdown-grace-period, --shutdown-abort-timeout). + // TODO(#5345): wire to PlatformCommandLineProvider. + internal static readonly TimeSpan DefaultGracePeriod = TimeSpan.FromSeconds(30); + internal static readonly TimeSpan DefaultAbortTimeout = TimeSpan.FromSeconds(10); + + private const int PhaseRunning = 0; + private const int PhaseDraining = 1; + private const int PhaseAborting = 2; + + private readonly CancellationTokenSource _drainingCts = new(); + private readonly CancellationTokenSource _abortingCts = new(); + private readonly TimeSpan _gracePeriod; + private readonly TimeSpan _abortTimeout; + private readonly IEnvironment _environment; private readonly ILogger? _logger; + private int _phase = PhaseRunning; + private int _ctrlCCount; + public CTRLPlusCCancellationTokenSource(IConsole? console = null, ILogger? logger = null) + : this(console, logger, DefaultGracePeriod, DefaultAbortTimeout, environment: null) { + } + + // Test-friendly overload so we can exercise the phase machine without waiting 30s. + internal CTRLPlusCCancellationTokenSource( + IConsole? console, + ILogger? logger, + TimeSpan gracePeriod, + TimeSpan abortTimeout, + IEnvironment? environment = null) + { + _gracePeriod = gracePeriod; + _abortTimeout = abortTimeout; + _environment = environment ?? new SystemEnvironment(); + _logger = logger; + if (console is not null && !IsCancelKeyPressNotSupported()) { console.CancelKeyPress += OnConsoleCancelKeyPressed; } - - _logger = logger; } [SupportedOSPlatformGuard("android")] @@ -33,27 +79,126 @@ private static bool IsCancelKeyPressNotSupported() OperatingSystem.IsWasi() || OperatingSystem.IsBrowser(); - public void CancelAfter(TimeSpan timeout) => _cancellationTokenSource.CancelAfter(timeout); + /// + public CancellationToken CancellationToken => _drainingCts.Token; + + /// + public CancellationToken DrainingToken => _drainingCts.Token; + + /// + public CancellationToken AbortingToken => _abortingCts.Token; + + internal int CurrentPhase => Volatile.Read(ref _phase); + + public void CancelAfter(TimeSpan timeout) => _drainingCts.CancelAfter(timeout); - public CancellationToken CancellationToken - => _cancellationTokenSource.Token; + /// + public void Cancel() => EnterDraining(); + + /// + public void Abort() + { + EnterDraining(); + EnterAborting(); + } + + public void Dispose() + { + _drainingCts.Dispose(); + _abortingCts.Dispose(); + } private void OnConsoleCancelKeyPressed(object? sender, ConsoleCancelEventArgs e) { - e.Cancel = true; + int count = Interlocked.Increment(ref _ctrlCCount); + + switch (count) + { + case 1: + // 1st Ctrl+C: cooperative cancel. + e.Cancel = true; + EnterDraining(); + break; + case 2: + // 2nd Ctrl+C: escalate to abort. + e.Cancel = true; + EnterAborting(); + break; + default: + // 3rd+ Ctrl+C: stop intercepting and let the runtime terminate + // the process. This matches docker compose / kubectl / npm UX: + // the user has explicitly asked us to die. + e.Cancel = false; + break; + } + } + + private void EnterDraining() + { + if (Interlocked.CompareExchange(ref _phase, PhaseDraining, PhaseRunning) != PhaseRunning) + { + return; + } + try { - _cancellationTokenSource.Cancel(); + _drainingCts.Cancel(); } catch (AggregateException ex) { - _logger?.LogWarning($"Exception during CTRLPlusCCancellationTokenSource cancel:\n{ex}"); + _logger?.LogWarning($"Exception during shutdown (Draining):\n{ex}"); + } + + // Auto-escalate to Aborting after the grace period. + if (_gracePeriod > TimeSpan.Zero && _gracePeriod != Timeout.InfiniteTimeSpan) + { + ScheduleEscalation(_gracePeriod, EnterAborting); + } + else if (_gracePeriod == TimeSpan.Zero) + { + EnterAborting(); } } - public void Dispose() - => _cancellationTokenSource.Dispose(); + private void EnterAborting() + { + if (Interlocked.Exchange(ref _phase, PhaseAborting) == PhaseAborting) + { + return; + } - public void Cancel() - => _cancellationTokenSource.Cancel(); + try + { + _abortingCts.Cancel(); + } + catch (AggregateException ex) + { + _logger?.LogWarning($"Exception during shutdown (Aborting):\n{ex}"); + } + + // After abort timeout, if the host is still alive, hard-terminate. + // FailFast is intentional: at this point we asked twice and waited; any + // remaining work has had its chance. This is the safety net that breaks + // hangs in non-cooperative frameworks (issue #5345). + if (_abortTimeout > TimeSpan.Zero && _abortTimeout != Timeout.InfiniteTimeSpan) + { + ScheduleEscalation(_abortTimeout, ForceTerminate); + } + } + + private static void ScheduleEscalation(TimeSpan delay, Action action) + { + // Fire-and-forget timer. We don't dispose: the host is shutting down anyway, + // and a short-lived CTS is cheaper than holding a Timer reference we'd need + // to manage across the phase machine. + var timerCts = new CancellationTokenSource(delay); + timerCts.Token.Register(action); + } + + private void ForceTerminate() + { + _logger?.LogWarning( + $"Shutdown grace exhausted ({_gracePeriod} + {_abortTimeout}); terminating host."); + _environment.FailFast("Test platform shutdown grace period exhausted."); + } } diff --git a/src/Platform/Microsoft.Testing.Platform/Services/ITestApplicationCancellationTokenSource.cs b/src/Platform/Microsoft.Testing.Platform/Services/ITestApplicationCancellationTokenSource.cs index 4d194e02f6..c5a18c0dcc 100644 --- a/src/Platform/Microsoft.Testing.Platform/Services/ITestApplicationCancellationTokenSource.cs +++ b/src/Platform/Microsoft.Testing.Platform/Services/ITestApplicationCancellationTokenSource.cs @@ -3,9 +3,46 @@ namespace Microsoft.Testing.Platform.Services; +/// +/// Source of the platform's lifetime cancellation tokens. Exposes a two-phase +/// shutdown model (see Issue #5345 / RFC "Phased graceful shutdown for MTP"): +/// +/// — set when the platform +/// enters the Draining phase (Ctrl+C, programmatic , +/// test session abort). Consumers should stop dispatching new work and +/// flush in-flight state. +/// — set when the platform +/// enters the Aborting phase (2nd Ctrl+C, grace period elapsed, +/// programmatic ). Consumers should bail out of +/// long-running work as fast as possible. +/// +/// is kept as the back-compat alias for +/// ; existing consumers do not need to change. +/// internal interface ITestApplicationCancellationTokenSource { + /// + /// Gets the back-compat alias for . + /// CancellationToken CancellationToken { get; } + /// + /// Gets the token that is signalled when the platform enters the Draining phase. + /// + CancellationToken DrainingToken { get; } + + /// + /// Gets the token that is signalled when the platform enters the Aborting phase. + /// + CancellationToken AbortingToken { get; } + + /// + /// Request the Draining phase. Idempotent. + /// void Cancel(); + + /// + /// Request the Aborting phase. Idempotent. Equivalent to a second Ctrl+C. + /// + void Abort(); } diff --git a/test/UnitTests/Microsoft.Testing.Platform.UnitTests/Hosts/CommonHostTests.cs b/test/UnitTests/Microsoft.Testing.Platform.UnitTests/Hosts/CommonHostTests.cs index b8d5f76a62..9a1b1aa72f 100644 --- a/test/UnitTests/Microsoft.Testing.Platform.UnitTests/Hosts/CommonHostTests.cs +++ b/test/UnitTests/Microsoft.Testing.Platform.UnitTests/Hosts/CommonHostTests.cs @@ -112,9 +112,17 @@ private sealed class TestApplicationCancellationTokenSource : ITestApplicationCa { public CancellationToken CancellationToken => CancellationToken.None; + public CancellationToken DrainingToken => CancellationToken.None; + + public CancellationToken AbortingToken => CancellationToken.None; + public void Cancel() { } + + public void Abort() + { + } } private sealed class AsyncCleanableTestHostApplicationLifetime : ITestHostApplicationLifetime, IAsyncCleanableExtension diff --git a/test/UnitTests/Microsoft.Testing.Platform.UnitTests/Services/CTRLPlusCCancellationTokenSourceTests.cs b/test/UnitTests/Microsoft.Testing.Platform.UnitTests/Services/CTRLPlusCCancellationTokenSourceTests.cs new file mode 100644 index 0000000000..55d048a047 --- /dev/null +++ b/test/UnitTests/Microsoft.Testing.Platform.UnitTests/Services/CTRLPlusCCancellationTokenSourceTests.cs @@ -0,0 +1,112 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. See LICENSE file in the project root for full license information. + +using Microsoft.Testing.Platform.Services; + +namespace Microsoft.Testing.Platform.UnitTests; + +[TestClass] +public sealed class CTRLPlusCCancellationTokenSourceTests +{ + [TestMethod] + public void Initial_State_NeitherTokenIsCancelled() + { + using var source = new CTRLPlusCCancellationTokenSource( + console: null, + logger: null, + gracePeriod: Timeout.InfiniteTimeSpan, + abortTimeout: Timeout.InfiniteTimeSpan); + + Assert.IsFalse(source.CancellationToken.IsCancellationRequested); + Assert.IsFalse(source.DrainingToken.IsCancellationRequested); + Assert.IsFalse(source.AbortingToken.IsCancellationRequested); + } + + [TestMethod] + public void Cancel_OnlySignalsDrainingToken() + { + using var source = new CTRLPlusCCancellationTokenSource( + console: null, + logger: null, + gracePeriod: Timeout.InfiniteTimeSpan, + abortTimeout: Timeout.InfiniteTimeSpan); + + source.Cancel(); + + Assert.IsTrue(source.DrainingToken.IsCancellationRequested); + Assert.IsTrue(source.CancellationToken.IsCancellationRequested, "Legacy alias must follow DrainingToken."); + Assert.IsFalse(source.AbortingToken.IsCancellationRequested); + } + + [TestMethod] + public void Abort_SignalsBothTokens() + { + using var source = new CTRLPlusCCancellationTokenSource( + console: null, + logger: null, + gracePeriod: Timeout.InfiniteTimeSpan, + abortTimeout: Timeout.InfiniteTimeSpan); + + source.Abort(); + + Assert.IsTrue(source.DrainingToken.IsCancellationRequested); + Assert.IsTrue(source.AbortingToken.IsCancellationRequested); + } + + [TestMethod] + public void Cancel_IsIdempotent() + { + using var source = new CTRLPlusCCancellationTokenSource( + console: null, + logger: null, + gracePeriod: Timeout.InfiniteTimeSpan, + abortTimeout: Timeout.InfiniteTimeSpan); + + source.Cancel(); + source.Cancel(); + source.Cancel(); + + Assert.IsTrue(source.DrainingToken.IsCancellationRequested); + Assert.IsFalse(source.AbortingToken.IsCancellationRequested); + } + + [TestMethod] + public async Task GracePeriodElapse_EscalatesToAborting() + { + using var source = new CTRLPlusCCancellationTokenSource( + console: null, + logger: null, + gracePeriod: TimeSpan.FromMilliseconds(50), + abortTimeout: Timeout.InfiniteTimeSpan); + + source.Cancel(); + Assert.IsTrue(source.DrainingToken.IsCancellationRequested); + Assert.IsFalse(source.AbortingToken.IsCancellationRequested); + + // Wait for grace to elapse plus a margin. + using var waitCts = new CancellationTokenSource(TimeSpan.FromSeconds(5)); + while (!source.AbortingToken.IsCancellationRequested && !waitCts.IsCancellationRequested) + { + await Task.Delay(10, TestContext.CancellationToken).ConfigureAwait(false); + } + + Assert.IsTrue(source.AbortingToken.IsCancellationRequested, "Aborting must trip after the grace period."); + } + + public TestContext TestContext { get; set; } = null!; + + [TestMethod] + public void ZeroGracePeriod_ImmediatelyEscalatesToAborting() + { + using var source = new CTRLPlusCCancellationTokenSource( + console: null, + logger: null, + gracePeriod: TimeSpan.Zero, + abortTimeout: Timeout.InfiniteTimeSpan); + + source.Cancel(); + + Assert.IsTrue(source.DrainingToken.IsCancellationRequested); + Assert.IsTrue(source.AbortingToken.IsCancellationRequested); + } +}