Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -6,19 +6,65 @@

namespace Microsoft.Testing.Platform.Services;

/// <summary>
/// Two-phase, Ctrl+C-aware <see cref="ITestApplicationCancellationTokenSource"/>.
/// </summary>
/// <remarks>
/// Phase machine (see RFC "Phased graceful shutdown for MTP", issue #5345):
/// <code>
/// RUNNING ──Ctrl+C / Cancel()──▶ DRAINING ──grace elapsed / 2nd Ctrl+C / Abort()──▶ ABORTING
/// ──3rd Ctrl+C──▶ (process terminated by runtime)
/// </code>
/// <para>
/// Transitions are idempotent and one-way. Existing consumers reading
/// <see cref="CancellationToken"/> automatically observe Draining (back-compat).
/// </para>
/// </remarks>
internal sealed class CTRLPlusCCancellationTokenSource : ITestApplicationCancellationTokenSource, IDisposable
{
private readonly CancellationTokenSource _cancellationTokenSource = new();
// Conservative defaults inspired by .NET HostOptions.ShutdownTimeout (30s) and
// Vitest's teardownTimeout (10s). Will become CLI options in a follow-up
// (--shutdown-grace-period, --shutdown-abort-timeout).
// TODO(#5345): wire to PlatformCommandLineProvider.
internal static readonly TimeSpan DefaultGracePeriod = TimeSpan.FromSeconds(30);
internal static readonly TimeSpan DefaultAbortTimeout = TimeSpan.FromSeconds(10);

private const int PhaseRunning = 0;
private const int PhaseDraining = 1;
private const int PhaseAborting = 2;

private readonly CancellationTokenSource _drainingCts = new();
private readonly CancellationTokenSource _abortingCts = new();
private readonly TimeSpan _gracePeriod;
private readonly TimeSpan _abortTimeout;
private readonly IEnvironment _environment;
private readonly ILogger? _logger;

private int _phase = PhaseRunning;
private int _ctrlCCount;

public CTRLPlusCCancellationTokenSource(IConsole? console = null, ILogger? logger = null)
: this(console, logger, DefaultGracePeriod, DefaultAbortTimeout, environment: null)
{
}

// Test-friendly overload so we can exercise the phase machine without waiting 30s.
internal CTRLPlusCCancellationTokenSource(
IConsole? console,
ILogger? logger,
TimeSpan gracePeriod,
TimeSpan abortTimeout,
IEnvironment? environment = null)
{
_gracePeriod = gracePeriod;
_abortTimeout = abortTimeout;
_environment = environment ?? new SystemEnvironment();
_logger = logger;

if (console is not null && !IsCancelKeyPressNotSupported())
{
console.CancelKeyPress += OnConsoleCancelKeyPressed;
}

_logger = logger;
}

[SupportedOSPlatformGuard("android")]
Expand All @@ -33,27 +79,126 @@ private static bool IsCancelKeyPressNotSupported()
OperatingSystem.IsWasi() ||
OperatingSystem.IsBrowser();

public void CancelAfter(TimeSpan timeout) => _cancellationTokenSource.CancelAfter(timeout);
/// <inheritdoc />
public CancellationToken CancellationToken => _drainingCts.Token;

/// <inheritdoc />
public CancellationToken DrainingToken => _drainingCts.Token;

/// <inheritdoc />
public CancellationToken AbortingToken => _abortingCts.Token;

internal int CurrentPhase => Volatile.Read(ref _phase);

public void CancelAfter(TimeSpan timeout) => _drainingCts.CancelAfter(timeout);

public CancellationToken CancellationToken
=> _cancellationTokenSource.Token;
/// <inheritdoc />
public void Cancel() => EnterDraining();

/// <inheritdoc />
public void Abort()
{
EnterDraining();
EnterAborting();
}

public void Dispose()
{
_drainingCts.Dispose();
_abortingCts.Dispose();
}
Comment on lines +105 to +109

private void OnConsoleCancelKeyPressed(object? sender, ConsoleCancelEventArgs e)
{
e.Cancel = true;
int count = Interlocked.Increment(ref _ctrlCCount);

switch (count)
{
case 1:
// 1st Ctrl+C: cooperative cancel.
e.Cancel = true;
EnterDraining();
break;
case 2:
// 2nd Ctrl+C: escalate to abort.
e.Cancel = true;
EnterAborting();
break;
default:
// 3rd+ Ctrl+C: stop intercepting and let the runtime terminate
// the process. This matches docker compose / kubectl / npm UX:
// the user has explicitly asked us to die.
e.Cancel = false;
break;
}
}

private void EnterDraining()
{
if (Interlocked.CompareExchange(ref _phase, PhaseDraining, PhaseRunning) != PhaseRunning)
{
return;
}

try
{
_cancellationTokenSource.Cancel();
_drainingCts.Cancel();
}
catch (AggregateException ex)
{
_logger?.LogWarning($"Exception during CTRLPlusCCancellationTokenSource cancel:\n{ex}");
_logger?.LogWarning($"Exception during shutdown (Draining):\n{ex}");
}

// Auto-escalate to Aborting after the grace period.
if (_gracePeriod > TimeSpan.Zero && _gracePeriod != Timeout.InfiniteTimeSpan)
{
ScheduleEscalation(_gracePeriod, EnterAborting);
}
else if (_gracePeriod == TimeSpan.Zero)
{
EnterAborting();
}
}

public void Dispose()
=> _cancellationTokenSource.Dispose();
private void EnterAborting()
{
if (Interlocked.Exchange(ref _phase, PhaseAborting) == PhaseAborting)
{
return;
}

public void Cancel()
=> _cancellationTokenSource.Cancel();
try
{
_abortingCts.Cancel();
}
Comment on lines +163 to +173
catch (AggregateException ex)
{
_logger?.LogWarning($"Exception during shutdown (Aborting):\n{ex}");
}

// After abort timeout, if the host is still alive, hard-terminate.
// FailFast is intentional: at this point we asked twice and waited; any
// remaining work has had its chance. This is the safety net that breaks
// hangs in non-cooperative frameworks (issue #5345).
if (_abortTimeout > TimeSpan.Zero && _abortTimeout != Timeout.InfiniteTimeSpan)
{
ScheduleEscalation(_abortTimeout, ForceTerminate);
}
}

private static void ScheduleEscalation(TimeSpan delay, Action action)
{
// Fire-and-forget timer. We don't dispose: the host is shutting down anyway,
// and a short-lived CTS is cheaper than holding a Timer reference we'd need
// to manage across the phase machine.
var timerCts = new CancellationTokenSource(delay);
timerCts.Token.Register(action);
}
Comment on lines +189 to +196

private void ForceTerminate()
{
_logger?.LogWarning(
$"Shutdown grace exhausted ({_gracePeriod} + {_abortTimeout}); terminating host.");
_environment.FailFast("Test platform shutdown grace period exhausted.");
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,46 @@

namespace Microsoft.Testing.Platform.Services;

/// <summary>
/// Source of the platform's lifetime cancellation tokens. Exposes a two-phase
/// shutdown model (see Issue #5345 / RFC "Phased graceful shutdown for MTP"):
/// <list type="bullet">
/// <item><description><see cref="DrainingToken"/> — set when the platform
/// enters the Draining phase (Ctrl+C, programmatic <see cref="Cancel"/>,
/// test session abort). Consumers should stop dispatching new work and
/// flush in-flight state.</description></item>
/// <item><description><see cref="AbortingToken"/> — set when the platform
/// enters the Aborting phase (2nd Ctrl+C, grace period elapsed,
/// programmatic <see cref="Abort"/>). Consumers should bail out of
/// long-running work as fast as possible.</description></item>
/// </list>
/// <see cref="CancellationToken"/> is kept as the back-compat alias for
/// <see cref="DrainingToken"/>; existing consumers do not need to change.
/// </summary>
internal interface ITestApplicationCancellationTokenSource
{
/// <summary>
/// Gets the back-compat alias for <see cref="DrainingToken"/>.
/// </summary>
CancellationToken CancellationToken { get; }

/// <summary>
/// Gets the token that is signalled when the platform enters the Draining phase.
/// </summary>
CancellationToken DrainingToken { get; }

/// <summary>
/// Gets the token that is signalled when the platform enters the Aborting phase.
/// </summary>
CancellationToken AbortingToken { get; }

/// <summary>
/// Request the Draining phase. Idempotent.
/// </summary>
void Cancel();

/// <summary>
/// Request the Aborting phase. Idempotent. Equivalent to a second Ctrl+C.
/// </summary>
void Abort();
}
Original file line number Diff line number Diff line change
Expand Up @@ -112,9 +112,17 @@ private sealed class TestApplicationCancellationTokenSource : ITestApplicationCa
{
public CancellationToken CancellationToken => CancellationToken.None;

public CancellationToken DrainingToken => CancellationToken.None;

public CancellationToken AbortingToken => CancellationToken.None;

public void Cancel()
{
}

public void Abort()
{
}
}

private sealed class AsyncCleanableTestHostApplicationLifetime : ITestHostApplicationLifetime, IAsyncCleanableExtension
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT license. See LICENSE file in the project root for full license information.

using Microsoft.Testing.Platform.Services;

namespace Microsoft.Testing.Platform.UnitTests;

[TestClass]
public sealed class CTRLPlusCCancellationTokenSourceTests
{
[TestMethod]
public void Initial_State_NeitherTokenIsCancelled()
{
using var source = new CTRLPlusCCancellationTokenSource(
console: null,
logger: null,
gracePeriod: Timeout.InfiniteTimeSpan,
abortTimeout: Timeout.InfiniteTimeSpan);

Assert.IsFalse(source.CancellationToken.IsCancellationRequested);
Assert.IsFalse(source.DrainingToken.IsCancellationRequested);
Assert.IsFalse(source.AbortingToken.IsCancellationRequested);
}

[TestMethod]
public void Cancel_OnlySignalsDrainingToken()
{
using var source = new CTRLPlusCCancellationTokenSource(
console: null,
logger: null,
gracePeriod: Timeout.InfiniteTimeSpan,
abortTimeout: Timeout.InfiniteTimeSpan);

source.Cancel();

Assert.IsTrue(source.DrainingToken.IsCancellationRequested);
Assert.IsTrue(source.CancellationToken.IsCancellationRequested, "Legacy alias must follow DrainingToken.");
Assert.IsFalse(source.AbortingToken.IsCancellationRequested);
}

[TestMethod]
public void Abort_SignalsBothTokens()
{
using var source = new CTRLPlusCCancellationTokenSource(
console: null,
logger: null,
gracePeriod: Timeout.InfiniteTimeSpan,
abortTimeout: Timeout.InfiniteTimeSpan);

source.Abort();

Assert.IsTrue(source.DrainingToken.IsCancellationRequested);
Assert.IsTrue(source.AbortingToken.IsCancellationRequested);
}

[TestMethod]
public void Cancel_IsIdempotent()
{
using var source = new CTRLPlusCCancellationTokenSource(
console: null,
logger: null,
gracePeriod: Timeout.InfiniteTimeSpan,
abortTimeout: Timeout.InfiniteTimeSpan);

source.Cancel();
source.Cancel();
source.Cancel();

Assert.IsTrue(source.DrainingToken.IsCancellationRequested);
Assert.IsFalse(source.AbortingToken.IsCancellationRequested);
}

[TestMethod]
public async Task GracePeriodElapse_EscalatesToAborting()
{
using var source = new CTRLPlusCCancellationTokenSource(
console: null,
logger: null,
gracePeriod: TimeSpan.FromMilliseconds(50),
abortTimeout: Timeout.InfiniteTimeSpan);

source.Cancel();
Assert.IsTrue(source.DrainingToken.IsCancellationRequested);
Assert.IsFalse(source.AbortingToken.IsCancellationRequested);

// Wait for grace to elapse plus a margin.
using var waitCts = new CancellationTokenSource(TimeSpan.FromSeconds(5));
while (!source.AbortingToken.IsCancellationRequested && !waitCts.IsCancellationRequested)
{
await Task.Delay(10, TestContext.CancellationToken).ConfigureAwait(false);
}

Comment on lines +88 to +92
Assert.IsTrue(source.AbortingToken.IsCancellationRequested, "Aborting must trip after the grace period.");
}

public TestContext TestContext { get; set; } = null!;

[TestMethod]
public void ZeroGracePeriod_ImmediatelyEscalatesToAborting()
{
using var source = new CTRLPlusCCancellationTokenSource(
console: null,
logger: null,
gracePeriod: TimeSpan.Zero,
abortTimeout: Timeout.InfiniteTimeSpan);

source.Cancel();

Assert.IsTrue(source.DrainingToken.IsCancellationRequested);
Assert.IsTrue(source.AbortingToken.IsCancellationRequested);
}
}
Loading