From ad890628b5dff6beff3d7bb34b2a53f3954c5883 Mon Sep 17 00:00:00 2001 From: Ivan Shymko Date: Tue, 28 Apr 2026 10:56:17 +0000 Subject: [PATCH 1/4] test: xfail legacy flaky test `tests/integration/test_scenarios.py::test_scenario_initial_task_types[new_task-streaming-legacy]` is flaky, given that it's legacy and is not used - `xfail` it. Example failures: - https://github.com/a2aproject/a2a-python/actions/runs/24995840443/job/73192514384 - https://github.com/a2aproject/a2a-python/actions/runs/25041293011/job/73344930927 --- tests/integration/test_scenarios.py | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/tests/integration/test_scenarios.py b/tests/integration/test_scenarios.py index 6070a672f..6eb507801 100644 --- a/tests/integration/test_scenarios.py +++ b/tests/integration/test_scenarios.py @@ -2,7 +2,6 @@ import collections import contextlib import logging - from typing import Any import grpc @@ -13,6 +12,7 @@ from a2a.client.client import ClientConfig from a2a.client.client_factory import ClientFactory from a2a.client.errors import A2AClientError +from a2a.helpers.proto_helpers import new_task_from_user_message from a2a.server.agent_execution import AgentExecutor, RequestContext from a2a.server.context import ServerCallContext from a2a.server.events import EventQueue @@ -47,16 +47,14 @@ TaskStatus, TaskStatusUpdateEvent, ) -from a2a.helpers.proto_helpers import new_task_from_user_message from a2a.utils import TransportProtocol from a2a.utils.errors import ( + InvalidAgentResponseError, InvalidParamsError, TaskNotCancelableError, TaskNotFoundError, - InvalidAgentResponseError, ) - logger = logging.getLogger(__name__) @@ -1933,8 +1931,21 @@ async def listen_to_end(): ) @pytest.mark.parametrize('initial_task_type', ['new_task', 'status_update']) async def test_scenario_initial_task_types( - use_legacy, streaming, initial_task_type + request, use_legacy, streaming, initial_task_type ): + if use_legacy and streaming and initial_task_type == 'new_task': + # There is a race condition which manifests itself in flaky CI failures. + # Given that we don't use legacy by default anymore, xfail it. + # Flakiness rate is around 1 failure per 30 runs, so use strict=False + # to avoid failing on a passing xfail. + request.node.add_marker( + pytest.mark.xfail( + reason='https://github.com/a2aproject/a2a-python/issues/869', + strict=False, + raises=ValueError, + ) + ) + started_event = asyncio.Event() continue_event = asyncio.Event() From 98e4780549d70280175afca8732e077851b93c6a Mon Sep 17 00:00:00 2001 From: Ivan Shymko Date: Tue, 28 Apr 2026 10:58:04 +0000 Subject: [PATCH 2/4] Update --- tests/integration/test_scenarios.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/integration/test_scenarios.py b/tests/integration/test_scenarios.py index 6eb507801..005bc25aa 100644 --- a/tests/integration/test_scenarios.py +++ b/tests/integration/test_scenarios.py @@ -2,6 +2,7 @@ import collections import contextlib import logging + from typing import Any import grpc @@ -12,7 +13,6 @@ from a2a.client.client import ClientConfig from a2a.client.client_factory import ClientFactory from a2a.client.errors import A2AClientError -from a2a.helpers.proto_helpers import new_task_from_user_message from a2a.server.agent_execution import AgentExecutor, RequestContext from a2a.server.context import ServerCallContext from a2a.server.events import EventQueue @@ -47,14 +47,16 @@ TaskStatus, TaskStatusUpdateEvent, ) +from a2a.helpers.proto_helpers import new_task_from_user_message from a2a.utils import TransportProtocol from a2a.utils.errors import ( - InvalidAgentResponseError, InvalidParamsError, TaskNotCancelableError, TaskNotFoundError, + InvalidAgentResponseError, ) + logger = logging.getLogger(__name__) From 1d317e157e456df7d24a1e70e44b0a8bc599e6e9 Mon Sep 17 00:00:00 2001 From: Ivan Shymko Date: Tue, 28 Apr 2026 13:06:39 +0200 Subject: [PATCH 3/4] Update tests/integration/test_scenarios.py Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> --- tests/integration/test_scenarios.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/test_scenarios.py b/tests/integration/test_scenarios.py index 005bc25aa..b85f350c3 100644 --- a/tests/integration/test_scenarios.py +++ b/tests/integration/test_scenarios.py @@ -1940,7 +1940,7 @@ async def test_scenario_initial_task_types( # Given that we don't use legacy by default anymore, xfail it. # Flakiness rate is around 1 failure per 30 runs, so use strict=False # to avoid failing on a passing xfail. - request.node.add_marker( + reason='Flaky test due to race condition in legacy streaming, see https://github.com/a2aproject/a2a-python/issues/869', pytest.mark.xfail( reason='https://github.com/a2aproject/a2a-python/issues/869', strict=False, From bc4458be1ba9c8bd1a64e9e7139db02a3d3e2de5 Mon Sep 17 00:00:00 2001 From: Ivan Shymko Date: Tue, 28 Apr 2026 11:07:17 +0000 Subject: [PATCH 4/4] Revert "Update tests/integration/test_scenarios.py" This reverts commit 1d317e157e456df7d24a1e70e44b0a8bc599e6e9. --- tests/integration/test_scenarios.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/test_scenarios.py b/tests/integration/test_scenarios.py index b85f350c3..005bc25aa 100644 --- a/tests/integration/test_scenarios.py +++ b/tests/integration/test_scenarios.py @@ -1940,7 +1940,7 @@ async def test_scenario_initial_task_types( # Given that we don't use legacy by default anymore, xfail it. # Flakiness rate is around 1 failure per 30 runs, so use strict=False # to avoid failing on a passing xfail. - reason='Flaky test due to race condition in legacy streaming, see https://github.com/a2aproject/a2a-python/issues/869', + request.node.add_marker( pytest.mark.xfail( reason='https://github.com/a2aproject/a2a-python/issues/869', strict=False,