Skip to content

Commit ad89062

Browse files
committed
test: xfail legacy flaky test
`tests/integration/test_scenarios.py::test_scenario_initial_task_types[new_task-streaming-legacy]` is flaky, given that it's legacy and is not used - `xfail` it. Example failures: - https://github.com/a2aproject/a2a-python/actions/runs/24995840443/job/73192514384 - https://github.com/a2aproject/a2a-python/actions/runs/25041293011/job/73344930927
1 parent 7af6050 commit ad89062

1 file changed

Lines changed: 16 additions & 5 deletions

File tree

tests/integration/test_scenarios.py

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22
import collections
33
import contextlib
44
import logging
5-
65
from typing import Any
76

87
import grpc
@@ -13,6 +12,7 @@
1312
from a2a.client.client import ClientConfig
1413
from a2a.client.client_factory import ClientFactory
1514
from a2a.client.errors import A2AClientError
15+
from a2a.helpers.proto_helpers import new_task_from_user_message
1616
from a2a.server.agent_execution import AgentExecutor, RequestContext
1717
from a2a.server.context import ServerCallContext
1818
from a2a.server.events import EventQueue
@@ -47,16 +47,14 @@
4747
TaskStatus,
4848
TaskStatusUpdateEvent,
4949
)
50-
from a2a.helpers.proto_helpers import new_task_from_user_message
5150
from a2a.utils import TransportProtocol
5251
from a2a.utils.errors import (
52+
InvalidAgentResponseError,
5353
InvalidParamsError,
5454
TaskNotCancelableError,
5555
TaskNotFoundError,
56-
InvalidAgentResponseError,
5756
)
5857

59-
6058
logger = logging.getLogger(__name__)
6159

6260

@@ -1933,8 +1931,21 @@ async def listen_to_end():
19331931
)
19341932
@pytest.mark.parametrize('initial_task_type', ['new_task', 'status_update'])
19351933
async def test_scenario_initial_task_types(
1936-
use_legacy, streaming, initial_task_type
1934+
request, use_legacy, streaming, initial_task_type
19371935
):
1936+
if use_legacy and streaming and initial_task_type == 'new_task':
1937+
# There is a race condition which manifests itself in flaky CI failures.
1938+
# Given that we don't use legacy by default anymore, xfail it.
1939+
# Flakiness rate is around 1 failure per 30 runs, so use strict=False
1940+
# to avoid failing on a passing xfail.
1941+
request.node.add_marker(
1942+
pytest.mark.xfail(
1943+
reason='https://github.com/a2aproject/a2a-python/issues/869',
1944+
strict=False,
1945+
raises=ValueError,
1946+
)
1947+
)
1948+
19381949
started_event = asyncio.Event()
19391950
continue_event = asyncio.Event()
19401951

0 commit comments

Comments
 (0)