From 64f74fe9dd543c39ee0842dd7947984e6307192e Mon Sep 17 00:00:00 2001 From: Rahul Bhargava Date: Thu, 14 May 2026 08:42:10 -0400 Subject: [PATCH 1/4] add support for story list randomize sort param --- mediacloud/api.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/mediacloud/api.py b/mediacloud/api.py index 6d157ce..ea0715b 100644 --- a/mediacloud/api.py +++ b/mediacloud/api.py @@ -206,11 +206,13 @@ def stories_by_source_over_interval(self, query: str, start_date: dt.date, end_d def story_list(self, query: str, start_date: dt.date, end_date: dt.date, collection_ids: Optional[List[int]] = [], source_ids: Optional[List[int]] = [], platform: Optional[str] = None, expanded: Optional[bool] = None, pagination_token: Optional[str] = None, - sort_order: Optional[str] = None, - page_size: Optional[int] = None) -> tuple[List[Story], PaginationToken]: + sort_order: Optional[str] = None, page_size: Optional[int] = None, + randomize: Optional[bool] = None) -> tuple[List[Story], PaginationToken]: params = self._prep_default_params(query, start_date, end_date, collection_ids, source_ids, platform) if expanded: params['expanded'] = 1 + if randomize: + params['randomize'] = 1 if pagination_token: params['pagination_token'] = pagination_token if sort_order: From 486cfa500e30dec332e748632d1b4477678908fc Mon Sep 17 00:00:00 2001 From: Rahul Bhargava Date: Fri, 15 May 2026 11:21:20 -0400 Subject: [PATCH 2/4] remove spurious print --- mediacloud/test/api_base_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mediacloud/test/api_base_test.py b/mediacloud/test/api_base_test.py index a0faf8f..6e668e8 100644 --- a/mediacloud/test/api_base_test.py +++ b/mediacloud/test/api_base_test.py @@ -6,6 +6,7 @@ mediacloud.api.BaseApi.BASE_API_URL = os.getenv("MC_API_BASE_URL", "https://search.mediacloud.org/api/") + class BaseApiTest(TestCase): @staticmethod @@ -32,5 +33,4 @@ def test_user_profile(): mc_api_key = os.getenv("MC_API_TOKEN") client = mediacloud.api.DirectoryApi(mc_api_key) _ = client.user_profile() - print(_) assert True From 026a274b9485811a7faa3dabd6152286cd9bf4f7 Mon Sep 17 00:00:00 2001 From: Rahul Bhargava Date: Fri, 15 May 2026 11:22:09 -0400 Subject: [PATCH 3/4] add support for new param to story_list, and tests --- mediacloud/api.py | 4 +-- mediacloud/test/api_search_test.py | 53 +++++++++++++++++++++++++++++- 2 files changed, 54 insertions(+), 3 deletions(-) diff --git a/mediacloud/api.py b/mediacloud/api.py index ea0715b..0ba6884 100644 --- a/mediacloud/api.py +++ b/mediacloud/api.py @@ -207,11 +207,11 @@ def story_list(self, query: str, start_date: dt.date, end_date: dt.date, collect source_ids: Optional[List[int]] = [], platform: Optional[str] = None, expanded: Optional[bool] = None, pagination_token: Optional[str] = None, sort_order: Optional[str] = None, page_size: Optional[int] = None, - randomize: Optional[bool] = None) -> tuple[List[Story], PaginationToken]: + randomized: Optional[bool] = None) -> tuple[List[Story], PaginationToken]: params = self._prep_default_params(query, start_date, end_date, collection_ids, source_ids, platform) if expanded: params['expanded'] = 1 - if randomize: + if randomized: params['randomize'] = 1 if pagination_token: params['pagination_token'] = pagination_token diff --git a/mediacloud/test/api_search_test.py b/mediacloud/test/api_search_test.py index 8395b27..015ac29 100644 --- a/mediacloud/test/api_search_test.py +++ b/mediacloud/test/api_search_test.py @@ -117,6 +117,44 @@ def test_story_list_paging(self): assert len(results2) == 1000 assert next_page_token2 is not None assert next_page_token1 != next_page_token2 + page1_ids = [s['id'] for s in results1] + page2_ids = [s['id'] for s in results2] + common_ids = set(page1_ids) & set(page2_ids) + self.assertEqual(len(common_ids), 0) + + def test_story_list_randomized(self): + # make sure ids in results differ from standard call when randomized sent in + results_sorted, _ = self._admin_search.story_list(query="weather", start_date=START_DATE, + end_date=END_DATE, + collection_ids=[COLLECTION_US_NATIONAL]) + results_random, _ = self._admin_search.story_list(query="weather", start_date=START_DATE, + end_date=END_DATE, randomized=True, + collection_ids=[COLLECTION_US_NATIONAL]) + self.assertEqual(len(results_sorted), len(results_random)) + # make sure the id values in the two lsts are different + sorted_ids = [s['id'] for s in results_sorted] + random_ids = [s['id'] for s in results_random] + self.assertEqual(len(sorted_ids), len(random_ids)) + self.assertNotEqual(sorted_ids, random_ids) + + def test_story_list_paging_randomizedd(self): + results1, next_page_token1 = self._admin_search.story_list(query="weather", start_date=START_DATE, + end_date=END_DATE, randomized=True, + collection_ids=[COLLECTION_US_NATIONAL]) + time.sleep(31) + assert len(results1) == 1000 + assert next_page_token1 is not None + results2, next_page_token2 = self._admin_search.story_list(query="weather", start_date=START_DATE, + end_date=END_DATE, randomized=True, + collection_ids=[COLLECTION_US_NATIONAL], + pagination_token=next_page_token1) + assert len(results2) == 1000 + assert next_page_token2 is not None + assert next_page_token1 != next_page_token2 + page1_ids = [s['id'] for s in results1] + page2_ids = [s['id'] for s in results2] + common_ids = set(page1_ids) & set(page2_ids) + self.assertEqual(len(common_ids), 0) def test_random_sample(self): def _test_random_sample(sample_size: int): @@ -139,7 +177,20 @@ def _test_random_sample(sample_size: int): assert 'text' not in s.keys() _test_random_sample(934) _test_random_sample(123) - # TO DO: add admin test that passed in `expanded=True` and verifies `text` is in returned item properties + + def test_story_list_random_expanded(self): + # note - requires staff API token + page, _ = self._admin_search.story_list(query="weather", start_date=START_DATE, end_date=END_DATE, + collection_ids=[COLLECTION_US_NATIONAL], randomized=True) + for story in page: + assert 'text' not in story + time.sleep(25) + page, _ = self._admin_search.story_list(query="weather", start_date=START_DATE, end_date=END_DATE, + expanded=True, collection_ids=[COLLECTION_US_NATIONAL], + randomized=True) + for story in page: + assert 'text' in story + assert len(story['text']) > 0 def test_story_list_expanded(self): # note - requires staff API token From f80fff98f8f43526ec2f20140c834ec2f6190201 Mon Sep 17 00:00:00 2001 From: Rahul Bhargava Date: Fri, 15 May 2026 13:25:20 -0400 Subject: [PATCH 4/4] tweak param defaults and typing hints --- mediacloud/api.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mediacloud/api.py b/mediacloud/api.py index 0ba6884..8264f2d 100644 --- a/mediacloud/api.py +++ b/mediacloud/api.py @@ -205,9 +205,9 @@ def stories_by_source_over_interval(self, query: str, start_date: dt.date, end_d def story_list(self, query: str, start_date: dt.date, end_date: dt.date, collection_ids: Optional[List[int]] = [], source_ids: Optional[List[int]] = [], platform: Optional[str] = None, - expanded: Optional[bool] = None, pagination_token: Optional[str] = None, + expanded: bool = False, pagination_token: Optional[str] = None, sort_order: Optional[str] = None, page_size: Optional[int] = None, - randomized: Optional[bool] = None) -> tuple[List[Story], PaginationToken]: + randomized: bool = False) -> tuple[List[Story], PaginationToken]: params = self._prep_default_params(query, start_date, end_date, collection_ids, source_ids, platform) if expanded: params['expanded'] = 1