From 4c77d7086ad14d47197697ea1c6ec7267d29b247 Mon Sep 17 00:00:00 2001 From: Rahul Bhargava Date: Mon, 4 May 2026 10:23:28 -0400 Subject: [PATCH] use ratelimiter for better default behavior also linting from something --- mediacloud/api.py | 34 ++++++++++++++-------------------- pyproject.toml | 1 + 2 files changed, 15 insertions(+), 20 deletions(-) diff --git a/mediacloud/api.py b/mediacloud/api.py index 37ade20..ef85903 100644 --- a/mediacloud/api.py +++ b/mediacloud/api.py @@ -4,25 +4,15 @@ import warnings from typing import Any, Dict, List, Optional, Union -import requests +from requests_ratelimiter import LimiterSession import mediacloud import mediacloud.error -from mediacloud.types import ( - Collection, - CountOverTimePoint, - JSONObj, - LanguageCount, - OffsetPage, - PaginationToken, - Source, - SourceIntervalAttention, - SourceCount, - SourceWeekAttention, - Story, - StoryCount, - VersionInfo, -) +from mediacloud.types import (Collection, CountOverTimePoint, JSONObj, + LanguageCount, OffsetPage, PaginationToken, + Source, SourceCount, SourceIntervalAttention, + SourceWeekAttention, Story, StoryCount, + VersionInfo) logger = logging.getLogger(__name__) @@ -40,6 +30,10 @@ class BaseApi: # running queries TIMEOUT_SECS = 60 + # Default rate limit for API requests. Admins with higher rate limits can + # override this on their subclass or instance before creating the session. + RATE_LIMIT_PER_MINUTE = 2 + BASE_API_URL = "https://search.mediacloud.org/api/" USER_AGENT_STRING = f"mediacloud {VERSION}" @@ -49,8 +43,8 @@ def __init__(self, auth_token: Optional[str] = None): raise mediacloud.error.MCException("No api key set - nothing will work without this") # Specify the auth_token to use for all future requests self._auth_token = auth_token - # better performance to put all HTTP through this one object - self._session = requests.Session() + # better performance to put all HTTP through this one object; + self._session = LimiterSession(per_minute=self.RATE_LIMIT_PER_MINUTE) self._session.headers.update({'Authorization': f'Token {self._auth_token}'}) self._session.headers.update({'Accept': 'application/json'}) self._session.headers.update({"User-Agent": self.USER_AGENT_STRING}) @@ -197,8 +191,8 @@ def stories_by_source_week(self, query: str, start_date: dt.date, end_date: dt.d return results['source-week-attention'] def stories_by_source_over_interval(self, query: str, start_date: dt.date, end_date: dt.date, - collection_ids: Optional[List[int]] = [], source_ids: Optional[List[int]] = [], - platform: Optional[str] = None, interval: Optional[str] = None) -> List[SourceIntervalAttention]: + collection_ids: Optional[List[int]] = [], source_ids: Optional[List[int]] = [], + platform: Optional[str] = None, interval: Optional[str] = None) -> List[SourceIntervalAttention]: params = self._prep_default_params(query, start_date, end_date, collection_ids, source_ids, platform) if interval: params['interval'] = interval diff --git a/pyproject.toml b/pyproject.toml index e6b0e43..b0a7f0f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -24,6 +24,7 @@ classifiers = [ ] dependencies = [ "requests == 2.*", + "requests-ratelimiter >= 0.10", ] [project.optional-dependencies]