Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 32 additions & 18 deletions mediacloud/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,20 @@

import mediacloud
import mediacloud.error
from mediacloud.types import (
Collection,
CountOverTimePoint,
JSONObj,
LanguageCount,
OffsetPage,
PaginationToken,
Source,
SourceCount,
SourceWeekAttention,
Story,
StoryCount,
VersionInfo,
)

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -40,18 +54,18 @@ def __init__(self, auth_token: Optional[str] = None):
self._session.headers.update({'Accept': 'application/json'})
self._session.headers.update({"User-Agent": self.USER_AGENT_STRING})

def user_profile(self) -> Dict:
def user_profile(self) -> JSONObj:
# :return: basic info about the current user, including their roles
return self._query('auth/profile')

def version(self) -> Dict:
def version(self) -> VersionInfo:
"""
returns dict with (at least):
GIT_REV, now (float epoch time), version
"""
return self._query('version')

def _query(self, endpoint: str, params: Optional[Dict] = None, method: str = 'GET') -> Dict:
def _query(self, endpoint: str, params: Optional[Dict] = None, method: str = 'GET') -> JSONObj:
"""
Centralize making the actual queries here for easy maintenance and testing of HTTP comms
"""
Expand All @@ -75,12 +89,12 @@ class DirectoryApi(BaseApi):
PLATFORM_TWITTER = "twitter"
PLATFORM_REDDIT = "reddit"

def collection(self, collection_id: int):
def collection(self, collection_id: int) -> Collection:

return self._query(f'sources/collections/{collection_id}/', None)

def collection_list(self, platform: Optional[str] = None, name: Optional[str] = None,
limit: Optional[int] = 0, offset: Optional[int] = 0, source_id: Optional[int] = None) -> Dict:
limit: Optional[int] = 0, offset: Optional[int] = 0, source_id: Optional[int] = None) -> OffsetPage:
params: Dict[Any, Any] = dict(limit=limit, offset=offset)
if name:
params['name'] = name
Expand All @@ -90,12 +104,12 @@ def collection_list(self, platform: Optional[str] = None, name: Optional[str] =
params['source_id'] = source_id
return self._query('sources/collections/', params)

def source(self, source_id: int):
def source(self, source_id: int) -> Source:
return self._query(f'sources/sources/{source_id}/', None)

def source_list(self, platform: Optional[str] = None, name: Optional[str] = None,
collection_id: Optional[int] = None,
limit: Optional[int] = 0, offset: Optional[int] = 0) -> Dict:
limit: Optional[int] = 0, offset: Optional[int] = 0) -> OffsetPage:
params: Dict[Any, Any] = dict(limit=limit, offset=offset)
if collection_id:
params['collection_id'] = collection_id
Expand All @@ -108,7 +122,7 @@ def source_list(self, platform: Optional[str] = None, name: Optional[str] = None
def feed_list(self, source_id: Optional[int] = None,
modified_since: Optional[Union[dt.datetime, int, float]] = None,
modified_before: Optional[Union[dt.datetime, int, float]] = None,
limit: Optional[int] = 0, offset: Optional[int] = 0, return_details: bool = False) -> Dict:
limit: Optional[int] = 0, offset: Optional[int] = 0, return_details: bool = False) -> JSONObj:
params: Dict[Any, Any] = dict(limit=limit, offset=offset)
if source_id:
params['source_id'] = source_id
Expand Down Expand Up @@ -156,14 +170,14 @@ def _prep_default_params(self, query: str, start_date: dt.date, end_date: dt.dat
return params

def story_count(self, query: str, start_date: dt.date, end_date: dt.date, collection_ids: Optional[List[int]] = [],
source_ids: Optional[List[int]] = [], platform: Optional[str] = None) -> Dict:
source_ids: Optional[List[int]] = [], platform: Optional[str] = None) -> StoryCount:
params = self._prep_default_params(query, start_date, end_date, collection_ids, source_ids, platform)
results = self._query('search/total-count', params)
return results['count']

def story_count_over_time(self, query: str, start_date: dt.date, end_date: dt.date,
collection_ids: Optional[List[int]] = [], source_ids: Optional[List[int]] = [],
platform: Optional[str] = None) -> List[Dict]:
platform: Optional[str] = None) -> List[CountOverTimePoint]:
params = self._prep_default_params(query, start_date, end_date, collection_ids, source_ids, platform)
results = self._query('search/count-over-time', params)
for d in results['count_over_time']['counts']:
Expand All @@ -172,7 +186,7 @@ def story_count_over_time(self, query: str, start_date: dt.date, end_date: dt.da

def stories_by_source_week(self, query: str, start_date: dt.date, end_date: dt.date,
collection_ids: Optional[List[int]] = [], source_ids: Optional[List[int]] = [],
platform: Optional[str] = None) -> List[Dict]:
platform: Optional[str] = None) -> List[SourceWeekAttention]:
params = self._prep_default_params(query, start_date, end_date, collection_ids, source_ids, platform)
results = self._query('search/count-by-source-week', params)
return results['source-week-attention']
Expand All @@ -181,7 +195,7 @@ def story_list(self, query: str, start_date: dt.date, end_date: dt.date, collect
source_ids: Optional[List[int]] = [], platform: Optional[str] = None,
expanded: Optional[bool] = None, pagination_token: Optional[str] = None,
sort_order: Optional[str] = None,
page_size: Optional[int] = None) -> tuple[List[Dict], Optional[str]]:
page_size: Optional[int] = None) -> tuple[List[Story], PaginationToken]:
params = self._prep_default_params(query, start_date, end_date, collection_ids, source_ids, platform)
if expanded:
params['expanded'] = 1
Expand All @@ -195,15 +209,15 @@ def story_list(self, query: str, start_date: dt.date, end_date: dt.date, collect
self._dates_str2objects(results['stories'])
return results['stories'], results['pagination_token']

def _dates_str2objects(self, stories: List[Dict]):
def _dates_str2objects(self, stories: List[Story]):
# _in place_ translation from ES date str to python data/datetime objects to save memory
for s in stories:
s['publish_date'] = dt.date.fromisoformat(s['publish_date'][:10]) if s['publish_date'] else None
s['indexed_date'] = dt.datetime.fromisoformat(s['indexed_date']) if s['indexed_date'] else None

def story_sample(self, query: str, start_date: dt.date, end_date: dt.date, collection_ids: Optional[List[int]] = [],
source_ids: Optional[List[int]] = [], platform: Optional[str] = None,
limit: Optional[int] = None, expanded=False) -> List[Dict]:
limit: Optional[int] = None, expanded=False) -> List[Story]:
params = self._prep_default_params(query, start_date, end_date, collection_ids, source_ids, platform)
if limit:
params['limit'] = limit
Expand All @@ -215,14 +229,14 @@ def story_sample(self, query: str, start_date: dt.date, end_date: dt.date, colle
self._dates_str2objects(results['sample'])
return results['sample']

def story(self, story_id: str) -> Dict:
def story(self, story_id: str) -> Story:
params = dict(storyId=story_id, platform=self.PROVIDER)
results = self._query('search/story', params)
return results['story']

def words(self, query: str, start_date: dt.date, end_date: dt.date, collection_ids: Optional[List[int]] = [],
source_ids: Optional[List[int]] = [], platform: Optional[str] = None,
limit: Optional[int] = None) -> List[Dict]:
limit: Optional[int] = None) -> List[JSONObj]:
params = self._prep_default_params(query, start_date, end_date, collection_ids, source_ids, platform)
if limit:
params['limit'] = limit
Expand All @@ -231,7 +245,7 @@ def words(self, query: str, start_date: dt.date, end_date: dt.date, collection_i

def sources(self, query: str, start_date: dt.date, end_date: dt.date, collection_ids: Optional[List[int]] = [],
source_ids: Optional[List[int]] = [], platform: Optional[str] = None,
limit: Optional[int] = None) -> List[Dict]:
limit: Optional[int] = None) -> List[SourceCount]:
params = self._prep_default_params(query, start_date, end_date, collection_ids, source_ids, platform)
if limit:
params['limit'] = limit
Expand All @@ -240,7 +254,7 @@ def sources(self, query: str, start_date: dt.date, end_date: dt.date, collection

def languages(self, query: str, start_date: dt.date, end_date: dt.date, collection_ids: Optional[List[int]] = [],
source_ids: Optional[List[int]] = [], platform: Optional[str] = None,
limit: Optional[int] = None) -> List[Dict]:
limit: Optional[int] = None) -> List[LanguageCount]:
params = self._prep_default_params(query, start_date, end_date, collection_ids, source_ids, platform)
if limit:
params['limit'] = limit
Expand Down
1 change: 1 addition & 0 deletions mediacloud/py.typed
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@

93 changes: 93 additions & 0 deletions mediacloud/types.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
import datetime as dt
from typing import Any, TypeAlias, TypedDict

JSONObj: TypeAlias = dict[str, Any]
JSONList: TypeAlias = list[JSONObj]
PaginationToken: TypeAlias = str | None


class Story(TypedDict, total=False):
id: str
title: str
url: str
language: str
media_name: str
media_url: str
text: str
publish_date: dt.date | None
indexed_date: dt.datetime | None


class StoryCount(TypedDict, total=False):
relevant: int
total: int


class CountOverTimePoint(TypedDict, total=False):
date: dt.date
count: int
total_count: int
ratio: float


class SourceCount(TypedDict, total=False):
source: str
count: int


class LanguageCount(TypedDict, total=False):
language: str
ratio: float
value: int


class SourceWeekAttention(TypedDict, total=False):
media_name: str
week: str
matching_stories: int
total_stories: int
ratio: float


class Collection(TypedDict, total=False):
id: int
name: str
platform: str
notes: str
public: bool
featured: bool
managed: bool
monitored: bool


class Source(TypedDict, total=False):
id: int
name: str
platform: str
label: str
homepage: str
media_type: str
pub_state: str
pub_country: str
primary_language: str


class Feed(TypedDict, total=False):
id: int
source_id: int
url: str
name: str
modified_at: str


class OffsetPage(TypedDict, total=False):
count: int
next: str | None
previous: str | None
results: JSONList


class VersionInfo(TypedDict, total=False):
GIT_REV: str
now: float
version: str
3 changes: 3 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@
requires = ["flit_core >=3.2,<4"]
build-backend = "flit_core.buildapi"

[tool.flit.sdist]
include = ["mediacloud/py.typed"]

[project]
name = "mediacloud"
version = "4.6.0"
Expand Down
Loading