1818)
1919
2020
21+ TIMEOUT_WRITE_PERMISSIVE = httpx .Timeout (
22+ 60 , # Default timeout for all operations unless otherwise stated.
23+ connect = 3 ,
24+ # Large chunks can take arbitrarily long to complete a write
25+ # so wait arbitrarily long to finish writes.
26+ write = None ,
27+ )
28+
29+ TIMEOUT_READ_PERMISSIVE = httpx .Timeout (
30+ 60 , # Default timeout for all operations unless otherwise stated.
31+ connect = 3 ,
32+ # Allow the server enough time to process the request and to read the response back.
33+ read = 60
34+ )
35+
36+
2137class ArachnidShield (_ArachnidShield ):
2238 """A client to communicate with the Arachnid Shield API
2339 provided by the Canadian Centre for Child Protection.
@@ -30,13 +46,20 @@ def __init__(self, username: typing.Union[str, bytes], password: typing.Union[st
3046 super ().__init__ (username = username , password = password )
3147 self .__client = super ()._build_sync_http_client ()
3248
33- def scan_media_from_bytes (self , contents : typing .Union [bytes , io .BytesIO ], mime_type : str ) -> ScannedMedia :
49+ def scan_media_from_bytes (
50+ self ,
51+ contents : typing .Union [bytes , io .BytesIO ],
52+ mime_type : str ,
53+ timeout : typing .Optional [httpx .Timeout ] = TIMEOUT_WRITE_PERMISSIVE ,
54+ ) -> ScannedMedia :
3455 """Given the contents of some media, along with a mime type,
3556 scan the contents for matches against known child abuse media.
3657
3758 Args:
3859 contents: The raw bytes that represent the media.
3960 mime_type: The mimetype of the media.
61+ timeout:
62+ If provided, will set a timeout configuration for the underlying http client.
4063
4164 Returns:
4265 The record of a successful media scan.
@@ -45,10 +68,13 @@ def scan_media_from_bytes(self, contents: typing.Union[bytes, io.BytesIO], mime_
4568 `ArachnidShieldError` on a failed but complete interaction with
4669 the Arachnid Shield API, and `httpx.HTTPError` on any other connection failures.
4770 """
48- return self .scan_media_from_bytes_with_config (ScanMediaFromBytes (contents = contents , mime_type = mime_type ))
71+ return self .scan_media_from_bytes_with_config (ScanMediaFromBytes (contents = contents , mime_type = mime_type ), timeout = timeout )
4972
5073 def scan_media_from_file (
51- self , filepath : pathlib .Path , mime_type_override : typing .Optional [str ] = None
74+ self ,
75+ filepath : pathlib .Path ,
76+ mime_type_override : typing .Optional [str ] = None ,
77+ timeout : typing .Optional [httpx .Timeout ] = TIMEOUT_WRITE_PERMISSIVE ,
5278 ) -> ScannedMedia :
5379 """Given path to the media file to scan, and an optional
5480 value for mime_type that bypasses guessing it based of the filepath,
@@ -60,6 +86,8 @@ def scan_media_from_file(
6086 mime_type_override:
6187 If provided, will use this as the mime_type
6288 instead of guessing it from the filepath.
89+ timeout:
90+ If provided, will set a timeout configuration for the underlying http client.
6391
6492 Returns:
6593 The record of a successful media scan.
@@ -78,7 +106,7 @@ def scan_media_from_file(
78106 detail = (
79107 f"Failed to identify mime_type for { filepath } . "
80108 f"You may specify it explicitly by providing "
81- f"`force_mime_type `."
109+ f"`mime_type_override `."
82110 )
83111 )
84112 )
@@ -87,14 +115,20 @@ def scan_media_from_file(
87115 contents = f .read ()
88116
89117 config = ScanMediaFromBytes (contents = contents , mime_type = mime_type )
90- return self .scan_media_from_bytes_with_config (config )
118+ return self .scan_media_from_bytes_with_config (config , timeout = timeout )
91119
92- def scan_media_from_url (self , url : str ) -> ScannedMedia :
120+ def scan_media_from_url (
121+ self ,
122+ url : str ,
123+ timeout : typing .Optional [httpx .Timeout ] = TIMEOUT_READ_PERMISSIVE ,
124+ ) -> ScannedMedia :
93125 """Given the absolute url that hosts the media we wish to scan,
94126 scan the contents of that url for matches against known harmful content.
95127
96128 Args:
97129 url: The absolute URL to scan.
130+ timeout:
131+ If provided, will set a timeout configuration for the underlying http client.
98132
99133 Returns:
100134 The record of a successful media scan.
@@ -103,14 +137,20 @@ def scan_media_from_url(self, url: str) -> ScannedMedia:
103137 `ArachnidShieldError` on a failed but complete interaction with
104138 the Arachnid Shield API, and `httpx.HTTPError` on any other connection failures.
105139 """
106- return self .scan_media_from_url_with_config (ScanMediaFromUrl (url = url ))
140+ return self .scan_media_from_url_with_config (ScanMediaFromUrl (url = url ), timeout = timeout )
107141
108- def scan_media_from_bytes_with_config (self , config : ScanMediaFromBytes ) -> ScannedMedia :
142+ def scan_media_from_bytes_with_config (
143+ self ,
144+ config : ScanMediaFromBytes ,
145+ timeout : typing .Optional [httpx .Timeout ] = TIMEOUT_WRITE_PERMISSIVE ,
146+ ) -> ScannedMedia :
109147 """Given the contents of some media, along with a mime type,
110148 scan the contents for matches against known child abuse media.
111149
112150 Args:
113151 config: The context that will be used to build the request.
152+ timeout:
153+ If provided, will set a timeout configuration for the underlying http client.
114154
115155 Returns:
116156 ScannedMedia: A record of a successful scan of the media.
@@ -125,6 +165,7 @@ def scan_media_from_bytes_with_config(self, config: ScanMediaFromBytes) -> Scann
125165 url = url ,
126166 headers = {"Content-Type" : config .mime_type },
127167 content = config .contents ,
168+ timeout = timeout ,
128169 )
129170
130171 if response .is_client_error or response .is_server_error :
@@ -134,12 +175,18 @@ def scan_media_from_bytes_with_config(self, config: ScanMediaFromBytes) -> Scann
134175 response .raise_for_status ()
135176 return ScannedMedia .from_dict (response .json ())
136177
137- def scan_media_from_url_with_config (self , config : ScanMediaFromUrl ) -> ScannedMedia :
178+ def scan_media_from_url_with_config (
179+ self ,
180+ config : ScanMediaFromUrl ,
181+ timeout : typing .Optional [httpx .Timeout ] = TIMEOUT_READ_PERMISSIVE ,
182+ ) -> ScannedMedia :
138183 """Given the absolute url that hosts the media we wish to scan,
139184 scan the contents of that url for matches against known harmful content.
140185
141186 Args:
142187 config: The context that will be used to build the request.
188+ timeout:
189+ If provided, will set a timeout configuration for the underlying http client.
143190
144191 Returns:
145192 ScannedMedia: A record of a successful scan of the media.
@@ -155,6 +202,7 @@ def scan_media_from_url_with_config(self, config: ScanMediaFromUrl) -> ScannedMe
155202 url = _url ,
156203 headers = {"Content-Type" : "application/json" },
157204 json = config .to_dict (),
205+ timeout = timeout ,
158206 )
159207
160208 if response .is_client_error or response .is_server_error :
@@ -164,11 +212,17 @@ def scan_media_from_url_with_config(self, config: ScanMediaFromUrl) -> ScannedMe
164212 response .raise_for_status ()
165213 return ScannedMedia .from_dict (response .json ())
166214
167- def scan_pdq_hashes (self , config : ScanMediaFromPdq ) -> ScannedPDQHashes :
215+ def scan_pdq_hashes (
216+ self ,
217+ config : ScanMediaFromPdq ,
218+ timeout : typing .Optional [httpx .Timeout ] = TIMEOUT_READ_PERMISSIVE ,
219+ ) -> ScannedPDQHashes :
168220 """
169221 Scan medias for CSAM based on their PDQ hashes.
170222 Args:
171223 config: The context that will be used to build the request.
224+ timeout:
225+ If provided, will set a timeout configuration for the underlying http client.
172226
173227 Returns:
174228 ScannedPDQHashes: A record of a batch of PDQ hashes that have been scanned by the Arachnid Shield API
@@ -183,6 +237,7 @@ def scan_pdq_hashes(self, config: ScanMediaFromPdq) -> ScannedPDQHashes:
183237 url = _url ,
184238 headers = {"Content-Type" : "application/json" },
185239 json = config .to_dict (),
240+ timeout = timeout ,
186241 )
187242 if response .is_client_error or response .is_server_error :
188243 error_detail = ErrorDetail .from_dict (response .json ())
@@ -203,13 +258,20 @@ def __init__(self, username: typing.Union[str, bytes], password: typing.Union[st
203258 super ().__init__ (username = username , password = password )
204259 self .__client = super ()._build_async_http_client ()
205260
206- async def scan_media_from_bytes (self , contents : typing .Union [bytes , io .BytesIO ], mime_type : str ) -> ScannedMedia :
261+ async def scan_media_from_bytes (
262+ self ,
263+ contents : typing .Union [bytes , io .BytesIO ],
264+ mime_type : str ,
265+ timeout : typing .Optional [httpx .Timeout ] = TIMEOUT_WRITE_PERMISSIVE ,
266+ ) -> ScannedMedia :
207267 """Given the contents of some media, along with a mime type,
208268 scan the contents for matches against known child abuse media.
209269
210270 Args:
211271 contents: The raw bytes that represent the media.
212272 mime_type: The mimetype of the media.
273+ timeout:
274+ If provided, will set a timeout configuration for the underlying http client.
213275
214276 Returns:
215277 The record of a successful media scan.
@@ -219,14 +281,20 @@ async def scan_media_from_bytes(self, contents: typing.Union[bytes, io.BytesIO],
219281 the Arachnid Shield API, and `httpx.HTTPError` on any other connection failures.
220282 """
221283
222- return await self .scan_media_from_bytes_with_config (ScanMediaFromBytes (contents = contents , mime_type = mime_type ))
284+ return await self .scan_media_from_bytes_with_config (ScanMediaFromBytes (contents = contents , mime_type = mime_type ), timeout = timeout )
223285
224- async def scan_media_from_url (self , url : str ) -> ScannedMedia :
286+ async def scan_media_from_url (
287+ self ,
288+ url : str ,
289+ timeout : typing .Optional [httpx .Timeout ] = TIMEOUT_READ_PERMISSIVE ,
290+ ) -> ScannedMedia :
225291 """Given the absolute url that hosts the media we wish to scan,
226292 scan the contents of that url for matches against known harmful content.
227293
228294 Args:
229295 url: The absolute URL to scan.
296+ timeout:
297+ If provided, will set a timeout configuration for the underlying http client.
230298
231299 Returns:
232300 The record of a successful media scan.
@@ -235,10 +303,13 @@ async def scan_media_from_url(self, url: str) -> ScannedMedia:
235303 `ArachnidShieldError` on a failed but complete interaction with
236304 the Arachnid Shield API, and `httpx.HTTPError` on any other connection failures.
237305 """
238- return await self .scan_media_from_url_with_config (ScanMediaFromUrl (url = url ))
306+ return await self .scan_media_from_url_with_config (ScanMediaFromUrl (url = url ), timeout = timeout )
239307
240308 async def scan_media_from_file (
241- self , filepath : pathlib .Path , mime_type_override : typing .Optional [str ] = None
309+ self ,
310+ filepath : pathlib .Path ,
311+ mime_type_override : typing .Optional [str ] = None ,
312+ timeout : typing .Optional [httpx .Timeout ] = TIMEOUT_WRITE_PERMISSIVE ,
242313 ) -> ScannedMedia :
243314 """Given path to the media file to scan, and an optional
244315 value for mime_type that bypasses guessing it based of the filepath,
@@ -250,6 +321,8 @@ async def scan_media_from_file(
250321 mime_type_override:
251322 If provided, will use this as the mime_type
252323 instead of guessing it from the filepath.
324+ timeout:
325+ If provided, will set a timeout configuration for the underlying http client.
253326
254327 Returns:
255328 The record of a successful media scan.
@@ -268,7 +341,7 @@ async def scan_media_from_file(
268341 detail = (
269342 f"Failed to identify mime_type for { filepath } . "
270343 f"You may specify it explicitly by providing "
271- f"`force_mime_type `."
344+ f"`mime_type_override `."
272345 )
273346 )
274347 )
@@ -277,14 +350,20 @@ async def scan_media_from_file(
277350 contents = f .read ()
278351
279352 config = ScanMediaFromBytes (contents = contents , mime_type = mime_type )
280- return await self .scan_media_from_bytes_with_config (config )
353+ return await self .scan_media_from_bytes_with_config (config , timeout = timeout )
281354
282- async def scan_media_from_bytes_with_config (self , config : ScanMediaFromBytes ) -> ScannedMedia :
355+ async def scan_media_from_bytes_with_config (
356+ self ,
357+ config : ScanMediaFromBytes ,
358+ timeout : typing .Optional [httpx .Timeout ] = TIMEOUT_WRITE_PERMISSIVE ,
359+ ) -> ScannedMedia :
283360 """Given the contents of some media, along with a mime type,
284361 scan the contents for matches against known child abuse media.
285362
286363 Args:
287364 config: The context that will be used to build the request.
365+ timeout:
366+ If provided, will set a timeout configuration for the underlying http client.
288367
289368 Returns:
290369 ScannedMedia: A record of a successful scan of the media.
@@ -300,6 +379,7 @@ async def scan_media_from_bytes_with_config(self, config: ScanMediaFromBytes) ->
300379 url = url ,
301380 headers = {"Content-Type" : config .mime_type },
302381 content = config .contents ,
382+ timeout = timeout ,
303383 )
304384
305385 if response .is_client_error or response .is_server_error :
@@ -309,12 +389,18 @@ async def scan_media_from_bytes_with_config(self, config: ScanMediaFromBytes) ->
309389 response .raise_for_status ()
310390 return ScannedMedia .from_dict (response .json ())
311391
312- async def scan_media_from_url_with_config (self , config : ScanMediaFromUrl ) -> ScannedMedia :
392+ async def scan_media_from_url_with_config (
393+ self ,
394+ config : ScanMediaFromUrl ,
395+ timeout : typing .Optional [httpx .Timeout ] = TIMEOUT_READ_PERMISSIVE ,
396+ ) -> ScannedMedia :
313397 """Given the absolute url that hosts the media we wish to scan,
314398 scan the contents of that url for matches against known harmful content.
315399
316400 Args:
317401 config: The context that will be used to build the request.
402+ timeout:
403+ If provided, will set a timeout configuration for the underlying http client.
318404
319405 Returns:
320406 ScannedMedia: A record of a successful scan of the media.
@@ -330,6 +416,7 @@ async def scan_media_from_url_with_config(self, config: ScanMediaFromUrl) -> Sca
330416 url = _url ,
331417 headers = {"Content-Type" : "application/json" },
332418 json = config .to_dict (),
419+ timeout = timeout ,
333420 )
334421
335422 if response .is_client_error or response .is_server_error :
@@ -339,11 +426,17 @@ async def scan_media_from_url_with_config(self, config: ScanMediaFromUrl) -> Sca
339426 response .raise_for_status ()
340427 return ScannedMedia .from_dict (response .json ())
341428
342- async def scan_pdq_hashes (self , config : ScanMediaFromPdq ) -> ScannedPDQHashes :
429+ async def scan_pdq_hashes (
430+ self ,
431+ config : ScanMediaFromPdq ,
432+ timeout : typing .Optional [httpx .Timeout ] = TIMEOUT_READ_PERMISSIVE ,
433+ ) -> ScannedPDQHashes :
343434 """
344435 Scan medias for CSAM based on their PDQ hashes.
345436 Args:
346437 config: The context that will be used to build the request.
438+ timeout:
439+ If provided, will set a timeout configuration for the underlying http client.
347440
348441 Returns:
349442 ScannedPDQHashes: A record of a batch of PDQ hashes that have been scanned by the Arachnid Shield API
@@ -358,6 +451,7 @@ async def scan_pdq_hashes(self, config: ScanMediaFromPdq) -> ScannedPDQHashes:
358451 url = _url ,
359452 headers = {"Content-Type" : "application/json" },
360453 json = config .to_dict (),
454+ timeout = timeout ,
361455 )
362456 if response .is_client_error or response .is_server_error :
363457 error_detail = ErrorDetail .from_dict (response .json ())
0 commit comments