1818)
1919
2020
21+ TIMEOUT_WRITE_PERMISSIVE = httpx .Timeout (
22+ 60 , # Default timeout for all operations unless otherwise stated.
23+ connect = 3 ,
24+ # Large chunks can take arbitrarily long to complete a write
25+ # so wait arbitrarily long to finish writes.
26+ write = None ,
27+ )
28+
29+ TIMEOUT_READ_PERMISSIVE = httpx .Timeout (
30+ 60 , # Default timeout for all operations unless otherwise stated.
31+ connect = 3 ,
32+ # Allow the server enough time to process the request and to read the response back.
33+ read = 60
34+ )
35+
36+
2137class ArachnidShield (_ArachnidShield ):
2238 """A client to communicate with the Arachnid Shield API
2339 provided by the Canadian Centre for Child Protection.
@@ -34,7 +50,7 @@ def scan_media_from_bytes(
3450 self ,
3551 contents : typing .Union [bytes , io .BytesIO ],
3652 mime_type : str ,
37- timeout : typing .Optional [httpx .Timeout ] = None ,
53+ timeout : typing .Optional [httpx .Timeout ] = TIMEOUT_WRITE_PERMISSIVE ,
3854 ) -> ScannedMedia :
3955 """Given the contents of some media, along with a mime type,
4056 scan the contents for matches against known child abuse media.
@@ -43,8 +59,7 @@ def scan_media_from_bytes(
4359 contents: The raw bytes that represent the media.
4460 mime_type: The mimetype of the media.
4561 timeout:
46- If provided, will set a timeout configuration for the underlying http client.
47- Otherwise, will disable the timeout entirely.
62+ If provided, will set a timeout configuration for the underlying http client.
4863
4964 Returns:
5065 The record of a successful media scan.
@@ -59,7 +74,7 @@ def scan_media_from_file(
5974 self ,
6075 filepath : pathlib .Path ,
6176 mime_type_override : typing .Optional [str ] = None ,
62- timeout : typing .Optional [httpx .Timeout ] = None ,
77+ timeout : typing .Optional [httpx .Timeout ] = TIMEOUT_WRITE_PERMISSIVE ,
6378 ) -> ScannedMedia :
6479 """Given path to the media file to scan, and an optional
6580 value for mime_type that bypasses guessing it based of the filepath,
@@ -72,8 +87,7 @@ def scan_media_from_file(
7287 If provided, will use this as the mime_type
7388 instead of guessing it from the filepath.
7489 timeout:
75- If provided, will set a timeout configuration for the underlying http client.
76- Otherwise, will disable the timeout entirely.
90+ If provided, will set a timeout configuration for the underlying http client.
7791
7892 Returns:
7993 The record of a successful media scan.
@@ -103,12 +117,18 @@ def scan_media_from_file(
103117 config = ScanMediaFromBytes (contents = contents , mime_type = mime_type )
104118 return self .scan_media_from_bytes_with_config (config , timeout = timeout )
105119
106- def scan_media_from_url (self , url : str ) -> ScannedMedia :
120+ def scan_media_from_url (
121+ self ,
122+ url : str ,
123+ timeout : typing .Optional [httpx .Timeout ] = TIMEOUT_READ_PERMISSIVE ,
124+ ) -> ScannedMedia :
107125 """Given the absolute url that hosts the media we wish to scan,
108126 scan the contents of that url for matches against known harmful content.
109127
110128 Args:
111129 url: The absolute URL to scan.
130+ timeout:
131+ If provided, will set a timeout configuration for the underlying http client.
112132
113133 Returns:
114134 The record of a successful media scan.
@@ -117,21 +137,20 @@ def scan_media_from_url(self, url: str) -> ScannedMedia:
117137 `ArachnidShieldError` on a failed but complete interaction with
118138 the Arachnid Shield API, and `httpx.HTTPError` on any other connection failures.
119139 """
120- return self .scan_media_from_url_with_config (ScanMediaFromUrl (url = url ))
140+ return self .scan_media_from_url_with_config (ScanMediaFromUrl (url = url ), timeout = timeout )
121141
122142 def scan_media_from_bytes_with_config (
123143 self ,
124144 config : ScanMediaFromBytes ,
125- timeout : typing .Optional [httpx .Timeout ] = httpx . Timeout ( 5 )
145+ timeout : typing .Optional [httpx .Timeout ] = TIMEOUT_WRITE_PERMISSIVE ,
126146 ) -> ScannedMedia :
127147 """Given the contents of some media, along with a mime type,
128148 scan the contents for matches against known child abuse media.
129149
130150 Args:
131151 config: The context that will be used to build the request.
132152 timeout:
133- If provided explicitly, a configuration passed to the underlying http client.
134- It defaults to 5 seconds, and can be disabled by setting it to `None`.
153+ If provided, will set a timeout configuration for the underlying http client.
135154
136155 Returns:
137156 ScannedMedia: A record of a successful scan of the media.
@@ -156,12 +175,18 @@ def scan_media_from_bytes_with_config(
156175 response .raise_for_status ()
157176 return ScannedMedia .from_dict (response .json ())
158177
159- def scan_media_from_url_with_config (self , config : ScanMediaFromUrl ) -> ScannedMedia :
178+ def scan_media_from_url_with_config (
179+ self ,
180+ config : ScanMediaFromUrl ,
181+ timeout : typing .Optional [httpx .Timeout ] = TIMEOUT_READ_PERMISSIVE ,
182+ ) -> ScannedMedia :
160183 """Given the absolute url that hosts the media we wish to scan,
161184 scan the contents of that url for matches against known harmful content.
162185
163186 Args:
164187 config: The context that will be used to build the request.
188+ timeout:
189+ If provided, will set a timeout configuration for the underlying http client.
165190
166191 Returns:
167192 ScannedMedia: A record of a successful scan of the media.
@@ -177,6 +202,7 @@ def scan_media_from_url_with_config(self, config: ScanMediaFromUrl) -> ScannedMe
177202 url = _url ,
178203 headers = {"Content-Type" : "application/json" },
179204 json = config .to_dict (),
205+ timeout = timeout ,
180206 )
181207
182208 if response .is_client_error or response .is_server_error :
@@ -186,11 +212,17 @@ def scan_media_from_url_with_config(self, config: ScanMediaFromUrl) -> ScannedMe
186212 response .raise_for_status ()
187213 return ScannedMedia .from_dict (response .json ())
188214
189- def scan_pdq_hashes (self , config : ScanMediaFromPdq ) -> ScannedPDQHashes :
215+ def scan_pdq_hashes (
216+ self ,
217+ config : ScanMediaFromPdq ,
218+ timeout : typing .Optional [httpx .Timeout ] = TIMEOUT_READ_PERMISSIVE ,
219+ ) -> ScannedPDQHashes :
190220 """
191221 Scan medias for CSAM based on their PDQ hashes.
192222 Args:
193223 config: The context that will be used to build the request.
224+ timeout:
225+ If provided, will set a timeout configuration for the underlying http client.
194226
195227 Returns:
196228 ScannedPDQHashes: A record of a batch of PDQ hashes that have been scanned by the Arachnid Shield API
@@ -205,6 +237,7 @@ def scan_pdq_hashes(self, config: ScanMediaFromPdq) -> ScannedPDQHashes:
205237 url = _url ,
206238 headers = {"Content-Type" : "application/json" },
207239 json = config .to_dict (),
240+ timeout = timeout ,
208241 )
209242 if response .is_client_error or response .is_server_error :
210243 error_detail = ErrorDetail .from_dict (response .json ())
@@ -229,7 +262,7 @@ async def scan_media_from_bytes(
229262 self ,
230263 contents : typing .Union [bytes , io .BytesIO ],
231264 mime_type : str ,
232- timeout : typing .Optional [httpx .Timeout ] = None ,
265+ timeout : typing .Optional [httpx .Timeout ] = TIMEOUT_WRITE_PERMISSIVE ,
233266 ) -> ScannedMedia :
234267 """Given the contents of some media, along with a mime type,
235268 scan the contents for matches against known child abuse media.
@@ -238,8 +271,7 @@ async def scan_media_from_bytes(
238271 contents: The raw bytes that represent the media.
239272 mime_type: The mimetype of the media.
240273 timeout:
241- If provided, will set a timeout configuration for the underlying http client.
242- Otherwise, will disable the timeout entirely.
274+ If provided, will set a timeout configuration for the underlying http client.
243275
244276 Returns:
245277 The record of a successful media scan.
@@ -251,12 +283,18 @@ async def scan_media_from_bytes(
251283
252284 return await self .scan_media_from_bytes_with_config (ScanMediaFromBytes (contents = contents , mime_type = mime_type ), timeout = timeout )
253285
254- async def scan_media_from_url (self , url : str ) -> ScannedMedia :
286+ async def scan_media_from_url (
287+ self ,
288+ url : str ,
289+ timeout : typing .Optional [httpx .Timeout ] = TIMEOUT_READ_PERMISSIVE ,
290+ ) -> ScannedMedia :
255291 """Given the absolute url that hosts the media we wish to scan,
256292 scan the contents of that url for matches against known harmful content.
257293
258294 Args:
259295 url: The absolute URL to scan.
296+ timeout:
297+ If provided, will set a timeout configuration for the underlying http client.
260298
261299 Returns:
262300 The record of a successful media scan.
@@ -265,13 +303,13 @@ async def scan_media_from_url(self, url: str) -> ScannedMedia:
265303 `ArachnidShieldError` on a failed but complete interaction with
266304 the Arachnid Shield API, and `httpx.HTTPError` on any other connection failures.
267305 """
268- return await self .scan_media_from_url_with_config (ScanMediaFromUrl (url = url ))
306+ return await self .scan_media_from_url_with_config (ScanMediaFromUrl (url = url ), timeout = timeout )
269307
270308 async def scan_media_from_file (
271309 self ,
272310 filepath : pathlib .Path ,
273311 mime_type_override : typing .Optional [str ] = None ,
274- timeout : typing .Optional [httpx .Timeout ] = None ,
312+ timeout : typing .Optional [httpx .Timeout ] = TIMEOUT_WRITE_PERMISSIVE ,
275313 ) -> ScannedMedia :
276314 """Given path to the media file to scan, and an optional
277315 value for mime_type that bypasses guessing it based of the filepath,
@@ -284,8 +322,7 @@ async def scan_media_from_file(
284322 If provided, will use this as the mime_type
285323 instead of guessing it from the filepath.
286324 timeout:
287- If provided, will set a timeout configuration for the underlying http client.
288- Otherwise, will disable the timeout entirely.
325+ If provided, will set a timeout configuration for the underlying http client.
289326
290327 Returns:
291328 The record of a successful media scan.
@@ -318,16 +355,15 @@ async def scan_media_from_file(
318355 async def scan_media_from_bytes_with_config (
319356 self ,
320357 config : ScanMediaFromBytes ,
321- timeout : typing .Optional [httpx .Timeout ] = httpx . Timeout ( 5 )
358+ timeout : typing .Optional [httpx .Timeout ] = TIMEOUT_WRITE_PERMISSIVE ,
322359 ) -> ScannedMedia :
323360 """Given the contents of some media, along with a mime type,
324361 scan the contents for matches against known child abuse media.
325362
326363 Args:
327364 config: The context that will be used to build the request.
328365 timeout:
329- If provided explicitly, a configuration passed to the underlying http client.
330- It defaults to 5 seconds, and can be disabled by setting it to `None`.
366+ If provided, will set a timeout configuration for the underlying http client.
331367
332368 Returns:
333369 ScannedMedia: A record of a successful scan of the media.
@@ -353,12 +389,18 @@ async def scan_media_from_bytes_with_config(
353389 response .raise_for_status ()
354390 return ScannedMedia .from_dict (response .json ())
355391
356- async def scan_media_from_url_with_config (self , config : ScanMediaFromUrl ) -> ScannedMedia :
392+ async def scan_media_from_url_with_config (
393+ self ,
394+ config : ScanMediaFromUrl ,
395+ timeout : typing .Optional [httpx .Timeout ] = TIMEOUT_READ_PERMISSIVE ,
396+ ) -> ScannedMedia :
357397 """Given the absolute url that hosts the media we wish to scan,
358398 scan the contents of that url for matches against known harmful content.
359399
360400 Args:
361401 config: The context that will be used to build the request.
402+ timeout:
403+ If provided, will set a timeout configuration for the underlying http client.
362404
363405 Returns:
364406 ScannedMedia: A record of a successful scan of the media.
@@ -374,6 +416,7 @@ async def scan_media_from_url_with_config(self, config: ScanMediaFromUrl) -> Sca
374416 url = _url ,
375417 headers = {"Content-Type" : "application/json" },
376418 json = config .to_dict (),
419+ timeout = timeout ,
377420 )
378421
379422 if response .is_client_error or response .is_server_error :
@@ -383,11 +426,17 @@ async def scan_media_from_url_with_config(self, config: ScanMediaFromUrl) -> Sca
383426 response .raise_for_status ()
384427 return ScannedMedia .from_dict (response .json ())
385428
386- async def scan_pdq_hashes (self , config : ScanMediaFromPdq ) -> ScannedPDQHashes :
429+ async def scan_pdq_hashes (
430+ self ,
431+ config : ScanMediaFromPdq ,
432+ timeout : typing .Optional [httpx .Timeout ] = TIMEOUT_READ_PERMISSIVE ,
433+ ) -> ScannedPDQHashes :
387434 """
388435 Scan medias for CSAM based on their PDQ hashes.
389436 Args:
390437 config: The context that will be used to build the request.
438+ timeout:
439+ If provided, will set a timeout configuration for the underlying http client.
391440
392441 Returns:
393442 ScannedPDQHashes: A record of a batch of PDQ hashes that have been scanned by the Arachnid Shield API
@@ -402,6 +451,7 @@ async def scan_pdq_hashes(self, config: ScanMediaFromPdq) -> ScannedPDQHashes:
402451 url = _url ,
403452 headers = {"Content-Type" : "application/json" },
404453 json = config .to_dict (),
454+ timeout = timeout ,
405455 )
406456 if response .is_client_error or response .is_server_error :
407457 error_detail = ErrorDetail .from_dict (response .json ())
0 commit comments