77import zipfile
88from collections .abc import Awaitable , Callable , Iterator
99from contextlib import contextmanager
10- from pathlib import Path , PurePosixPath
10+ from pathlib import Path , PurePosixPath , PureWindowsPath
1111from typing import Literal , cast
1212
1313from ..errors import ExecNonZeroError , WorkspaceArchiveWriteError
1414from ..files import EntryKind , FileEntry
15- from ..util .tar_utils import UnsafeTarMemberError , safe_tar_member_rel_path
15+ from ..util .tar_utils import UnsafeTarMemberError , safe_tar_member_rel_path , validate_tarfile
1616
1717
1818class UnsafeZipMemberError (ValueError ):
@@ -46,6 +46,7 @@ async def extract_tar_archive(
4646 child_entry_cache : dict [Path , dict [str , EntryKind ]] = {}
4747 try :
4848 with tarfile .open (fileobj = data , mode = "r:*" ) as archive :
49+ validate_tarfile (archive , allow_symlinks = False )
4950 for member in archive .getmembers ():
5051 rel_path = safe_tar_member_rel_path (member )
5152 if rel_path is None :
@@ -112,6 +113,7 @@ async def extract_zip_archive(
112113 try :
113114 with zipfile_compatible_stream (data ) as zip_data :
114115 with zipfile .ZipFile (zip_data ) as archive :
116+ validate_zipfile (archive )
115117 for member in archive .infolist ():
116118 rel_path = safe_zip_member_rel_path (member )
117119 if rel_path is None :
@@ -281,6 +283,12 @@ def safe_zip_member_rel_path(member: zipfile.ZipInfo) -> Path | None:
281283 if member .filename in ("" , "." , "./" ):
282284 return None
283285
286+ windows_path = PureWindowsPath (member .filename )
287+ if windows_path .drive :
288+ raise UnsafeZipMemberError (member = member .filename , reason = "windows drive path" )
289+ if "\\ " in member .filename :
290+ raise UnsafeZipMemberError (member = member .filename , reason = "windows path separator" )
291+
284292 rel = PurePosixPath (member .filename )
285293 if rel .is_absolute ():
286294 raise UnsafeZipMemberError (member = member .filename , reason = "absolute path" )
@@ -294,6 +302,36 @@ def safe_zip_member_rel_path(member: zipfile.ZipInfo) -> Path | None:
294302 return Path (* rel .parts )
295303
296304
305+ def validate_zipfile (archive : zipfile .ZipFile ) -> None :
306+ members_by_rel_path : dict [Path , zipfile .ZipInfo ] = {}
307+ members : list [tuple [zipfile .ZipInfo , Path ]] = []
308+
309+ for member in archive .infolist ():
310+ rel_path = safe_zip_member_rel_path (member )
311+ if rel_path is None :
312+ continue
313+
314+ previous = members_by_rel_path .get (rel_path )
315+ if previous is not None and not (previous .is_dir () and member .is_dir ()):
316+ raise UnsafeZipMemberError (
317+ member = member .filename ,
318+ reason = f"duplicate archive path: { rel_path .as_posix ()} " ,
319+ )
320+ members_by_rel_path [rel_path ] = member
321+ members .append ((member , rel_path ))
322+
323+ for member , rel_path in members :
324+ for parent in rel_path .parents :
325+ if parent == Path ():
326+ break
327+ parent_member = members_by_rel_path .get (parent )
328+ if parent_member is not None and not parent_member .is_dir ():
329+ raise UnsafeZipMemberError (
330+ member = member .filename ,
331+ reason = f"archive path descends through non-directory: { parent .as_posix ()} " ,
332+ )
333+
334+
297335class _ZipFileStreamAdapter (io .IOBase ):
298336 # Python 3.10's zipfile._SharedFile reads `file.seekable` directly, so this
299337 # adapter keeps ZIP-compatible random-access streams working across versions.
0 commit comments