Skip to content

Commit 2eb8713

Browse files
authored
fix: tighten tar and zip member validation (#3028)
1 parent 9a207b6 commit 2eb8713

4 files changed

Lines changed: 189 additions & 4 deletions

File tree

src/agents/sandbox/session/archive_extraction.py

Lines changed: 40 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,12 +7,12 @@
77
import zipfile
88
from collections.abc import Awaitable, Callable, Iterator
99
from contextlib import contextmanager
10-
from pathlib import Path, PurePosixPath
10+
from pathlib import Path, PurePosixPath, PureWindowsPath
1111
from typing import Literal, cast
1212

1313
from ..errors import ExecNonZeroError, WorkspaceArchiveWriteError
1414
from ..files import EntryKind, FileEntry
15-
from ..util.tar_utils import UnsafeTarMemberError, safe_tar_member_rel_path
15+
from ..util.tar_utils import UnsafeTarMemberError, safe_tar_member_rel_path, validate_tarfile
1616

1717

1818
class UnsafeZipMemberError(ValueError):
@@ -46,6 +46,7 @@ async def extract_tar_archive(
4646
child_entry_cache: dict[Path, dict[str, EntryKind]] = {}
4747
try:
4848
with tarfile.open(fileobj=data, mode="r:*") as archive:
49+
validate_tarfile(archive, allow_symlinks=False)
4950
for member in archive.getmembers():
5051
rel_path = safe_tar_member_rel_path(member)
5152
if rel_path is None:
@@ -112,6 +113,7 @@ async def extract_zip_archive(
112113
try:
113114
with zipfile_compatible_stream(data) as zip_data:
114115
with zipfile.ZipFile(zip_data) as archive:
116+
validate_zipfile(archive)
115117
for member in archive.infolist():
116118
rel_path = safe_zip_member_rel_path(member)
117119
if rel_path is None:
@@ -281,6 +283,12 @@ def safe_zip_member_rel_path(member: zipfile.ZipInfo) -> Path | None:
281283
if member.filename in ("", ".", "./"):
282284
return None
283285

286+
windows_path = PureWindowsPath(member.filename)
287+
if windows_path.drive:
288+
raise UnsafeZipMemberError(member=member.filename, reason="windows drive path")
289+
if "\\" in member.filename:
290+
raise UnsafeZipMemberError(member=member.filename, reason="windows path separator")
291+
284292
rel = PurePosixPath(member.filename)
285293
if rel.is_absolute():
286294
raise UnsafeZipMemberError(member=member.filename, reason="absolute path")
@@ -294,6 +302,36 @@ def safe_zip_member_rel_path(member: zipfile.ZipInfo) -> Path | None:
294302
return Path(*rel.parts)
295303

296304

305+
def validate_zipfile(archive: zipfile.ZipFile) -> None:
306+
members_by_rel_path: dict[Path, zipfile.ZipInfo] = {}
307+
members: list[tuple[zipfile.ZipInfo, Path]] = []
308+
309+
for member in archive.infolist():
310+
rel_path = safe_zip_member_rel_path(member)
311+
if rel_path is None:
312+
continue
313+
314+
previous = members_by_rel_path.get(rel_path)
315+
if previous is not None and not (previous.is_dir() and member.is_dir()):
316+
raise UnsafeZipMemberError(
317+
member=member.filename,
318+
reason=f"duplicate archive path: {rel_path.as_posix()}",
319+
)
320+
members_by_rel_path[rel_path] = member
321+
members.append((member, rel_path))
322+
323+
for member, rel_path in members:
324+
for parent in rel_path.parents:
325+
if parent == Path():
326+
break
327+
parent_member = members_by_rel_path.get(parent)
328+
if parent_member is not None and not parent_member.is_dir():
329+
raise UnsafeZipMemberError(
330+
member=member.filename,
331+
reason=f"archive path descends through non-directory: {parent.as_posix()}",
332+
)
333+
334+
297335
class _ZipFileStreamAdapter(io.IOBase):
298336
# Python 3.10's zipfile._SharedFile reads `file.seekable` directly, so this
299337
# adapter keeps ZIP-compatible random-access streams working across versions.

src/agents/sandbox/util/tar_utils.py

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
import tarfile
88
import tempfile
99
from collections.abc import Iterable
10-
from pathlib import Path, PurePosixPath
10+
from pathlib import Path, PurePosixPath, PureWindowsPath
1111

1212

1313
class UnsafeTarMemberError(ValueError):
@@ -27,6 +27,14 @@ def _validate_archive_root_member(member: tarfile.TarInfo) -> None:
2727
raise UnsafeTarMemberError(member=member.name, reason="archive root member must be directory")
2828

2929

30+
def _raise_if_windows_member_path(member_name: str) -> None:
31+
windows_path = PureWindowsPath(member_name)
32+
if windows_path.drive:
33+
raise UnsafeTarMemberError(member=member_name, reason="windows drive path")
34+
if "\\" in member_name:
35+
raise UnsafeTarMemberError(member=member_name, reason="windows path separator")
36+
37+
3038
def safe_tar_member_rel_path(
3139
member: tarfile.TarInfo,
3240
*,
@@ -37,6 +45,7 @@ def safe_tar_member_rel_path(
3745
if member.name in ("", ".", "./"):
3846
_validate_archive_root_member(member)
3947
return None
48+
_raise_if_windows_member_path(member.name)
4049
rel = PurePosixPath(member.name)
4150
if rel.is_absolute():
4251
raise UnsafeTarMemberError(member=member.name, reason="absolute path")
@@ -189,6 +198,7 @@ def validate_tarfile(
189198
reject_symlink_rel_paths: Iterable[str | Path] = (),
190199
skip_rel_paths: Iterable[str | Path] = (),
191200
root_name: str | None = None,
201+
allow_symlinks: bool = True,
192202
) -> None:
193203
"""Validate a workspace tar before handing it to a local or remote extractor.
194204
@@ -212,7 +222,7 @@ def validate_tarfile(
212222
root_name=root_name,
213223
):
214224
continue
215-
rel_path = safe_tar_member_rel_path(member, allow_symlinks=True)
225+
rel_path = safe_tar_member_rel_path(member, allow_symlinks=allow_symlinks)
216226
if rel_path is None:
217227
continue
218228

@@ -242,6 +252,12 @@ def validate_tarfile(
242252
member=member.name,
243253
reason=f"archive path descends through symlink: {parent.as_posix()}",
244254
)
255+
parent_member = members_by_rel_path.get(parent)
256+
if parent_member is not None and not parent_member.isdir():
257+
raise UnsafeTarMemberError(
258+
member=member.name,
259+
reason=f"archive path descends through non-directory: {parent.as_posix()}",
260+
)
245261

246262

247263
def validate_tar_bytes(

tests/sandbox/test_extract.py

Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,28 @@ def _zip_bytes(*, members: dict[str, bytes]) -> io.BytesIO:
134134
return buf
135135

136136

137+
async def _assert_extract_rejects_member(
138+
tmp_path: Path,
139+
archive_name: str,
140+
data: io.IOBase,
141+
*,
142+
expected_member: str,
143+
expected_reason: str,
144+
) -> Path:
145+
session = _build_session(tmp_path)
146+
await session.start()
147+
try:
148+
workspace = Path(session.state.manifest.root)
149+
with pytest.raises(WorkspaceArchiveWriteError) as exc_info:
150+
await session.extract(archive_name, data)
151+
152+
assert exc_info.value.context["member"] == expected_member
153+
assert exc_info.value.context["reason"] == expected_reason
154+
return workspace
155+
finally:
156+
await session.shutdown()
157+
158+
137159
@pytest.mark.asyncio
138160
async def test_extract_tar_writes_archive_and_unpacks_contents(tmp_path: Path) -> None:
139161
session = _build_session(tmp_path)
@@ -300,6 +322,86 @@ async def test_extract_zip_rejects_symlinked_parent_paths(tmp_path: Path) -> Non
300322
await session.shutdown()
301323

302324

325+
@pytest.mark.asyncio
326+
async def test_extract_tar_rejects_windows_drive_member_paths(tmp_path: Path) -> None:
327+
await _assert_extract_rejects_member(
328+
tmp_path,
329+
"bundle.tar",
330+
_tar_bytes(members={"C:/tmp/evil.txt": b"evil"}),
331+
expected_member="C:/tmp/evil.txt",
332+
expected_reason="windows drive path",
333+
)
334+
335+
336+
@pytest.mark.asyncio
337+
async def test_extract_zip_rejects_windows_drive_member_paths(tmp_path: Path) -> None:
338+
await _assert_extract_rejects_member(
339+
tmp_path,
340+
"bundle.zip",
341+
_zip_bytes(members={r"C:\tmp\evil.txt": b"evil"}),
342+
expected_member=r"C:\tmp\evil.txt",
343+
expected_reason="windows drive path",
344+
)
345+
346+
347+
@pytest.mark.asyncio
348+
async def test_extract_tar_rejects_windows_separator_member_paths(tmp_path: Path) -> None:
349+
await _assert_extract_rejects_member(
350+
tmp_path,
351+
"bundle.tar",
352+
_tar_bytes(members={r"..\evil.txt": b"evil"}),
353+
expected_member=r"..\evil.txt",
354+
expected_reason="windows path separator",
355+
)
356+
357+
358+
@pytest.mark.asyncio
359+
async def test_extract_zip_rejects_windows_separator_member_paths(tmp_path: Path) -> None:
360+
await _assert_extract_rejects_member(
361+
tmp_path,
362+
"bundle.zip",
363+
_zip_bytes(members={r"\evil.txt": b"evil"}),
364+
expected_member=r"\evil.txt",
365+
expected_reason="windows path separator",
366+
)
367+
368+
369+
@pytest.mark.asyncio
370+
async def test_extract_tar_rejects_member_under_non_directory_member(tmp_path: Path) -> None:
371+
workspace = await _assert_extract_rejects_member(
372+
tmp_path,
373+
"bundle.tar",
374+
_tar_bytes(
375+
members={
376+
"nested/hello.txt": b"hello from tar",
377+
"nested": b"not a directory",
378+
}
379+
),
380+
expected_member="nested/hello.txt",
381+
expected_reason="archive path descends through non-directory: nested",
382+
)
383+
384+
assert not (workspace / "nested").exists()
385+
386+
387+
@pytest.mark.asyncio
388+
async def test_extract_zip_rejects_member_under_non_directory_member(tmp_path: Path) -> None:
389+
workspace = await _assert_extract_rejects_member(
390+
tmp_path,
391+
"bundle.zip",
392+
_zip_bytes(
393+
members={
394+
"nested/hello.txt": b"hello from zip",
395+
"nested": b"not a directory",
396+
}
397+
),
398+
expected_member="nested/hello.txt",
399+
expected_reason="archive path descends through non-directory: nested",
400+
)
401+
402+
assert not (workspace / "nested").exists()
403+
404+
303405
@pytest.mark.asyncio
304406
async def test_unix_local_persist_workspace_excludes_resolved_mount_path(tmp_path: Path) -> None:
305407
workspace_root = tmp_path / "workspace"

tests/sandbox/test_tar_utils.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,35 @@ def test_validate_tar_bytes_rejects_root_symlink() -> None:
112112
validate_tar_bytes(raw)
113113

114114

115+
@pytest.mark.parametrize("member_name", ["C:/tmp/evil.txt", r"C:\tmp\evil.txt"])
116+
def test_validate_tar_bytes_rejects_windows_drive_member_paths(member_name: str) -> None:
117+
raw = _tar_bytes(_file(member_name, b"evil"))
118+
119+
with pytest.raises(UnsafeTarMemberError, match="windows drive path"):
120+
validate_tar_bytes(raw)
121+
122+
123+
@pytest.mark.parametrize("member_name", [r"..\evil.txt", r"\evil.txt", r"nested\evil.txt"])
124+
def test_validate_tar_bytes_rejects_windows_separator_member_paths(member_name: str) -> None:
125+
raw = _tar_bytes(_file(member_name, b"evil"))
126+
127+
with pytest.raises(UnsafeTarMemberError, match="windows path separator"):
128+
validate_tar_bytes(raw)
129+
130+
131+
def test_validate_tar_bytes_rejects_member_under_non_directory_member() -> None:
132+
raw = _tar_bytes(
133+
_file("nested/hello.txt", b"hello"),
134+
_file("nested", b"not a directory"),
135+
)
136+
137+
with pytest.raises(
138+
UnsafeTarMemberError,
139+
match="archive path descends through non-directory: nested",
140+
):
141+
validate_tar_bytes(raw)
142+
143+
115144
def test_strip_tar_member_prefix_returns_workspace_relative_archive() -> None:
116145
raw = _tar_bytes(
117146
_dir("workspace"),

0 commit comments

Comments
 (0)