Skip to content

Commit 7e9e4b3

Browse files
committed
Added add-files option to CLI.
1 parent 2364490 commit 7e9e4b3

2 files changed

Lines changed: 152 additions & 0 deletions

File tree

pyiceberg/cli/console.py

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -179,6 +179,59 @@ def files(ctx: Context, identifier: str, history: bool) -> None:
179179
output.files(catalog_table, history)
180180

181181

182+
@run.command("add-files")
183+
@click.argument("identifier")
184+
@click.argument("file_paths", nargs=-1)
185+
@click.option("--branch", default=None, help="Branch to add files to (default: main).")
186+
@click.option(
187+
"--no-check-duplicates",
188+
is_flag=True,
189+
help="Skip check for files already referenced by the table.",
190+
)
191+
@click.option(
192+
"--property",
193+
"-p",
194+
"properties",
195+
multiple=True,
196+
help="Snapshot property key=value (repeatable).",
197+
)
198+
@click.pass_context
199+
@catch_exception()
200+
def add_files(
201+
ctx: Context,
202+
identifier: str,
203+
file_paths: tuple[str, ...],
204+
branch: str | None,
205+
no_check_duplicates: bool,
206+
properties: tuple[str, ...],
207+
) -> None:
208+
"""Add one or more data files to the table by path."""
209+
if not file_paths:
210+
raise click.UsageError("At least one file path is required.")
211+
212+
catalog, output = _catalog_and_output(ctx)
213+
214+
snapshot_properties: dict[str, str] = {}
215+
for prop in properties:
216+
if "=" not in prop:
217+
raise click.UsageError(f"Property must be in key=value form, got: {prop!r}")
218+
key, _, value = prop.partition("=")
219+
snapshot_properties[key] = value
220+
221+
file_paths_list = []
222+
for item in file_paths:
223+
file_paths_list.append(item)
224+
225+
table = catalog.load_table(identifier)
226+
table.add_files(
227+
file_paths=file_paths_list,
228+
branch=branch or MAIN_BRANCH,
229+
snapshot_properties=snapshot_properties,
230+
check_duplicate_files=not no_check_duplicates,
231+
)
232+
output.text(f"Added {len(file_paths)} file(s) to {identifier}")
233+
234+
182235
@run.command("delete-files")
183236
@click.argument("identifier")
184237
@click.argument("file_paths", nargs=-1)

tests/cli/test_console.py

Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1031,6 +1031,105 @@ def test_log_level_cli_overrides_env(mocker: MockFixture) -> None:
10311031
assert call_kwargs["level"] == logging.ERROR
10321032

10331033

1034+
def test_add_files_invokes_table_api(catalog: InMemoryCatalog, mocker: MockFixture) -> None:
1035+
catalog.create_namespace(TEST_TABLE_NAMESPACE)
1036+
catalog.create_table(
1037+
identifier=TEST_TABLE_IDENTIFIER,
1038+
schema=TEST_TABLE_SCHEMA,
1039+
partition_spec=TEST_TABLE_PARTITION_SPEC,
1040+
)
1041+
mock_table = MagicMock()
1042+
mocker.patch.object(catalog, "load_table", return_value=mock_table)
1043+
1044+
runner = CliRunner()
1045+
result = runner.invoke(
1046+
run,
1047+
[
1048+
"add-files",
1049+
"default.my_table",
1050+
"s3://bucket/path/file.parquet",
1051+
"--branch",
1052+
"main",
1053+
"--property",
1054+
"k1=v1",
1055+
"-p",
1056+
"k2=v2",
1057+
],
1058+
)
1059+
1060+
if result.exit_code != 0:
1061+
out = (result.output or "") + (getattr(result, "stderr", "") or "")
1062+
raise AssertionError(f"exit_code={result.exit_code} output/stderr: {out!r}")
1063+
mock_table.add_files.assert_called_once()
1064+
call_kwargs = mock_table.add_files.call_args[1]
1065+
assert call_kwargs["file_paths"] == ["s3://bucket/path/file.parquet"]
1066+
assert call_kwargs["branch"] == "main"
1067+
assert call_kwargs["snapshot_properties"] == {"k1": "v1", "k2": "v2"}
1068+
assert call_kwargs["check_duplicate_files"] is True
1069+
1070+
1071+
def test_add_files_requires_at_least_one_path(catalog: InMemoryCatalog) -> None:
1072+
catalog.create_namespace(TEST_TABLE_NAMESPACE)
1073+
catalog.create_table(
1074+
identifier=TEST_TABLE_IDENTIFIER,
1075+
schema=TEST_TABLE_SCHEMA,
1076+
partition_spec=TEST_TABLE_PARTITION_SPEC,
1077+
)
1078+
runner = CliRunner()
1079+
result = runner.invoke(run, ["add-files", "default.my_table"])
1080+
assert result.exit_code != 0
1081+
out = (result.output or "") + (getattr(result, "stderr", "") or "")
1082+
assert "file path" in out.lower() or "required" in out.lower()
1083+
1084+
1085+
def test_add_files_invalid_property_format(catalog: InMemoryCatalog, mocker: MockFixture) -> None:
1086+
catalog.create_namespace(TEST_TABLE_NAMESPACE)
1087+
catalog.create_table(
1088+
identifier=TEST_TABLE_IDENTIFIER,
1089+
schema=TEST_TABLE_SCHEMA,
1090+
partition_spec=TEST_TABLE_PARTITION_SPEC,
1091+
)
1092+
runner = CliRunner()
1093+
result = runner.invoke(
1094+
run,
1095+
["add-files", "default.my_table", "s3://bucket/file.parquet", "--property", "invalid_no_equals"],
1096+
)
1097+
assert result.exit_code != 0
1098+
out = (result.output or "") + (getattr(result, "stderr", "") or "")
1099+
assert "key=value" in out or "invalid_no_equals" in out
1100+
1101+
1102+
def test_add_files_table_does_not_exist(catalog: InMemoryCatalog) -> None:
1103+
catalog.create_namespace(TEST_TABLE_NAMESPACE)
1104+
runner = CliRunner()
1105+
result = runner.invoke(run, ["add-files", "default.doesnotexist", "s3://bucket/file.parquet"])
1106+
assert result.exit_code != 0
1107+
out = (result.output or "") + (getattr(result, "stderr", "") or "")
1108+
assert "default.doesnotexist" in out and ("Table does not exist" in out or "does not exist" in out)
1109+
1110+
1111+
def test_add_files_no_check_duplicates_flag(catalog: InMemoryCatalog, mocker: MockFixture) -> None:
1112+
catalog.create_namespace(TEST_TABLE_NAMESPACE)
1113+
catalog.create_table(
1114+
identifier=TEST_TABLE_IDENTIFIER,
1115+
schema=TEST_TABLE_SCHEMA,
1116+
partition_spec=TEST_TABLE_PARTITION_SPEC,
1117+
)
1118+
mock_table = MagicMock()
1119+
mocker.patch.object(catalog, "load_table", return_value=mock_table)
1120+
1121+
runner = CliRunner()
1122+
result = runner.invoke(
1123+
run,
1124+
["add-files", "default.my_table", "s3://bucket/file.parquet", "--no-check-duplicates"],
1125+
)
1126+
1127+
assert result.exit_code == 0
1128+
mock_table.add_files.assert_called_once()
1129+
call_kwargs = mock_table.add_files.call_args[1]
1130+
assert call_kwargs["check_duplicate_files"] is False
1131+
1132+
10341133
def test_delete_files_invalid_property_format(catalog: InMemoryCatalog, mocker: MockFixture) -> None:
10351134
catalog.create_namespace(TEST_TABLE_NAMESPACE)
10361135
catalog.create_table(

0 commit comments

Comments
 (0)