|
15 | 15 | # specific language governing permissions and limitations |
16 | 16 | # under the License. |
17 | 17 | # pylint: disable=redefined-outer-name,arguments-renamed,fixme |
| 18 | +import importlib |
18 | 19 | from tempfile import TemporaryDirectory |
19 | 20 |
|
20 | 21 | import fastavro |
@@ -1039,3 +1040,65 @@ def test_clear_manifest_cache() -> None: |
1039 | 1040 | cache_after = manifest_module._manifest_cache |
1040 | 1041 | assert cache_after is not None, "Cache should still be enabled after clear" |
1041 | 1042 | assert len(cache_after) == 0, "Cache should be empty after clear" |
| 1043 | + |
| 1044 | + |
| 1045 | +def test_manifest_cache_can_be_disabled_with_zero_size(monkeypatch: pytest.MonkeyPatch) -> None: |
| 1046 | + """Test that setting manifest-cache-size to 0 disables caching.""" |
| 1047 | + monkeypatch.setenv("PYICEBERG_MANIFEST_CACHE_SIZE", "0") |
| 1048 | + importlib.reload(manifest_module) |
| 1049 | + |
| 1050 | + try: |
| 1051 | + assert manifest_module._manifest_cache_size == 0 |
| 1052 | + assert len(manifest_module._manifest_cache) == 0 |
| 1053 | + |
| 1054 | + io = PyArrowFileIO() |
| 1055 | + |
| 1056 | + with TemporaryDirectory() as tmp_dir: |
| 1057 | + schema = Schema(NestedField(field_id=1, name="id", field_type=IntegerType(), required=True)) |
| 1058 | + spec = UNPARTITIONED_PARTITION_SPEC |
| 1059 | + |
| 1060 | + manifest_path = f"{tmp_dir}/manifest.avro" |
| 1061 | + with manifest_module.write_manifest( |
| 1062 | + format_version=2, |
| 1063 | + spec=spec, |
| 1064 | + schema=schema, |
| 1065 | + output_file=io.new_output(manifest_path), |
| 1066 | + snapshot_id=1, |
| 1067 | + avro_compression="zstandard", |
| 1068 | + ) as writer: |
| 1069 | + data_file = manifest_module.DataFile.from_args( |
| 1070 | + content=manifest_module.DataFileContent.DATA, |
| 1071 | + file_path=f"{tmp_dir}/data.parquet", |
| 1072 | + file_format=manifest_module.FileFormat.PARQUET, |
| 1073 | + partition=Record(), |
| 1074 | + record_count=100, |
| 1075 | + file_size_in_bytes=1000, |
| 1076 | + ) |
| 1077 | + writer.add_entry( |
| 1078 | + manifest_module.ManifestEntry.from_args( |
| 1079 | + status=manifest_module.ManifestEntryStatus.ADDED, |
| 1080 | + snapshot_id=1, |
| 1081 | + data_file=data_file, |
| 1082 | + ) |
| 1083 | + ) |
| 1084 | + manifest_file = writer.to_manifest_file() |
| 1085 | + |
| 1086 | + list_path = f"{tmp_dir}/manifest-list.avro" |
| 1087 | + with manifest_module.write_manifest_list( |
| 1088 | + format_version=2, |
| 1089 | + output_file=io.new_output(list_path), |
| 1090 | + snapshot_id=1, |
| 1091 | + parent_snapshot_id=None, |
| 1092 | + sequence_number=1, |
| 1093 | + avro_compression="zstandard", |
| 1094 | + ) as list_writer: |
| 1095 | + list_writer.add_manifests([manifest_file]) |
| 1096 | + |
| 1097 | + manifests_first_call = manifest_module._manifests(io, list_path) |
| 1098 | + manifests_second_call = manifest_module._manifests(io, list_path) |
| 1099 | + |
| 1100 | + assert len(manifest_module._manifest_cache) == 0 |
| 1101 | + assert manifests_first_call[0] is not manifests_second_call[0] |
| 1102 | + finally: |
| 1103 | + monkeypatch.delenv("PYICEBERG_MANIFEST_CACHE_SIZE", raising=False) |
| 1104 | + importlib.reload(manifest_module) |
0 commit comments