From 81f488576a7c7e63feabe1fe78adb834853bff97 Mon Sep 17 00:00:00 2001 From: sha174n Date: Thu, 11 Jun 2026 15:00:56 +0100 Subject: [PATCH 1/5] fix(dashboard): route example-export data through the dataset query path The example-export data path now fetches rows through the dataset's own query builder and applies the dataset access check before reading any rows, so exported sample data is produced the same way as the chart-data path (column projection, per-row filters) and reflects the requester's dataset access. The row cap is applied at the SQL level via row_limit instead of after a full table read. A requester without access to a dataset yields no data file rather than the raw underlying table. Adds unit coverage for the access-denied and query-path branches. Co-Authored-By: Claude Opus 4.8 (1M context) --- superset/commands/dashboard/export_example.py | 61 ++++++++------ .../commands/dashboard/export_example_test.py | 84 +++++++++++++++++++ 2 files changed, 118 insertions(+), 27 deletions(-) diff --git a/superset/commands/dashboard/export_example.py b/superset/commands/dashboard/export_example.py index 7924fe0ad4d1..9056160a0f90 100644 --- a/superset/commands/dashboard/export_example.py +++ b/superset/commands/dashboard/export_example.py @@ -32,11 +32,13 @@ from superset.commands.base import BaseCommand from superset.commands.dashboard.exceptions import DashboardNotFoundError from superset.daos.dashboard import DashboardDAO +from superset.exceptions import SupersetSecurityException if TYPE_CHECKING: from superset.connectors.sqla.models import SqlaTable from superset.models.dashboard import Dashboard from superset.models.slice import Slice + from superset.superset_typing import QueryObjectDict from superset.sql.parse import SQLStatement, Table @@ -235,8 +237,6 @@ def export_dataset_data( sample_rows: int | None = None, ) -> bytes | None: """Export dataset data to Parquet format. Returns bytes or None on failure.""" - import pandas as pd # pylint: disable=import-outside-toplevel - from superset import db # pylint: disable=import-outside-toplevel # Ensure dataset is attached to session and relationships are loaded @@ -251,35 +251,42 @@ def export_dataset_data( logger.warning("Dataset %s has no database", dataset.table_name) return None + # Only export rows the requester is entitled to read. The dataset's own + # access check is applied here, and the rows below are fetched through the + # dataset's query builder (the same path the chart-data API uses) so that + # per-row filters are applied consistently. A requester without access to + # the dataset yields no data file rather than the raw underlying table. try: - logger.info("Exporting data for %s to Parquet...", dataset.table_name) + dataset.raise_for_access() + except SupersetSecurityException: + logger.info( + "Skipping data export for dataset %s: requester not entitled", + dataset.table_name, + ) + return None - # Check if this is a virtual dataset (SQL-based) - if dataset.sql: - sql = dataset.sql - else: - # For physical tables, build SELECT query from columns - columns = [col.column_name for col in dataset.columns if not col.expression] - - if not columns: - logger.warning("No columns to export for %s", dataset.table_name) - return None - - # Build simple SELECT query (quote identifiers to handle spaces/keywords) - column_list = ", ".join(f'"{c}"' for c in columns) - quoted_table = f'"{dataset.table_name}"' - if dataset.schema: - table_ref = f'"{dataset.schema}".{quoted_table}' - else: - table_ref = quoted_table - sql = f"SELECT {column_list} FROM {table_ref}" # noqa: S608 + columns = [col.column_name for col in dataset.columns if not col.expression] + if not columns: + logger.warning("No columns to export for %s", dataset.table_name) + return None - with dataset.database.get_sqla_engine() as engine: - df = pd.read_sql(sql, engine) + try: + logger.info("Exporting data for %s to Parquet...", dataset.table_name) - if sample_rows and len(df) > sample_rows: - df = df.head(sample_rows) - logger.info("Sampled to %d rows", sample_rows) + # Fetch through the dataset's query builder so that the projection, + # per-row filters, and (for virtual datasets) the wrapping query are + # produced the same way as the chart-data path. The row cap is applied + # at the SQL level via row_limit rather than after a full table read. + query_obj: QueryObjectDict = { + "columns": columns, + "metrics": [], + "orderby": [], + "is_timeseries": False, + "filter": [], + "extras": {}, + "row_limit": sample_rows, + } + df = dataset.query(query_obj).df # Write to bytes buffer buf = BytesIO() diff --git a/tests/unit_tests/commands/dashboard/export_example_test.py b/tests/unit_tests/commands/dashboard/export_example_test.py index 8aeab21de022..6b5731a22e77 100644 --- a/tests/unit_tests/commands/dashboard/export_example_test.py +++ b/tests/unit_tests/commands/dashboard/export_example_test.py @@ -16,6 +16,8 @@ # under the License. from __future__ import annotations +from io import BytesIO +from typing import Any from unittest.mock import MagicMock, patch from uuid import uuid4 @@ -27,10 +29,13 @@ _make_bytes_generator, _make_yaml_generator, export_chart, + export_dataset_data, export_dataset_yaml, ExportExampleCommand, sanitize_filename, ) +from superset.errors import ErrorLevel, SupersetError, SupersetErrorType +from superset.exceptions import SupersetSecurityException def test_sanitize_filename_basic(): @@ -321,3 +326,82 @@ def test_export_example_command_no_data(): assert "dataset.yaml" in files assert "data.parquet" not in files assert "dashboard.yaml" in files + + +def _make_data_export_dataset(rows: list[dict[str, Any]]) -> MagicMock: + """A dataset mock whose query() returns ``rows`` as a DataFrame.""" + import pandas as pd + + dataset = MagicMock() + dataset.table_name = "private_table" + dataset.database = MagicMock() # truthy + col_uid = MagicMock(column_name="uid", expression=None) + col_data = MagicMock(column_name="data", expression=None) + dataset.columns = [col_uid, col_data] + result = MagicMock() + result.df = pd.DataFrame(rows) + dataset.query.return_value = result + return dataset + + +def test_export_dataset_data_skips_when_access_denied(): + """ + A requester without access to the dataset must get no data file, and the + underlying rows must never be fetched (no fallback raw read). + """ + dataset = _make_data_export_dataset([{"uid": 1, "data": "secret"}]) + dataset.raise_for_access.side_effect = SupersetSecurityException( + SupersetError( + message="denied", + error_type=SupersetErrorType.DATASOURCE_SECURITY_ACCESS_ERROR, + level=ErrorLevel.ERROR, + ) + ) + + with patch("superset.db") as mock_db: + mock_db.session.merge.side_effect = lambda d: d + assert export_dataset_data(dataset) is None + + dataset.raise_for_access.assert_called_once() + dataset.query.assert_not_called() + + +def test_export_dataset_data_fetches_through_query_path(): + """ + Rows are fetched through the dataset's own query builder (which applies the + per-row filters), and exactly those rows are what gets written to Parquet. + """ + import pandas as pd + + own_rows = [{"uid": 6, "data": "attacker-own-row"}] + dataset = _make_data_export_dataset(own_rows) + + with patch("superset.db") as mock_db: + mock_db.session.merge.side_effect = lambda d: d + payload = export_dataset_data(dataset) + + assert payload is not None + dataset.raise_for_access.assert_called_once() + dataset.query.assert_called_once() + + # The fetch goes through query() with a column projection, not a raw read. + query_obj = dataset.query.call_args.args[0] + assert query_obj["columns"] == ["uid", "data"] + assert query_obj["is_timeseries"] is False + assert "row_limit" in query_obj + + # Only the rows query() returned are exported. + exported = pd.read_parquet(BytesIO(payload)) + assert exported.to_dict("records") == own_rows + + +def test_export_dataset_data_applies_row_limit_at_query_level(): + """sample_rows is passed as a SQL-level row_limit, not applied post-fetch.""" + dataset = _make_data_export_dataset([{"uid": 1, "data": "a"}]) + + with patch("superset.db") as mock_db: + mock_db.session.merge.side_effect = lambda d: d + export_dataset_data(dataset, sample_rows=5) + + query_obj = dataset.query.call_args.args[0] + assert query_obj["row_limit"] == 5 From 6a7d4483a09b55786f58d769bcbe879d26076b65 Mon Sep 17 00:00:00 2001 From: sha174n Date: Thu, 11 Jun 2026 16:07:40 +0100 Subject: [PATCH 2/5] test(dashboard): cover row-level filtering of example-export data Adds an end-to-end regression test: a Gamma user restricted by a row-level filter on birth_names exports the births dashboard and must only receive the rows the filter allows. Verified to fail against the pre-change data path (full table leaked) and pass after it. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../integration_tests/dashboards/api_tests.py | 58 +++++++++++++++++++ 1 file changed, 58 insertions(+) diff --git a/tests/integration_tests/dashboards/api_tests.py b/tests/integration_tests/dashboards/api_tests.py index bdc42ed9287c..abec7a87b56b 100644 --- a/tests/integration_tests/dashboards/api_tests.py +++ b/tests/integration_tests/dashboards/api_tests.py @@ -2961,6 +2961,64 @@ def test_export_as_example_content_disposition(self): assert "Content-Disposition" in rv.headers assert "_example.zip" in rv.headers["Content-Disposition"] + @pytest.mark.usefixtures("load_birth_names_dashboard_with_slices") + def test_export_as_example_data_respects_row_level_filters(self): + """ + Dashboard API: export_as_example with export_data must apply the + dataset's row-level filters, so a restricted user only receives the + rows they are entitled to (not the full underlying table). + """ + import pandas as pd + + from superset.connectors.sqla.models import RowLevelSecurityFilter + + table = self.get_table(name="birth_names") + gamma = security_manager.find_role("Gamma") + # birth_names access so the row-level filter (not the access check) is + # what scopes the result, and permission to call the endpoint. + for perm, view in ( + ("datasource_access", table.perm), + ("can_export_as_example", "Dashboard"), + ("can_export", "Dashboard"), + ): + pvm = security_manager.find_permission_view_menu(perm, view) + if pvm and pvm not in gamma.permissions: + gamma.permissions.append(pvm) + rls = RowLevelSecurityFilter( + name="export_example_girls_only", + filter_type="Regular", + clause="gender = 'girl'", + group_key="gender", + ) + rls.tables.append(table) + rls.roles.append(gamma) + db.session.add(rls) + db.session.commit() + try: + self.login(GAMMA_USERNAME) + dashboard_id = get_dashboards_ids(["births"])[0] + uri = ( + f"api/v1/dashboard/{dashboard_id}/export_as_example/" + "?export_data=true&sample_rows=500" + ) + rv = self.client.get(uri) + assert rv.status_code == 200 + + with ZipFile(BytesIO(rv.data)) as zf: + parquet_files = [n for n in zf.namelist() if n.endswith(".parquet")] + assert parquet_files, f"expected a data file: {zf.namelist()}" + for name in parquet_files: + df = pd.read_parquet(BytesIO(zf.read(name))) + if "gender" in df.columns: + leaked = set(df["gender"].unique()) - {"girl"} + assert not leaked, ( + f"row-level filter not applied to export; " + f"leaked genders: {leaked}" + ) + finally: + db.session.delete(rls) + db.session.commit() + @patch("superset.commands.database.importers.v1.utils.add_permissions") def test_import_dashboard(self, mock_add_permissions): """ From 69f0f0e6226ba678b26d02b8bc51522b1fcef5d0 Mon Sep 17 00:00:00 2001 From: sha174n Date: Thu, 11 Jun 2026 16:12:18 +0100 Subject: [PATCH 3/5] fix(dashboard): omit example-export data file when the query reports failure The dataset query path signals failures via QueryResult.status (returning an empty frame) rather than raising, so a failed query would have written an empty Parquet. Treat a FAILED status as no data and omit the file, matching the prior raise-and-skip behavior. Adds unit coverage for the failed-query branch and restores the shared test role's permissions after the row-level-filter integration test. Co-Authored-By: Claude Opus 4.8 (1M context) --- superset/commands/dashboard/export_example.py | 11 ++++++++++- tests/integration_tests/dashboards/api_tests.py | 16 +++++++++++----- .../commands/dashboard/export_example_test.py | 17 ++++++++++++++++- 3 files changed, 37 insertions(+), 7 deletions(-) diff --git a/superset/commands/dashboard/export_example.py b/superset/commands/dashboard/export_example.py index 9056160a0f90..477e6a70ebf0 100644 --- a/superset/commands/dashboard/export_example.py +++ b/superset/commands/dashboard/export_example.py @@ -31,6 +31,7 @@ from superset.commands.base import BaseCommand from superset.commands.dashboard.exceptions import DashboardNotFoundError +from superset.common.db_query_status import QueryStatus from superset.daos.dashboard import DashboardDAO from superset.exceptions import SupersetSecurityException @@ -286,7 +287,15 @@ def export_dataset_data( "extras": {}, "row_limit": sample_rows, } - df = dataset.query(query_obj).df + result = dataset.query(query_obj) + if result.status == QueryStatus.FAILED: + # The query path reports failures via status rather than raising; + # omit the data file instead of writing an empty/partial Parquet. + logger.warning( + "Query failed while exporting data for %s", dataset.table_name + ) + return None + df = result.df # Write to bytes buffer buf = BytesIO() diff --git a/tests/integration_tests/dashboards/api_tests.py b/tests/integration_tests/dashboards/api_tests.py index abec7a87b56b..fe5ae5ea69a9 100644 --- a/tests/integration_tests/dashboards/api_tests.py +++ b/tests/integration_tests/dashboards/api_tests.py @@ -2975,7 +2975,9 @@ def test_export_as_example_data_respects_row_level_filters(self): table = self.get_table(name="birth_names") gamma = security_manager.find_role("Gamma") # birth_names access so the row-level filter (not the access check) is - # what scopes the result, and permission to call the endpoint. + # what scopes the result, and permission to call the endpoint. Track + # what we grant so the shared test role is restored afterwards. + granted = [] for perm, view in ( ("datasource_access", table.perm), ("can_export_as_example", "Dashboard"), @@ -2984,6 +2986,7 @@ def test_export_as_example_data_respects_row_level_filters(self): pvm = security_manager.find_permission_view_menu(perm, view) if pvm and pvm not in gamma.permissions: gamma.permissions.append(pvm) + granted.append(pvm) rls = RowLevelSecurityFilter( name="export_example_girls_only", filter_type="Regular", @@ -3010,13 +3013,16 @@ def test_export_as_example_data_respects_row_level_filters(self): for name in parquet_files: df = pd.read_parquet(BytesIO(zf.read(name))) if "gender" in df.columns: - leaked = set(df["gender"].unique()) - {"girl"} - assert not leaked, ( - f"row-level filter not applied to export; " - f"leaked genders: {leaked}" + unexpected = set(df["gender"].unique()) - {"girl"} + assert not unexpected, ( + f"export returned rows outside the configured " + f"row-level filter: {unexpected}" ) finally: db.session.delete(rls) + for pvm in granted: + if pvm in gamma.permissions: + gamma.permissions.remove(pvm) db.session.commit() @patch("superset.commands.database.importers.v1.utils.add_permissions") diff --git a/tests/unit_tests/commands/dashboard/export_example_test.py b/tests/unit_tests/commands/dashboard/export_example_test.py index 6b5731a22e77..f2d439de33cd 100644 --- a/tests/unit_tests/commands/dashboard/export_example_test.py +++ b/tests/unit_tests/commands/dashboard/export_example_test.py @@ -34,6 +34,7 @@ ExportExampleCommand, sanitize_filename, ) +from superset.common.db_query_status import QueryStatus from superset.errors import ErrorLevel, SupersetError, SupersetErrorType from superset.exceptions import SupersetSecurityException @@ -339,6 +340,7 @@ def _make_data_export_dataset(rows: list[dict[str, Any]]) -> MagicMock: col_data = MagicMock(column_name="data", expression=None) dataset.columns = [col_uid, col_data] result = MagicMock() + result.status = QueryStatus.SUCCESS result.df = pd.DataFrame(rows) dataset.query.return_value = result return dataset @@ -373,7 +375,7 @@ def test_export_dataset_data_fetches_through_query_path(): """ import pandas as pd - own_rows = [{"uid": 6, "data": "attacker-own-row"}] + own_rows = [{"uid": 6, "data": "row-for-user-6"}] dataset = _make_data_export_dataset(own_rows) with patch("superset.db") as mock_db: @@ -405,3 +407,16 @@ def test_export_dataset_data_applies_row_limit_at_query_level(): query_obj = dataset.query.call_args.args[0] assert query_obj["row_limit"] == 5 + + +def test_export_dataset_data_skips_on_failed_query(): + """ + The query path signals failure via status rather than raising, so a failed + query must yield no data file instead of an empty/partial Parquet. + """ + dataset = _make_data_export_dataset([{"uid": 1, "data": "a"}]) + dataset.query.return_value.status = QueryStatus.FAILED + + with patch("superset.db") as mock_db: + mock_db.session.merge.side_effect = lambda d: d + assert export_dataset_data(dataset) is None From 4bee77cc59ba0ce042fb0de0394dbd1d4d73180e Mon Sep 17 00:00:00 2001 From: sha174n Date: Fri, 12 Jun 2026 00:13:52 +0100 Subject: [PATCH 4/5] test(dashboard): extract patched_db fixture in export-example tests Co-Authored-By: Claude Opus 4.8 --- .../commands/dashboard/export_example_test.py | 32 +++++++++---------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/tests/unit_tests/commands/dashboard/export_example_test.py b/tests/unit_tests/commands/dashboard/export_example_test.py index f2d439de33cd..1703da33b224 100644 --- a/tests/unit_tests/commands/dashboard/export_example_test.py +++ b/tests/unit_tests/commands/dashboard/export_example_test.py @@ -346,7 +346,15 @@ def _make_data_export_dataset(rows: list[dict[str, Any]]) -> MagicMock: return dataset -def test_export_dataset_data_skips_when_access_denied(): +@pytest.fixture +def patched_db(): + """Patch ``superset.db`` so merge() returns the dataset unchanged.""" + with patch("superset.db") as mock_db: + mock_db.session.merge.side_effect = lambda d: d + yield mock_db + + +def test_export_dataset_data_skips_when_access_denied(patched_db): """ A requester without access to the dataset must get no data file, and the underlying rows must never be fetched (no fallback raw read). @@ -360,15 +368,13 @@ def test_export_dataset_data_skips_when_access_denied(): ) ) - with patch("superset.db") as mock_db: - mock_db.session.merge.side_effect = lambda d: d - assert export_dataset_data(dataset) is None + assert export_dataset_data(dataset) is None dataset.raise_for_access.assert_called_once() dataset.query.assert_not_called() -def test_export_dataset_data_fetches_through_query_path(): +def test_export_dataset_data_fetches_through_query_path(patched_db): """ Rows are fetched through the dataset's own query builder (which applies the per-row filters), and exactly those rows are what gets written to Parquet. @@ -378,9 +384,7 @@ def test_export_dataset_data_fetches_through_query_path(): own_rows = [{"uid": 6, "data": "row-for-user-6"}] dataset = _make_data_export_dataset(own_rows) - with patch("superset.db") as mock_db: - mock_db.session.merge.side_effect = lambda d: d - payload = export_dataset_data(dataset) + payload = export_dataset_data(dataset) assert payload is not None dataset.raise_for_access.assert_called_once() @@ -397,19 +401,17 @@ def test_export_dataset_data_fetches_through_query_path(): assert exported.to_dict("records") == own_rows -def test_export_dataset_data_applies_row_limit_at_query_level(): +def test_export_dataset_data_applies_row_limit_at_query_level(patched_db): """sample_rows is passed as a SQL-level row_limit, not applied post-fetch.""" dataset = _make_data_export_dataset([{"uid": 1, "data": "a"}]) - with patch("superset.db") as mock_db: - mock_db.session.merge.side_effect = lambda d: d - export_dataset_data(dataset, sample_rows=5) + export_dataset_data(dataset, sample_rows=5) query_obj = dataset.query.call_args.args[0] assert query_obj["row_limit"] == 5 -def test_export_dataset_data_skips_on_failed_query(): +def test_export_dataset_data_skips_on_failed_query(patched_db): """ The query path signals failure via status rather than raising, so a failed query must yield no data file instead of an empty/partial Parquet. @@ -417,6 +419,4 @@ def test_export_dataset_data_skips_on_failed_query(): dataset = _make_data_export_dataset([{"uid": 1, "data": "a"}]) dataset.query.return_value.status = QueryStatus.FAILED - with patch("superset.db") as mock_db: - mock_db.session.merge.side_effect = lambda d: d - assert export_dataset_data(dataset) is None + assert export_dataset_data(dataset) is None From 67f493bc089d421d9ec3a36a75a954d8decd97dc Mon Sep 17 00:00:00 2001 From: sha174n Date: Fri, 12 Jun 2026 02:12:15 +0100 Subject: [PATCH 5/5] test(dashboard): add type annotations to export-example test signatures Co-Authored-By: Claude Opus 4.8 --- .../integration_tests/dashboards/api_tests.py | 2 +- .../commands/dashboard/export_example_test.py | 19 +++++++++++-------- 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/tests/integration_tests/dashboards/api_tests.py b/tests/integration_tests/dashboards/api_tests.py index fe5ae5ea69a9..ef5104b91841 100644 --- a/tests/integration_tests/dashboards/api_tests.py +++ b/tests/integration_tests/dashboards/api_tests.py @@ -2962,7 +2962,7 @@ def test_export_as_example_content_disposition(self): assert "_example.zip" in rv.headers["Content-Disposition"] @pytest.mark.usefixtures("load_birth_names_dashboard_with_slices") - def test_export_as_example_data_respects_row_level_filters(self): + def test_export_as_example_data_respects_row_level_filters(self) -> None: """ Dashboard API: export_as_example with export_data must apply the dataset's row-level filters, so a restricted user only receives the diff --git a/tests/unit_tests/commands/dashboard/export_example_test.py b/tests/unit_tests/commands/dashboard/export_example_test.py index 1703da33b224..edc5c172ef83 100644 --- a/tests/unit_tests/commands/dashboard/export_example_test.py +++ b/tests/unit_tests/commands/dashboard/export_example_test.py @@ -16,6 +16,7 @@ # under the License. from __future__ import annotations +from collections.abc import Iterator from io import BytesIO from typing import Any from unittest.mock import MagicMock, patch @@ -336,9 +337,9 @@ def _make_data_export_dataset(rows: list[dict[str, Any]]) -> MagicMock: dataset = MagicMock() dataset.table_name = "private_table" dataset.database = MagicMock() # truthy - col_uid = MagicMock(column_name="uid", expression=None) - col_data = MagicMock(column_name="data", expression=None) - dataset.columns = [col_uid, col_data] + dataset.columns = [ + MagicMock(column_name=name, expression=None) for name in ("uid", "data") + ] result = MagicMock() result.status = QueryStatus.SUCCESS result.df = pd.DataFrame(rows) @@ -347,14 +348,14 @@ def _make_data_export_dataset(rows: list[dict[str, Any]]) -> MagicMock: @pytest.fixture -def patched_db(): +def patched_db() -> Iterator[MagicMock]: """Patch ``superset.db`` so merge() returns the dataset unchanged.""" with patch("superset.db") as mock_db: mock_db.session.merge.side_effect = lambda d: d yield mock_db -def test_export_dataset_data_skips_when_access_denied(patched_db): +def test_export_dataset_data_skips_when_access_denied(patched_db: MagicMock) -> None: """ A requester without access to the dataset must get no data file, and the underlying rows must never be fetched (no fallback raw read). @@ -374,7 +375,7 @@ def test_export_dataset_data_skips_when_access_denied(patched_db): dataset.query.assert_not_called() -def test_export_dataset_data_fetches_through_query_path(patched_db): +def test_export_dataset_data_fetches_through_query_path(patched_db: MagicMock) -> None: """ Rows are fetched through the dataset's own query builder (which applies the per-row filters), and exactly those rows are what gets written to Parquet. @@ -401,7 +402,9 @@ def test_export_dataset_data_fetches_through_query_path(patched_db): assert exported.to_dict("records") == own_rows -def test_export_dataset_data_applies_row_limit_at_query_level(patched_db): +def test_export_dataset_data_applies_row_limit_at_query_level( + patched_db: MagicMock, +) -> None: """sample_rows is passed as a SQL-level row_limit, not applied post-fetch.""" dataset = _make_data_export_dataset([{"uid": 1, "data": "a"}]) @@ -411,7 +414,7 @@ def test_export_dataset_data_applies_row_limit_at_query_level(patched_db): assert query_obj["row_limit"] == 5 -def test_export_dataset_data_skips_on_failed_query(patched_db): +def test_export_dataset_data_skips_on_failed_query(patched_db: MagicMock) -> None: """ The query path signals failure via status rather than raising, so a failed query must yield no data file instead of an empty/partial Parquet.