From e8b587e083d6ae6f5254266ebdae64b492f8a537 Mon Sep 17 00:00:00 2001 From: kraysent Date: Tue, 2 Jun 2026 14:22:47 +0200 Subject: [PATCH 1/2] #374: add more metadata to /v1/tables endpoint --- app/data/model/table.py | 1 + app/data/repositories/layer0/tables.py | 3 ++ app/domain/adminapi/table_upload.py | 40 ++++++++++++++++++----- app/presentation/adminapi/interface.py | 26 ++++++++------- tests/regression/upload_simple_table.py | 2 ++ tests/unit/data/layer0_repository_test.py | 1 + 6 files changed, 52 insertions(+), 21 deletions(-) diff --git a/app/data/model/table.py b/app/data/model/table.py index 3fa8b158..dde034bf 100644 --- a/app/data/model/table.py +++ b/app/data/model/table.py @@ -50,6 +50,7 @@ class Layer0TableListItem: description: str num_fields: int modification_dt: datetime.datetime + bibcode: str @dataclass diff --git a/app/data/repositories/layer0/tables.py b/app/data/repositories/layer0/tables.py index 22666eab..9da6829e 100644 --- a/app/data/repositories/layer0/tables.py +++ b/app/data/repositories/layer0/tables.py @@ -533,6 +533,7 @@ def search_tables( t.table_name, t.modification_dt, COALESCE(ti.param->>'description', '') AS description, + b.code AS bibcode, ( SELECT COUNT(*)::int FROM meta.column_info c @@ -541,6 +542,7 @@ def search_tables( AND c.column_name != %s ) AS num_fields FROM layer0.tables t + JOIN common.bib b ON b.id = t.bib LEFT JOIN meta.table_info ti ON ti.schema_name = %s AND ti.table_name = t.table_name WHERE t.table_name ILIKE %s OR COALESCE(ti.param->>'description', '') ILIKE %s @@ -563,6 +565,7 @@ def search_tables( description=row["description"] or "", num_fields=int(row["num_fields"]), modification_dt=row["modification_dt"], + bibcode=row["bibcode"], ) for row in rows ] diff --git a/app/domain/adminapi/table_upload.py b/app/domain/adminapi/table_upload.py index 1c16c9ff..82c39bd8 100644 --- a/app/domain/adminapi/table_upload.py +++ b/app/domain/adminapi/table_upload.py @@ -178,17 +178,39 @@ def add_data(self, r: adminapi.AddDataRequest) -> adminapi.AddDataResponse: def get_table_list(self, r: adminapi.GetTableListRequest) -> adminapi.GetTableListResponse: items = self.layer0_repo.search_tables(r.query, r.page_size, r.page) cached_tables = self.table_stats_cache.get().tables + missing_names = [item.table_name for item in items if item.table_name not in cached_tables] + fallback_progress = self.layer0_repo.get_table_progress(missing_names) if missing_names else {} return adminapi.GetTableListResponse( - tables=[ - adminapi.TableListItem( - name=item.table_name, - description=item.description, - num_entries=cached_tables[item.table_name].total_records if item.table_name in cached_tables else 0, - num_fields=item.num_fields, - modification_dt=item.modification_dt, + tables=[self._table_list_item(item, cached_tables, fallback_progress) for item in items] + ) + + def _table_list_item( + self, + item: model.Layer0TableListItem, + cached_tables: dict[str, adminapi.TableProgress], + fallback_progress: dict[str, model.TableProgress], + ) -> adminapi.TableListItem: + progress = cached_tables.get(item.table_name) + if progress is None: + table_progress = fallback_progress.get(item.table_name) + if table_progress is None: + table_progress = model.TableProgress( + total_records=0, + unprocessed=0, + pending_triage=0, + resolved_unsubmitted=0, + submitted=0, + catalogs={}, ) - for item in items - ] + progress = table_stats.table_progress_to_presentation(table_progress) + return adminapi.TableListItem( + name=item.table_name, + description=item.description, + num_entries=progress.total_records, + num_fields=item.num_fields, + modification_dt=item.modification_dt, + bibcode=item.bibcode, + progress=progress, ) def get_table(self, r: adminapi.GetTableRequest) -> adminapi.GetTableResponse: diff --git a/app/presentation/adminapi/interface.py b/app/presentation/adminapi/interface.py index ca5e62b5..38d9880f 100644 --- a/app/presentation/adminapi/interface.py +++ b/app/presentation/adminapi/interface.py @@ -53,18 +53,6 @@ class GetTableListRequest(pydantic.BaseModel): page: int = 0 -class TableListItem(pydantic.BaseModel): - name: str - description: str - num_entries: int - num_fields: int - modification_dt: datetime.datetime - - -class GetTableListResponse(pydantic.BaseModel): - tables: list[TableListItem] - - class CatalogProgress(pydantic.BaseModel): structured: int in_layer2: int @@ -80,6 +68,20 @@ class TableProgress(pydantic.BaseModel): catalogs: dict[str, CatalogProgress] +class TableListItem(pydantic.BaseModel): + name: str + description: str + num_entries: int + num_fields: int + modification_dt: datetime.datetime + bibcode: str + progress: TableProgress + + +class GetTableListResponse(pydantic.BaseModel): + tables: list[TableListItem] + + class TableStatsSnapshot(pydantic.BaseModel): tables: dict[str, TableProgress] computed_at: datetime.datetime diff --git a/tests/regression/upload_simple_table.py b/tests/regression/upload_simple_table.py index aabc746a..f4907b19 100644 --- a/tests/regression/upload_simple_table.py +++ b/tests/regression/upload_simple_table.py @@ -390,6 +390,8 @@ def check_table_list(session: requests.Session, table_name: str): assert "description" in item assert "num_entries" in item assert "num_fields" in item + assert "bibcode" in item + assert "progress" in item @lib.test_logging_decorator diff --git a/tests/unit/data/layer0_repository_test.py b/tests/unit/data/layer0_repository_test.py index 10aef494..62df88b5 100644 --- a/tests/unit/data/layer0_repository_test.py +++ b/tests/unit/data/layer0_repository_test.py @@ -69,6 +69,7 @@ def test_search_tables_calls_query_with_expected_structure(self): "description": "A test table", "num_fields": 6, "modification_dt": datetime.datetime(2025, 1, 1, tzinfo=datetime.UTC), + "bibcode": "2024PDU....4601628D", } ] From 8caf4d8164eed8b43b59ef9c2143aeb30109996a Mon Sep 17 00:00:00 2001 From: kraysent Date: Tue, 2 Jun 2026 14:31:08 +0200 Subject: [PATCH 2/2] decrease cache frequency & fix query --- app/commands/adminapi/command.py | 2 +- app/domain/adminapi/table_upload.py | 53 ++++++++++++----------------- 2 files changed, 22 insertions(+), 33 deletions(-) diff --git a/app/commands/adminapi/command.py b/app/commands/adminapi/command.py index d937f7ac..dbf19137 100644 --- a/app/commands/adminapi/command.py +++ b/app/commands/adminapi/command.py @@ -53,7 +53,7 @@ def prepare(self): self.table_stats_cache = cache.BackgroundCache( "table_stats", refresh, - refresh_frequency=timedelta(minutes=2), + refresh_frequency=timedelta(minutes=5), refresh_timeout=timedelta(minutes=5), ) self._table_stats_thread = threading.Thread(target=self.table_stats_cache.run, daemon=True) diff --git a/app/domain/adminapi/table_upload.py b/app/domain/adminapi/table_upload.py index 82c39bd8..a87e1793 100644 --- a/app/domain/adminapi/table_upload.py +++ b/app/domain/adminapi/table_upload.py @@ -178,40 +178,29 @@ def add_data(self, r: adminapi.AddDataRequest) -> adminapi.AddDataResponse: def get_table_list(self, r: adminapi.GetTableListRequest) -> adminapi.GetTableListResponse: items = self.layer0_repo.search_tables(r.query, r.page_size, r.page) cached_tables = self.table_stats_cache.get().tables - missing_names = [item.table_name for item in items if item.table_name not in cached_tables] - fallback_progress = self.layer0_repo.get_table_progress(missing_names) if missing_names else {} - return adminapi.GetTableListResponse( - tables=[self._table_list_item(item, cached_tables, fallback_progress) for item in items] + empty_progress = adminapi.TableProgress( + total_records=0, + unprocessed=0, + pending_triage=0, + resolved_unsubmitted=0, + submitted=0, + catalogs={}, ) - - def _table_list_item( - self, - item: model.Layer0TableListItem, - cached_tables: dict[str, adminapi.TableProgress], - fallback_progress: dict[str, model.TableProgress], - ) -> adminapi.TableListItem: - progress = cached_tables.get(item.table_name) - if progress is None: - table_progress = fallback_progress.get(item.table_name) - if table_progress is None: - table_progress = model.TableProgress( - total_records=0, - unprocessed=0, - pending_triage=0, - resolved_unsubmitted=0, - submitted=0, - catalogs={}, + tables: list[adminapi.TableListItem] = [] + for item in items: + progress = cached_tables.get(item.table_name) or empty_progress + tables.append( + adminapi.TableListItem( + name=item.table_name, + description=item.description, + num_entries=progress.total_records, + num_fields=item.num_fields, + modification_dt=item.modification_dt, + bibcode=item.bibcode, + progress=progress, ) - progress = table_stats.table_progress_to_presentation(table_progress) - return adminapi.TableListItem( - name=item.table_name, - description=item.description, - num_entries=progress.total_records, - num_fields=item.num_fields, - modification_dt=item.modification_dt, - bibcode=item.bibcode, - progress=progress, - ) + ) + return adminapi.GetTableListResponse(tables=tables) def get_table(self, r: adminapi.GetTableRequest) -> adminapi.GetTableResponse: meta = self.layer0_repo.fetch_metadata_by_name(r.table_name)