Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 7 additions & 8 deletions superset/commands/sql_lab/estimate.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,14 +102,13 @@ def _apply_sql_security(self, sql: str) -> str:
db_engine_spec.engine,
set(),
)
if disallowed_tables and parsed_script.check_tables_present(disallowed_tables):
found_tables = set()
for statement in parsed_script.statements:
present = {table.table.lower() for table in statement.tables}
for table in disallowed_tables:
if table.lower() in present:
found_tables.add(table)
raise SupersetDisallowedSQLTableException(found_tables or disallowed_tables)
if disallowed_tables:
# Honors schema-qualified denylist entries (e.g.
# ``information_schema.tables``) and reports only the tables
# actually referenced by the query.
found_tables = parsed_script.get_disallowed_tables(disallowed_tables)
if found_tables:
raise SupersetDisallowedSQLTableException(found_tables)

if parsed_script.has_mutation() and not self._database.allow_dml:
raise SupersetDMLNotAllowedException()
Expand Down
37 changes: 37 additions & 0 deletions superset/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -1830,6 +1830,20 @@ def engine_context_manager( # pylint: disable=unused-argument
"pg_read_file",
"pg_ls_dir",
"pg_read_binary_file",
# PostgreSQL large-object functions: writers can plant arbitrary
# bytes on the server filesystem (lo_export, lo_from_bytea, lowrite,
# lo_put, lo_create, lo_import), readers can pull bytes back out
# (lo_get, loread), and lo_unlink deletes large objects outright.
# Defense-in-depth on top of is_mutating()'s function-name check.
"lo_from_bytea",
"lo_export",
"lo_import",
"lo_put",
"lo_create",
"lowrite",
"lo_get",
"loread",
Comment thread
sha174n marked this conversation as resolved.
"lo_unlink",
# XML functions that can execute SQL
"database_to_xml",
"database_to_xmlschema",
Expand Down Expand Up @@ -1920,6 +1934,29 @@ def engine_context_manager( # pylint: disable=unused-argument
"pg_stat_replication",
"pg_stat_wal_receiver",
"pg_user",
# The SQL-standard `information_schema` views expose table /
# column / privilege / view-definition metadata across the entire
# database role the connection user can see. Entries are
# schema-qualified so `check_tables_present` only matches when the
# query actually references `information_schema.<view>`, not any
# user table that happens to share a name.
"information_schema.tables",
"information_schema.columns",
"information_schema.schemata",
"information_schema.views",
"information_schema.routines",
"information_schema.role_table_grants",
"information_schema.role_column_grants",
"information_schema.role_routine_grants",
"information_schema.table_privileges",
"information_schema.column_privileges",
"information_schema.usage_privileges",
"information_schema.key_column_usage",
"information_schema.table_constraints",
"information_schema.referential_constraints",
"information_schema.view_table_usage",
"information_schema.applicable_roles",
"information_schema.enabled_roles",
Comment on lines +1937 to +1959

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggestion: The new PostgreSQL denylist entries are only schema-qualified (information_schema.<view>), but table matching logic cannot resolve runtime default schema/search_path. This allows bypassing the blocklist via unqualified references like SELECT * FROM tables when information_schema is in scope (for example via configured/default schema or session search path), exposing metadata that this change is meant to protect. Add corresponding bare names for sensitive information_schema views (or enforce schema-resolution-aware checks) so unqualified access is also blocked. [security]

Severity Level: Critical 🚨
- ❌ SQL Lab can read sensitive information_schema views unblocked.
- ⚠️ Cost estimate path reuses same denylist and misses access.
Steps of Reproduction ✅
1. Configure a PostgreSQL database in Superset and open SQL Lab so queries execute via
`execute_sql_statements` in `superset/sql_lab.py:391-80`. Select the same PostgreSQL
database and choose the `information_schema` schema from the schema dropdown so that the
`schema` argument passed into the database engine is `"information_schema"`.

2. When SQL Lab opens a connection for that database, `Database.get_sqla_engine` in
`superset/models/core.py:480-59` calls `PostgresEngineSpec.get_prequeries` at
`superset/db_engine_specs/postgres.py:717-29`, which returns `['set search_path =
"information_schema"']`. This prequery is executed on each new DBAPI connection (see the
`run_prequeries` connect hook at `superset/models/core.py:488-32`), so at runtime
unqualified table names like `tables` resolve to `information_schema.tables`.

3. Submit the SQL text `SELECT * FROM tables;` in SQL Lab with the `information_schema`
schema selected. `execute_sql_statements` builds a `SQLScript` at `superset/sql_lab.py:31`
and then loads the PostgreSQL denylist from `DISALLOWED_SQL_TABLES["postgresql"]` defined
in `superset/config.py:1915-46`, which includes only schema-qualified entries such as
`"information_schema.tables"` and `"information_schema.columns"`. It then calls
`parsed_script.get_disallowed_tables(disallowed_tables)` at `superset/sql_lab.py:42-50`.

4. Inside `SQLStatement.get_disallowed_tables` and `check_tables_present` in
`superset/sql/parse.py:988-137`, the parser iterates `self.tables`, which is built by
`_extract_tables_from_statement` using sqlglot `exp.Table` nodes at
`superset/sql/parse.py:760-25` and `extract_tables_from_statement` at
`superset/sql/parse.py:92-115`. For the query `SELECT * FROM tables`, each table node has
`table='tables'` and no `schema`, so `present_bare = {'tables'}` and `present_qualified`
does not contain `"information_schema.tables"`. Because the denylist entries for these
views are only schema-qualified (e.g. `"information_schema.tables"` at
`superset/config.py:1929`), the matching logic in `get_disallowed_tables` (`needle`
containing a dot must be in `present_qualified`) fails to match, `found_tables` is empty,
and `SupersetDisallowedSQLTableException` is never raised in `execute_sql_statements`. The
query then executes against `information_schema.tables` due to the `search_path` prequery,
allowing the user to retrieve metadata from views that were intended to be blocked by the
schema-qualified denylist entries.

Fix in Cursor | Fix in VSCode Claude

(Use Cmd/Ctrl + Click for best experience)

Prompt for AI Agent 🤖
This is a comment left during a code review.

**Path:** superset/config.py
**Line:** 1937:1959
**Comment:**
	*Security: The new PostgreSQL denylist entries are only schema-qualified (`information_schema.<view>`), but table matching logic cannot resolve runtime default schema/search_path. This allows bypassing the blocklist via unqualified references like `SELECT * FROM tables` when `information_schema` is in scope (for example via configured/default schema or session search path), exposing metadata that this change is meant to protect. Add corresponding bare names for sensitive `information_schema` views (or enforce schema-resolution-aware checks) so unqualified access is also blocked.

Validate the correctness of the flagged issue. If correct, How can I resolve this? If you propose a fix, implement it and please make it concise.
Once fix is implemented, also check other comments on the same PR, and ask user if the user wants to fix the rest of the comments as well. if said yes, then fetch all the comments validate the correctness and implement a minimal fix
👍 | 👎

},
"mysql": {
"mysql.user",
Expand Down
39 changes: 13 additions & 26 deletions superset/models/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -1221,24 +1221,16 @@ def _process_sql_expression( # pylint: disable=too-many-arguments
disallowed_functions
):
raise SupersetDisallowedSQLFunctionException(disallowed_functions)
if disallowed_tables and parsed.check_tables_present(disallowed_tables):
if disallowed_tables:
# Report only the tables actually found in the expression,
# mirroring the canonical execution-time gate in
# `superset.sql_lab._validate_query` so the user-facing
# error doesn't echo the operator's full denylist.
present_tables = {
table.table.lower()
for statement in parsed.statements
for table in statement.tables
}
found_tables = {
table
for table in disallowed_tables
if table.lower() in present_tables
}
raise SupersetDisallowedSQLTableException(
found_tables or disallowed_tables
)
# error doesn't echo the operator's full denylist. Honors
# schema-qualified denylist entries (e.g.
# ``information_schema.tables``).
found_tables = parsed.get_disallowed_tables(disallowed_tables)
if found_tables:
raise SupersetDisallowedSQLTableException(found_tables)
return expression

def _process_select_expression(
Expand Down Expand Up @@ -1465,19 +1457,14 @@ def _raise_for_disallowed_sql(self, sql: str) -> None:
disallowed_functions
):
raise SupersetDisallowedSQLFunctionException(disallowed_functions)
if disallowed_tables and parsed_script.check_tables_present(disallowed_tables):
if disallowed_tables:
# Report only the tables actually found in the query, mirroring the
# canonical execution-time gate so the user-facing error doesn't
# echo the operator's full denylist.
present_tables = {
table.table.lower()
for statement in parsed_script.statements
for table in statement.tables
}
found_tables = {
table for table in disallowed_tables if table.lower() in present_tables
}
raise SupersetDisallowedSQLTableException(found_tables or disallowed_tables)
# echo the operator's full denylist. Honors schema-qualified
# denylist entries (e.g. ``information_schema.tables``).
found_tables = parsed_script.get_disallowed_tables(disallowed_tables)
if found_tables:
raise SupersetDisallowedSQLTableException(found_tables)

def query(self, query_obj: QueryObjectDict) -> QueryResult:
"""
Expand Down
12 changes: 3 additions & 9 deletions superset/sql/execution/executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -717,15 +717,9 @@ def _check_disallowed_tables(self, script: SQLScript) -> set[str] | None:
if not engine_disallowed:
return None

# Single-pass AST-based table detection
found: set[str] = set()
for statement in script.statements:
present = {table.table.lower() for table in statement.tables}
for table in engine_disallowed:
if table.lower() in present:
found.add(table)

return found or None
# Honors schema-qualified denylist entries (e.g.
# ``information_schema.tables``) as well as bare names.
return script.get_disallowed_tables(engine_disallowed) or None

def _apply_rls_to_script(
self, script: SQLScript, catalog: str | None, schema: str | None
Expand Down
Loading
Loading