From f748de342327eb161cbb4d3d174e0bb23a1be227 Mon Sep 17 00:00:00 2001 From: Claude Code Date: Mon, 1 Jun 2026 17:33:40 -0700 Subject: [PATCH 1/9] feat(encrypt): make encryption engine selectable (AES-GCM support) [DRAFT/SIP] MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit App-encrypted fields (DB passwords, SSH tunnel creds, OAuth tokens) use sqlalchemy_utils EncryptedType, which defaults to AES-CBC — unauthenticated encryption vulnerable to bit-flipping/tamper of ciphertext at rest. AES-GCM is authenticated and fails closed on tampering. Phase 1 (this change, backward compatible): add a configurable encryption engine via SQLALCHEMY_ENCRYPTED_FIELD_ENGINE ("aes" | "aes-gcm"), defaulting to "aes" so existing deployments are unchanged. The default SQLAlchemyUtilsAdapter now honors it; an explicit engine kwarg still wins. New installs can opt into AES-GCM with one line instead of writing a custom adapter. Includes docs/sip/authenticated-encryption-at-rest.md describing the full proposal: Phase 2 (a CBC->GCM re-encryption migrator built on SecretsMigrator) and Phase 3 (flip default for fresh installs). The instance-wide re-encryption migration is intentionally NOT included here — it is the subject of the SIP. DRAFT: flipping the engine on a populated DB without the Phase 2 migrator makes existing secrets undecryptable; this PR only adds the safe opt-in plumbing for discussion. Co-Authored-By: Claude Opus 4.8 --- docs/sip/authenticated-encryption-at-rest.md | 124 +++++++++++++++++++ superset/config.py | 10 ++ superset/utils/encrypt.py | 19 +++ tests/unit_tests/utils/encrypt_test.py | 57 +++++++++ 4 files changed, 210 insertions(+) create mode 100644 docs/sip/authenticated-encryption-at-rest.md create mode 100644 tests/unit_tests/utils/encrypt_test.py diff --git a/docs/sip/authenticated-encryption-at-rest.md b/docs/sip/authenticated-encryption-at-rest.md new file mode 100644 index 000000000000..956319e88daf --- /dev/null +++ b/docs/sip/authenticated-encryption-at-rest.md @@ -0,0 +1,124 @@ + + +# SIP: Authenticated encryption (AES-GCM) for app-encrypted fields + +## [DRAFT — proposal for discussion] + +This document is a draft proposal accompanying the code in this PR. It is +intended to seed the formal SIP discussion; the code change here is the +backward-compatible first step (engine selection), **not** the full migration. + +## Motivation + +Superset app-encrypts a number of sensitive fields before persisting them to +the metadata database, including: + +- database connection passwords and `encrypted_extra` (`superset/models/core.py`), +- SSH tunnel credentials — password, private key, private-key password + (`superset/databases/ssh_tunnel/models.py`), +- OAuth2 tokens and other secrets stored via `EncryptedType`. + +These fields are encrypted with `sqlalchemy_utils.EncryptedType`, which +**defaults to `AesEngine` (AES-CBC)**. AES-CBC provides confidentiality but is +**unauthenticated**: it has no integrity tag. An attacker with write access to +the ciphertext (e.g. direct metadata-DB access, a backup, or a compromised +replica) can perform **bit-flipping / chosen-ciphertext manipulation** to +silently alter the decrypted plaintext of a secret without detection. + +`AesGcmEngine` (AES-GCM) is authenticated encryption: tampering causes +decryption to fail loudly rather than yielding attacker-influenced plaintext. +Using authenticated encryption for secrets at rest is an ASVS L1 expectation +(11.3.2 / cryptography best practice). + +`config.py` already documents that operators *can* switch to GCM by writing a +custom `AbstractEncryptedFieldAdapter`, but: + +1. it is opt-in, undocumented as a security recommendation, and easy to miss; +2. there is **no migration path** — flipping the engine on a populated database + makes every existing secret undecryptable, because GCM ciphertext is not + format-compatible with CBC. + +## Proposed change + +A three-part change, delivered incrementally so existing deployments are never +broken: + +### Phase 1 — engine selection (this PR) + +- Add a `SQLALCHEMY_ENCRYPTED_FIELD_ENGINE` config (`"aes"` | `"aes-gcm"`), + **defaulting to `"aes"`** (no behavior change for existing installs). +- Teach the default `SQLAlchemyUtilsAdapter` to honor it (an explicit `engine` + kwarg still wins, so the migrator can pin an engine). +- This lets **new** deployments choose AES-GCM from day one with a one-line + config, instead of writing a custom adapter. + +### Phase 2 — CBC→GCM re-encryption migrator + +Extend the existing `SecretsMigrator` (today used for `SECRET_KEY` rotation) to +support an **engine migration** mode that: + +1. discovers every `EncryptedType` column (it already does this via + `discover_encrypted_fields()`), +2. decrypts each value with the **source** engine (AES-CBC) under the current + `SECRET_KEY`, +3. re-encrypts with the **target** engine (AES-GCM), +4. runs transactionally per the existing all-or-nothing semantics. + +Exposed as a CLI command (e.g. `superset re-encrypt-secrets --engine aes-gcm`), +runnable by operators with a DB backup in hand. + +### Phase 3 — flip the default for new installs + +Once the migrator and docs are in place, change the default to `"aes-gcm"` for +**fresh** installs only (e.g. keyed off an empty metadata DB / documented in +`UPDATING.md`), keeping existing installs on `"aes"` until they run Phase 2. + +## New or changed public interfaces + +- New config: `SQLALCHEMY_ENCRYPTED_FIELD_ENGINE: Literal["aes", "aes-gcm"]`. +- New (Phase 2) CLI: `superset re-encrypt-secrets --engine `. +- No schema changes; ciphertext format changes per migrated column. + +## Migration plan and compatibility + +- **Backward compatible by default.** Phase 1 changes nothing unless the + operator opts in. +- Switching an existing deployment to `"aes-gcm"` **without** running the Phase + 2 migrator will make existing secrets undecryptable — this is called out in + the config comment and must be in `UPDATING.md`. +- Recommended operator runbook: take a metadata-DB backup → run + `re-encrypt-secrets --engine aes-gcm` → set + `SQLALCHEMY_ENCRYPTED_FIELD_ENGINE = "aes-gcm"` → restart. +- `AesEngine` allows queryability over encrypted fields; AES-GCM does not. + Any code that filters/queries on an encrypted column directly must be audited + before Phase 3 (none is expected, but it must be verified). + +## Rejected alternatives + +- **Flip the default immediately.** Rejected: bricks every existing + deployment's secrets with no migration path. +- **Document-only (custom adapter).** Status quo; high friction and no + migration tooling — most operators will never do it. + +## Open questions + +- Should Phase 2 support GCM→CBC rollback for operators who need queryability? +- Should the migrator support concurrent SECRET_KEY rotation + engine change in + a single pass (it already handles previous-key decryption)? diff --git a/superset/config.py b/superset/config.py index b493dd4ac3d1..e13539fee345 100644 --- a/superset/config.py +++ b/superset/config.py @@ -295,6 +295,16 @@ def _try_json_readsha(filepath: str, length: int) -> str | None: SQLAlchemyUtilsAdapter ) +# Encryption engine used by the default SQLAlchemyUtilsAdapter for app-encrypted +# fields. Options: +# "aes" - AES-CBC (historical default; unauthenticated, queryable) +# "aes-gcm" - AES-GCM (authenticated encryption; recommended for NEW installs) +# WARNING: changing this on a database that already holds encrypted secrets +# (database passwords, SSH tunnel credentials, OAuth tokens, ...) will make +# those values undecryptable unless they are re-encrypted first. See the +# authenticated-encryption SIP/migration before switching an existing install. +SQLALCHEMY_ENCRYPTED_FIELD_ENGINE: Literal["aes", "aes-gcm"] = "aes" + # Extends the default SQLGlot dialects with additional dialects SQLGLOT_DIALECTS_EXTENSIONS: DialectExtensions | Callable[[], DialectExtensions] = {} diff --git a/superset/utils/encrypt.py b/superset/utils/encrypt.py index 963e72f98587..bb5d44389ec2 100644 --- a/superset/utils/encrypt.py +++ b/superset/utils/encrypt.py @@ -24,12 +24,25 @@ from sqlalchemy import Table, text, TypeDecorator from sqlalchemy.engine import Connection, Dialect, Row from sqlalchemy_utils import EncryptedType as SqlaEncryptedType +from sqlalchemy_utils.types.encrypted.encrypted_type import AesEngine, AesGcmEngine class EncryptedType(SqlaEncryptedType): cache_ok = True +# Named encryption engines selectable via the ``SQLALCHEMY_ENCRYPTED_FIELD_ENGINE`` +# config. "aes" (AES-CBC) is the historical default; "aes-gcm" is authenticated +# encryption (recommended for new deployments). NOTE: switching an existing +# deployment from "aes" to "aes-gcm" requires re-encrypting all stored secrets +# first — see the SIP referenced in the docs. Changing this on a populated +# database without that migration will make existing secrets undecryptable. +ENCRYPTION_ENGINES = { + "aes": AesEngine, + "aes-gcm": AesGcmEngine, +} + + ENC_ADAPTER_TAG_ATTR_NAME = "__created_by_enc_field_adapter__" logger = logging.getLogger(__name__) @@ -65,6 +78,12 @@ def create( **kwargs: Optional[dict[str, Any]], ) -> TypeDecorator: if app_config: + # Select the encryption engine from config, defaulting to the + # historical AES-CBC engine for backward compatibility. An explicit + # ``engine`` kwarg (e.g. from the migrator) always takes precedence. + if "engine" not in kwargs: + engine_name = app_config.get("SQLALCHEMY_ENCRYPTED_FIELD_ENGINE", "aes") + kwargs["engine"] = ENCRYPTION_ENGINES.get(engine_name, AesEngine) return EncryptedType(*args, lambda: app_config["SECRET_KEY"], **kwargs) raise Exception( # pylint: disable=broad-exception-raised diff --git a/tests/unit_tests/utils/encrypt_test.py b/tests/unit_tests/utils/encrypt_test.py new file mode 100644 index 000000000000..7403ed15c8f2 --- /dev/null +++ b/tests/unit_tests/utils/encrypt_test.py @@ -0,0 +1,57 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from sqlalchemy import String +from sqlalchemy_utils.types.encrypted.encrypted_type import AesEngine, AesGcmEngine + +from superset.utils.encrypt import SQLAlchemyUtilsAdapter + +SECRET = {"SECRET_KEY": "x" * 32} + + +def test_default_engine_is_aes_cbc() -> None: + """Without config, the adapter keeps the historical AES-CBC engine.""" + field = SQLAlchemyUtilsAdapter().create(SECRET, String(128)) + assert isinstance(field.engine, AesEngine) + + +def test_aes_gcm_engine_selected_by_config() -> None: + """SQLALCHEMY_ENCRYPTED_FIELD_ENGINE='aes-gcm' selects authenticated AES-GCM.""" + field = SQLAlchemyUtilsAdapter().create( + {**SECRET, "SQLALCHEMY_ENCRYPTED_FIELD_ENGINE": "aes-gcm"}, + String(128), + ) + assert isinstance(field.engine, AesGcmEngine) + + +def test_unknown_engine_falls_back_to_aes_cbc() -> None: + """An unrecognized engine name falls back to the safe historical default.""" + field = SQLAlchemyUtilsAdapter().create( + {**SECRET, "SQLALCHEMY_ENCRYPTED_FIELD_ENGINE": "bogus"}, + String(128), + ) + assert isinstance(field.engine, AesEngine) + + +def test_explicit_engine_kwarg_takes_precedence() -> None: + """An explicit engine kwarg overrides the config (used by the migrator).""" + field = SQLAlchemyUtilsAdapter().create( + {**SECRET, "SQLALCHEMY_ENCRYPTED_FIELD_ENGINE": "aes-gcm"}, + String(128), + engine=AesEngine, + ) + assert isinstance(field.engine, AesEngine) From 782c1db8d8a2a6bf3813a6c3a5927a7b2e5ade5f Mon Sep 17 00:00:00 2001 From: Claude Code Date: Mon, 1 Jun 2026 20:16:25 -0700 Subject: [PATCH 2/9] =?UTF-8?q?feat(encrypt):=20add=20CBC=E2=86=92GCM=20re?= =?UTF-8?q?-encryption=20migrator=20+=20CLI=20engine=20flag?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Phase 2 of the authenticated-encryption-at-rest proposal. Teaches the existing SecretsMigrator an engine-migration mode (decrypt with the source engine under the current SECRET_KEY, re-encrypt with a target engine) and exposes it via `superset re-encrypt-secrets --engine aes-gcm`. The run is transactional and idempotent per column, so existing installs can move from AES-CBC to authenticated AES-GCM without bricking stored secrets. Default engine is unchanged ("aes"); behavior is opt-in. Adds UPDATING.md runbook and updates the SIP to reflect Phases 1–2 shipping here. Co-Authored-By: Claude Opus 4.8 --- UPDATING.md | 23 +++ docs/sip/authenticated-encryption-at-rest.md | 35 +++-- superset/cli/update.py | 30 +++- superset/utils/encrypt.py | 155 ++++++++++++++----- tests/unit_tests/utils/encrypt_test.py | 121 ++++++++++++++- 5 files changed, 308 insertions(+), 56 deletions(-) diff --git a/UPDATING.md b/UPDATING.md index 9e04218886ff..864c1ce93cc7 100644 --- a/UPDATING.md +++ b/UPDATING.md @@ -59,6 +59,29 @@ Both default to empty (no behavior change). They apply to both the `LOCAL_EXTENS The Dynamic Group By chart customization now orders its display values according to the "Sort display control values" toggle: ascending (A–Z), descending (Z–A), or the dataset's source order when the toggle is unset. Previously the dropdown always sorted alphabetically. Existing dashboards where the toggle was never set will show options in source order instead of A–Z; open the customization and enable the toggle to restore alphabetical ordering. +### Selectable encryption engine for app-encrypted fields (AES-GCM) + +App-encrypted fields (database passwords, SSH tunnel credentials, OAuth tokens, etc.) can now use authenticated **AES-GCM** encryption instead of the historical unauthenticated **AES-CBC**. A new config selects the engine for the default adapter: + +```python +# "aes" (AES-CBC, historical default) | "aes-gcm" (authenticated, recommended for new installs) +SQLALCHEMY_ENCRYPTED_FIELD_ENGINE = "aes" +``` + +**No action required / no behavior change:** the default remains `"aes"`, so existing installs are unaffected. + +**Opting in on an existing install:** flipping the engine on a populated database without re-encrypting first will make stored secrets undecryptable, because the two ciphertext formats are not compatible. A migrator is provided. Recommended runbook: + +1. Take a metadata-DB backup. +2. Re-encrypt existing secrets into the new engine (the `SECRET_KEY` is unchanged): + ```bash + superset re-encrypt-secrets --engine aes-gcm + ``` +3. Set `SQLALCHEMY_ENCRYPTED_FIELD_ENGINE = "aes-gcm"` in your config. +4. Restart Superset. + +The migration is transactional (all-or-nothing) and idempotent — it can be safely re-run or resumed. Note that AES-GCM, unlike AES-CBC, does not support querying directly over encrypted columns; audit any code that filters on an encrypted column before switching. See the SIP at `docs/sip/authenticated-encryption-at-rest.md` for details. + ### Granular Export Controls A new feature flag `GRANULAR_EXPORT_CONTROLS` introduces three fine-grained permissions that replace the legacy `can_csv` permission: diff --git a/docs/sip/authenticated-encryption-at-rest.md b/docs/sip/authenticated-encryption-at-rest.md index 956319e88daf..ea6cc8daf798 100644 --- a/docs/sip/authenticated-encryption-at-rest.md +++ b/docs/sip/authenticated-encryption-at-rest.md @@ -22,8 +22,10 @@ under the License. ## [DRAFT — proposal for discussion] This document is a draft proposal accompanying the code in this PR. It is -intended to seed the formal SIP discussion; the code change here is the -backward-compatible first step (engine selection), **not** the full migration. +intended to seed the formal SIP discussion. The code here ships the +backward-compatible engine selection **and** the re-encryption migrator +(Phases 1–2 below); both are opt-in and change nothing for existing installs by +default. Flipping the default for fresh installs (Phase 3) remains future work. ## Motivation @@ -69,20 +71,23 @@ broken: - This lets **new** deployments choose AES-GCM from day one with a one-line config, instead of writing a custom adapter. -### Phase 2 — CBC→GCM re-encryption migrator +### Phase 2 — CBC→GCM re-encryption migrator (this PR) -Extend the existing `SecretsMigrator` (today used for `SECRET_KEY` rotation) to -support an **engine migration** mode that: +The existing `SecretsMigrator` (previously only used for `SECRET_KEY` rotation) +gains an **engine migration** mode that: -1. discovers every `EncryptedType` column (it already does this via - `discover_encrypted_fields()`), +1. discovers every `EncryptedType` column (via `discover_encrypted_fields()`), 2. decrypts each value with the **source** engine (AES-CBC) under the current `SECRET_KEY`, 3. re-encrypts with the **target** engine (AES-GCM), -4. runs transactionally per the existing all-or-nothing semantics. +4. runs transactionally per the existing all-or-nothing semantics, and is + idempotent per column (already-migrated values are skipped), so a run can be + safely repeated or resumed. -Exposed as a CLI command (e.g. `superset re-encrypt-secrets --engine aes-gcm`), -runnable by operators with a DB backup in hand. +Exposed via a new `--engine` option on the existing CLI command: +`superset re-encrypt-secrets --engine aes-gcm`, runnable by operators with a DB +backup in hand. The `SECRET_KEY` is unchanged; an engine change and a key +rotation can also be combined (pass `--previous_secret_key` as well). ### Phase 3 — flip the default for new installs @@ -119,6 +124,10 @@ Once the migrator and docs are in place, change the default to `"aes-gcm"` for ## Open questions -- Should Phase 2 support GCM→CBC rollback for operators who need queryability? -- Should the migrator support concurrent SECRET_KEY rotation + engine change in - a single pass (it already handles previous-key decryption)? +- GCM→CBC rollback (for operators who need queryability) already works via the + same command (`re-encrypt-secrets --engine aes`), since the migrator is + engine-symmetric. Should rollback be documented as a supported path or + discouraged? +- The migrator already supports a concurrent `SECRET_KEY` rotation + engine + change in a single pass (pass `--previous_secret_key` alongside `--engine`). + Is that combination worth calling out in the operator docs, or kept advanced? diff --git a/superset/cli/update.py b/superset/cli/update.py index e35e394c3250..9ebc7977e187 100755 --- a/superset/cli/update.py +++ b/superset/cli/update.py @@ -31,7 +31,7 @@ import superset.utils.database as database_utils from superset.utils.decorators import transaction -from superset.utils.encrypt import SecretsMigrator +from superset.utils.encrypt import ENCRYPTION_ENGINES, SecretsMigrator logger = logging.getLogger(__name__) @@ -110,17 +110,37 @@ def update_api_docs() -> None: help="An optional previous secret key, if PREVIOUS_SECRET_KEY " "is not set on the config", ) -def re_encrypt_secrets(previous_secret_key: Optional[str] = None) -> None: +@click.option( + "--engine", + "-e", + "target_engine_name", + required=False, + type=click.Choice(sorted(ENCRYPTION_ENGINES)), + help="Re-encrypt all app-encrypted fields with this encryption engine " + "(e.g. 'aes-gcm' for authenticated encryption). The SECRET_KEY is " + "unchanged. Take a metadata-DB backup first, then set " + "SQLALCHEMY_ENCRYPTED_FIELD_ENGINE to the same value and restart.", +) +def re_encrypt_secrets( + previous_secret_key: Optional[str] = None, + target_engine_name: Optional[str] = None, +) -> None: previous_secret_key = previous_secret_key or current_app.config.get( "PREVIOUS_SECRET_KEY" ) - if previous_secret_key is None: + target_engine = ( + ENCRYPTION_ENGINES[target_engine_name] if target_engine_name else None + ) + if previous_secret_key is None and target_engine is None: click.secho( - "No previous secret key provided; nothing to re-encrypt.", + "No previous secret key or target engine provided; nothing to re-encrypt.", fg="yellow", ) return - secrets_migrator = SecretsMigrator(previous_secret_key=previous_secret_key) + secrets_migrator = SecretsMigrator( + previous_secret_key=previous_secret_key, + target_engine=target_engine, + ) try: stats = secrets_migrator.run() except Exception as exc: # pylint: disable=broad-except diff --git a/superset/utils/encrypt.py b/superset/utils/encrypt.py index bb5d44389ec2..00a4b5da8afa 100644 --- a/superset/utils/encrypt.py +++ b/superset/utils/encrypt.py @@ -19,7 +19,7 @@ from dataclasses import dataclass from typing import Any, Optional -from flask import Flask +from flask import current_app, Flask from flask_babel import lazy_gettext as _ from sqlalchemy import Table, text, TypeDecorator from sqlalchemy.engine import Connection, Dialect, Row @@ -120,10 +120,40 @@ def created_by_enc_field_factory(field: TypeDecorator) -> bool: class SecretsMigrator: - def __init__(self, previous_secret_key: str) -> None: + """Re-encrypts every app-encrypted column in the ORM. + + Two modes, which can also be combined: + + - **Key rotation** — pass ``previous_secret_key``. Values are decrypted + under the previous key and re-encrypted under the current ``SECRET_KEY`` + using the currently-configured engine. + - **Engine migration** — pass ``target_engine`` (e.g. ``AesGcmEngine``). + Values are decrypted under the current ``SECRET_KEY`` with the source + engine and re-encrypted with the target engine. This is how an existing + install moves from AES-CBC to authenticated AES-GCM without bricking + stored secrets; the ``SECRET_KEY`` itself is unchanged. + + Both modes share the same all-or-nothing transaction and per-column + idempotency: a value already readable in the target form is left untouched, + so a run can be safely repeated or resumed. + """ + + def __init__( + self, + previous_secret_key: Optional[str] = None, + target_engine: Optional[type[Any]] = None, + ) -> None: from superset import db # pylint: disable=import-outside-toplevel self._db = db + self._secret_key = current_app.config["SECRET_KEY"] + self._target_engine = target_engine + # In engine-migration mode the SECRET_KEY does not change: the source + # ciphertext is decrypted under the current key (with the source + # engine), so default the "previous" key to the current one when the + # caller only asked for an engine change. + if target_engine is not None and not previous_secret_key: + previous_secret_key = self._secret_key self._previous_secret_key = previous_secret_key self._dialect: Dialect = db.engine.url.get_dialect() @@ -196,6 +226,42 @@ def _select_columns_from_table( cols = ",".join(pk_columns + column_names) return conn.execute(f"SELECT {cols} FROM {table_name}") # noqa: S608 + def _target_type(self, encrypted_type: EncryptedType) -> EncryptedType: + """The EncryptedType to re-encrypt a value *into*. + + For a key rotation this is the column's own configured type (current + engine + lazily-resolved current key). For an engine migration it is a + type pinned to the requested target engine under the current key. + """ + if self._target_engine is None: + return encrypted_type + return EncryptedType( + type_in=encrypted_type.underlying_type, + key=self._secret_key, + engine=self._target_engine, + ) + + def _source_decryptors(self, encrypted_type: EncryptedType) -> list[EncryptedType]: + """Candidate decryptors, tried in order, to recover a value's plaintext. + + 1. The column's configured type — current key + currently-configured + engine. During an engine migration this reads the source ciphertext + while the config still points at the source engine. + 2. The previous key with the historical AES-CBC engine — covers + ``SECRET_KEY`` rotation, and (since the previous key defaults to the + current one in engine-migration mode) reading AES-CBC source data + after the config has already been flipped to the target engine. + """ + decryptors = [encrypted_type] + if self._previous_secret_key: + decryptors.append( + EncryptedType( + type_in=encrypted_type.underlying_type, + key=self._previous_secret_key, + ) + ) + return decryptors + def _re_encrypt_row( self, conn: Connection, @@ -206,24 +272,28 @@ def _re_encrypt_row( stats: ReEncryptStats, ) -> None: """ - Re encrypts all columns in a Row + Re-encrypts all columns in a Row into the target form. - Re-encryption is idempotent per column: we first ask whether the - current key can already decrypt the value, and skip if so. Only if - the current key fails do we fall back to decrypting with the - previous key and re-encrypting. Checking the current key first - keeps ``run()`` idempotent regardless of what ``previous_secret_key`` - the caller supplies — even re-running with the same (unchanged) - ``SECRET_KEY`` will not rewrite rows. + The "target form" is current-key + currently-configured engine for a + ``SECRET_KEY`` rotation, or current-key + the requested engine for an + engine migration (see ``_target_type``). + + Re-encryption is idempotent per column: a value that can already be + read in the target form is left untouched. This keeps ``run()`` + idempotent regardless of what ``previous_secret_key`` the caller + supplies, and lets an interrupted engine migration be resumed. + Otherwise the plaintext is recovered from the first candidate source + decryptor that can read it (see ``_source_decryptors``) and re-encrypted + into the target form. NULL values are never encrypted, so they are reported separately (neither re-encrypted nor "skipped because already current"). Per-column outcomes are accumulated onto ``stats`` so the caller can - report a summary. Columns whose ciphertext is unreadable under both - keys are counted as failures and logged; the exception is not - propagated, so processing continues. The caller is responsible for - raising once all rows have been scanned. + report a summary. Columns whose ciphertext is unreadable under any + candidate key/engine are counted as failures and logged; the exception + is not propagated, so processing continues. The caller is responsible + for raising once all rows have been scanned. If no columns need re-encryption, no UPDATE is issued. @@ -242,38 +312,48 @@ def _re_encrypt_row( stats.null += 1 continue - # Fast path: if the current key can already read the value, - # leave it untouched. A failure here simply means we need to try - # the previous key below — not a condition worth logging. + target_type = self._target_type(encrypted_type) + + # Fast path: if the value can already be read in the target form, + # leave it untouched. A failure here simply means we need to try the + # source decryptors below — not a condition worth logging. try: - encrypted_type.process_result_value(raw_value, self._dialect) + target_type.process_result_value(raw_value, self._dialect) except Exception: # noqa: BLE001, S110 # pylint: disable=broad-except pass else: stats.skipped += 1 continue - # Current key cannot decrypt — try the previous key. - previous_encrypted_type = EncryptedType( - type_in=encrypted_type.underlying_type, key=self._previous_secret_key - ) - try: - unencrypted_value = previous_encrypted_type.process_result_value( - raw_value, self._dialect - ) - except Exception as prev_ex: # noqa: BLE001 # pylint: disable=broad-except + # Recover the plaintext from the first source decryptor that can + # read the value (current engine/key, then previous key). + unencrypted_value = None + decrypted = False + last_error: Optional[Exception] = None + for decryptor in self._source_decryptors(encrypted_type): + try: + unencrypted_value = decryptor.process_result_value( + raw_value, self._dialect + ) + except Exception as ex: # noqa: BLE001 # pylint: disable=broad-except + last_error = ex + continue + decrypted = True + break + + if not decrypted: logger.error( - "Column [%s.%s] cannot be decrypted under the previous" - " or current secret key (%s: %s)", + "Column [%s.%s] cannot be decrypted under any known" + " key/engine (%s: %s)", table_name, column_name, - type(prev_ex).__name__, - prev_ex, + type(last_error).__name__ if last_error else "None", + last_error, ) stats.failed += 1 continue - re_encrypted_columns[column_name] = encrypted_type.process_bind_param( + re_encrypted_columns[column_name] = target_type.process_bind_param( unencrypted_value, self._dialect, ) @@ -295,12 +375,13 @@ def _re_encrypt_row( def run(self) -> ReEncryptStats: """ - Re-encrypt every encrypted column in the ORM under the current - ``SECRET_KEY``. + Re-encrypt every encrypted column in the ORM into the target form + (current ``SECRET_KEY`` and, for an engine migration, the target + engine). - Returns per-value counts of re-encrypted, skipped (already under the - current key), and failed (undecryptable) outcomes. If any failures - occurred the transaction is rolled back by raising after the + Returns per-value counts of re-encrypted, skipped (already in the + target form), null, and failed (undecryptable) outcomes. If any + failures occurred the transaction is rolled back by raising after the summary is logged, so partial re-encryption never commits. """ encrypted_meta_info = self.discover_encrypted_fields() diff --git a/tests/unit_tests/utils/encrypt_test.py b/tests/unit_tests/utils/encrypt_test.py index 7403ed15c8f2..a8ae6f4acc6d 100644 --- a/tests/unit_tests/utils/encrypt_test.py +++ b/tests/unit_tests/utils/encrypt_test.py @@ -15,12 +15,43 @@ # specific language governing permissions and limitations # under the License. +from unittest.mock import MagicMock + from sqlalchemy import String +from sqlalchemy.engine import make_url from sqlalchemy_utils.types.encrypted.encrypted_type import AesEngine, AesGcmEngine -from superset.utils.encrypt import SQLAlchemyUtilsAdapter +from superset.utils.encrypt import ( + EncryptedType, + ReEncryptStats, + SecretsMigrator, + SQLAlchemyUtilsAdapter, +) SECRET = {"SECRET_KEY": "x" * 32} +SECRET_KEY = "k" * 32 +DIALECT = make_url("sqlite://").get_dialect() + + +def _encrypted_type(engine: type) -> EncryptedType: + """A standalone EncryptedType for a given engine under SECRET_KEY.""" + return EncryptedType(String(1024), key=lambda: SECRET_KEY, engine=engine) + + +def _engine_migrator(target_engine: type) -> SecretsMigrator: + """Build a SecretsMigrator in engine-migration mode without an app context. + + ``__init__`` reads ``current_app`` and the DB dialect, so — like the + existing row-level tests that override ``_dialect`` — we set the few + attributes ``_re_encrypt_row`` actually uses directly. + """ + migrator = SecretsMigrator.__new__(SecretsMigrator) + migrator._secret_key = SECRET_KEY # noqa: SLF001 + migrator._target_engine = target_engine # noqa: SLF001 + # Engine migration keeps the SECRET_KEY; previous key defaults to current. + migrator._previous_secret_key = SECRET_KEY # noqa: SLF001 + migrator._dialect = DIALECT # noqa: SLF001 + return migrator def test_default_engine_is_aes_cbc() -> None: @@ -55,3 +86,91 @@ def test_explicit_engine_kwarg_takes_precedence() -> None: engine=AesEngine, ) assert isinstance(field.engine, AesEngine) + + +def test_engine_migration_cbc_to_gcm_re_encrypts() -> None: + """CBC source value is re-encrypted into GCM under the same SECRET_KEY. + + Mirrors the recommended runbook: the migrator runs while the config still + points at AES-CBC (the column type), re-encrypting into AES-GCM. + """ + cbc = _encrypted_type(AesEngine) + ciphertext = cbc.process_bind_param("hunter2", DIALECT) + + migrator = _engine_migrator(AesGcmEngine) + conn = MagicMock() + row = {"id": 1, "password": ciphertext} + stats = ReEncryptStats() + + migrator._re_encrypt_row( # noqa: SLF001 + conn, row, "dbs", {"password": cbc}, ["id"], stats + ) + + assert stats == ReEncryptStats(re_encrypted=1) + assert conn.execute.call_count == 1 + new_value = conn.execute.call_args.kwargs["password"] + # The stored value changed and now decrypts as GCM back to the plaintext. + assert new_value != ciphertext + gcm = _encrypted_type(AesGcmEngine) + assert gcm.process_result_value(new_value, DIALECT) == "hunter2" + + +def test_engine_migration_idempotent_for_already_target() -> None: + """A value already in the target (GCM) form is skipped — runs are resumable. + + The column is still configured as CBC (config not yet flipped), but the + value has already been migrated to GCM, so it must be left untouched. + """ + gcm_value = _encrypted_type(AesGcmEngine).process_bind_param("hunter2", DIALECT) + cbc_column = _encrypted_type(AesEngine) + + migrator = _engine_migrator(AesGcmEngine) + conn = MagicMock() + row = {"id": 1, "password": gcm_value} + stats = ReEncryptStats() + + migrator._re_encrypt_row( # noqa: SLF001 + conn, row, "dbs", {"password": cbc_column}, ["id"], stats + ) + + assert stats == ReEncryptStats(skipped=1) + assert conn.execute.call_count == 0 + + +def test_engine_migration_reads_cbc_after_config_already_flipped() -> None: + """CBC source is still migrated when the config was flipped to GCM first. + + If an operator sets the config engine to GCM before running the migrator, + the column type can no longer read the CBC value; the previous-key (== the + current key) AES-CBC decryptor recovers it and it is re-encrypted as GCM. + """ + cbc_value = _encrypted_type(AesEngine).process_bind_param("hunter2", DIALECT) + gcm_column = _encrypted_type(AesGcmEngine) + + migrator = _engine_migrator(AesGcmEngine) + conn = MagicMock() + row = {"id": 1, "password": cbc_value} + stats = ReEncryptStats() + + migrator._re_encrypt_row( # noqa: SLF001 + conn, row, "dbs", {"password": gcm_column}, ["id"], stats + ) + + assert stats == ReEncryptStats(re_encrypted=1) + new_value = conn.execute.call_args.kwargs["password"] + assert gcm_column.process_result_value(new_value, DIALECT) == "hunter2" + + +def test_engine_migration_unreadable_value_counts_as_failure() -> None: + """A value no engine/key can read is a failure, not a silent pass-through.""" + migrator = _engine_migrator(AesGcmEngine) + conn = MagicMock() + row = {"id": 1, "password": b"not-valid-ciphertext"} + stats = ReEncryptStats() + + migrator._re_encrypt_row( # noqa: SLF001 + conn, row, "dbs", {"password": _encrypted_type(AesEngine)}, ["id"], stats + ) + + assert stats == ReEncryptStats(failed=1) + assert conn.execute.call_count == 0 From 05d87edf27e9e4331f2c77fdd3944e766c0f49c2 Mon Sep 17 00:00:00 2001 From: Evan Date: Tue, 2 Jun 2026 09:43:19 -0700 Subject: [PATCH 3/9] fix(encrypt): previous-key decryptor honors column engine; case-insensitive --engine; warn on unknown engine Address bot review on #40654: - SecretsMigrator now tries the previous key under both the column's configured engine and the historical AES-CBC engine, so SECRET_KEY rotation works for AES-GCM deployments (not just CBC) while still reading CBC source data after an engine-flipped-first migration. - re-encrypt-secrets --engine accepts case-insensitive engine names. - Adapter logs a warning when SQLALCHEMY_ENCRYPTED_FIELD_ENGINE is unrecognized before falling back to AES-CBC. - config.py points operators at the SQLALCHEMY_ENCRYPTED_FIELD_ENGINE knob as the preferred way to switch engines. - Add unit test covering AES-GCM SECRET_KEY rotation. Co-Authored-By: Claude Opus 4.8 --- superset/cli/update.py | 2 +- superset/config.py | 5 +++- superset/utils/encrypt.py | 29 ++++++++++++++---- tests/unit_tests/utils/encrypt_test.py | 41 ++++++++++++++++++++++++++ 4 files changed, 70 insertions(+), 7 deletions(-) diff --git a/superset/cli/update.py b/superset/cli/update.py index 9ebc7977e187..d9c589e8bb6d 100755 --- a/superset/cli/update.py +++ b/superset/cli/update.py @@ -115,7 +115,7 @@ def update_api_docs() -> None: "-e", "target_engine_name", required=False, - type=click.Choice(sorted(ENCRYPTION_ENGINES)), + type=click.Choice(sorted(ENCRYPTION_ENGINES), case_sensitive=False), help="Re-encrypt all app-encrypted fields with this encryption engine " "(e.g. 'aes-gcm' for authenticated encryption). The SECRET_KEY is " "unchanged. Take a metadata-DB backup first, then set " diff --git a/superset/config.py b/superset/config.py index e13539fee345..6d4d40c663ca 100644 --- a/superset/config.py +++ b/superset/config.py @@ -270,7 +270,10 @@ def _try_json_readsha(filepath: str, length: int) -> str | None: # as key material. Do note that AesEngine allows for queryability over the # encrypted fields. # -# To change the default engine you need to define your own adapter: +# To switch the engine used by the default adapter, prefer the +# ``SQLALCHEMY_ENCRYPTED_FIELD_ENGINE`` knob below (e.g. "aes-gcm"). Defining a +# custom adapter, as shown next, is only needed for behaviour the built-in +# engines do not cover: # # e.g.: # diff --git a/superset/utils/encrypt.py b/superset/utils/encrypt.py index 00a4b5da8afa..964ba703d55a 100644 --- a/superset/utils/encrypt.py +++ b/superset/utils/encrypt.py @@ -83,6 +83,13 @@ def create( # ``engine`` kwarg (e.g. from the migrator) always takes precedence. if "engine" not in kwargs: engine_name = app_config.get("SQLALCHEMY_ENCRYPTED_FIELD_ENGINE", "aes") + if engine_name not in ENCRYPTION_ENGINES: + logger.warning( + "Unrecognized SQLALCHEMY_ENCRYPTED_FIELD_ENGINE %r;" + " falling back to AES-CBC. Valid engines: %s", + engine_name, + ", ".join(sorted(ENCRYPTION_ENGINES)), + ) kwargs["engine"] = ENCRYPTION_ENGINES.get(engine_name, AesEngine) return EncryptedType(*args, lambda: app_config["SECRET_KEY"], **kwargs) @@ -247,18 +254,30 @@ def _source_decryptors(self, encrypted_type: EncryptedType) -> list[EncryptedTyp 1. The column's configured type — current key + currently-configured engine. During an engine migration this reads the source ciphertext while the config still points at the source engine. - 2. The previous key with the historical AES-CBC engine — covers - ``SECRET_KEY`` rotation, and (since the previous key defaults to the - current one in engine-migration mode) reading AES-CBC source data - after the config has already been flipped to the target engine. + 2. The previous key under each supported engine. Trying the column's + configured engine covers ``SECRET_KEY`` rotation for any engine + (including AES-GCM); also trying the historical AES-CBC engine + covers an engine migration whose config was flipped to the target + engine *before* the migrator ran (the source data is still CBC under + the current key, which the previous key defaults to in + engine-migration mode). """ decryptors = [encrypted_type] if self._previous_secret_key: - decryptors.append( + # Try the column's own engine first, then any remaining supported + # engines (notably the historical AES-CBC), de-duplicated so we + # never build the same decryptor twice. + engines: list[type[Any]] = [type(encrypted_type.engine)] + for engine in ENCRYPTION_ENGINES.values(): + if engine not in engines: + engines.append(engine) + decryptors.extend( EncryptedType( type_in=encrypted_type.underlying_type, key=self._previous_secret_key, + engine=engine, ) + for engine in engines ) return decryptors diff --git a/tests/unit_tests/utils/encrypt_test.py b/tests/unit_tests/utils/encrypt_test.py index a8ae6f4acc6d..5cfb44e29990 100644 --- a/tests/unit_tests/utils/encrypt_test.py +++ b/tests/unit_tests/utils/encrypt_test.py @@ -161,6 +161,47 @@ def test_engine_migration_reads_cbc_after_config_already_flipped() -> None: assert gcm_column.process_result_value(new_value, DIALECT) == "hunter2" +def _key_rotation_migrator(previous_secret_key: str) -> SecretsMigrator: + """Build a SecretsMigrator in key-rotation mode without an app context. + + Like ``_engine_migrator`` but with no target engine: values are decrypted + under ``previous_secret_key`` and re-encrypted under the current key using + the column's own engine. + """ + migrator = SecretsMigrator.__new__(SecretsMigrator) + migrator._secret_key = SECRET_KEY # noqa: SLF001 + migrator._target_engine = None # noqa: SLF001 + migrator._previous_secret_key = previous_secret_key # noqa: SLF001 + migrator._dialect = DIALECT # noqa: SLF001 + return migrator + + +def test_key_rotation_for_aes_gcm_column() -> None: + """SECRET_KEY rotation works for an AES-GCM column. + + The previous-key fallback must use the column's AES-GCM engine, otherwise + GCM ciphertext written under the old key cannot be decrypted and the + rotation rolls back. + """ + old_key = "o" * 32 + gcm_old = EncryptedType(String(1024), key=lambda: old_key, engine=AesGcmEngine) + old_value = gcm_old.process_bind_param("hunter2", DIALECT) + gcm_column = _encrypted_type(AesGcmEngine) + + migrator = _key_rotation_migrator(previous_secret_key=old_key) + conn = MagicMock() + row = {"id": 1, "password": old_value} + stats = ReEncryptStats() + + migrator._re_encrypt_row( # noqa: SLF001 + conn, row, "dbs", {"password": gcm_column}, ["id"], stats + ) + + assert stats == ReEncryptStats(re_encrypted=1) + new_value = conn.execute.call_args.kwargs["password"] + assert gcm_column.process_result_value(new_value, DIALECT) == "hunter2" + + def test_engine_migration_unreadable_value_counts_as_failure() -> None: """A value no engine/key can read is a failure, not a silent pass-through.""" migrator = _engine_migrator(AesGcmEngine) From 731a476acc93fea59ddafdf3356035b6a151c8eb Mon Sep 17 00:00:00 2001 From: Evan Date: Fri, 5 Jun 2026 11:44:14 -0700 Subject: [PATCH 4/9] fix(encrypt): avoid logging config-sourced engine value (CodeQL) CodeQL's clear-text-logging query flags interpolating any app-config value into a log because the config also holds SECRET_KEY. The unrecognized-engine warning now lists only the valid engine names, which is enough to diagnose a typo. Co-Authored-By: Claude Opus 4.8 --- superset/utils/encrypt.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/superset/utils/encrypt.py b/superset/utils/encrypt.py index 964ba703d55a..b2d5c959d1dc 100644 --- a/superset/utils/encrypt.py +++ b/superset/utils/encrypt.py @@ -84,10 +84,14 @@ def create( if "engine" not in kwargs: engine_name = app_config.get("SQLALCHEMY_ENCRYPTED_FIELD_ENGINE", "aes") if engine_name not in ENCRYPTION_ENGINES: + # Do not log the configured value itself: it originates from + # the app config (which also holds the SECRET_KEY), and + # static analysis flags interpolating any config-sourced + # value into a log as potential clear-text secret logging. + # The set of valid engines is enough to diagnose the typo. logger.warning( - "Unrecognized SQLALCHEMY_ENCRYPTED_FIELD_ENGINE %r;" + "Unrecognized SQLALCHEMY_ENCRYPTED_FIELD_ENGINE;" " falling back to AES-CBC. Valid engines: %s", - engine_name, ", ".join(sorted(ENCRYPTION_ENGINES)), ) kwargs["engine"] = ENCRYPTION_ENGINES.get(engine_name, AesEngine) From 0fe86ea0c3a87a0050ab28f951be8fb6c341ecf4 Mon Sep 17 00:00:00 2001 From: Evan Date: Fri, 5 Jun 2026 16:37:26 -0700 Subject: [PATCH 5/9] test(encrypt): add CLI coverage for re-encrypt-secrets --engine option Co-Authored-By: Claude Opus 4.8 --- tests/integration_tests/cli_tests.py | 69 ++++++++++++++++++++++++++++ 1 file changed, 69 insertions(+) diff --git a/tests/integration_tests/cli_tests.py b/tests/integration_tests/cli_tests.py index c41e88decc3f..ea2e831cbd9b 100644 --- a/tests/integration_tests/cli_tests.py +++ b/tests/integration_tests/cli_tests.py @@ -29,6 +29,7 @@ import superset.cli.importexport import superset.cli.thumbnails import superset.cli.update +import superset.utils.encrypt from superset import db from superset.models.dashboard import Dashboard from tests.integration_tests.fixtures.birth_names_dashboard import ( @@ -364,3 +365,71 @@ def test_re_encrypt_secrets_failure_exits_nonzero(app_context): # The failure path must be handled by the CLI, not leaked as an # uncaught exception. assert response.exception is None or isinstance(response.exception, SystemExit) + + +def test_re_encrypt_secrets_engine_option_invokes_migrator(app_context): + """ + When --engine is provided, the CLI must resolve the engine name to the + correct engine class and pass it to SecretsMigrator as target_engine. + """ + from sqlalchemy_utils.types.encrypted.encrypted_type import AesGcmEngine + + current_app.config.pop("PREVIOUS_SECRET_KEY", None) + runner = current_app.test_cli_runner() + with mock.patch.object( + superset.cli.update, + "SecretsMigrator", + ) as migrator_mock: + migrator_mock.return_value.run.return_value = ( + superset.utils.encrypt.ReEncryptStats() + ) + response = runner.invoke( + superset.cli.update.re_encrypt_secrets, + ["--engine", "aes-gcm"], + ) + + assert response.exit_code == 0 + call_kwargs = migrator_mock.call_args.kwargs + assert call_kwargs.get("target_engine") is AesGcmEngine + assert call_kwargs.get("previous_secret_key") is None + + +def test_re_encrypt_secrets_engine_option_case_insensitive(app_context): + """ + The --engine option must be case-insensitive per + click.Choice(..., case_sensitive=False). + """ + from sqlalchemy_utils.types.encrypted.encrypted_type import AesGcmEngine + + current_app.config.pop("PREVIOUS_SECRET_KEY", None) + runner = current_app.test_cli_runner() + with mock.patch.object( + superset.cli.update, + "SecretsMigrator", + ) as migrator_mock: + migrator_mock.return_value.run.return_value = ( + superset.utils.encrypt.ReEncryptStats() + ) + response = runner.invoke( + superset.cli.update.re_encrypt_secrets, + ["--engine", "AES-GCM"], + ) + + assert response.exit_code == 0 + assert migrator_mock.call_args.kwargs.get("target_engine") is AesGcmEngine + + +def test_re_encrypt_secrets_engine_option_invalid_raises_usage(app_context): + """ + An unrecognized engine name must produce a click usage error, not a + traceback or silent failure. + """ + runner = current_app.test_cli_runner() + response = runner.invoke( + superset.cli.update.re_encrypt_secrets, + ["--engine", "nonexistent-engine"], + ) + + assert response.exit_code != 0 + assert "Invalid value" in response.output or "Usage:" in response.output + assert "aes" in response.output or "aes-gcm" in response.output From 6671c901035beec60e68c861456313ef3880f13f Mon Sep 17 00:00:00 2001 From: Claude Code Date: Tue, 9 Jun 2026 11:50:36 -0700 Subject: [PATCH 6/9] test(encrypt): cover GCM->CBC rollback re-encryption MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds a regression test for the reverse engine migration (--engine aes): an AES-GCM value must be re-encrypted back to AES-CBC, not skipped. This guards the idempotency fast-path, which decrypts in the target form first — since the rollback target (AES-CBC) is unauthenticated, the test confirms it does not mis-read GCM ciphertext and wrongly skip the value. Co-Authored-By: Claude Opus 4.8 --- tests/unit_tests/utils/encrypt_test.py | 29 ++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/tests/unit_tests/utils/encrypt_test.py b/tests/unit_tests/utils/encrypt_test.py index 5cfb44e29990..807987b47092 100644 --- a/tests/unit_tests/utils/encrypt_test.py +++ b/tests/unit_tests/utils/encrypt_test.py @@ -161,6 +161,35 @@ def test_engine_migration_reads_cbc_after_config_already_flipped() -> None: assert gcm_column.process_result_value(new_value, DIALECT) == "hunter2" +def test_engine_migration_gcm_to_cbc_rolls_back() -> None: + """GCM source value is rolled back to CBC under the same SECRET_KEY. + + The reverse of the forward migration (``--engine aes``). The idempotency + fast-path decrypts in the *target* form first; since the target here is + unauthenticated AES-CBC, this guards against it mis-reading the AES-GCM + ciphertext and wrongly skipping the value instead of re-encrypting it. + """ + gcm_value = _encrypted_type(AesGcmEngine).process_bind_param("hunter2", DIALECT) + gcm_column = _encrypted_type(AesGcmEngine) + + migrator = _engine_migrator(AesEngine) + conn = MagicMock() + row = {"id": 1, "password": gcm_value} + stats = ReEncryptStats() + + migrator._re_encrypt_row( # noqa: SLF001 + conn, row, "dbs", {"password": gcm_column}, ["id"], stats + ) + + assert stats == ReEncryptStats(re_encrypted=1) + new_value = conn.execute.call_args.kwargs["password"] + assert new_value != gcm_value + # The rolled-back value now decrypts as AES-CBC back to the plaintext. + assert _encrypted_type(AesEngine).process_result_value(new_value, DIALECT) == ( + "hunter2" + ) + + def _key_rotation_migrator(previous_secret_key: str) -> SecretsMigrator: """Build a SecretsMigrator in key-rotation mode without an app context. From 10c49b61f95829599d75222a076f79c387e2ca19 Mon Sep 17 00:00:00 2001 From: Evan Date: Wed, 10 Jun 2026 14:52:31 -0700 Subject: [PATCH 7/9] fix(encrypt): fail-closed engine selection + authenticated-first rollback classification MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Addresses review feedback on the selectable encryption engine PR. Posture: authenticated checks gate decisions; unauthenticated paths never silently win. - Unknown engine name fails closed. An unrecognized SQLALCHEMY_ENCRYPTED_FIELD_ENGINE now raises at field-construction (startup) instead of silently degrading to unauthenticated AES-CBC. The value is normalized (trim/lower/underscores→hyphens) to match the CLI's case-insensitive click.Choice, and the offending value is kept out of the error message (same clear-text-logging rationale as the CodeQL fix). Absent key still defaults to "aes" (no behavior change for existing installs). - Rollback (GCM→CBC) no longer mis-skips. The "already in target form" fast path trusted any successful target decrypt — but unauthenticated CBC can coincidentally decrypt a GCM blob, counting it skipped and leaving GCM ciphertext that bricks on the next config flip while the run reports success. When the target engine is unauthenticated, the value is now first probed under any authenticated engine (current + previous key); an authenticated decrypt wins. Generalized on "is the target authenticated", not engine names. - De-dup the redundant previous-key decryptor when prev_key == SECRET_KEY (the column's own engine is already decryptors[0]). - Runbook: add the post-restart re-run step (sweeps secrets written CBC during the migration window) and a quiet-window note (multi-worker cutover is not zero-downtime). Tests: flip the unknown-engine test to assert the fail-closed raise (and that the value isn't leaked); add a normalization test; add the rollback ordering-invariant test (authenticated probe wins over a forced-spurious CBC skip); add combined key-rotation + engine-migration coverage at both the migrator and CLI levels; hoist the function-local AesGcmEngine imports. Co-Authored-By: Claude Opus 4.8 --- UPDATING.md | 7 ++ superset/utils/encrypt.py | 148 ++++++++++++++++++++----- tests/integration_tests/cli_tests.py | 32 +++++- tests/unit_tests/utils/encrypt_test.py | 109 ++++++++++++++++-- 4 files changed, 260 insertions(+), 36 deletions(-) diff --git a/UPDATING.md b/UPDATING.md index 864c1ce93cc7..81faf8a10ff5 100644 --- a/UPDATING.md +++ b/UPDATING.md @@ -79,6 +79,13 @@ SQLALCHEMY_ENCRYPTED_FIELD_ENGINE = "aes" ``` 3. Set `SQLALCHEMY_ENCRYPTED_FIELD_ENGINE = "aes-gcm"` in your config. 4. Restart Superset. +5. Re-run the migrator once more after the restart: + ```bash + superset re-encrypt-secrets --engine aes-gcm + ``` + A live instance keeps writing *new* secrets as AES-CBC during the window between step 2 and the restart in step 4; this second pass sweeps those up (it is idempotent, so already-migrated values are skipped). + +Schedule the cutover in a quiet window. Runtime reads use only the single configured engine, so in a multi-worker deployment there is an unavoidable brief decrypt-outage between the migration commit and the last worker restarting with the new config — each migrator run is transactional, but the fleet-wide cutover is not zero-downtime. The migration is transactional (all-or-nothing) and idempotent — it can be safely re-run or resumed. Note that AES-GCM, unlike AES-CBC, does not support querying directly over encrypted columns; audit any code that filters on an encrypted column before switching. See the SIP at `docs/sip/authenticated-encryption-at-rest.md` for details. diff --git a/superset/utils/encrypt.py b/superset/utils/encrypt.py index b2d5c959d1dc..ba60e2424d2d 100644 --- a/superset/utils/encrypt.py +++ b/superset/utils/encrypt.py @@ -24,7 +24,11 @@ from sqlalchemy import Table, text, TypeDecorator from sqlalchemy.engine import Connection, Dialect, Row from sqlalchemy_utils import EncryptedType as SqlaEncryptedType -from sqlalchemy_utils.types.encrypted.encrypted_type import AesEngine, AesGcmEngine +from sqlalchemy_utils.types.encrypted.encrypted_type import ( + AesEngine, + AesGcmEngine, + EncryptionDecryptionBaseEngine, +) class EncryptedType(SqlaEncryptedType): @@ -37,11 +41,53 @@ class EncryptedType(SqlaEncryptedType): # deployment from "aes" to "aes-gcm" requires re-encrypting all stored secrets # first — see the SIP referenced in the docs. Changing this on a populated # database without that migration will make existing secrets undecryptable. -ENCRYPTION_ENGINES = { +ENCRYPTION_ENGINES: dict[str, type[EncryptionDecryptionBaseEngine]] = { "aes": AesEngine, "aes-gcm": AesGcmEngine, } +# The historical fallback engine when the config does not name one. +DEFAULT_ENCRYPTION_ENGINE_NAME = "aes" + +# Engines whose ciphertext is authenticated: a successful decrypt is +# cryptographic proof the value is genuinely in that form. AES-GCM carries an +# authentication tag; AES-CBC does not, so a CBC "success" can be coincidental. +# Classification logic (the migrator's idempotency fast path) must let an +# authenticated decrypt win over an unauthenticated one, never the reverse. +AUTHENTICATED_ENGINES: frozenset[type[EncryptionDecryptionBaseEngine]] = frozenset( + {AesGcmEngine} +) + + +def _is_authenticated_engine(engine: type[EncryptionDecryptionBaseEngine]) -> bool: + return engine in AUTHENTICATED_ENGINES + + +def resolve_encryption_engine(engine_name: str) -> type[EncryptionDecryptionBaseEngine]: + """Resolve a configured engine name to its engine class, fail-closed. + + The value is normalized (trimmed, lower-cased, underscores → hyphens) so it + matches the case-insensitive CLI ``click.Choice``. An unrecognized name + raises so a misconfiguration fails at field-construction (startup) rather + than silently degrading to unauthenticated AES-CBC — which would let an + operator who typo'd ``"aes_gcm"`` believe they had authenticated encryption, + and, after a GCM migration, write new secrets as CBC into a GCM database. + + The offending value is deliberately kept out of the error message: it comes + from the app config (which also holds ``SECRET_KEY``), and static analysis + flags interpolating config-sourced values into logs/errors as potential + clear-text secret exposure. The set of valid engine names is enough to + diagnose a typo. + """ + normalized = engine_name.strip().lower().replace("_", "-") + try: + return ENCRYPTION_ENGINES[normalized] + except KeyError: + raise ValueError( + "Unrecognized SQLALCHEMY_ENCRYPTED_FIELD_ENGINE. Valid engines: " + + ", ".join(sorted(ENCRYPTION_ENGINES)) + ) from None + ENC_ADAPTER_TAG_ATTR_NAME = "__created_by_enc_field_adapter__" logger = logging.getLogger(__name__) @@ -79,22 +125,20 @@ def create( ) -> TypeDecorator: if app_config: # Select the encryption engine from config, defaulting to the - # historical AES-CBC engine for backward compatibility. An explicit - # ``engine`` kwarg (e.g. from the migrator) always takes precedence. + # historical AES-CBC engine for backward compatibility when the key + # is absent. A *present but unrecognized* value fails closed (see + # ``resolve_encryption_engine``) rather than silently degrading to + # AES-CBC. An explicit ``engine`` kwarg (e.g. from the migrator) + # always takes precedence. if "engine" not in kwargs: - engine_name = app_config.get("SQLALCHEMY_ENCRYPTED_FIELD_ENGINE", "aes") - if engine_name not in ENCRYPTION_ENGINES: - # Do not log the configured value itself: it originates from - # the app config (which also holds the SECRET_KEY), and - # static analysis flags interpolating any config-sourced - # value into a log as potential clear-text secret logging. - # The set of valid engines is enough to diagnose the typo. - logger.warning( - "Unrecognized SQLALCHEMY_ENCRYPTED_FIELD_ENGINE;" - " falling back to AES-CBC. Valid engines: %s", - ", ".join(sorted(ENCRYPTION_ENGINES)), - ) - kwargs["engine"] = ENCRYPTION_ENGINES.get(engine_name, AesEngine) + engine_name = ( + app_config.get("SQLALCHEMY_ENCRYPTED_FIELD_ENGINE") + or DEFAULT_ENCRYPTION_ENGINE_NAME + ) + # ``**kwargs`` is loosely annotated as ``Optional[dict]`` here, so + # route the resolved engine class through an ``Any`` local. + engine_cls: Any = resolve_encryption_engine(engine_name) + kwargs["engine"] = engine_cls return EncryptedType(*args, lambda: app_config["SECRET_KEY"], **kwargs) raise Exception( # pylint: disable=broad-exception-raised @@ -271,10 +315,17 @@ def _source_decryptors(self, encrypted_type: EncryptedType) -> list[EncryptedTyp # Try the column's own engine first, then any remaining supported # engines (notably the historical AES-CBC), de-duplicated so we # never build the same decryptor twice. - engines: list[type[Any]] = [type(encrypted_type.engine)] + engines: list[type[EncryptionDecryptionBaseEngine]] = [ + type(encrypted_type.engine) + ] for engine in ENCRYPTION_ENGINES.values(): if engine not in engines: engines.append(engine) + # When the previous key equals the current key (engine-migration + # mode, or a no-op rotation) the column's own engine under that key + # is already ``decryptors[0]``; don't append it again as a fallback. + if self._previous_secret_key == self._secret_key: + engines = engines[1:] decryptors.extend( EncryptedType( type_in=encrypted_type.underlying_type, @@ -285,6 +336,34 @@ def _source_decryptors(self, encrypted_type: EncryptedType) -> list[EncryptedTyp ) return decryptors + def _decrypts_under_authenticated_engine( + self, encrypted_type: EncryptedType, raw_value: bytes + ) -> bool: + """Whether ``raw_value`` decrypts under any authenticated engine. + + A successful authenticated (AES-GCM) decrypt is cryptographic proof the + value is genuinely in that engine's form — the authentication tag makes + a coincidental success negligibly unlikely. Tried under both the current + and previous keys so it holds during a combined key-rotation + engine + migration. Used to stop an *unauthenticated* target decrypt from + coincidentally classifying an authenticated value as "already migrated". + """ + keys = {self._secret_key} + if self._previous_secret_key: + keys.add(self._previous_secret_key) + for engine in AUTHENTICATED_ENGINES: + for key in keys: + try: + EncryptedType( + type_in=encrypted_type.underlying_type, + key=key, + engine=engine, + ).process_result_value(raw_value, self._dialect) + except Exception: # noqa: BLE001, S112 # pylint: disable=broad-except + continue + return True + return False + def _re_encrypt_row( self, conn: Connection, @@ -340,13 +419,32 @@ def _re_encrypt_row( # Fast path: if the value can already be read in the target form, # leave it untouched. A failure here simply means we need to try the # source decryptors below — not a condition worth logging. - try: - target_type.process_result_value(raw_value, self._dialect) - except Exception: # noqa: BLE001, S110 # pylint: disable=broad-except - pass - else: - stats.skipped += 1 - continue + # + # Caveat for an *unauthenticated* target (AES-CBC, e.g. an + # ``--engine aes`` rollback): CBC decryption has no authentication + # tag, so an authenticated (AES-GCM) ciphertext can coincidentally + # "decrypt" under CBC and be wrongly classified as already migrated — + # leaving it as GCM, which then bricks once the config is flipped to + # CBC, with the run still reporting success. So when the target is + # unauthenticated, first rule out that the value is provably in an + # authenticated form; an authenticated decrypt must win over an + # unauthenticated one. (A forward migration to AES-GCM is unaffected: + # an authenticated target read is itself proof.) + target_is_authenticated = _is_authenticated_engine(type(target_type.engine)) + provably_authenticated = ( + not target_is_authenticated + and self._decrypts_under_authenticated_engine(encrypted_type, raw_value) + ) + if not provably_authenticated: + try: + target_type.process_result_value(raw_value, self._dialect) + except ( # noqa: S110 # pylint: disable=broad-except + Exception # noqa: BLE001 + ): + pass + else: + stats.skipped += 1 + continue # Recover the plaintext from the first source decryptor that can # read the value (current engine/key, then previous key). diff --git a/tests/integration_tests/cli_tests.py b/tests/integration_tests/cli_tests.py index ea2e831cbd9b..ee96bc0a3848 100644 --- a/tests/integration_tests/cli_tests.py +++ b/tests/integration_tests/cli_tests.py @@ -25,6 +25,7 @@ import yaml # noqa: F401 from flask import current_app from freezegun import freeze_time +from sqlalchemy_utils.types.encrypted.encrypted_type import AesGcmEngine import superset.cli.importexport import superset.cli.thumbnails @@ -372,8 +373,6 @@ def test_re_encrypt_secrets_engine_option_invokes_migrator(app_context): When --engine is provided, the CLI must resolve the engine name to the correct engine class and pass it to SecretsMigrator as target_engine. """ - from sqlalchemy_utils.types.encrypted.encrypted_type import AesGcmEngine - current_app.config.pop("PREVIOUS_SECRET_KEY", None) runner = current_app.test_cli_runner() with mock.patch.object( @@ -399,8 +398,6 @@ def test_re_encrypt_secrets_engine_option_case_insensitive(app_context): The --engine option must be case-insensitive per click.Choice(..., case_sensitive=False). """ - from sqlalchemy_utils.types.encrypted.encrypted_type import AesGcmEngine - current_app.config.pop("PREVIOUS_SECRET_KEY", None) runner = current_app.test_cli_runner() with mock.patch.object( @@ -419,6 +416,33 @@ def test_re_encrypt_secrets_engine_option_case_insensitive(app_context): assert migrator_mock.call_args.kwargs.get("target_engine") is AesGcmEngine +def test_re_encrypt_secrets_combined_key_rotation_and_engine(app_context): + """ + --previous_secret_key and --engine combine in a single run: the migrator + must receive both the previous key (for decryption) and the target engine + (for re-encryption). This is the mode most likely to regress, since the + single-option tests each pin only the other's variable. + """ + current_app.config.pop("PREVIOUS_SECRET_KEY", None) + runner = current_app.test_cli_runner() + with mock.patch.object( + superset.cli.update, + "SecretsMigrator", + ) as migrator_mock: + migrator_mock.return_value.run.return_value = ( + superset.utils.encrypt.ReEncryptStats() + ) + response = runner.invoke( + superset.cli.update.re_encrypt_secrets, + ["--previous_secret_key", "old-key", "--engine", "aes-gcm"], + ) + + assert response.exit_code == 0 + call_kwargs = migrator_mock.call_args.kwargs + assert call_kwargs.get("target_engine") is AesGcmEngine + assert call_kwargs.get("previous_secret_key") == "old-key" + + def test_re_encrypt_secrets_engine_option_invalid_raises_usage(app_context): """ An unrecognized engine name must produce a click usage error, not a diff --git a/tests/unit_tests/utils/encrypt_test.py b/tests/unit_tests/utils/encrypt_test.py index 807987b47092..2a191c6922cb 100644 --- a/tests/unit_tests/utils/encrypt_test.py +++ b/tests/unit_tests/utils/encrypt_test.py @@ -15,8 +15,10 @@ # specific language governing permissions and limitations # under the License. +from unittest import mock from unittest.mock import MagicMock +import pytest from sqlalchemy import String from sqlalchemy.engine import make_url from sqlalchemy_utils.types.encrypted.encrypted_type import AesEngine, AesGcmEngine @@ -69,13 +71,36 @@ def test_aes_gcm_engine_selected_by_config() -> None: assert isinstance(field.engine, AesGcmEngine) -def test_unknown_engine_falls_back_to_aes_cbc() -> None: - """An unrecognized engine name falls back to the safe historical default.""" - field = SQLAlchemyUtilsAdapter().create( - {**SECRET, "SQLALCHEMY_ENCRYPTED_FIELD_ENGINE": "bogus"}, - String(128), - ) - assert isinstance(field.engine, AesEngine) +def test_unknown_engine_raises_fail_closed() -> None: + """An unrecognized engine name fails closed at field construction. + + Silently falling back to unauthenticated AES-CBC would let an operator who + typo'd the engine believe they had authenticated encryption — and, after a + GCM migration, write new secrets as CBC into a GCM database. The error must + not leak the configured value (it shares the config namespace as SECRET_KEY) + but must list the valid engines so a typo is diagnosable. + """ + with pytest.raises( + ValueError, match="Unrecognized SQLALCHEMY_ENCRYPTED_FIELD_ENGINE" + ) as exc_info: + SQLAlchemyUtilsAdapter().create( + {**SECRET, "SQLALCHEMY_ENCRYPTED_FIELD_ENGINE": "bogus"}, + String(128), + ) + message = str(exc_info.value) + assert "bogus" not in message + assert "aes" in message + assert "aes-gcm" in message + + +def test_engine_name_is_normalized() -> None: + """Engine names are case/separator-normalized to match the CLI's Choice.""" + for name in ("AES-GCM", "aes_gcm", " Aes-Gcm "): + field = SQLAlchemyUtilsAdapter().create( + {**SECRET, "SQLALCHEMY_ENCRYPTED_FIELD_ENGINE": name}, + String(128), + ) + assert isinstance(field.engine, AesGcmEngine) def test_explicit_engine_kwarg_takes_precedence() -> None: @@ -190,6 +215,76 @@ def test_engine_migration_gcm_to_cbc_rolls_back() -> None: ) +def test_rollback_authenticated_probe_wins_over_spurious_cbc_skip() -> None: + """Rolling back to unauthenticated CBC must re-encrypt a provably-GCM value, + never skip it — even if the unauthenticated target decrypt coincidentally + succeeds. The authenticated (GCM) interpretation must win. + + The coincidental CBC-decrypt-of-a-GCM-blob can't be crafted deterministically + (it's a ~2^-128 event), so this pins the *ordering invariant* instead: force + the target (CBC) read to "succeed", and assert the value is still re-encrypted + because the authenticated probe is consulted first and wins. Without the + guard this row would be wrongly counted as ``skipped``. + """ + gcm_value = _encrypted_type(AesGcmEngine).process_bind_param("hunter2", DIALECT) + cbc_column = _encrypted_type(AesEngine) + + migrator = _engine_migrator(AesEngine) # target = unauthenticated CBC (rollback) + + # Simulate the spurious case: the unauthenticated CBC target read "succeeds" + # even though the value is really GCM. + spurious_target = MagicMock() + spurious_target.engine = AesEngine() + spurious_target.underlying_type = cbc_column.underlying_type + spurious_target.process_result_value.return_value = "garbage" + spurious_target.process_bind_param.return_value = b"new-cbc-ciphertext" + + conn = MagicMock() + row = {"id": 1, "password": gcm_value} + stats = ReEncryptStats() + + with mock.patch.object(migrator, "_target_type", return_value=spurious_target): + migrator._re_encrypt_row( # noqa: SLF001 + conn, row, "dbs", {"password": cbc_column}, ["id"], stats + ) + + # Re-encrypted, NOT skipped: the GCM authenticator beat the spurious CBC read. + assert stats == ReEncryptStats(re_encrypted=1) + assert spurious_target.process_bind_param.call_count == 1 + + +def test_combined_key_rotation_and_engine_migration() -> None: + """Old-key AES-CBC value → current-key AES-GCM in a single run. + + Exercises the combined mode (``--previous_secret_key`` + ``--engine``): the + source ciphertext is CBC under the *previous* key, and must be recovered and + re-encrypted as GCM under the *current* key. This is the mode most likely to + regress, since each single-mode test pins only the other's variable. + """ + old_key = "o" * 32 + cbc_old = EncryptedType(String(1024), key=lambda: old_key, engine=AesEngine) + old_value = cbc_old.process_bind_param("hunter2", DIALECT) + cbc_column = _encrypted_type(AesEngine) + + migrator = _engine_migrator(AesGcmEngine) + migrator._previous_secret_key = old_key # noqa: SLF001 # rotate key too + + conn = MagicMock() + row = {"id": 1, "password": old_value} + stats = ReEncryptStats() + + migrator._re_encrypt_row( # noqa: SLF001 + conn, row, "dbs", {"password": cbc_column}, ["id"], stats + ) + + assert stats == ReEncryptStats(re_encrypted=1) + new_value = conn.execute.call_args.kwargs["password"] + # The migrated value decrypts as GCM under the *current* key. + assert _encrypted_type(AesGcmEngine).process_result_value(new_value, DIALECT) == ( + "hunter2" + ) + + def _key_rotation_migrator(previous_secret_key: str) -> SecretsMigrator: """Build a SecretsMigrator in key-rotation mode without an app context. From ad613d472aaab62d95c8ddaee5ffdeb0175a113f Mon Sep 17 00:00:00 2001 From: Evan Date: Sat, 13 Jun 2026 04:18:19 -0700 Subject: [PATCH 8/9] fix(encrypt): fail closed on empty engine value; sync SIP runbook; typed CLI tests Co-Authored-By: Claude Opus 4.8 --- docs/sip/authenticated-encryption-at-rest.md | 5 ++++- superset/utils/encrypt.py | 10 +++++++--- tests/integration_tests/cli_tests.py | 8 ++++---- tests/unit_tests/utils/encrypt_test.py | 17 +++++++++++++++++ 4 files changed, 32 insertions(+), 8 deletions(-) diff --git a/docs/sip/authenticated-encryption-at-rest.md b/docs/sip/authenticated-encryption-at-rest.md index ea6cc8daf798..c95caf25255c 100644 --- a/docs/sip/authenticated-encryption-at-rest.md +++ b/docs/sip/authenticated-encryption-at-rest.md @@ -110,7 +110,10 @@ Once the migrator and docs are in place, change the default to `"aes-gcm"` for the config comment and must be in `UPDATING.md`. - Recommended operator runbook: take a metadata-DB backup → run `re-encrypt-secrets --engine aes-gcm` → set - `SQLALCHEMY_ENCRYPTED_FIELD_ENGINE = "aes-gcm"` → restart. + `SQLALCHEMY_ENCRYPTED_FIELD_ENGINE = "aes-gcm"` → restart → re-run + `re-encrypt-secrets --engine aes-gcm` once more to sweep up any secrets a live + instance wrote as AES-CBC during the cutover window. The canonical, more + detailed version of this runbook lives in `UPDATING.md`; this is a summary. - `AesEngine` allows queryability over encrypted fields; AES-GCM does not. Any code that filters/queries on an encrypted column directly must be audited before Phase 3 (none is expected, but it must be verified). diff --git a/superset/utils/encrypt.py b/superset/utils/encrypt.py index ba60e2424d2d..4043b7a99f4b 100644 --- a/superset/utils/encrypt.py +++ b/superset/utils/encrypt.py @@ -131,9 +131,13 @@ def create( # AES-CBC. An explicit ``engine`` kwarg (e.g. from the migrator) # always takes precedence. if "engine" not in kwargs: - engine_name = ( - app_config.get("SQLALCHEMY_ENCRYPTED_FIELD_ENGINE") - or DEFAULT_ENCRYPTION_ENGINE_NAME + # Only an *absent* key defaults to AES-CBC; a present value + # (even an empty string) is routed through the fail-closed + # resolver so a blanked-out config does not silently degrade to + # unauthenticated encryption. + engine_name = app_config.get( + "SQLALCHEMY_ENCRYPTED_FIELD_ENGINE", + DEFAULT_ENCRYPTION_ENGINE_NAME, ) # ``**kwargs`` is loosely annotated as ``Optional[dict]`` here, so # route the resolved engine class through an ``Any`` local. diff --git a/tests/integration_tests/cli_tests.py b/tests/integration_tests/cli_tests.py index ee96bc0a3848..e54d6152cd6d 100644 --- a/tests/integration_tests/cli_tests.py +++ b/tests/integration_tests/cli_tests.py @@ -368,7 +368,7 @@ def test_re_encrypt_secrets_failure_exits_nonzero(app_context): assert response.exception is None or isinstance(response.exception, SystemExit) -def test_re_encrypt_secrets_engine_option_invokes_migrator(app_context): +def test_re_encrypt_secrets_engine_option_invokes_migrator(app_context) -> None: """ When --engine is provided, the CLI must resolve the engine name to the correct engine class and pass it to SecretsMigrator as target_engine. @@ -393,7 +393,7 @@ def test_re_encrypt_secrets_engine_option_invokes_migrator(app_context): assert call_kwargs.get("previous_secret_key") is None -def test_re_encrypt_secrets_engine_option_case_insensitive(app_context): +def test_re_encrypt_secrets_engine_option_case_insensitive(app_context) -> None: """ The --engine option must be case-insensitive per click.Choice(..., case_sensitive=False). @@ -416,7 +416,7 @@ def test_re_encrypt_secrets_engine_option_case_insensitive(app_context): assert migrator_mock.call_args.kwargs.get("target_engine") is AesGcmEngine -def test_re_encrypt_secrets_combined_key_rotation_and_engine(app_context): +def test_re_encrypt_secrets_combined_key_rotation_and_engine(app_context) -> None: """ --previous_secret_key and --engine combine in a single run: the migrator must receive both the previous key (for decryption) and the target engine @@ -443,7 +443,7 @@ def test_re_encrypt_secrets_combined_key_rotation_and_engine(app_context): assert call_kwargs.get("previous_secret_key") == "old-key" -def test_re_encrypt_secrets_engine_option_invalid_raises_usage(app_context): +def test_re_encrypt_secrets_engine_option_invalid_raises_usage(app_context) -> None: """ An unrecognized engine name must produce a click usage error, not a traceback or silent failure. diff --git a/tests/unit_tests/utils/encrypt_test.py b/tests/unit_tests/utils/encrypt_test.py index 2a191c6922cb..ab1a67fb02f1 100644 --- a/tests/unit_tests/utils/encrypt_test.py +++ b/tests/unit_tests/utils/encrypt_test.py @@ -93,6 +93,23 @@ def test_unknown_engine_raises_fail_closed() -> None: assert "aes-gcm" in message +def test_empty_engine_value_raises_fail_closed() -> None: + """A present-but-empty engine value fails closed instead of defaulting. + + Only an *absent* key falls back to AES-CBC. An empty string (e.g. a + blanked-out env var) must not silently degrade to unauthenticated CBC after + a GCM migration — it routes through the same fail-closed resolver as any + other unrecognized value. + """ + with pytest.raises( + ValueError, match="Unrecognized SQLALCHEMY_ENCRYPTED_FIELD_ENGINE" + ): + SQLAlchemyUtilsAdapter().create( + {**SECRET, "SQLALCHEMY_ENCRYPTED_FIELD_ENGINE": ""}, + String(128), + ) + + def test_engine_name_is_normalized() -> None: """Engine names are case/separator-normalized to match the CLI's Choice.""" for name in ("AES-GCM", "aes_gcm", " Aes-Gcm "): From e3f6631f127395c7eb7518468713524c33ffa43c Mon Sep 17 00:00:00 2001 From: Evan Date: Sat, 13 Jun 2026 05:28:51 -0700 Subject: [PATCH 9/9] fix(encrypt): fail closed on non-string engine; add docstrings + typed test params Co-Authored-By: Claude Opus 4.8 --- superset/cli/update.py | 8 ++++++++ superset/utils/encrypt.py | 21 ++++++++++++++++++++- tests/integration_tests/cli_tests.py | 17 +++++++++++++---- tests/unit_tests/utils/encrypt_test.py | 15 +++++++++++++++ 4 files changed, 56 insertions(+), 5 deletions(-) diff --git a/superset/cli/update.py b/superset/cli/update.py index d9c589e8bb6d..4dbe96ff0786 100755 --- a/superset/cli/update.py +++ b/superset/cli/update.py @@ -125,6 +125,14 @@ def re_encrypt_secrets( previous_secret_key: Optional[str] = None, target_engine_name: Optional[str] = None, ) -> None: + """Re-encrypt every app-encrypted field via :class:`SecretsMigrator`. + + Supports key rotation (``previous_secret_key``, falling back to the + ``PREVIOUS_SECRET_KEY`` config) and engine migration (``target_engine_name``, + a case-insensitive ``ENCRYPTION_ENGINES`` key such as ``aes-gcm``); the two + can combine. With neither provided the command is a no-op. Exits non-zero on + failure. + """ previous_secret_key = previous_secret_key or current_app.config.get( "PREVIOUS_SECRET_KEY" ) diff --git a/superset/utils/encrypt.py b/superset/utils/encrypt.py index 4043b7a99f4b..b788566228a1 100644 --- a/superset/utils/encrypt.py +++ b/superset/utils/encrypt.py @@ -60,10 +60,13 @@ class EncryptedType(SqlaEncryptedType): def _is_authenticated_engine(engine: type[EncryptionDecryptionBaseEngine]) -> bool: + """Return whether ``engine`` produces authenticated ciphertext (e.g. AES-GCM).""" return engine in AUTHENTICATED_ENGINES -def resolve_encryption_engine(engine_name: str) -> type[EncryptionDecryptionBaseEngine]: +def resolve_encryption_engine( + engine_name: Any, +) -> type[EncryptionDecryptionBaseEngine]: """Resolve a configured engine name to its engine class, fail-closed. The value is normalized (trimmed, lower-cased, underscores → hyphens) so it @@ -79,6 +82,14 @@ def resolve_encryption_engine(engine_name: str) -> type[EncryptionDecryptionBase clear-text secret exposure. The set of valid engine names is enough to diagnose a typo. """ + # A non-string config value (e.g. ``None`` from a custom override) must take + # the same fail-closed path rather than blowing up with an ``AttributeError`` + # during field construction. + if not isinstance(engine_name, str): + raise ValueError( + "Unrecognized SQLALCHEMY_ENCRYPTED_FIELD_ENGINE. Valid engines: " + + ", ".join(sorted(ENCRYPTION_ENGINES)) + ) normalized = engine_name.strip().lower().replace("_", "-") try: return ENCRYPTION_ENGINES[normalized] @@ -202,6 +213,14 @@ def __init__( previous_secret_key: Optional[str] = None, target_engine: Optional[type[Any]] = None, ) -> None: + """Configure a migration run. + + ``previous_secret_key`` enables key rotation (decrypt under the old key, + re-encrypt under the current ``SECRET_KEY``). ``target_engine`` enables + engine migration (e.g. ``AesGcmEngine``); in that mode the ``SECRET_KEY`` + is unchanged, so an absent ``previous_secret_key`` defaults to the current + one. Passing both combines key rotation and engine migration in one run. + """ from superset import db # pylint: disable=import-outside-toplevel self._db = db diff --git a/tests/integration_tests/cli_tests.py b/tests/integration_tests/cli_tests.py index e54d6152cd6d..eee4ee21ccde 100644 --- a/tests/integration_tests/cli_tests.py +++ b/tests/integration_tests/cli_tests.py @@ -24,6 +24,7 @@ import pytest import yaml # noqa: F401 from flask import current_app +from flask.ctx import AppContext from freezegun import freeze_time from sqlalchemy_utils.types.encrypted.encrypted_type import AesGcmEngine @@ -368,7 +369,9 @@ def test_re_encrypt_secrets_failure_exits_nonzero(app_context): assert response.exception is None or isinstance(response.exception, SystemExit) -def test_re_encrypt_secrets_engine_option_invokes_migrator(app_context) -> None: +def test_re_encrypt_secrets_engine_option_invokes_migrator( + app_context: AppContext, +) -> None: """ When --engine is provided, the CLI must resolve the engine name to the correct engine class and pass it to SecretsMigrator as target_engine. @@ -393,7 +396,9 @@ def test_re_encrypt_secrets_engine_option_invokes_migrator(app_context) -> None: assert call_kwargs.get("previous_secret_key") is None -def test_re_encrypt_secrets_engine_option_case_insensitive(app_context) -> None: +def test_re_encrypt_secrets_engine_option_case_insensitive( + app_context: AppContext, +) -> None: """ The --engine option must be case-insensitive per click.Choice(..., case_sensitive=False). @@ -416,7 +421,9 @@ def test_re_encrypt_secrets_engine_option_case_insensitive(app_context) -> None: assert migrator_mock.call_args.kwargs.get("target_engine") is AesGcmEngine -def test_re_encrypt_secrets_combined_key_rotation_and_engine(app_context) -> None: +def test_re_encrypt_secrets_combined_key_rotation_and_engine( + app_context: AppContext, +) -> None: """ --previous_secret_key and --engine combine in a single run: the migrator must receive both the previous key (for decryption) and the target engine @@ -443,7 +450,9 @@ def test_re_encrypt_secrets_combined_key_rotation_and_engine(app_context) -> Non assert call_kwargs.get("previous_secret_key") == "old-key" -def test_re_encrypt_secrets_engine_option_invalid_raises_usage(app_context) -> None: +def test_re_encrypt_secrets_engine_option_invalid_raises_usage( + app_context: AppContext, +) -> None: """ An unrecognized engine name must produce a click usage error, not a traceback or silent failure. diff --git a/tests/unit_tests/utils/encrypt_test.py b/tests/unit_tests/utils/encrypt_test.py index ab1a67fb02f1..46ff1a20d4f2 100644 --- a/tests/unit_tests/utils/encrypt_test.py +++ b/tests/unit_tests/utils/encrypt_test.py @@ -26,6 +26,7 @@ from superset.utils.encrypt import ( EncryptedType, ReEncryptStats, + resolve_encryption_engine, SecretsMigrator, SQLAlchemyUtilsAdapter, ) @@ -110,6 +111,20 @@ def test_empty_engine_value_raises_fail_closed() -> None: ) +def test_non_string_engine_value_raises_fail_closed() -> None: + """A non-string engine value (e.g. ``None``) fails closed, not with an + ``AttributeError``. + + A custom config override could set the engine to a non-string. That must + take the same controlled ``ValueError`` path as any unrecognized value + rather than raising ``AttributeError`` when the resolver normalizes it. + """ + with pytest.raises( + ValueError, match="Unrecognized SQLALCHEMY_ENCRYPTED_FIELD_ENGINE" + ): + resolve_encryption_engine(None) + + def test_engine_name_is_normalized() -> None: """Engine names are case/separator-normalized to match the CLI's Choice.""" for name in ("AES-GCM", "aes_gcm", " Aes-Gcm "):