diff --git a/superset/models/helpers.py b/superset/models/helpers.py index 0f6963087240..457b9db704f2 100644 --- a/superset/models/helpers.py +++ b/superset/models/helpers.py @@ -1154,6 +1154,20 @@ def get_extra_cache_keys(self, query_obj: QueryObjectDict) -> list[Hashable]: def get_template_processor(self, **kwargs: Any) -> BaseTemplateProcessor: raise NotImplementedError() + def get_dataset_timezone(self) -> str | None: + """ + Get the timezone configured for this dataset from the extra JSON field. + + Returns an IANA timezone name (e.g., "Europe/Berlin", "America/New_York") + or None if not configured. + + ``extra_dict`` is provided by concrete datasources (e.g. ``SqlaTable``) + rather than this mixin, so read it defensively: subclasses without it + simply have no configured timezone. + """ + extra = getattr(self, "extra_dict", None) or {} + return extra.get("timezone") + def get_fetch_values_predicate( self, template_processor: Optional[ # pylint: disable=unused-argument @@ -1598,11 +1612,13 @@ def _get_timestamp_format(column: str | None) -> str | None: and (col.get("is_dttm") if isinstance(col, dict) else col.is_dttm) ) + dataset_timezone = self.get_dataset_timezone() dttm_cols = [ DateColumn( timestamp_format=_get_timestamp_format(label), offset=self.offset, time_shift=query_object.time_shift, + timezone=dataset_timezone, col_label=label, ) for label in labels @@ -1615,6 +1631,7 @@ def _get_timestamp_format(column: str | None) -> str | None: timestamp_format=_get_timestamp_format(query_object.granularity), offset=self.offset, time_shift=query_object.time_shift, + timezone=dataset_timezone, ) ) @@ -2762,19 +2779,44 @@ def get_time_filter( # pylint: disable=too-many-arguments ) ) + # Apply timezone conversion for time filter boundaries + # This converts user's local time boundaries to UTC for querying UTC-stored data + adjusted_start, adjusted_end = start_dttm, end_dttm + dataset_timezone = self.get_dataset_timezone() + + if dataset_timezone and (start_dttm or end_dttm): + try: + tz = pytz.timezone(dataset_timezone) + utc = pytz.UTC + + # The datetimes from the UI are naive (no timezone info) + # We interpret them as being in the dataset's configured timezone + # and convert them to UTC for comparison with UTC-stored data + if start_dttm: + local_start = tz.localize(start_dttm) + adjusted_start = local_start.astimezone(utc).replace(tzinfo=None) + if end_dttm: + local_end = tz.localize(end_dttm) + adjusted_end = local_end.astimezone(utc).replace(tzinfo=None) + except pytz.UnknownTimeZoneError: + logger.warning( + "Invalid timezone '%s' in dataset extra", + dataset_timezone, + ) + l = [] # noqa: E741 - if start_dttm: + if adjusted_start: l.append( col >= self.db_engine_spec.get_text_clause( - self.dttm_sql_literal(start_dttm, time_col) + self.dttm_sql_literal(adjusted_start, time_col) ) ) - if end_dttm: + if adjusted_end: l.append( col < self.db_engine_spec.get_text_clause( - self.dttm_sql_literal(end_dttm, time_col) + self.dttm_sql_literal(adjusted_end, time_col) ) ) if not l: diff --git a/superset/utils/core.py b/superset/utils/core.py index 045450b868e1..97165dca09f2 100644 --- a/superset/utils/core.py +++ b/superset/utils/core.py @@ -65,6 +65,7 @@ import markdown as md import nh3 import pandas as pd +import pytz import sqlalchemy as sa from cryptography.hazmat.backends import default_backend from cryptography.x509 import Certificate, load_pem_x509_certificate @@ -1935,6 +1936,7 @@ class DateColumn: timestamp_format: str | None = None offset: int | None = None time_shift: str | None = None + timezone: str | None = None # IANA timezone name def __hash__(self) -> int: return hash(self.col_label) @@ -1948,11 +1950,13 @@ def get_legacy_time_column( timestamp_format: str | None, offset: int | None, time_shift: str | None, + timezone: str | None = None, ) -> DateColumn: return cls( timestamp_format=timestamp_format, offset=offset, time_shift=time_shift, + timezone=timezone, col_label=DTTM_ALIAS, ) @@ -2037,8 +2041,28 @@ def normalize_dttm_col( _process_datetime_column(df, _col) - if _col.offset: + if _col.timezone: + try: + tz = pytz.timezone(_col.timezone) + # Data is stored in UTC, convert to the dataset's configured timezone + # First make the datetime UTC-aware, then convert to target timezone + series = df[_col.col_label] + if not series.empty and series.notna().any(): + # Convert UTC to target timezone + df[_col.col_label] = ( + series.dt.tz_localize("UTC") + .dt.tz_convert(tz) + .dt.tz_localize(None) # Remove timezone info for display + ) + except pytz.UnknownTimeZoneError: + logging.warning( + "Unknown timezone '%s', falling back to offset", _col.timezone + ) + if _col.offset: + df[_col.col_label] += timedelta(hours=_col.offset) + elif _col.offset: df[_col.col_label] += timedelta(hours=_col.offset) + if _col.time_shift is not None: df[_col.col_label] += parse_human_timedelta(_col.time_shift) diff --git a/superset/viz.py b/superset/viz.py index a751c99d70a1..c91c7ee6a163 100644 --- a/superset/viz.py +++ b/superset/viz.py @@ -324,6 +324,7 @@ def get_df(self, query_obj: QueryObjectDict | None = None) -> pd.DataFrame: timestamp_format=timestamp_format, offset=self.datasource.offset, time_shift=self.form_data.get("time_shift"), + timezone=self.datasource.get_dataset_timezone(), ) ] ), diff --git a/tests/unit_tests/utils/test_core.py b/tests/unit_tests/utils/test_core.py index 2b852796ebf1..c5486843a504 100644 --- a/tests/unit_tests/utils/test_core.py +++ b/tests/unit_tests/utils/test_core.py @@ -341,6 +341,78 @@ def test_normalize_dttm_col_with_offset_and_time_shift() -> None: assert df["date_col"][2].strftime("%Y-%m-%d %H:%M:%S") == "2022-01-01 04:00:00" +def test_normalize_dttm_col_with_timezone() -> None: + """UTC-stored values are converted to the dataset's configured timezone.""" + # Winter date: Europe/Berlin is UTC+1, so 00:00 UTC renders as 01:00 local. + df = pd.DataFrame({"date_col": ["2020-01-01 00:00:00"]}) + dttm_cols = ( + DateColumn( + col_label="date_col", + timestamp_format="%Y-%m-%d %H:%M:%S", + timezone="Europe/Berlin", + ), + ) + + normalize_dttm_col(df, dttm_cols) + + assert is_datetime64_dtype(df["date_col"]) + # tz-naive after conversion (display value), shifted by the zone offset. + assert df["date_col"][0].tzinfo is None + assert df["date_col"][0].strftime("%Y-%m-%d %H:%M:%S") == "2020-01-01 01:00:00" + + +def test_normalize_dttm_col_timezone_handles_dst() -> None: + """The timezone path respects DST, unlike a fixed hour offset.""" + # Summer date: Europe/Berlin is UTC+2 (CEST), so 00:00 UTC renders as 02:00. + df = pd.DataFrame({"date_col": ["2020-07-01 00:00:00"]}) + dttm_cols = ( + DateColumn( + col_label="date_col", + timestamp_format="%Y-%m-%d %H:%M:%S", + timezone="Europe/Berlin", + ), + ) + + normalize_dttm_col(df, dttm_cols) + + assert df["date_col"][0].strftime("%Y-%m-%d %H:%M:%S") == "2020-07-01 02:00:00" + + +def test_normalize_dttm_col_timezone_takes_precedence_over_offset() -> None: + """When both timezone and offset are set, the timezone conversion wins.""" + df = pd.DataFrame({"date_col": ["2020-01-01 00:00:00"]}) + dttm_cols = ( + DateColumn( + col_label="date_col", + timestamp_format="%Y-%m-%d %H:%M:%S", + timezone="Europe/Berlin", + offset=10, + ), + ) + + normalize_dttm_col(df, dttm_cols) + + # +1h from the Berlin (winter) conversion, NOT +10h from the offset. + assert df["date_col"][0].strftime("%Y-%m-%d %H:%M:%S") == "2020-01-01 01:00:00" + + +def test_normalize_dttm_col_invalid_timezone_falls_back_to_offset() -> None: + """An unknown timezone falls back to the plain hour offset.""" + df = pd.DataFrame({"date_col": ["2020-01-01 00:00:00"]}) + dttm_cols = ( + DateColumn( + col_label="date_col", + timestamp_format="%Y-%m-%d %H:%M:%S", + timezone="Not/AZone", + offset=3, + ), + ) + + normalize_dttm_col(df, dttm_cols) + + assert df["date_col"][0].strftime("%Y-%m-%d %H:%M:%S") == "2020-01-01 03:00:00" + + def test_normalize_dttm_col_invalid_date_coerced() -> None: """Test that invalid dates are coerced to NaT.""" df = pd.DataFrame({"date_col": ["2020-01-01", "invalid_date", "2022-01-01"]})