From 5e3504af0e97a0751e9b1ccdb23605200e34454b Mon Sep 17 00:00:00 2001 From: Jayakar Katika Date: Fri, 9 Jan 2026 17:37:51 +0200 Subject: [PATCH 1/4] fix: Apply timezone offset to convert local time boundaries to UTC --- superset/models/helpers.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/superset/models/helpers.py b/superset/models/helpers.py index a4fb9e3fea15..fc81d39eea8c 100644 --- a/superset/models/helpers.py +++ b/superset/models/helpers.py @@ -2387,6 +2387,17 @@ def get_time_filter( # pylint: disable=too-many-arguments ) ) + # Apply timezone offset to convert local time boundaries to UTC + # The offset is in hours (e.g., 6 for UTC+6) + # If offset is set, we subtract it from the datetime to convert to UTC + # Example: User selects "Last day" which gives midnight local time + # If offset=6 (UTC+6), we subtract 6 hours to get UTC time + offset_hours = getattr(self, "offset", 0) or 0 + if offset_hours and start_dttm: + start_dttm = start_dttm - timedelta(hours=offset_hours) + if offset_hours and end_dttm: + end_dttm = end_dttm - timedelta(hours=offset_hours) + l = [] # noqa: E741 if start_dttm: l.append( From 903fa87a795a29957eaa2e33d2c032d1b48e2f27 Mon Sep 17 00:00:00 2001 From: Jayakar Katika Date: Thu, 22 Jan 2026 12:27:35 +0200 Subject: [PATCH 2/4] fix: use dataset's extra field's timezone to offset query - This keeps the backwards compatibility --- superset/models/helpers.py | 55 ++++++++++++++++++++++++++++---------- superset/utils/core.py | 27 +++++++++++++++++-- superset/viz.py | 1 + 3 files changed, 67 insertions(+), 16 deletions(-) diff --git a/superset/models/helpers.py b/superset/models/helpers.py index fc81d39eea8c..d68ee75bb207 100644 --- a/superset/models/helpers.py +++ b/superset/models/helpers.py @@ -892,6 +892,16 @@ def get_extra_cache_keys(self, query_obj: QueryObjectDict) -> list[Hashable]: def get_template_processor(self, **kwargs: Any) -> BaseTemplateProcessor: raise NotImplementedError() + def get_dataset_timezone(self) -> str | None: + """ + Get the timezone configured for this dataset from the extra JSON field. + + Returns an IANA timezone name (e.g., "Europe/Berlin", "America/New_York") + or None if not configured. + + """ + return self.extra_dict.get("timezone") + def get_fetch_values_predicate( self, template_processor: Optional[ # pylint: disable=unused-argument @@ -1257,11 +1267,13 @@ def _get_timestamp_format(column: str | None) -> str | None: and (col.get("is_dttm") if isinstance(col, dict) else col.is_dttm) ) + dataset_timezone = self.get_dataset_timezone() dttm_cols = [ DateColumn( timestamp_format=_get_timestamp_format(label), offset=self.offset, time_shift=query_object.time_shift, + timezone=dataset_timezone, col_label=label, ) for label in labels @@ -1274,6 +1286,7 @@ def _get_timestamp_format(column: str | None) -> str | None: timestamp_format=_get_timestamp_format(query_object.granularity), offset=self.offset, time_shift=query_object.time_shift, + timezone=dataset_timezone, ) ) @@ -2387,30 +2400,44 @@ def get_time_filter( # pylint: disable=too-many-arguments ) ) - # Apply timezone offset to convert local time boundaries to UTC - # The offset is in hours (e.g., 6 for UTC+6) - # If offset is set, we subtract it from the datetime to convert to UTC - # Example: User selects "Last day" which gives midnight local time - # If offset=6 (UTC+6), we subtract 6 hours to get UTC time - offset_hours = getattr(self, "offset", 0) or 0 - if offset_hours and start_dttm: - start_dttm = start_dttm - timedelta(hours=offset_hours) - if offset_hours and end_dttm: - end_dttm = end_dttm - timedelta(hours=offset_hours) + # Apply timezone conversion for time filter boundaries + # This converts user's local time boundaries to UTC for querying UTC-stored data + adjusted_start, adjusted_end = start_dttm, end_dttm + dataset_timezone = self.get_dataset_timezone() + + if dataset_timezone and (start_dttm or end_dttm): + try: + tz = pytz.timezone(dataset_timezone) + utc = pytz.UTC + + # The datetimes from the UI are naive (no timezone info) + # We interpret them as being in the dataset's configured timezone + # and convert them to UTC for comparison with UTC-stored data + if start_dttm: + local_start = tz.localize(start_dttm) + adjusted_start = local_start.astimezone(utc).replace(tzinfo=None) + if end_dttm: + local_end = tz.localize(end_dttm) + adjusted_end = local_end.astimezone(utc).replace(tzinfo=None) + except pytz.UnknownTimeZoneError: + logger.warning( + "Invalid timezone '%s' in dataset extra", + dataset_timezone, + ) l = [] # noqa: E741 - if start_dttm: + if adjusted_start: l.append( col >= self.db_engine_spec.get_text_clause( - self.dttm_sql_literal(start_dttm, time_col) + self.dttm_sql_literal(adjusted_start, time_col) ) ) - if end_dttm: + if adjusted_end: l.append( col < self.db_engine_spec.get_text_clause( - self.dttm_sql_literal(end_dttm, time_col) + self.dttm_sql_literal(adjusted_end, time_col) ) ) return and_(*l) diff --git a/superset/utils/core.py b/superset/utils/core.py index f089aadf70ed..8b8be07cf5d7 100644 --- a/superset/utils/core.py +++ b/superset/utils/core.py @@ -25,6 +25,7 @@ import logging import os import platform +import pytz import re import signal import smtplib @@ -1849,7 +1850,7 @@ class DateColumn: timestamp_format: str | None = None offset: int | None = None time_shift: str | None = None - + timezone: str | None = None # IANA timezone name def __hash__(self) -> int: return hash(self.col_label) @@ -1862,11 +1863,13 @@ def get_legacy_time_column( timestamp_format: str | None, offset: int | None, time_shift: str | None, + timezone: str | None = None, ) -> DateColumn: return cls( timestamp_format=timestamp_format, offset=offset, time_shift=time_shift, + timezone=timezone, col_label=DTTM_ALIAS, ) @@ -1951,8 +1954,28 @@ def normalize_dttm_col( _process_datetime_column(df, _col) - if _col.offset: + if _col.timezone: + try: + tz = pytz.timezone(_col.timezone) + # Data is stored in UTC, convert to the dataset's configured timezone + # First make the datetime UTC-aware, then convert to target timezone + series = df[_col.col_label] + if not series.empty and series.notna().any(): + # Convert UTC to target timezone + df[_col.col_label] = ( + series.dt.tz_localize("UTC") + .dt.tz_convert(tz) + .dt.tz_localize(None) # Remove timezone info for display + ) + except pytz.UnknownTimeZoneError: + logging.warning( + "Unknown timezone '%s', falling back to offset", _col.timezone + ) + if _col.offset: + df[_col.col_label] += timedelta(hours=_col.offset) + elif _col.offset: df[_col.col_label] += timedelta(hours=_col.offset) + if _col.time_shift is not None: df[_col.col_label] += parse_human_timedelta(_col.time_shift) diff --git a/superset/viz.py b/superset/viz.py index b641009602ff..c74f2cf7ddb7 100644 --- a/superset/viz.py +++ b/superset/viz.py @@ -297,6 +297,7 @@ def get_df(self, query_obj: QueryObjectDict | None = None) -> pd.DataFrame: timestamp_format=timestamp_format, offset=self.datasource.offset, time_shift=self.form_data.get("time_shift"), + timezone=self.datasource.get_dataset_timezone(), ) ] ), From 2d56e8949d089d7e55e8f18d06cb0d9570060191 Mon Sep 17 00:00:00 2001 From: Jayakar Katika Date: Mon, 26 Jan 2026 09:34:52 +0200 Subject: [PATCH 3/4] fix: ruff format and lint --- superset/utils/core.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/superset/utils/core.py b/superset/utils/core.py index 8b8be07cf5d7..2c06486673ad 100644 --- a/superset/utils/core.py +++ b/superset/utils/core.py @@ -25,7 +25,6 @@ import logging import os import platform -import pytz import re import signal import smtplib @@ -66,6 +65,7 @@ import markdown as md import nh3 import pandas as pd +import pytz import sqlalchemy as sa from cryptography.hazmat.backends import default_backend from cryptography.x509 import Certificate, load_pem_x509_certificate @@ -1851,6 +1851,7 @@ class DateColumn: offset: int | None = None time_shift: str | None = None timezone: str | None = None # IANA timezone name + def __hash__(self) -> int: return hash(self.col_label) From 9f9e4ee3f92a73d33993369ffddc0e121bf06f3d Mon Sep 17 00:00:00 2001 From: Evan Date: Sat, 13 Jun 2026 09:00:48 -0700 Subject: [PATCH 4/4] test(utils): cover dataset-timezone normalization; fix extra_dict access MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds unit tests for normalize_dttm_col's new timezone path (UTC→dataset tz, DST handling, precedence over offset, invalid-tz fallback). Also reads extra_dict defensively in get_dataset_timezone — it's provided by concrete datasources (SqlaTable), not ExploreMixin, so the bare self.extra_dict tripped mypy and would AttributeError on other ExploreMixin subclasses. Co-Authored-By: Claude Fable 5 --- superset/models/helpers.py | 6 ++- tests/unit_tests/utils/test_core.py | 72 +++++++++++++++++++++++++++++ 2 files changed, 77 insertions(+), 1 deletion(-) diff --git a/superset/models/helpers.py b/superset/models/helpers.py index f98915b74b36..457b9db704f2 100644 --- a/superset/models/helpers.py +++ b/superset/models/helpers.py @@ -1161,8 +1161,12 @@ def get_dataset_timezone(self) -> str | None: Returns an IANA timezone name (e.g., "Europe/Berlin", "America/New_York") or None if not configured. + ``extra_dict`` is provided by concrete datasources (e.g. ``SqlaTable``) + rather than this mixin, so read it defensively: subclasses without it + simply have no configured timezone. """ - return self.extra_dict.get("timezone") + extra = getattr(self, "extra_dict", None) or {} + return extra.get("timezone") def get_fetch_values_predicate( self, diff --git a/tests/unit_tests/utils/test_core.py b/tests/unit_tests/utils/test_core.py index 2b852796ebf1..c5486843a504 100644 --- a/tests/unit_tests/utils/test_core.py +++ b/tests/unit_tests/utils/test_core.py @@ -341,6 +341,78 @@ def test_normalize_dttm_col_with_offset_and_time_shift() -> None: assert df["date_col"][2].strftime("%Y-%m-%d %H:%M:%S") == "2022-01-01 04:00:00" +def test_normalize_dttm_col_with_timezone() -> None: + """UTC-stored values are converted to the dataset's configured timezone.""" + # Winter date: Europe/Berlin is UTC+1, so 00:00 UTC renders as 01:00 local. + df = pd.DataFrame({"date_col": ["2020-01-01 00:00:00"]}) + dttm_cols = ( + DateColumn( + col_label="date_col", + timestamp_format="%Y-%m-%d %H:%M:%S", + timezone="Europe/Berlin", + ), + ) + + normalize_dttm_col(df, dttm_cols) + + assert is_datetime64_dtype(df["date_col"]) + # tz-naive after conversion (display value), shifted by the zone offset. + assert df["date_col"][0].tzinfo is None + assert df["date_col"][0].strftime("%Y-%m-%d %H:%M:%S") == "2020-01-01 01:00:00" + + +def test_normalize_dttm_col_timezone_handles_dst() -> None: + """The timezone path respects DST, unlike a fixed hour offset.""" + # Summer date: Europe/Berlin is UTC+2 (CEST), so 00:00 UTC renders as 02:00. + df = pd.DataFrame({"date_col": ["2020-07-01 00:00:00"]}) + dttm_cols = ( + DateColumn( + col_label="date_col", + timestamp_format="%Y-%m-%d %H:%M:%S", + timezone="Europe/Berlin", + ), + ) + + normalize_dttm_col(df, dttm_cols) + + assert df["date_col"][0].strftime("%Y-%m-%d %H:%M:%S") == "2020-07-01 02:00:00" + + +def test_normalize_dttm_col_timezone_takes_precedence_over_offset() -> None: + """When both timezone and offset are set, the timezone conversion wins.""" + df = pd.DataFrame({"date_col": ["2020-01-01 00:00:00"]}) + dttm_cols = ( + DateColumn( + col_label="date_col", + timestamp_format="%Y-%m-%d %H:%M:%S", + timezone="Europe/Berlin", + offset=10, + ), + ) + + normalize_dttm_col(df, dttm_cols) + + # +1h from the Berlin (winter) conversion, NOT +10h from the offset. + assert df["date_col"][0].strftime("%Y-%m-%d %H:%M:%S") == "2020-01-01 01:00:00" + + +def test_normalize_dttm_col_invalid_timezone_falls_back_to_offset() -> None: + """An unknown timezone falls back to the plain hour offset.""" + df = pd.DataFrame({"date_col": ["2020-01-01 00:00:00"]}) + dttm_cols = ( + DateColumn( + col_label="date_col", + timestamp_format="%Y-%m-%d %H:%M:%S", + timezone="Not/AZone", + offset=3, + ), + ) + + normalize_dttm_col(df, dttm_cols) + + assert df["date_col"][0].strftime("%Y-%m-%d %H:%M:%S") == "2020-01-01 03:00:00" + + def test_normalize_dttm_col_invalid_date_coerced() -> None: """Test that invalid dates are coerced to NaT.""" df = pd.DataFrame({"date_col": ["2020-01-01", "invalid_date", "2022-01-01"]})