Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 16 additions & 1 deletion sentry_sdk/integrations/redis/_async_common.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,15 @@
from typing import TYPE_CHECKING

import sentry_sdk
from sentry_sdk.consts import OP
from sentry_sdk.consts import OP, SPANDATA
from sentry_sdk.integrations.redis.consts import SPAN_ORIGIN
from sentry_sdk.integrations.redis.modules.caches import (
_compile_cache_span_properties,
_set_cache_data,
)
from sentry_sdk.integrations.redis.modules.queries import _compile_db_span_properties
from sentry_sdk.integrations.redis.utils import (
_get_safe_command,
_set_client_data,
_set_pipeline_data,
)
Expand Down Expand Up @@ -109,6 +110,12 @@ async def _sentry_execute_command(
integration,
)

additional_cache_span_attributes = {}
with capture_internal_exceptions():
additional_cache_span_attributes[SPANDATA.DB_QUERY_TEXT] = (
_get_safe_command(name, args)
)

cache_span: "Optional[Union[Span, StreamedSpan]]" = None
if cache_properties["is_cache_key"] and cache_properties["op"] is not None:
if span_streaming:
Expand All @@ -117,6 +124,7 @@ async def _sentry_execute_command(
attributes={
"sentry.op": cache_properties["op"],
"sentry.origin": SPAN_ORIGIN,
**additional_cache_span_attributes,
},
)
else:
Expand All @@ -129,13 +137,20 @@ async def _sentry_execute_command(

db_properties = _compile_db_span_properties(integration, name, args)

additional_db_span_attributes = {}
with capture_internal_exceptions():
additional_db_span_attributes[SPANDATA.DB_QUERY_TEXT] = _get_safe_command(
name, args
)

db_span: "Union[Span, StreamedSpan]"
if span_streaming:
db_span = sentry_sdk.traces.start_span(
name=db_properties["description"],
attributes={
"sentry.op": db_properties["op"],
"sentry.origin": SPAN_ORIGIN,
**additional_db_span_attributes,
},
)
else:
Expand Down
17 changes: 16 additions & 1 deletion sentry_sdk/integrations/redis/_sync_common.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,15 @@
from typing import TYPE_CHECKING

import sentry_sdk
from sentry_sdk.consts import OP
from sentry_sdk.consts import OP, SPANDATA
from sentry_sdk.integrations.redis.consts import SPAN_ORIGIN
from sentry_sdk.integrations.redis.modules.caches import (
_compile_cache_span_properties,
_set_cache_data,
)
from sentry_sdk.integrations.redis.modules.queries import _compile_db_span_properties
from sentry_sdk.integrations.redis.utils import (
_get_safe_command,
_set_client_data,
_set_pipeline_data,
)
Expand Down Expand Up @@ -108,6 +109,12 @@ def sentry_patched_execute_command(
integration,
)

additional_cache_span_attributes = {}
with capture_internal_exceptions():
additional_cache_span_attributes[SPANDATA.DB_QUERY_TEXT] = (
_get_safe_command(name, args)
)

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Cache spans use db.query.text

Medium Severity

For span streaming, cache spans merge additional_cache_span_attributes using SPANDATA.DB_QUERY_TEXT and _get_safe_command, but this change was meant to populate cache.key on cache spans. That mislabels cache.get / cache.put spans with a database query attribute and does not add cache.key at span creation (only later via _set_cache_data).

Additional Locations (1)
Fix in Cursor Fix in Web

Reviewed by Cursor Bugbot for commit c758f7f. Configure here.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this comment still needs to be addressed @ericapisani

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This change was to address your earlier comment re: setting the full redis command without truncation as an attribute.

As part of this, I reversed the earlier change I had made introducing the cache.key attribute, so this bot comment is coming up because I forgot to update the title/description of the PR

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Cool another case of the bots being confused 😞. Approved now!


cache_span: "Optional[Union[Span, StreamedSpan]]" = None
if cache_properties["is_cache_key"] and cache_properties["op"] is not None:
if span_streaming:
Expand All @@ -116,6 +123,7 @@ def sentry_patched_execute_command(
attributes={
"sentry.op": cache_properties["op"],
"sentry.origin": SPAN_ORIGIN,

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Bug: The cache span is initialized with SPANDATA.CACHE_KEY set to the description string, not the key tuple, causing an incorrect attribute type in exception paths.
Severity: LOW

Suggested Fix

Modify the span creation to use the correct property from the start. Instead of setting SPANDATA.CACHE_KEY to cache_properties["description"], set it to cache_properties["key"]. This ensures the attribute has the correct type (a tuple of strings) from the moment of initialization, even in exception paths.

Prompt for AI Agent
Review the code at the location below. A potential bug has been identified by an AI
agent. Verify if this is a real issue. If it is, propose a fix; if not, explain why it's
not valid.

Location: sentry_sdk/integrations/redis/_sync_common.py#L118

Potential issue: The code in `_sync_common.py` and `_async_common.py` incorrectly
initializes the `SPANDATA.CACHE_KEY` attribute on the cache span with
`cache_properties["description"]`, which is a string. The correct value should be
`cache_properties["key"]`, a tuple of strings. While a subsequent call to
`_set_cache_data` overwrites this with the correct value in the normal execution flow,
this correction does not happen if an exception is raised during the Redis command
execution. In such an exception scenario, the span would be captured with the wrong
attribute type (a string instead of a list of strings), leading to inconsistent
telemetry data.

Also affects:

  • sentry_sdk/integrations/redis/_async_common.py:119~119

Did we get this right? 👍 / 👎 to inform future reviews.

**additional_cache_span_attributes,
},
)
else:
Expand All @@ -128,13 +136,20 @@ def sentry_patched_execute_command(

db_properties = _compile_db_span_properties(integration, name, args)

additional_db_span_attributes = {}
with capture_internal_exceptions():
additional_db_span_attributes[SPANDATA.DB_QUERY_TEXT] = _get_safe_command(
name, args
)

db_span: "Union[Span, StreamedSpan]"
if span_streaming:
db_span = sentry_sdk.traces.start_span(
name=db_properties["description"],
attributes={
"sentry.op": db_properties["op"],
"sentry.origin": SPAN_ORIGIN,
**additional_db_span_attributes,
},
)
else:
Expand Down
31 changes: 31 additions & 0 deletions tests/integrations/redis/test_redis.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,7 @@

assert parent_span["name"] == "custom parent"
assert redis_span["name"] == "GET [Filtered]"
assert redis_span["attributes"][SPANDATA.DB_QUERY_TEXT] == "GET [Filtered]"
assert redis_span["attributes"]["sentry.op"] == "db.redis"
else:
events = capture_events()
Expand Down Expand Up @@ -177,8 +178,10 @@

assert parent["name"] == "custom parent"
assert set1["name"] == "SET 'somekey1' [Filtered]"
assert set1["attributes"][SPANDATA.DB_QUERY_TEXT] == "SET 'somekey1' [Filtered]"
assert set1["attributes"]["sentry.op"] == "db.redis"
assert set2["name"] == "SET 'somekey2' [Filtered]"
assert set2["attributes"][SPANDATA.DB_QUERY_TEXT] == "SET 'somekey2' [Filtered]"
assert get["name"] == "GET 'somekey2'"
assert delete["name"] == "DEL 'somekey1' [Filtered]"
else:
Expand Down Expand Up @@ -223,8 +226,16 @@

assert parent["name"] == "custom parent"
assert set1["name"] == "SET 'somekey1' 'my secret string1'"
assert (
set1["attributes"][SPANDATA.DB_QUERY_TEXT]
== "SET 'somekey1' 'my secret string1'"
)
assert set1["attributes"]["sentry.op"] == "db.redis"
assert set2["name"] == "SET 'somekey2' 'my secret string2'"
assert (
set2["attributes"][SPANDATA.DB_QUERY_TEXT]
== "SET 'somekey2' 'my secret string2'"
)
assert get["name"] == "GET 'somekey2'"
assert delete["name"] == "DEL 'somekey1' 'somekey2'"
else:
Expand Down Expand Up @@ -271,8 +282,16 @@

assert parent["name"] == "custom parent"
assert set1["name"] == f"SET 'somekey1' '{long_string}'"
assert (
set1["attributes"][SPANDATA.DB_QUERY_TEXT]
== f"SET 'somekey1' '{long_string}'"
)
assert set1["attributes"]["sentry.op"] == "db.redis"
assert set2["name"] == f"SET 'somekey2' '{short_string}'"
assert (
set2["attributes"][SPANDATA.DB_QUERY_TEXT]
== f"SET 'somekey2' '{short_string}'"
)
else:
events = capture_events()
with start_transaction():
Expand Down Expand Up @@ -317,8 +336,16 @@

assert parent["name"] == "custom parent"
assert set1["name"] == expected_long
assert (
set1["attributes"][SPANDATA.DB_QUERY_TEXT]
== f"SET 'somekey1' '{long_string}'"
)
assert set1["attributes"]["sentry.op"] == "db.redis"
assert set2["name"] == expected_short

Check warning on line 344 in tests/integrations/redis/test_redis.py

View check run for this annotation

@sentry/warden / warden: code-review

`DB_QUERY_TEXT` span attribute bypasses user-configured `max_data_size` limit

In the span-streaming path, `db.redis` spans now set `SPANDATA.DB_QUERY_TEXT` to the raw output of `_get_safe_command(name, args)`. Unlike the span description/name (computed by `_get_db_span_description`), this value is never truncated to `integration.max_data_size`. With `send_default_pii=True`, large Redis values (e.g. a 100,000-char SET payload) are stored verbatim in the attribute, producing very large span attributes that bypass the user's configured data-size cap and risk exceeding Sentry's ingest payload limits. The same pattern applies to the cache-key attribute in both `_sync_common.py` and `_async_common.py`.
Comment on lines +339 to 344

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

DB_QUERY_TEXT span attribute bypasses user-configured max_data_size limit

In the span-streaming path, db.redis spans now set SPANDATA.DB_QUERY_TEXT to the raw output of _get_safe_command(name, args). Unlike the span description/name (computed by _get_db_span_description), this value is never truncated to integration.max_data_size. With send_default_pii=True, large Redis values (e.g. a 100,000-char SET payload) are stored verbatim in the attribute, producing very large span attributes that bypass the user's configured data-size cap and risk exceeding Sentry's ingest payload limits. The same pattern applies to the cache-key attribute in both _sync_common.py and _async_common.py.

Evidence
  • _get_db_span_description in modules/queries.py truncates the span name to integration.max_data_size (description[: max_data_size - 3] + '...'), and _get_cache_span_description in modules/caches.py does the same.
  • _get_safe_command in redis/utils.py only limits arg count (_MAX_NUM_ARGS) and never truncates individual arg length or applies max_data_size.
  • _sync_common.py:141 (and the equivalent in _async_common.py) assigns additional_db_span_attributes[SPANDATA.DB_QUERY_TEXT] = _get_safe_command(name, args) and passes it straight into the streamed span attributes.
  • The new test test_data_truncation_custom (test_redis.py:339-342) configures max_data_size=30 yet asserts set1["attributes"][SPANDATA.DB_QUERY_TEXT] == f"SET 'somekey1' '{long_string}'" where long_string = 'a' * 100000, confirming the ~100k-char value is stored untruncated while the span name is truncated to 30.

Identified by Warden code-review · JGL-PYX

assert (
set2["attributes"][SPANDATA.DB_QUERY_TEXT]
== f"SET 'somekey2' '{short_string}'"
)
else:
events = capture_events()
with start_transaction():
Expand Down Expand Up @@ -401,6 +428,7 @@
assert redis_span["name"] == "GET 'foobar'"
attrs = redis_span["attributes"]
assert attrs["sentry.op"] == "db.redis"
assert attrs[SPANDATA.DB_QUERY_TEXT] == "GET 'foobar'"
assert attrs[SPANDATA.DB_SYSTEM_NAME] == "redis"
assert attrs[SPANDATA.DB_DRIVER_NAME] == "redis-py"
assert attrs[SPANDATA.DB_NAMESPACE] == "1"
Expand Down Expand Up @@ -508,6 +536,9 @@
assert parent_span["name"] == "custom parent"
assert parent_span["attributes"]["sentry.origin"] == "manual"
assert set_span["attributes"]["sentry.origin"] == "auto.db.redis"
assert (
set_span["attributes"][SPANDATA.DB_QUERY_TEXT] == "SET 'somekey' [Filtered]"
)
assert pipeline_span["attributes"]["sentry.origin"] == "auto.db.redis"
else:
events = capture_events()
Expand Down
15 changes: 15 additions & 0 deletions tests/integrations/redis/test_redis_cache_module.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,21 +83,34 @@ def test_cache_basic(sentry_init, capture_events, capture_items, span_streaming)
assert payloads[1]["attributes"]["sentry.op"] == "db.redis"
assert payloads[1]["attributes"][SPANDATA.DB_OPERATION_NAME] == "GET"
assert payloads[2]["attributes"]["sentry.op"] == "cache.get"
assert payloads[2]["attributes"][SPANDATA.DB_QUERY_TEXT] == "GET 'mycachekey'"

# set: db then cache.put
assert payloads[3]["attributes"]["sentry.op"] == "db.redis"
assert payloads[3]["attributes"][SPANDATA.DB_OPERATION_NAME] == "SET"
assert payloads[4]["attributes"]["sentry.op"] == "cache.put"
assert (
payloads[4]["attributes"][SPANDATA.DB_QUERY_TEXT]
== "SET 'mycachekey1' [Filtered]"
)

# setex: db then cache.put
assert payloads[5]["attributes"]["sentry.op"] == "db.redis"
assert payloads[5]["attributes"][SPANDATA.DB_OPERATION_NAME] == "SETEX"
assert payloads[6]["attributes"]["sentry.op"] == "cache.put"
assert (
payloads[6]["attributes"][SPANDATA.DB_QUERY_TEXT]
== "SETEX 'mycachekey2' [Filtered] [Filtered]"
)

# mget: db then cache.get
assert payloads[7]["attributes"]["sentry.op"] == "db.redis"
assert payloads[7]["attributes"][SPANDATA.DB_OPERATION_NAME] == "MGET"
assert payloads[8]["attributes"]["sentry.op"] == "cache.get"
assert (
payloads[8]["attributes"][SPANDATA.DB_QUERY_TEXT]
== "MGET 'mycachekey1' [Filtered]"
)

assert payloads[9]["name"] == "custom parent"
else:
Expand Down Expand Up @@ -169,12 +182,14 @@ def test_cache_keys(sentry_init, capture_events, capture_items, span_streaming):
assert payloads[1]["name"] == "GET 'blub'"
assert payloads[2]["attributes"]["sentry.op"] == "cache.get"
assert payloads[2]["name"] == "blub"
assert payloads[2]["attributes"][SPANDATA.DB_QUERY_TEXT] == "GET 'blub'"

# blubkeything: db then cache.get
assert payloads[3]["attributes"]["sentry.op"] == "db.redis"
assert payloads[3]["name"] == "GET 'blubkeything'"
assert payloads[4]["attributes"]["sentry.op"] == "cache.get"
assert payloads[4]["name"] == "blubkeything"
Comment thread
sentry-warden[bot] marked this conversation as resolved.
assert payloads[4]["attributes"][SPANDATA.DB_QUERY_TEXT] == "GET 'blubkeything'"

# bl: db only (no prefix match)
assert payloads[5]["attributes"]["sentry.op"] == "db.redis"
Expand Down
4 changes: 4 additions & 0 deletions tests/integrations/redis/test_redis_cache_module_async.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
)

import sentry_sdk
from sentry_sdk.consts import SPANDATA
from sentry_sdk.integrations.redis import RedisIntegration
from sentry_sdk.utils import parse_version

Expand Down Expand Up @@ -83,6 +84,7 @@ async def test_cache_basic(sentry_init, capture_events, capture_items, span_stre
assert parent_span["name"] == "custom parent"
assert db_span["attributes"]["sentry.op"] == "db.redis"
assert cache_span["attributes"]["sentry.op"] == "cache.get"
assert cache_span["attributes"][SPANDATA.CACHE_KEY] == ["myasynccachekey"]
else:
events = capture_events()
with sentry_sdk.start_transaction():
Expand Down Expand Up @@ -132,12 +134,14 @@ async def test_cache_keys(sentry_init, capture_events, capture_items, span_strea
assert payloads[1]["name"] == "GET 'ablub'"
assert payloads[2]["attributes"]["sentry.op"] == "cache.get"
assert payloads[2]["name"] == "ablub"
assert payloads[2]["attributes"][SPANDATA.CACHE_KEY] == ["ablub"]

# ablubkeything: db then cache.get
assert payloads[3]["attributes"]["sentry.op"] == "db.redis"
assert payloads[3]["name"] == "GET 'ablubkeything'"
assert payloads[4]["attributes"]["sentry.op"] == "cache.get"
assert payloads[4]["name"] == "ablubkeything"
assert payloads[4]["attributes"][SPANDATA.CACHE_KEY] == ["ablubkeything"]

# abl: db only (no prefix match)
assert payloads[5]["attributes"]["sentry.op"] == "db.redis"
Expand Down
Loading