Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
55 changes: 55 additions & 0 deletions sentry_sdk/ai/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
from sys import getsizeof
from typing import TYPE_CHECKING

from sentry_sdk._types import SENSITIVE_DATA_SUBSTITUTE

if TYPE_CHECKING:
from typing import Any, Callable, Dict, List, Optional, Tuple

Expand Down Expand Up @@ -141,6 +143,57 @@ def _find_truncation_index(messages: "List[Dict[str, Any]]", max_bytes: int) ->
return 0


def redact_blob_message_parts(
messages: "List[Dict[str, Any]]",
) -> "List[Dict[str, Any]]":
"""
Redact blob message parts from the messages, by removing the "content" key.
e.g:
{
"role": "user",
"content": [
{
"text": "How many ponies do you see in the image?",
"type": "text"
},
{
"type": "blob",
"modality": "image",
"mime_type": "image/jpeg",
"content": "data:image/jpeg;base64,..."
}
]
}
becomes:
{
"role": "user",
"content": [
{
"text": "How many ponies do you see in the image?",
"type": "text"
},
{
"type": "blob",
"modality": "image",
"mime_type": "image/jpeg",
"content": "[Filtered]"
}
]
}
"""

for message in messages:
if not isinstance(message, dict):
continue

content = message.get("content")
if isinstance(content, list):
for item in content:
if isinstance(item, dict) and item.get("type") == "blob":
item["content"] = SENSITIVE_DATA_SUBSTITUTE
return messages
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Bug: Redaction mutates outbound AI request messages

redact_blob_message_parts mutates messages (and nested content items) in place and is called from truncate_and_annotate_messages. Several integrations build messages via shallow copies (or reuse original dicts), so this can replace blob content in the original request payload before the provider call, potentially stripping images/binary inputs and changing model behavior.

Additional Locations (1)

Fix in Cursor Fix in Web

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think the concern is valid, we shouldn't modify anything the user has a reference on @constantinius.



def truncate_messages_by_size(
messages: "List[Dict[str, Any]]",
max_bytes: int = MAX_GEN_AI_MESSAGE_BYTES,
Expand Down Expand Up @@ -186,6 +239,8 @@ def truncate_and_annotate_messages(
if not messages:
return None

messages = redact_blob_message_parts(messages)

truncated_messages, removed_count = truncate_messages_by_size(messages, max_bytes)
if removed_count > 0:
scope._gen_ai_original_message_count[span.span_id] = len(messages)
Expand Down
106 changes: 105 additions & 1 deletion tests/test_ai_monitoring.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import pytest

import sentry_sdk
from sentry_sdk._types import AnnotatedValue
from sentry_sdk._types import AnnotatedValue, SENSITIVE_DATA_SUBSTITUTE
from sentry_sdk.ai.monitoring import ai_track
from sentry_sdk.ai.utils import (
MAX_GEN_AI_MESSAGE_BYTES,
Expand All @@ -13,6 +13,7 @@
truncate_and_annotate_messages,
truncate_messages_by_size,
_find_truncation_index,
redact_blob_message_parts,
)
from sentry_sdk.serializer import serialize
from sentry_sdk.utils import safe_serialize
Expand Down Expand Up @@ -542,3 +543,106 @@ def __init__(self):
assert isinstance(messages_value, AnnotatedValue)
assert messages_value.metadata["len"] == stored_original_length
assert len(messages_value.value) == len(truncated_messages)


class TestRedactBlobMessageParts:
def test_redacts_single_blob_content(self):
"""Test that blob content is redacted in a message with single blob part"""
messages = [
{
"role": "user",
"content": [
{
"text": "How many ponies do you see in the image?",
"type": "text",
},
{
"type": "blob",
"modality": "image",
"mime_type": "image/jpeg",
"content": "data:image/jpeg;base64,/9j/4AAQSkZJRg==",
},
],
}
]

result = redact_blob_message_parts(messages)

assert result == messages # Returns the same list
assert (
messages[0]["content"][0]["text"]
== "How many ponies do you see in the image?"
)
assert messages[0]["content"][0]["type"] == "text"
assert messages[0]["content"][1]["type"] == "blob"
assert messages[0]["content"][1]["modality"] == "image"
assert messages[0]["content"][1]["mime_type"] == "image/jpeg"
assert messages[0]["content"][1]["content"] == SENSITIVE_DATA_SUBSTITUTE

def test_redacts_multiple_blob_parts(self):
"""Test that multiple blob parts in a single message are all redacted"""
messages = [
{
"role": "user",
"content": [
{"text": "Compare these images", "type": "text"},
{
"type": "blob",
"modality": "image",
"mime_type": "image/jpeg",
"content": "data:image/jpeg;base64,first_image",
},
{
"type": "blob",
"modality": "image",
"mime_type": "image/png",
"content": "data:image/png;base64,second_image",
},
],
}
]

result = redact_blob_message_parts(messages)

assert result == messages
assert messages[0]["content"][0]["text"] == "Compare these images"
assert messages[0]["content"][1]["content"] == SENSITIVE_DATA_SUBSTITUTE
assert messages[0]["content"][2]["content"] == SENSITIVE_DATA_SUBSTITUTE

def test_redacts_blobs_in_multiple_messages(self):
"""Test that blob parts are redacted across multiple messages"""
messages = [
{
"role": "user",
"content": [
{"text": "First message", "type": "text"},
{
"type": "blob",
"modality": "image",
"content": "data:image/jpeg;base64,first",
},
],
},
{
"role": "assistant",
"content": "I see the image.",
},
{
"role": "user",
"content": [
{"text": "Second message", "type": "text"},
{
"type": "blob",
"modality": "image",
"content": "data:image/jpeg;base64,second",
},
],
},
]

result = redact_blob_message_parts(messages)

assert result == messages
assert messages[0]["content"][1]["content"] == SENSITIVE_DATA_SUBSTITUTE
assert messages[1]["content"] == "I see the image." # Unchanged
assert messages[2]["content"][1]["content"] == SENSITIVE_DATA_SUBSTITUTE
Loading