Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 15 additions & 2 deletions sentry_sdk/integrations/anthropic.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,11 @@ def _get_token_usage(result: "Messages") -> "tuple[int, int, int, int]":
):
cache_write_input_tokens = usage.cache_creation_input_tokens

# Anthropic's input_tokens excludes cached/cache_write tokens.
# Normalize to total input tokens so downstream cost calculations
# (input_tokens - cached) don't produce negative values.
input_tokens += cache_read_input_tokens + cache_write_input_tokens
Comment on lines +111 to +114
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

you're already adding cache read and write outside afterwards, can remove this.

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No I don't think we can remove this - there are two codepaths below, one of them for non-streaming and one for streaming. The streaming one does not use this function to retrieve the cost, so it needs to normalize separately


return (
input_tokens,
output_tokens,
Expand Down Expand Up @@ -466,11 +471,15 @@ def new_iterator() -> "Iterator[MessageStreamEvent]":
)
yield event

# Anthropic's input_tokens excludes cached/cache_write tokens.
# Normalize to total input tokens for correct cost calculations.
total_input = usage.input_tokens + (usage.cache_read_input_tokens or 0) + (usage.cache_write_input_tokens or 0)

_set_output_data(
span=span,
integration=integration,
model=model,
input_tokens=usage.input_tokens,
input_tokens=total_input,
output_tokens=usage.output_tokens,
cache_read_input_tokens=usage.cache_read_input_tokens,
cache_write_input_tokens=usage.cache_write_input_tokens,
Expand All @@ -496,11 +505,15 @@ async def new_iterator_async() -> "AsyncIterator[MessageStreamEvent]":
)
yield event

# Anthropic's input_tokens excludes cached/cache_write tokens.
# Normalize to total input tokens for correct cost calculations.
total_input = usage.input_tokens + (usage.cache_read_input_tokens or 0) + (usage.cache_write_input_tokens or 0)

_set_output_data(
span=span,
integration=integration,
model=model,
input_tokens=usage.input_tokens,
input_tokens=total_input,
output_tokens=usage.output_tokens,
cache_read_input_tokens=usage.cache_read_input_tokens,
cache_write_input_tokens=usage.cache_write_input_tokens,
Expand Down
150 changes: 150 additions & 0 deletions tests/integrations/anthropic/test_anthropic.py
Original file line number Diff line number Diff line change
Expand Up @@ -2265,6 +2265,156 @@ def test_cache_tokens_nonstreaming(sentry_init, capture_events):
assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHE_WRITE] == 20


def test_input_tokens_include_cached_nonstreaming(sentry_init, capture_events):
"""
Test that gen_ai.usage.input_tokens includes cached tokens.

Anthropic's usage.input_tokens excludes cached/cache_write tokens,
but gen_ai.usage.input_tokens should be the TOTAL input tokens
(including cached + cache_write) so that downstream cost calculations
don't produce negative values.

See: negative gen_ai.cost.input_tokens bug when cache_read > input_tokens.
"""
sentry_init(integrations=[AnthropicIntegration()], traces_sample_rate=1.0)
events = capture_events()
client = Anthropic(api_key="z")

# Simulate Anthropic response where input_tokens=100 EXCLUDES cached tokens
# cache_read=80 and cache_write=20 are separate
# Total input tokens processed = 100 + 80 + 20 = 200
client.messages._post = mock.Mock(
return_value=Message(
id="id",
model="claude-3-5-sonnet-20241022",
role="assistant",
content=[TextBlock(type="text", text="Response")],
type="message",
usage=Usage(
input_tokens=100,
output_tokens=50,
cache_read_input_tokens=80,
cache_creation_input_tokens=20,
),
)
)

with start_transaction(name="anthropic"):
client.messages.create(
max_tokens=1024,
messages=[{"role": "user", "content": "Hello"}],
model="claude-3-5-sonnet-20241022",
)

(span,) = events[0]["spans"]

# input_tokens should be total: 100 (non-cached) + 80 (cache_read) + 20 (cache_write) = 200
assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 200

# total_tokens should include the full input count
assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 250 # 200 + 50

# Cache fields should still be reported correctly
assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHED] == 80
assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHE_WRITE] == 20


def test_input_tokens_include_cached_streaming(sentry_init, capture_events):
"""
Test that gen_ai.usage.input_tokens includes cached tokens for streaming responses.

Same bug as non-streaming: Anthropic's input_tokens excludes cached tokens,
leading to negative cost calculations when cache_read > input_tokens.
"""
client = Anthropic(api_key="z")
returned_stream = Stream(cast_to=None, response=None, client=client)
returned_stream._iterator = [
MessageStartEvent(
type="message_start",
message=Message(
id="id",
model="claude-3-5-sonnet-20241022",
role="assistant",
content=[],
type="message",
usage=Usage(
input_tokens=100,
output_tokens=0,
cache_read_input_tokens=80,
cache_creation_input_tokens=20,
),
),
),
MessageDeltaEvent(
type="message_delta",
delta=Delta(stop_reason="end_turn"),
usage=MessageDeltaUsage(output_tokens=50),
),
]

sentry_init(integrations=[AnthropicIntegration()], traces_sample_rate=1.0)
events = capture_events()
client.messages._post = mock.Mock(return_value=returned_stream)

with start_transaction(name="anthropic"):
for _ in client.messages.create(
max_tokens=1024,
messages=[{"role": "user", "content": "Hello"}],
model="claude-3-5-sonnet-20241022",
stream=True,
):
pass

(span,) = events[0]["spans"]

# input_tokens should be total: 100 + 80 + 20 = 200
assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 200

# total_tokens should include the full input count
assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 250 # 200 + 50

# Cache fields should still be reported correctly
assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHED] == 80
assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHE_WRITE] == 20


def test_input_tokens_unchanged_without_caching(sentry_init, capture_events):
"""
Test that input_tokens is unchanged when there are no cached tokens.
Ensures the fix doesn't break the non-caching case.
"""
sentry_init(integrations=[AnthropicIntegration()], traces_sample_rate=1.0)
events = capture_events()
client = Anthropic(api_key="z")

client.messages._post = mock.Mock(
return_value=Message(
id="id",
model="claude-3-5-sonnet-20241022",
role="assistant",
content=[TextBlock(type="text", text="Response")],
type="message",
usage=Usage(
input_tokens=100,
output_tokens=50,
),
)
)

with start_transaction(name="anthropic"):
client.messages.create(
max_tokens=1024,
messages=[{"role": "user", "content": "Hello"}],
model="claude-3-5-sonnet-20241022",
)

(span,) = events[0]["spans"]

# Without caching, input_tokens should remain as-is
assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 100
assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 150 # 100 + 50


def test_cache_tokens_streaming(sentry_init, capture_events):
"""Test cache tokens are tracked for streaming responses."""
client = Anthropic(api_key="z")
Expand Down
Loading