Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -89,9 +89,7 @@ def __init__(self, *, proactive_compression: Union[bool, "ProactiveCompressionCo
threshold = None

if threshold is not None and (threshold <= 0 or threshold > 1):
raise ValueError(
f"compression_threshold must be between 0 (exclusive) and 1 (inclusive), got {threshold}"
)
raise ValueError(f"compression_threshold must be between 0 (exclusive) and 1 (inclusive), got {threshold}")

self.removed_message_count = 0
self._compression_threshold = threshold
Expand Down
23 changes: 21 additions & 2 deletions src/strands/models/bedrock.py
Original file line number Diff line number Diff line change
Expand Up @@ -372,14 +372,22 @@ def _get_additional_request_fields(self, tool_choice: ToolChoice | None) -> dict
return {"additionalModelRequestFields": additional_fields}

def _inject_cache_point(self, messages: list[dict[str, Any]]) -> None:
"""Inject a cache point at the end of the last user message.
"""Inject cache points into user messages.

When anchor_first_message is enabled in cache_config, injects two cache points:
1. A stable prefix on the first user message (covers system prompt + first user message + tool descriptions)
2. A moving tail on the last user message (advances with the conversation)

The stable prefix acts as a fallback when the moving tail is invalidated (e.g. by context pruning
or summarization). When anchor_first_message is disabled (default), only the moving tail is injected.

Args:
messages: List of messages to inject cache point into (modified in place).
"""
if not messages:
return

first_user_idx: int | None = None
last_user_idx: int | None = None
for msg_idx, msg in enumerate(messages):
content = msg.get("content", [])
Expand All @@ -392,11 +400,22 @@ def _inject_cache_point(self, messages: list[dict[str, Any]]) -> None:
block_idx,
)
if msg.get("role") == "user":
if first_user_idx is None:
first_user_idx = msg_idx
last_user_idx = msg_idx

cache_config = self.config.get("cache_config")
anchor = cache_config and cache_config.anchor_first_message

# Stable prefix on first user message
if anchor and first_user_idx is not None and messages[first_user_idx].get("content"):
messages[first_user_idx]["content"].append({"cachePoint": {"type": "default"}})
logger.debug("msg_idx=<%s> | added stable cache point to first user message", first_user_idx)

# Moving tail on last user message
if last_user_idx is not None and messages[last_user_idx].get("content"):
messages[last_user_idx]["content"].append({"cachePoint": {"type": "default"}})
logger.debug("msg_idx=<%s> | added cache point to last user message", last_user_idx)
logger.debug("msg_idx=<%s> | added moving tail cache point to last user message", last_user_idx)

def _find_last_user_text_message_index(self, messages: Messages) -> int | None:
"""Find the index of the last user message containing text or image content.
Expand Down
5 changes: 5 additions & 0 deletions src/strands/models/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,9 +134,14 @@ class CacheConfig:
strategy: Caching strategy to use.
- "auto": Automatically detect model support and inject cachePoint to maximize cache coverage
- "anthropic": Inject cachePoint in Anthropic-compatible format without model support check
anchor_first_message: When True, inject a stable cache prefix on the first user message in addition
to the moving tail on the last user message. The stable prefix covers system prompt + first user
message + tool descriptions and acts as a fallback when the moving tail is invalidated (e.g. by
context pruning or summarization). Defaults to False.
"""

strategy: Literal["auto", "anthropic"] = "auto"
anchor_first_message: bool = False


class Model(abc.ABC):
Expand Down
4 changes: 1 addition & 3 deletions tests/strands/agent/test_conversation_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -983,9 +983,7 @@ def reduce_context(self, agent, e=None, **kwargs):
def test_proactive_compression_true_default_threshold_behavior():
"""proactive_compression=True uses 0.7 — triggered at 0.7+ but not below."""
manager = _MinimalManager(proactive_compression=True)
agent = _make_mock_agent(
messages=[{"role": "user", "content": [{"text": "msg"}]}], context_window_limit=1000
)
agent = _make_mock_agent(messages=[{"role": "user", "content": [{"text": "msg"}]}], context_window_limit=1000)
registry = HookRegistry()
manager.register_hooks(registry)

Expand Down
94 changes: 94 additions & 0 deletions tests/strands/models/test_bedrock.py
Original file line number Diff line number Diff line change
Expand Up @@ -3100,6 +3100,100 @@ def test_inject_cache_point_auto_strategy_resolves_to_anthropic_for_claude(bedro
assert len(formatted[1]["content"]) == 1


def test_inject_cache_point_dual_prefix_multi_turn(bedrock_client):
"""Test that anchor_first_message adds cache points to both first and last user messages."""
model = BedrockModel(
model_id="us.anthropic.claude-sonnet-4-20250514-v1:0",
cache_config=CacheConfig(strategy="auto", anchor_first_message=True),
)

cleaned_messages = [
{"role": "user", "content": [{"text": "Hello"}]},
{"role": "assistant", "content": [{"text": "Hi there!"}]},
{"role": "user", "content": [{"text": "How are you?"}]},
]

model._inject_cache_point(cleaned_messages)

# First user message should have stable prefix cache point
assert len(cleaned_messages[0]["content"]) == 2
assert "cachePoint" in cleaned_messages[0]["content"][-1]
assert cleaned_messages[0]["content"][-1]["cachePoint"]["type"] == "default"

# Assistant message should be unchanged
assert len(cleaned_messages[1]["content"]) == 1

# Last user message should have moving tail cache point
assert len(cleaned_messages[2]["content"]) == 2
assert "cachePoint" in cleaned_messages[2]["content"][-1]
assert cleaned_messages[2]["content"][-1]["cachePoint"]["type"] == "default"


def test_inject_cache_point_dual_prefix_single_user_message(bedrock_client):
"""Test that anchor_first_message with a single user message adds both cache points."""
model = BedrockModel(
model_id="us.anthropic.claude-sonnet-4-20250514-v1:0",
cache_config=CacheConfig(strategy="auto", anchor_first_message=True),
)

cleaned_messages = [
{"role": "user", "content": [{"text": "Hello"}]},
]

model._inject_cache_point(cleaned_messages)

# Single user message gets both stable prefix and moving tail
assert len(cleaned_messages[0]["content"]) == 3
assert cleaned_messages[0]["content"][1] == {"cachePoint": {"type": "default"}}
assert cleaned_messages[0]["content"][2] == {"cachePoint": {"type": "default"}}


def test_inject_cache_point_dual_prefix_strips_existing(bedrock_client):
"""Test that anchor_first_message strips existing cache points before adding dual prefixes."""
model = BedrockModel(
model_id="us.anthropic.claude-sonnet-4-20250514-v1:0",
cache_config=CacheConfig(strategy="auto", anchor_first_message=True),
)

cleaned_messages = [
{"role": "user", "content": [{"text": "Hello"}, {"cachePoint": {"type": "default"}}]},
{"role": "assistant", "content": [{"text": "Hi"}, {"cachePoint": {"type": "default"}}]},
{"role": "user", "content": [{"text": "Follow up"}, {"cachePoint": {"type": "default"}}]},
]

model._inject_cache_point(cleaned_messages)

# Old cache points stripped, new ones added at correct positions
assert len(cleaned_messages[0]["content"]) == 2 # text + stable prefix
assert cleaned_messages[0]["content"][-1] == {"cachePoint": {"type": "default"}}
assert len(cleaned_messages[1]["content"]) == 1 # assistant: only text
assert len(cleaned_messages[2]["content"]) == 2 # text + moving tail
assert cleaned_messages[2]["content"][-1] == {"cachePoint": {"type": "default"}}


def test_inject_cache_point_without_anchor_no_stable_prefix(bedrock_client):
"""Test that without anchor_first_message, only moving tail is added (backward compatible)."""
model = BedrockModel(
model_id="us.anthropic.claude-sonnet-4-20250514-v1:0",
cache_config=CacheConfig(strategy="auto"),
)

cleaned_messages = [
{"role": "user", "content": [{"text": "Hello"}]},
{"role": "assistant", "content": [{"text": "Hi there!"}]},
{"role": "user", "content": [{"text": "How are you?"}]},
]

model._inject_cache_point(cleaned_messages)

# First user message should NOT have a cache point
assert len(cleaned_messages[0]["content"]) == 1

# Last user message should have moving tail
assert len(cleaned_messages[2]["content"]) == 2
assert "cachePoint" in cleaned_messages[2]["content"][-1]


def test_find_last_user_text_message_index_no_user_messages(bedrock_client):
"""Test _find_last_user_text_message_index returns None when no user text messages exist."""
model = BedrockModel(model_id="test-model")
Expand Down