diff --git a/src/strands/agent/conversation_manager/conversation_manager.py b/src/strands/agent/conversation_manager/conversation_manager.py index 7e2283883..60ac77a73 100644 --- a/src/strands/agent/conversation_manager/conversation_manager.py +++ b/src/strands/agent/conversation_manager/conversation_manager.py @@ -89,9 +89,7 @@ def __init__(self, *, proactive_compression: Union[bool, "ProactiveCompressionCo threshold = None if threshold is not None and (threshold <= 0 or threshold > 1): - raise ValueError( - f"compression_threshold must be between 0 (exclusive) and 1 (inclusive), got {threshold}" - ) + raise ValueError(f"compression_threshold must be between 0 (exclusive) and 1 (inclusive), got {threshold}") self.removed_message_count = 0 self._compression_threshold = threshold diff --git a/src/strands/models/bedrock.py b/src/strands/models/bedrock.py index c74a63a3b..bd2c92c65 100644 --- a/src/strands/models/bedrock.py +++ b/src/strands/models/bedrock.py @@ -372,7 +372,14 @@ def _get_additional_request_fields(self, tool_choice: ToolChoice | None) -> dict return {"additionalModelRequestFields": additional_fields} def _inject_cache_point(self, messages: list[dict[str, Any]]) -> None: - """Inject a cache point at the end of the last user message. + """Inject cache points into user messages. + + When anchor_first_message is enabled in cache_config, injects two cache points: + 1. A stable prefix on the first user message (covers system prompt + first user message + tool descriptions) + 2. A moving tail on the last user message (advances with the conversation) + + The stable prefix acts as a fallback when the moving tail is invalidated (e.g. by context pruning + or summarization). When anchor_first_message is disabled (default), only the moving tail is injected. Args: messages: List of messages to inject cache point into (modified in place). @@ -380,6 +387,7 @@ def _inject_cache_point(self, messages: list[dict[str, Any]]) -> None: if not messages: return + first_user_idx: int | None = None last_user_idx: int | None = None for msg_idx, msg in enumerate(messages): content = msg.get("content", []) @@ -392,11 +400,22 @@ def _inject_cache_point(self, messages: list[dict[str, Any]]) -> None: block_idx, ) if msg.get("role") == "user": + if first_user_idx is None: + first_user_idx = msg_idx last_user_idx = msg_idx + cache_config = self.config.get("cache_config") + anchor = cache_config and cache_config.anchor_first_message + + # Stable prefix on first user message + if anchor and first_user_idx is not None and messages[first_user_idx].get("content"): + messages[first_user_idx]["content"].append({"cachePoint": {"type": "default"}}) + logger.debug("msg_idx=<%s> | added stable cache point to first user message", first_user_idx) + + # Moving tail on last user message if last_user_idx is not None and messages[last_user_idx].get("content"): messages[last_user_idx]["content"].append({"cachePoint": {"type": "default"}}) - logger.debug("msg_idx=<%s> | added cache point to last user message", last_user_idx) + logger.debug("msg_idx=<%s> | added moving tail cache point to last user message", last_user_idx) def _find_last_user_text_message_index(self, messages: Messages) -> int | None: """Find the index of the last user message containing text or image content. diff --git a/src/strands/models/model.py b/src/strands/models/model.py index dd2f9eed2..c879a40ad 100644 --- a/src/strands/models/model.py +++ b/src/strands/models/model.py @@ -134,9 +134,14 @@ class CacheConfig: strategy: Caching strategy to use. - "auto": Automatically detect model support and inject cachePoint to maximize cache coverage - "anthropic": Inject cachePoint in Anthropic-compatible format without model support check + anchor_first_message: When True, inject a stable cache prefix on the first user message in addition + to the moving tail on the last user message. The stable prefix covers system prompt + first user + message + tool descriptions and acts as a fallback when the moving tail is invalidated (e.g. by + context pruning or summarization). Defaults to False. """ strategy: Literal["auto", "anthropic"] = "auto" + anchor_first_message: bool = False class Model(abc.ABC): diff --git a/tests/strands/agent/test_conversation_manager.py b/tests/strands/agent/test_conversation_manager.py index df748241e..3fd67753e 100644 --- a/tests/strands/agent/test_conversation_manager.py +++ b/tests/strands/agent/test_conversation_manager.py @@ -983,9 +983,7 @@ def reduce_context(self, agent, e=None, **kwargs): def test_proactive_compression_true_default_threshold_behavior(): """proactive_compression=True uses 0.7 — triggered at 0.7+ but not below.""" manager = _MinimalManager(proactive_compression=True) - agent = _make_mock_agent( - messages=[{"role": "user", "content": [{"text": "msg"}]}], context_window_limit=1000 - ) + agent = _make_mock_agent(messages=[{"role": "user", "content": [{"text": "msg"}]}], context_window_limit=1000) registry = HookRegistry() manager.register_hooks(registry) diff --git a/tests/strands/models/test_bedrock.py b/tests/strands/models/test_bedrock.py index 2f1f7d1f1..9432ee8cd 100644 --- a/tests/strands/models/test_bedrock.py +++ b/tests/strands/models/test_bedrock.py @@ -3100,6 +3100,100 @@ def test_inject_cache_point_auto_strategy_resolves_to_anthropic_for_claude(bedro assert len(formatted[1]["content"]) == 1 +def test_inject_cache_point_dual_prefix_multi_turn(bedrock_client): + """Test that anchor_first_message adds cache points to both first and last user messages.""" + model = BedrockModel( + model_id="us.anthropic.claude-sonnet-4-20250514-v1:0", + cache_config=CacheConfig(strategy="auto", anchor_first_message=True), + ) + + cleaned_messages = [ + {"role": "user", "content": [{"text": "Hello"}]}, + {"role": "assistant", "content": [{"text": "Hi there!"}]}, + {"role": "user", "content": [{"text": "How are you?"}]}, + ] + + model._inject_cache_point(cleaned_messages) + + # First user message should have stable prefix cache point + assert len(cleaned_messages[0]["content"]) == 2 + assert "cachePoint" in cleaned_messages[0]["content"][-1] + assert cleaned_messages[0]["content"][-1]["cachePoint"]["type"] == "default" + + # Assistant message should be unchanged + assert len(cleaned_messages[1]["content"]) == 1 + + # Last user message should have moving tail cache point + assert len(cleaned_messages[2]["content"]) == 2 + assert "cachePoint" in cleaned_messages[2]["content"][-1] + assert cleaned_messages[2]["content"][-1]["cachePoint"]["type"] == "default" + + +def test_inject_cache_point_dual_prefix_single_user_message(bedrock_client): + """Test that anchor_first_message with a single user message adds both cache points.""" + model = BedrockModel( + model_id="us.anthropic.claude-sonnet-4-20250514-v1:0", + cache_config=CacheConfig(strategy="auto", anchor_first_message=True), + ) + + cleaned_messages = [ + {"role": "user", "content": [{"text": "Hello"}]}, + ] + + model._inject_cache_point(cleaned_messages) + + # Single user message gets both stable prefix and moving tail + assert len(cleaned_messages[0]["content"]) == 3 + assert cleaned_messages[0]["content"][1] == {"cachePoint": {"type": "default"}} + assert cleaned_messages[0]["content"][2] == {"cachePoint": {"type": "default"}} + + +def test_inject_cache_point_dual_prefix_strips_existing(bedrock_client): + """Test that anchor_first_message strips existing cache points before adding dual prefixes.""" + model = BedrockModel( + model_id="us.anthropic.claude-sonnet-4-20250514-v1:0", + cache_config=CacheConfig(strategy="auto", anchor_first_message=True), + ) + + cleaned_messages = [ + {"role": "user", "content": [{"text": "Hello"}, {"cachePoint": {"type": "default"}}]}, + {"role": "assistant", "content": [{"text": "Hi"}, {"cachePoint": {"type": "default"}}]}, + {"role": "user", "content": [{"text": "Follow up"}, {"cachePoint": {"type": "default"}}]}, + ] + + model._inject_cache_point(cleaned_messages) + + # Old cache points stripped, new ones added at correct positions + assert len(cleaned_messages[0]["content"]) == 2 # text + stable prefix + assert cleaned_messages[0]["content"][-1] == {"cachePoint": {"type": "default"}} + assert len(cleaned_messages[1]["content"]) == 1 # assistant: only text + assert len(cleaned_messages[2]["content"]) == 2 # text + moving tail + assert cleaned_messages[2]["content"][-1] == {"cachePoint": {"type": "default"}} + + +def test_inject_cache_point_without_anchor_no_stable_prefix(bedrock_client): + """Test that without anchor_first_message, only moving tail is added (backward compatible).""" + model = BedrockModel( + model_id="us.anthropic.claude-sonnet-4-20250514-v1:0", + cache_config=CacheConfig(strategy="auto"), + ) + + cleaned_messages = [ + {"role": "user", "content": [{"text": "Hello"}]}, + {"role": "assistant", "content": [{"text": "Hi there!"}]}, + {"role": "user", "content": [{"text": "How are you?"}]}, + ] + + model._inject_cache_point(cleaned_messages) + + # First user message should NOT have a cache point + assert len(cleaned_messages[0]["content"]) == 1 + + # Last user message should have moving tail + assert len(cleaned_messages[2]["content"]) == 2 + assert "cachePoint" in cleaned_messages[2]["content"][-1] + + def test_find_last_user_text_message_index_no_user_messages(bedrock_client): """Test _find_last_user_text_message_index returns None when no user text messages exist.""" model = BedrockModel(model_id="test-model")