From 54f5660a70f528d38ff63775ff9fd504900dd2b0 Mon Sep 17 00:00:00 2001 From: Dinindu Suriyamudali Date: Sun, 10 May 2026 22:28:20 +1200 Subject: [PATCH 1/2] feat(cache): add stable prefix caching strategy for improved hit rates (#2271) Introduces `anchor_first_message` to `CacheConfig` to inject a stable cache point on the first user message. This stable prefix acts as a fallback when the moving tail cache point (on the last user message) is invalidated by context pruning or summarization, enhancing cache resilience and hit rates. --- .../conversation_manager.py | 4 +- src/strands/models/bedrock.py | 26 +++++- src/strands/models/model.py | 5 + .../agent/test_conversation_manager.py | 4 +- tests/strands/models/test_bedrock.py | 93 +++++++++++++++++++ 5 files changed, 123 insertions(+), 9 deletions(-) diff --git a/src/strands/agent/conversation_manager/conversation_manager.py b/src/strands/agent/conversation_manager/conversation_manager.py index 7e2283883..60ac77a73 100644 --- a/src/strands/agent/conversation_manager/conversation_manager.py +++ b/src/strands/agent/conversation_manager/conversation_manager.py @@ -89,9 +89,7 @@ def __init__(self, *, proactive_compression: Union[bool, "ProactiveCompressionCo threshold = None if threshold is not None and (threshold <= 0 or threshold > 1): - raise ValueError( - f"compression_threshold must be between 0 (exclusive) and 1 (inclusive), got {threshold}" - ) + raise ValueError(f"compression_threshold must be between 0 (exclusive) and 1 (inclusive), got {threshold}") self.removed_message_count = 0 self._compression_threshold = threshold diff --git a/src/strands/models/bedrock.py b/src/strands/models/bedrock.py index c74a63a3b..c7cda5401 100644 --- a/src/strands/models/bedrock.py +++ b/src/strands/models/bedrock.py @@ -372,7 +372,14 @@ def _get_additional_request_fields(self, tool_choice: ToolChoice | None) -> dict return {"additionalModelRequestFields": additional_fields} def _inject_cache_point(self, messages: list[dict[str, Any]]) -> None: - """Inject a cache point at the end of the last user message. + """Inject cache points into user messages. + + When anchor_first_message is enabled in cache_config, injects two cache points: + 1. A stable prefix on the first user message (covers system prompt + first user message + tool descriptions) + 2. A moving tail on the last user message (advances with the conversation) + + The stable prefix acts as a fallback when the moving tail is invalidated (e.g. by context pruning + or summarization). When anchor_first_message is disabled (default), only the moving tail is injected. Args: messages: List of messages to inject cache point into (modified in place). @@ -380,6 +387,7 @@ def _inject_cache_point(self, messages: list[dict[str, Any]]) -> None: if not messages: return + first_user_idx: int | None = None last_user_idx: int | None = None for msg_idx, msg in enumerate(messages): content = msg.get("content", []) @@ -392,11 +400,23 @@ def _inject_cache_point(self, messages: list[dict[str, Any]]) -> None: block_idx, ) if msg.get("role") == "user": + if first_user_idx is None: + first_user_idx = msg_idx last_user_idx = msg_idx + cache_config = self.config.get("cache_config") + anchor = cache_config and cache_config.anchor_first_message + + # Stable prefix on first user message + if anchor and first_user_idx is not None and messages[first_user_idx].get("content"): + messages[first_user_idx]["content"].append({"cachePoint": {"type": "default"}}) + logger.debug("msg_idx=<%s> | added stable cache point to first user message", first_user_idx) + + # Moving tail on last user message (skip if same as first to avoid duplicate) if last_user_idx is not None and messages[last_user_idx].get("content"): - messages[last_user_idx]["content"].append({"cachePoint": {"type": "default"}}) - logger.debug("msg_idx=<%s> | added cache point to last user message", last_user_idx) + if not (anchor and last_user_idx == first_user_idx): + messages[last_user_idx]["content"].append({"cachePoint": {"type": "default"}}) + logger.debug("msg_idx=<%s> | added moving tail cache point to last user message", last_user_idx) def _find_last_user_text_message_index(self, messages: Messages) -> int | None: """Find the index of the last user message containing text or image content. diff --git a/src/strands/models/model.py b/src/strands/models/model.py index dd2f9eed2..c879a40ad 100644 --- a/src/strands/models/model.py +++ b/src/strands/models/model.py @@ -134,9 +134,14 @@ class CacheConfig: strategy: Caching strategy to use. - "auto": Automatically detect model support and inject cachePoint to maximize cache coverage - "anthropic": Inject cachePoint in Anthropic-compatible format without model support check + anchor_first_message: When True, inject a stable cache prefix on the first user message in addition + to the moving tail on the last user message. The stable prefix covers system prompt + first user + message + tool descriptions and acts as a fallback when the moving tail is invalidated (e.g. by + context pruning or summarization). Defaults to False. """ strategy: Literal["auto", "anthropic"] = "auto" + anchor_first_message: bool = False class Model(abc.ABC): diff --git a/tests/strands/agent/test_conversation_manager.py b/tests/strands/agent/test_conversation_manager.py index df748241e..3fd67753e 100644 --- a/tests/strands/agent/test_conversation_manager.py +++ b/tests/strands/agent/test_conversation_manager.py @@ -983,9 +983,7 @@ def reduce_context(self, agent, e=None, **kwargs): def test_proactive_compression_true_default_threshold_behavior(): """proactive_compression=True uses 0.7 — triggered at 0.7+ but not below.""" manager = _MinimalManager(proactive_compression=True) - agent = _make_mock_agent( - messages=[{"role": "user", "content": [{"text": "msg"}]}], context_window_limit=1000 - ) + agent = _make_mock_agent(messages=[{"role": "user", "content": [{"text": "msg"}]}], context_window_limit=1000) registry = HookRegistry() manager.register_hooks(registry) diff --git a/tests/strands/models/test_bedrock.py b/tests/strands/models/test_bedrock.py index 2f1f7d1f1..b5ae09fd9 100644 --- a/tests/strands/models/test_bedrock.py +++ b/tests/strands/models/test_bedrock.py @@ -3100,6 +3100,99 @@ def test_inject_cache_point_auto_strategy_resolves_to_anthropic_for_claude(bedro assert len(formatted[1]["content"]) == 1 +def test_inject_cache_point_dual_prefix_multi_turn(bedrock_client): + """Test that anchor_first_message adds cache points to both first and last user messages.""" + model = BedrockModel( + model_id="us.anthropic.claude-sonnet-4-20250514-v1:0", + cache_config=CacheConfig(strategy="auto", anchor_first_message=True), + ) + + cleaned_messages = [ + {"role": "user", "content": [{"text": "Hello"}]}, + {"role": "assistant", "content": [{"text": "Hi there!"}]}, + {"role": "user", "content": [{"text": "How are you?"}]}, + ] + + model._inject_cache_point(cleaned_messages) + + # First user message should have stable prefix cache point + assert len(cleaned_messages[0]["content"]) == 2 + assert "cachePoint" in cleaned_messages[0]["content"][-1] + assert cleaned_messages[0]["content"][-1]["cachePoint"]["type"] == "default" + + # Assistant message should be unchanged + assert len(cleaned_messages[1]["content"]) == 1 + + # Last user message should have moving tail cache point + assert len(cleaned_messages[2]["content"]) == 2 + assert "cachePoint" in cleaned_messages[2]["content"][-1] + assert cleaned_messages[2]["content"][-1]["cachePoint"]["type"] == "default" + + +def test_inject_cache_point_dual_prefix_single_user_message(bedrock_client): + """Test that anchor_first_message with a single user message only adds one cache point.""" + model = BedrockModel( + model_id="us.anthropic.claude-sonnet-4-20250514-v1:0", + cache_config=CacheConfig(strategy="auto", anchor_first_message=True), + ) + + cleaned_messages = [ + {"role": "user", "content": [{"text": "Hello"}]}, + ] + + model._inject_cache_point(cleaned_messages) + + # Single user message: only one cache point (no duplicate) + assert len(cleaned_messages[0]["content"]) == 2 + assert "cachePoint" in cleaned_messages[0]["content"][-1] + + +def test_inject_cache_point_dual_prefix_strips_existing(bedrock_client): + """Test that anchor_first_message strips existing cache points before adding dual prefixes.""" + model = BedrockModel( + model_id="us.anthropic.claude-sonnet-4-20250514-v1:0", + cache_config=CacheConfig(strategy="auto", anchor_first_message=True), + ) + + cleaned_messages = [ + {"role": "user", "content": [{"text": "Hello"}, {"cachePoint": {"type": "default"}}]}, + {"role": "assistant", "content": [{"text": "Hi"}, {"cachePoint": {"type": "default"}}]}, + {"role": "user", "content": [{"text": "Follow up"}, {"cachePoint": {"type": "default"}}]}, + ] + + model._inject_cache_point(cleaned_messages) + + # Old cache points stripped, new ones added at correct positions + assert len(cleaned_messages[0]["content"]) == 2 # text + stable prefix + assert cleaned_messages[0]["content"][-1] == {"cachePoint": {"type": "default"}} + assert len(cleaned_messages[1]["content"]) == 1 # assistant: only text + assert len(cleaned_messages[2]["content"]) == 2 # text + moving tail + assert cleaned_messages[2]["content"][-1] == {"cachePoint": {"type": "default"}} + + +def test_inject_cache_point_without_anchor_no_stable_prefix(bedrock_client): + """Test that without anchor_first_message, only moving tail is added (backward compatible).""" + model = BedrockModel( + model_id="us.anthropic.claude-sonnet-4-20250514-v1:0", + cache_config=CacheConfig(strategy="auto"), + ) + + cleaned_messages = [ + {"role": "user", "content": [{"text": "Hello"}]}, + {"role": "assistant", "content": [{"text": "Hi there!"}]}, + {"role": "user", "content": [{"text": "How are you?"}]}, + ] + + model._inject_cache_point(cleaned_messages) + + # First user message should NOT have a cache point + assert len(cleaned_messages[0]["content"]) == 1 + + # Last user message should have moving tail + assert len(cleaned_messages[2]["content"]) == 2 + assert "cachePoint" in cleaned_messages[2]["content"][-1] + + def test_find_last_user_text_message_index_no_user_messages(bedrock_client): """Test _find_last_user_text_message_index returns None when no user text messages exist.""" model = BedrockModel(model_id="test-model") From d7a19e07957a45948e0170819c7d21bec25a9d46 Mon Sep 17 00:00:00 2001 From: Dinindu Suriyamudali Date: Sun, 10 May 2026 23:05:31 +1200 Subject: [PATCH 2/2] feat(cache): inject moving tail alongside stable prefix on single message (#2271) Removes the exclusion that prevented the moving tail cache point from being added to the last user message if it was the same as the first user message and `anchor_first_message` was active. This ensures both caching strategies are applied concurrently, improving cache resilience in single-turn conversations. --- src/strands/models/bedrock.py | 7 +++---- tests/strands/models/test_bedrock.py | 9 +++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/strands/models/bedrock.py b/src/strands/models/bedrock.py index c7cda5401..bd2c92c65 100644 --- a/src/strands/models/bedrock.py +++ b/src/strands/models/bedrock.py @@ -412,11 +412,10 @@ def _inject_cache_point(self, messages: list[dict[str, Any]]) -> None: messages[first_user_idx]["content"].append({"cachePoint": {"type": "default"}}) logger.debug("msg_idx=<%s> | added stable cache point to first user message", first_user_idx) - # Moving tail on last user message (skip if same as first to avoid duplicate) + # Moving tail on last user message if last_user_idx is not None and messages[last_user_idx].get("content"): - if not (anchor and last_user_idx == first_user_idx): - messages[last_user_idx]["content"].append({"cachePoint": {"type": "default"}}) - logger.debug("msg_idx=<%s> | added moving tail cache point to last user message", last_user_idx) + messages[last_user_idx]["content"].append({"cachePoint": {"type": "default"}}) + logger.debug("msg_idx=<%s> | added moving tail cache point to last user message", last_user_idx) def _find_last_user_text_message_index(self, messages: Messages) -> int | None: """Find the index of the last user message containing text or image content. diff --git a/tests/strands/models/test_bedrock.py b/tests/strands/models/test_bedrock.py index b5ae09fd9..9432ee8cd 100644 --- a/tests/strands/models/test_bedrock.py +++ b/tests/strands/models/test_bedrock.py @@ -3130,7 +3130,7 @@ def test_inject_cache_point_dual_prefix_multi_turn(bedrock_client): def test_inject_cache_point_dual_prefix_single_user_message(bedrock_client): - """Test that anchor_first_message with a single user message only adds one cache point.""" + """Test that anchor_first_message with a single user message adds both cache points.""" model = BedrockModel( model_id="us.anthropic.claude-sonnet-4-20250514-v1:0", cache_config=CacheConfig(strategy="auto", anchor_first_message=True), @@ -3142,9 +3142,10 @@ def test_inject_cache_point_dual_prefix_single_user_message(bedrock_client): model._inject_cache_point(cleaned_messages) - # Single user message: only one cache point (no duplicate) - assert len(cleaned_messages[0]["content"]) == 2 - assert "cachePoint" in cleaned_messages[0]["content"][-1] + # Single user message gets both stable prefix and moving tail + assert len(cleaned_messages[0]["content"]) == 3 + assert cleaned_messages[0]["content"][1] == {"cachePoint": {"type": "default"}} + assert cleaned_messages[0]["content"][2] == {"cachePoint": {"type": "default"}} def test_inject_cache_point_dual_prefix_strips_existing(bedrock_client):