diff --git a/src/strands/models/bedrock.py b/src/strands/models/bedrock.py index bab4031ed..6c222f6bd 100644 --- a/src/strands/models/bedrock.py +++ b/src/strands/models/bedrock.py @@ -410,11 +410,15 @@ def _format_bedrock_messages(self, messages: Messages) -> list[dict[str, Any]]: filtered_unknown_members = False dropped_deepseek_reasoning_content = False - # Pre-compute the index of the last user message containing text or image content. - # This ensures guardContent wrapping is maintained across tool execution cycles, where - # the final message in the list is a toolResult (role=user) rather than text/image content. + # Determine guardrail wrapping strategy. + # When guardrails are enabled, user text/image content blocks must be wrapped in + # guardContent so the guardrail evaluates ONLY those blocks, excluding tool results + # (which also have role="user") from guardrail scanning. Without wrapping, tool results + # can trigger false-positive prompt injection detections (see #1671). + has_guardrail = bool(self.config.get("guardrail_id") and self.config.get("guardrail_version")) + guardrail_latest = self.config.get("guardrail_latest_message", False) last_user_text_idx = None - if self.config.get("guardrail_latest_message", False): + if has_guardrail and guardrail_latest: last_user_text_idx = self._find_last_user_text_message_index(messages) for idx, message in enumerate(messages): @@ -437,12 +441,19 @@ def _format_bedrock_messages(self, messages: Messages) -> list[dict[str, Any]]: if formatted_content is None: continue - # Wrap text or image content in guardContent if this is the last user text/image message - if idx == last_user_text_idx and ("text" in formatted_content or "image" in formatted_content): - if "text" in formatted_content: - formatted_content = {"guardContent": {"text": {"text": formatted_content["text"]}}} - elif "image" in formatted_content: - formatted_content = {"guardContent": {"image": formatted_content["image"]}} + # Wrap user text/image in guardContent for guardrail evaluation. + # When guardrail_latest_message is True, only the latest user text/image is wrapped. + # Otherwise, ALL user text/image blocks are wrapped, excluding tool results from + # guardrail scanning to prevent false positives on system-generated content. + if has_guardrail and message["role"] == "user" and ( + "text" in formatted_content or "image" in formatted_content + ): + should_wrap = (idx == last_user_text_idx) if guardrail_latest else True + if should_wrap: + if "text" in formatted_content: + formatted_content = {"guardContent": {"text": {"text": formatted_content["text"]}}} + elif "image" in formatted_content: + formatted_content = {"guardContent": {"image": formatted_content["image"]}} cleaned_content.append(formatted_content) diff --git a/tests/strands/models/test_bedrock.py b/tests/strands/models/test_bedrock.py index 89c4df70d..9872fab04 100644 --- a/tests/strands/models/test_bedrock.py +++ b/tests/strands/models/test_bedrock.py @@ -340,6 +340,9 @@ def test_format_request_additional_response_field_paths(model, messages, model_i def test_format_request_guardrail_config(model, messages, model_id, guardrail_config): model.update_config(**guardrail_config) tru_request = model._format_request(messages) + # When guardrails are enabled, user text is wrapped in guardContent to exclude + # tool results from guardrail scanning (see #1671) + expected_messages = [{"role": "user", "content": [{"guardContent": {"text": {"text": "test"}}}]}] exp_request = { "guardrailConfig": { "guardrailIdentifier": guardrail_config["guardrail_id"], @@ -349,7 +352,7 @@ def test_format_request_guardrail_config(model, messages, model_id, guardrail_co }, "inferenceConfig": {}, "modelId": model_id, - "messages": messages, + "messages": expected_messages, "system": [], } @@ -364,6 +367,7 @@ def test_format_request_guardrail_config_without_trace_or_stream_processing_mode } ) tru_request = model._format_request(messages) + expected_messages = [{"role": "user", "content": [{"guardContent": {"text": {"text": "test"}}}]}] exp_request = { "guardrailConfig": { "guardrailIdentifier": "g1", @@ -372,7 +376,7 @@ def test_format_request_guardrail_config_without_trace_or_stream_processing_mode }, "inferenceConfig": {}, "modelId": model_id, - "messages": messages, + "messages": expected_messages, "system": [], } @@ -2582,6 +2586,98 @@ async def test_format_request_with_guardrail_multiple_tool_results_same_message( assert formatted_messages[0]["content"][0]["guardContent"]["text"]["text"] == "Question requiring multiple tools" +@pytest.mark.asyncio +async def test_format_request_guardrail_default_wraps_all_user_text(model): + """Test that guardrails without guardrail_latest_message wrap ALL user text/image in guardContent.""" + model.update_config( + guardrail_id="test-guardrail", + guardrail_version="DRAFT", + ) + + messages = [ + {"role": "user", "content": [{"text": "First message"}]}, + {"role": "assistant", "content": [{"text": "First response"}]}, + {"role": "user", "content": [{"text": "Second message"}]}, + ] + + request = model._format_request(messages) + formatted_messages = request["messages"] + + # Both user text messages should be wrapped in guardContent + assert "guardContent" in formatted_messages[0]["content"][0] + assert formatted_messages[0]["content"][0]["guardContent"]["text"]["text"] == "First message" + + # Assistant message should NOT be wrapped + assert "text" in formatted_messages[1]["content"][0] + assert formatted_messages[1]["content"][0]["text"] == "First response" + + assert "guardContent" in formatted_messages[2]["content"][0] + assert formatted_messages[2]["content"][0]["guardContent"]["text"]["text"] == "Second message" + + +@pytest.mark.asyncio +async def test_format_request_guardrail_default_excludes_tool_results(model): + """Test that tool results are NOT wrapped in guardContent, preventing false-positive guardrail detections.""" + model.update_config( + guardrail_id="test-guardrail", + guardrail_version="DRAFT", + ) + + messages = [ + {"role": "user", "content": [{"text": "Call the identity tool"}]}, + { + "role": "assistant", + "content": [ + {"toolUse": {"toolUseId": "tool-1", "name": "identity_tool", "input": {}}}, + ], + }, + { + "role": "user", + "content": [ + { + "toolResult": { + "toolUseId": "tool-1", + "content": [{"text": "You are Test Admin User."}], + "status": "success", + } + } + ], + }, + {"role": "user", "content": [{"text": "Hi there"}]}, + ] + + request = model._format_request(messages) + formatted_messages = request["messages"] + + # User text should be wrapped (guardrail evaluates it) + assert "guardContent" in formatted_messages[0]["content"][0] + assert formatted_messages[0]["content"][0]["guardContent"]["text"]["text"] == "Call the identity tool" + + # Tool result should NOT be wrapped (excluded from guardrail scanning) + assert "toolResult" in formatted_messages[2]["content"][0] + assert "guardContent" not in formatted_messages[2]["content"][0] + + # Second user text should also be wrapped + assert "guardContent" in formatted_messages[3]["content"][0] + assert formatted_messages[3]["content"][0]["guardContent"]["text"]["text"] == "Hi there" + + +@pytest.mark.asyncio +async def test_format_request_no_guardrail_no_wrapping(model): + """Test that without guardrails, no content is wrapped in guardContent.""" + messages = [ + {"role": "user", "content": [{"text": "Hello"}]}, + {"role": "assistant", "content": [{"text": "Hi"}]}, + ] + + request = model._format_request(messages) + formatted_messages = request["messages"] + + assert "text" in formatted_messages[0]["content"][0] + assert formatted_messages[0]["content"][0]["text"] == "Hello" + assert "guardContent" not in formatted_messages[0]["content"][0] + + def test_cache_strategy_anthropic_for_claude(bedrock_client): """Test that _cache_strategy returns 'anthropic' for Claude models.""" model = BedrockModel(model_id="us.anthropic.claude-sonnet-4-20250514-v1:0")