From ce2e12f38ab2a173e15e372e4fbfb99e6a08847c Mon Sep 17 00:00:00 2001 From: giulio-leone Date: Fri, 13 Mar 2026 01:23:58 +0100 Subject: [PATCH] fix: guardrail redact targets last user message, not messages[-1] MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When long-term memory (LTM) session managers like AgentCoreMemorySessionManager append an assistant message containing user context after the user turn, the guardrail redaction logic incorrectly redacted the LTM context instead of the actual user input. Root cause: the redact handler used `self.messages[-1]` which assumes the last message is the user's input. With LTM enabled, the message list looks like: [0] user: 'Tell me something bad' ← should be redacted [1] assistant: '...' ← was being redacted The fix replaces `self.messages[-1]` with a reverse search for the last message with `role == 'user'`, matching the pattern already used by `_find_last_user_text_message_index()` in the Bedrock model for guardrail_latest_message wrapping. Closes #1639 --- src/strands/agent/agent.py | 18 +++++++++---- tests/strands/agent/test_agent.py | 44 ++++++++++++++++++++++++++++++- 2 files changed, 56 insertions(+), 6 deletions(-) diff --git a/src/strands/agent/agent.py b/src/strands/agent/agent.py index f378a886a..d92d6a2ba 100644 --- a/src/strands/agent/agent.py +++ b/src/strands/agent/agent.py @@ -846,12 +846,20 @@ async def _run_loop( and event.chunk.get("redactContent") and event.chunk["redactContent"].get("redactUserContentMessage") ): - self.messages[-1]["content"] = self._redact_user_content( - self.messages[-1]["content"], - str(event.chunk["redactContent"]["redactUserContentMessage"]), + # Find the last user message — not necessarily messages[-1], + # because session managers (e.g. AgentCoreMemorySessionManager) + # may append non-user messages (LTM context) after the user turn. + last_user_msg = next( + (m for m in reversed(self.messages) if m["role"] == "user"), + None, ) - if self._session_manager: - self._session_manager.redact_latest_message(self.messages[-1], self) + if last_user_msg is not None: + last_user_msg["content"] = self._redact_user_content( + last_user_msg["content"], + str(event.chunk["redactContent"]["redactUserContentMessage"]), + ) + if self._session_manager: + self._session_manager.redact_latest_message(last_user_msg, self) yield event # Capture the result from the final event if available diff --git a/tests/strands/agent/test_agent.py b/tests/strands/agent/test_agent.py index 967a0dafb..3e8a0fdd4 100644 --- a/tests/strands/agent/test_agent.py +++ b/tests/strands/agent/test_agent.py @@ -1580,7 +1580,49 @@ def test_agent_restored_from_session_management_with_redacted_input(): assert agent.messages[0] == agent_2.messages[0] -def test_agent_restored_from_session_management_with_correct_index(): +def test_agent_redacts_user_message_not_ltm_context(): + """Test that guardrail redacts the last *user* message, not a trailing LTM assistant message. + + Reproduces: https://github.com/strands-agents/sdk-python/issues/1639 + When long-term memory (LTM) session managers append an assistant message with + user context after the user turn, the redact logic must still target the user + message rather than the trailing assistant LTM message. + """ + mocked_model = MockedModelProvider( + [{"redactedUserContent": "BLOCKED!", "redactedAssistantContent": "INPUT BLOCKED!"}] + ) + + agent = Agent( + model=mocked_model, + system_prompt="You are a helpful assistant.", + callback_handler=None, + ) + + # Simulate LTM session manager appending context after user message: + # messages[0] = user input, messages[1] = assistant LTM context + agent.messages.append({"role": "user", "content": [{"text": "Tell me something bad"}]}) + agent.messages.append( + {"role": "assistant", "content": [{"text": "Preference: likes cats"}]} + ) + + # Run the agent — guardrail should redact the user message (index 0), not the LTM message + response = agent("ignored") # noqa: F841 -- triggers model which triggers redact + + # Find the user messages — the first user message (the actual input) should be redacted + user_messages = [m for m in agent.messages if m["role"] == "user"] + assert len(user_messages) >= 1 + # The last user message before the LTM context should have been redacted + # Check that at least one user message was redacted + redacted_user = [m for m in user_messages if m["content"] == [{"text": "BLOCKED!"}]] + assert len(redacted_user) >= 1, f"Expected at least one redacted user message, got: {user_messages}" + + # The assistant LTM message should NOT have been redacted + assistant_messages = [m for m in agent.messages if m["role"] == "assistant"] + ltm_messages = [m for m in assistant_messages if any("" in str(c) for c in m.get("content", []))] + for ltm in ltm_messages: + assert ltm["content"] != [{"text": "BLOCKED!"}], "LTM context message should not be redacted" + + mock_model_provider = MockedModelProvider( [{"role": "assistant", "content": [{"text": "hello!"}]}, {"role": "assistant", "content": [{"text": "world!"}]}] )