Skip to content

Commit ce2e12f

Browse files
giulio-leonegiulio-leone
authored andcommitted
fix: guardrail redact targets last user message, not messages[-1]
When long-term memory (LTM) session managers like AgentCoreMemorySessionManager append an assistant message containing user context after the user turn, the guardrail redaction logic incorrectly redacted the LTM context instead of the actual user input. Root cause: the redact handler used `self.messages[-1]` which assumes the last message is the user's input. With LTM enabled, the message list looks like: [0] user: 'Tell me something bad' ← should be redacted [1] assistant: '<user_context>...</user_context>' ← was being redacted The fix replaces `self.messages[-1]` with a reverse search for the last message with `role == 'user'`, matching the pattern already used by `_find_last_user_text_message_index()` in the Bedrock model for guardrail_latest_message wrapping. Closes #1639
1 parent fca208b commit ce2e12f

2 files changed

Lines changed: 56 additions & 6 deletions

File tree

src/strands/agent/agent.py

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -846,12 +846,20 @@ async def _run_loop(
846846
and event.chunk.get("redactContent")
847847
and event.chunk["redactContent"].get("redactUserContentMessage")
848848
):
849-
self.messages[-1]["content"] = self._redact_user_content(
850-
self.messages[-1]["content"],
851-
str(event.chunk["redactContent"]["redactUserContentMessage"]),
849+
# Find the last user message — not necessarily messages[-1],
850+
# because session managers (e.g. AgentCoreMemorySessionManager)
851+
# may append non-user messages (LTM context) after the user turn.
852+
last_user_msg = next(
853+
(m for m in reversed(self.messages) if m["role"] == "user"),
854+
None,
852855
)
853-
if self._session_manager:
854-
self._session_manager.redact_latest_message(self.messages[-1], self)
856+
if last_user_msg is not None:
857+
last_user_msg["content"] = self._redact_user_content(
858+
last_user_msg["content"],
859+
str(event.chunk["redactContent"]["redactUserContentMessage"]),
860+
)
861+
if self._session_manager:
862+
self._session_manager.redact_latest_message(last_user_msg, self)
855863
yield event
856864

857865
# Capture the result from the final event if available

tests/strands/agent/test_agent.py

Lines changed: 43 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1580,7 +1580,49 @@ def test_agent_restored_from_session_management_with_redacted_input():
15801580
assert agent.messages[0] == agent_2.messages[0]
15811581

15821582

1583-
def test_agent_restored_from_session_management_with_correct_index():
1583+
def test_agent_redacts_user_message_not_ltm_context():
1584+
"""Test that guardrail redacts the last *user* message, not a trailing LTM assistant message.
1585+
1586+
Reproduces: https://github.com/strands-agents/sdk-python/issues/1639
1587+
When long-term memory (LTM) session managers append an assistant message with
1588+
user context after the user turn, the redact logic must still target the user
1589+
message rather than the trailing assistant LTM message.
1590+
"""
1591+
mocked_model = MockedModelProvider(
1592+
[{"redactedUserContent": "BLOCKED!", "redactedAssistantContent": "INPUT BLOCKED!"}]
1593+
)
1594+
1595+
agent = Agent(
1596+
model=mocked_model,
1597+
system_prompt="You are a helpful assistant.",
1598+
callback_handler=None,
1599+
)
1600+
1601+
# Simulate LTM session manager appending context after user message:
1602+
# messages[0] = user input, messages[1] = assistant LTM context
1603+
agent.messages.append({"role": "user", "content": [{"text": "Tell me something bad"}]})
1604+
agent.messages.append(
1605+
{"role": "assistant", "content": [{"text": "<user_context>Preference: likes cats</user_context>"}]}
1606+
)
1607+
1608+
# Run the agent — guardrail should redact the user message (index 0), not the LTM message
1609+
response = agent("ignored") # noqa: F841 -- triggers model which triggers redact
1610+
1611+
# Find the user messages — the first user message (the actual input) should be redacted
1612+
user_messages = [m for m in agent.messages if m["role"] == "user"]
1613+
assert len(user_messages) >= 1
1614+
# The last user message before the LTM context should have been redacted
1615+
# Check that at least one user message was redacted
1616+
redacted_user = [m for m in user_messages if m["content"] == [{"text": "BLOCKED!"}]]
1617+
assert len(redacted_user) >= 1, f"Expected at least one redacted user message, got: {user_messages}"
1618+
1619+
# The assistant LTM message should NOT have been redacted
1620+
assistant_messages = [m for m in agent.messages if m["role"] == "assistant"]
1621+
ltm_messages = [m for m in assistant_messages if any("<user_context>" in str(c) for c in m.get("content", []))]
1622+
for ltm in ltm_messages:
1623+
assert ltm["content"] != [{"text": "BLOCKED!"}], "LTM context message should not be redacted"
1624+
1625+
15841626
mock_model_provider = MockedModelProvider(
15851627
[{"role": "assistant", "content": [{"text": "hello!"}]}, {"role": "assistant", "content": [{"text": "world!"}]}]
15861628
)

0 commit comments

Comments
 (0)