diff --git a/src/strands/session/s3_session_manager.py b/src/strands/session/s3_session_manager.py index 8d557e81c..e956f7be3 100644 --- a/src/strands/session/s3_session_manager.py +++ b/src/strands/session/s3_session_manager.py @@ -1,5 +1,6 @@ """S3-based session manager for cloud storage.""" +import base64 import json import logging from concurrent.futures import ThreadPoolExecutor, as_completed @@ -25,6 +26,32 @@ MESSAGE_PREFIX = "message_" MULTI_AGENT_PREFIX = "multi_agent_" +_BYTES_TYPE_TAG = "__bytes_encoded__" + + +class _BytesEncoder(json.JSONEncoder): + """JSON encoder that handles bytes objects by base64-encoding them. + + Uses the same tagging convention as :func:`~strands.types.session.encode_bytes_values` + (``{"__bytes_encoded__": True, "data": ""}``), so the data can be + decoded by :func:`~strands.types.session.decode_bytes_values` transparently. + """ + + def default(self, o: Any) -> Any: + if isinstance(o, (bytes, bytearray)): + return {_BYTES_TYPE_TAG: True, "data": base64.b64encode(o).decode("ascii")} + return super().default(o) + + +def _bytes_decoder_hook(obj: dict[str, Any]) -> Any: + """``object_hook`` for :func:`json.loads` that restores base64-encoded bytes. + + Matches the tagging convention used by ``encode_bytes_values``. + """ + if obj.get(_BYTES_TYPE_TAG) is True and "data" in obj: + return base64.b64decode(obj["data"]) + return obj + class S3SessionManager(RepositorySessionManager, SessionRepository): """S3-based session manager for cloud storage. @@ -136,7 +163,7 @@ def _read_s3_object(self, key: str) -> dict[str, Any] | None: try: response = self.client.get_object(Bucket=self.bucket, Key=key) content = response["Body"].read().decode("utf-8") - return cast(dict[str, Any], json.loads(content)) + return cast(dict[str, Any], json.loads(content, object_hook=_bytes_decoder_hook)) except ClientError as e: if e.response["Error"]["Code"] == "NoSuchKey": return None @@ -148,7 +175,7 @@ def _read_s3_object(self, key: str) -> dict[str, Any] | None: def _write_s3_object(self, key: str, data: dict[str, Any]) -> None: """Write JSON object to S3.""" try: - content = json.dumps(data, indent=2, ensure_ascii=False) + content = json.dumps(data, cls=_BytesEncoder, indent=2, ensure_ascii=False) self.client.put_object( Bucket=self.bucket, Key=key, Body=content.encode("utf-8"), ContentType="application/json" ) diff --git a/tests/strands/session/test_s3_session_manager.py b/tests/strands/session/test_s3_session_manager.py index c1c89da5b..22ceb2e75 100644 --- a/tests/strands/session/test_s3_session_manager.py +++ b/tests/strands/session/test_s3_session_manager.py @@ -481,3 +481,45 @@ def test_update_nonexistent_multi_agent(s3_manager, sample_session): nonexistent_mock.id = "nonexistent" with pytest.raises(SessionException): s3_manager.update_multi_agent(sample_session.session_id, nonexistent_mock) + + +def test_write_and_read_message_with_binary_document_content(s3_manager, sample_session, sample_agent, sample_message): + """Test that messages containing bytes (e.g., inline PDF documents) can be serialized and deserialized. + + Reproduces: https://github.com/strands-agents/sdk-python/issues/1864 + """ + # Create session and agent + s3_manager.create_session(sample_session) + s3_manager.create_agent(sample_session.session_id, sample_agent) + + # Create a message containing binary document content (simulating a multimodal prompt) + pdf_bytes = b"%PDF-1.4 fake content for test" + message_with_bytes = SessionMessage.from_message( + message={ + "role": "user", + "content": [ + {"text": "Analyze this PDF"}, + { + "document": { + "format": "pdf", + "name": "document.pdf", + "source": { + "bytes": pdf_bytes, + }, + } + }, + ], + }, + index=0, + ) + + # This should NOT raise TypeError: Object of type bytes is not JSON serializable + s3_manager.create_message(sample_session.session_id, sample_agent.agent_id, message_with_bytes) + + # Read back and verify the bytes are correctly restored + result = s3_manager.read_message(sample_session.session_id, sample_agent.agent_id, 0) + assert result is not None + content = result.message["content"] + assert len(content) == 2 + assert content[0]["text"] == "Analyze this PDF" + assert content[1]["document"]["source"]["bytes"] == pdf_bytes