diff --git a/py/noxfile.py b/py/noxfile.py index 7a79ab64..d542e3d1 100644 --- a/py/noxfile.py +++ b/py/noxfile.py @@ -62,6 +62,7 @@ def _pinned_python_version(): # validate things work with or without them. VENDOR_PACKAGES = ( "agno", + "agentscope", "anthropic", "dspy", "openai", @@ -89,6 +90,7 @@ def _pinned_python_version(): # Keep LATEST for newest API coverage, and pin 2.4.0 to cover the 2.4 -> 2.5 breaking change # to internals we leverage for instrumentation. AGNO_VERSIONS = (LATEST, "2.4.0", "2.1.0") +AGENTSCOPE_VERSIONS = (LATEST, "1.0.0") # pydantic_ai 1.x requires Python >= 3.10 # Two test suites with different version requirements: # 1. wrap_openai approach: works with older versions (0.1.9+) @@ -172,6 +174,16 @@ def test_agno(session, version): _run_core_tests(session) +@nox.session() +@nox.parametrize("version", AGENTSCOPE_VERSIONS, ids=AGENTSCOPE_VERSIONS) +def test_agentscope(session, version): + _install_test_deps(session) + _install(session, "agentscope", version) + _install(session, "openai") + _run_tests(session, f"{INTEGRATION_DIR}/agentscope/test_agentscope.py") + _run_core_tests(session) + + @nox.session() @nox.parametrize("version", ANTHROPIC_VERSIONS, ids=ANTHROPIC_VERSIONS) def test_anthropic(session, version): diff --git a/py/src/braintrust/auto.py b/py/src/braintrust/auto.py index c71dd140..531531d6 100644 --- a/py/src/braintrust/auto.py +++ b/py/src/braintrust/auto.py @@ -9,6 +9,7 @@ from braintrust.integrations import ( ADKIntegration, + AgentScopeIntegration, AgnoIntegration, AnthropicIntegration, ClaudeAgentSDKIntegration, @@ -41,6 +42,7 @@ def auto_instrument( pydantic_ai: bool = True, google_genai: bool = True, agno: bool = True, + agentscope: bool = True, claude_agent_sdk: bool = True, dspy: bool = True, adk: bool = True, @@ -61,6 +63,7 @@ def auto_instrument( pydantic_ai: Enable Pydantic AI instrumentation (default: True) google_genai: Enable Google GenAI instrumentation (default: True) agno: Enable Agno instrumentation (default: True) + agentscope: Enable AgentScope instrumentation (default: True) claude_agent_sdk: Enable Claude Agent SDK instrumentation (default: True) dspy: Enable DSPy instrumentation (default: True) adk: Enable Google ADK instrumentation (default: True) @@ -123,6 +126,8 @@ def auto_instrument( results["google_genai"] = _instrument_integration(GoogleGenAIIntegration) if agno: results["agno"] = _instrument_integration(AgnoIntegration) + if agentscope: + results["agentscope"] = _instrument_integration(AgentScopeIntegration) if claude_agent_sdk: results["claude_agent_sdk"] = _instrument_integration(ClaudeAgentSDKIntegration) if dspy: diff --git a/py/src/braintrust/conftest.py b/py/src/braintrust/conftest.py index 0fbdf40b..2345b227 100644 --- a/py/src/braintrust/conftest.py +++ b/py/src/braintrust/conftest.py @@ -191,6 +191,7 @@ def get_vcr_config(): "decode_compressed_response": True, "filter_headers": [ "authorization", + "Authorization", "openai-organization", "x-api-key", "api-key", diff --git a/py/src/braintrust/integrations/__init__.py b/py/src/braintrust/integrations/__init__.py index 4d02c323..cf18a7a3 100644 --- a/py/src/braintrust/integrations/__init__.py +++ b/py/src/braintrust/integrations/__init__.py @@ -1,4 +1,5 @@ from .adk import ADKIntegration +from .agentscope import AgentScopeIntegration from .agno import AgnoIntegration from .anthropic import AnthropicIntegration from .claude_agent_sdk import ClaudeAgentSDKIntegration @@ -8,6 +9,7 @@ __all__ = [ "ADKIntegration", + "AgentScopeIntegration", "AgnoIntegration", "AnthropicIntegration", "ClaudeAgentSDKIntegration", diff --git a/py/src/braintrust/integrations/adk/test_adk.py b/py/src/braintrust/integrations/adk/test_adk.py index bed6f3e6..9d9be979 100644 --- a/py/src/braintrust/integrations/adk/test_adk.py +++ b/py/src/braintrust/integrations/adk/test_adk.py @@ -41,6 +41,7 @@ def before_record_request(request): "cassette_library_dir": str(Path(__file__).parent / "cassettes"), "filter_headers": [ "authorization", + "Authorization", "x-goog-api-key", ], "before_record_request": before_record_request, diff --git a/py/src/braintrust/integrations/agentscope/__init__.py b/py/src/braintrust/integrations/agentscope/__init__.py new file mode 100644 index 00000000..534f1db8 --- /dev/null +++ b/py/src/braintrust/integrations/agentscope/__init__.py @@ -0,0 +1,20 @@ +"""Braintrust integration for AgentScope.""" + +from braintrust.logger import NOOP_SPAN, current_span, init_logger + +from .integration import AgentScopeIntegration + + +__all__ = ["AgentScopeIntegration", "setup_agentscope"] + + +def setup_agentscope( + api_key: str | None = None, + project_id: str | None = None, + project_name: str | None = None, +) -> bool: + """Setup Braintrust integration with AgentScope.""" + if current_span() == NOOP_SPAN: + init_logger(project=project_name, api_key=api_key, project_id=project_id) + + return AgentScopeIntegration.setup() diff --git a/py/src/braintrust/integrations/agentscope/cassettes/test_agentscope_sequential_pipeline_creates_parent_span.yaml b/py/src/braintrust/integrations/agentscope/cassettes/test_agentscope_sequential_pipeline_creates_parent_span.yaml new file mode 100644 index 00000000..1ceea647 --- /dev/null +++ b/py/src/braintrust/integrations/agentscope/cassettes/test_agentscope_sequential_pipeline_creates_parent_span.yaml @@ -0,0 +1,545 @@ +interactions: +- request: + body: '{"messages":[{"role":"system","content":[{"type":"text","text":"You rewrite + the input as a short title."}]},{"role":"user","content":[{"type":"text","text":"# + Conversation History\nThe content between tags contains + your conversation history\n\nuser: Summarize why tests should use real + recorded traffic.\n"}]}],"model":"gpt-4o-mini","stream":false,"temperature":0}' + headers: + Accept: + - application/json + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + Content-Length: + - '403' + Content-Type: + - application/json + Host: + - api.openai.com + User-Agent: + - AsyncOpenAI/Python 2.29.0 + X-Stainless-Arch: + - arm64 + X-Stainless-Async: + - async:asyncio + X-Stainless-Lang: + - python + X-Stainless-OS: + - MacOS + X-Stainless-Package-Version: + - 2.29.0 + X-Stainless-Runtime: + - CPython + X-Stainless-Runtime-Version: + - 3.13.3 + x-stainless-read-timeout: + - '600' + x-stainless-retry-count: + - '0' + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: !!binary | + H4sIAAAAAAAA/6rmUlBQykxRslJQSs5ILEnOLcjRdfELyCzzdwMAAAD//41SwW7bMAy95ysMneMi + yZwou/a2a5EehqIwFImytcmiIcpbhyH/PspOarfrgF0EmI/v+fGR9+0ed1V1+Hl4HI5f2oevzekU + BrHODDx/A51urDuNzIPkMEywjqASZNWtlFUld/KzHIEODfhMa/pUVlh2Lrhyt9lV5UaW2+OV3aLT + QNz2xJ9F8Xt8s89g4IXLm/Wt0gGRaoBrtyYuRvS5IhSRo6RCEusZ1BgShNH6PQSwLlGBtngkF5ri + AZTnR2M0YIpTVNY6XbhQnIASNyyFItiBVB4mDN4vABUCJpXDGEd4viKXV9Memz7imd5RheUwqK05 + O+Ig2SAl7MWIXvh9HsMZ3swrWKjrU53wO4y/28tJTswrmcHjFUvsz8/lw379gVhtICnnaZGt0Eq3 + YGbmvAg1GIcLYLUY+W8vH2lPY3PI/yM/A1pDz6dW9xGM02/nndsi5Hv9V9trxKNhQRB/8AHWyUHM + azBg1eCnKxL0ixJ0Ne+qgdhHN52S7WutPoE0cnu2YnVZ/QE7MxO8WAMAAA== + headers: + CF-Cache-Status: + - DYNAMIC + CF-Ray: + - 9e20e166fbae67b2-SJC + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json + Date: + - Wed, 25 Mar 2026 21:06:38 GMT + Server: + - cloudflare + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + alt-svc: + - h3=":443"; ma=86400 + openai-organization: + - braintrust-data + openai-processing-ms: + - '664' + openai-project: + - proj_vsCSXafhhByzWOThMrJcZiw9 + openai-version: + - '2020-10-01' + set-cookie: + - __cf_bm=0vN_GTd.Taalah9PDsZg_Ru.1_PcZ_NBP9qkR2MCsFE-1774472797.273874-1.0.1.1-_HsrwKoaPPyMDTMtccbmEvGb.WDYukiKlNhKyLTp32aZR8vwDwqATyzmrwTg82HAg9bVn2GQnmrENihz.LTaMxGxvJCORGScpnet2yitftoFB0LwZa12LFWkWMzlprHK; + HttpOnly; Secure; Path=/; Domain=api.openai.com; Expires=Wed, 25 Mar 2026 + 21:36:38 GMT + x-openai-proxy-wasm: + - v0.1 + x-ratelimit-limit-requests: + - '30000' + x-ratelimit-limit-tokens: + - '150000000' + x-ratelimit-remaining-requests: + - '29999' + x-ratelimit-remaining-tokens: + - '149999940' + x-ratelimit-reset-requests: + - 2ms + x-ratelimit-reset-tokens: + - 0s + x-request-id: + - req_bcc5d231b4f24f049b2e0a0ba5d880a9 + status: + code: 200 + message: OK +- request: + body: '{"messages":[{"role":"system","content":[{"type":"text","text":"You rewrite + the input as a short title."}]},{"role":"user","content":[{"type":"text","text":"# + Conversation History\nThe content between tags contains + your conversation history\n\nuser: Summarize why tests should use real + recorded traffic.\n"}]}],"model":"gpt-4o-mini","stream":false,"temperature":0}' + headers: + Accept: + - application/json + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + Content-Length: + - '403' + Content-Type: + - application/json + Host: + - api.openai.com + User-Agent: + - AsyncOpenAI/Python 2.29.0 + X-Stainless-Arch: + - arm64 + X-Stainless-Async: + - async:asyncio + X-Stainless-Lang: + - python + X-Stainless-OS: + - MacOS + X-Stainless-Package-Version: + - 2.29.0 + X-Stainless-Runtime: + - CPython + X-Stainless-Runtime-Version: + - 3.13.3 + x-stainless-read-timeout: + - '600' + x-stainless-retry-count: + - '0' + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: !!binary | + H4sIAAAAAAAA/4xSwW7bMAy95ysEneMhcdI56G3FTus2DFt3GgpDkShHjSwJIj2sKPLvo+2kdrcO + 2EWA+fieHx/5tBBCOiOvhdQHRbpNvnj/+cvDR328uXUfNv7WfTuGlSrfVZ9Sh4+tXPaMuH8ATRfW + Gx2ZB+RiGGGdQRH0quuq2m6rclduBqCNBnxPaxIV21i0LriiXJXbYlUV692ZfYhOA3LbD/4U4ml4 + e5/BwC8ur5aXSguIqgGuXZq4mKPvK1IhOiQVSC4nUMdAEAbrNxDAOkIRrfiOLjTiKyjPj47ZgBF3 + WVnrtHBB3AESN8yFMtgOVT9M6LyfASqESKoPYxjh/oycnk372KQc9/gHVVoOAw81Z4ccJBtEikkO + 6Inf+yGc7sW8koXaRDXFIwy/u6pGOTmtZAJ3Z4zYn5/Kb6+Wr4jVBkg5j7NspVb6AGZiTotQnXFx + BixmI//t5TXtcWwO+X/kJ0BrSHxqdcpgnH4579SWob/Xf7U9RzwYlgj5Jx9gTQ5yvwYDVnV+vCKJ + j0jQ1ryrBnLKbjwlm2qtNlCZar23cnFa/AYAAP//AwB2CCbyWAMAAA== + headers: + CF-Cache-Status: + - DYNAMIC + CF-Ray: + - 9e20e208d9766142-SJC + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json + Date: + - Wed, 25 Mar 2026 21:07:03 GMT + Server: + - cloudflare + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + alt-svc: + - h3=":443"; ma=86400 + openai-organization: + - braintrust-data + openai-processing-ms: + - '652' + openai-project: + - proj_vsCSXafhhByzWOThMrJcZiw9 + openai-version: + - '2020-10-01' + set-cookie: + - __cf_bm=nPA03jdSYpPIt1MCtq5c3dH__SdRu5fP0HUZaKsaPsk-1774472823.1741066-1.0.1.1-QeI7TdvczvDfZcpacbJMilgyA_s79AH1EgKxSLO_1Z_BUh6jqZue4vjSpv9Sr.ihxNrRzZkdyq7EzAFHuIxCU9THwR_hF6iuXTWOT0BzCGX_rHDteSnXa7BlEfiRqaBH; + HttpOnly; Secure; Path=/; Domain=api.openai.com; Expires=Wed, 25 Mar 2026 + 21:37:03 GMT + x-openai-proxy-wasm: + - v0.1 + x-ratelimit-limit-requests: + - '30000' + x-ratelimit-limit-tokens: + - '150000000' + x-ratelimit-remaining-requests: + - '29999' + x-ratelimit-remaining-tokens: + - '149999940' + x-ratelimit-reset-requests: + - 2ms + x-ratelimit-reset-tokens: + - 0s + x-request-id: + - req_09e84f9d413d4a8d9495fef9e4133024 + status: + code: 200 + message: OK +- request: + body: '{"messages":[{"role":"system","content":[{"type":"text","text":"You answer + the previous message in one sentence."}]},{"role":"user","content":[{"type":"text","text":"# + Conversation History\nThe content between tags contains + your conversation history\n\nAlice: Benefits of Using Real Recorded + Traffic in Testing\n"}]}],"model":"gpt-4o-mini","stream":false,"temperature":0}' + headers: + Accept: + - application/json + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + Content-Length: + - '410' + Content-Type: + - application/json + Host: + - api.openai.com + User-Agent: + - AsyncOpenAI/Python 2.29.0 + X-Stainless-Arch: + - arm64 + X-Stainless-Async: + - async:asyncio + X-Stainless-Lang: + - python + X-Stainless-OS: + - MacOS + X-Stainless-Package-Version: + - 2.29.0 + X-Stainless-Runtime: + - CPython + X-Stainless-Runtime-Version: + - 3.13.3 + x-stainless-read-timeout: + - '600' + x-stainless-retry-count: + - '0' + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: !!binary | + H4sIAAAAAAAA/6rmUlBQykxRslJQSs5ILEnOLcjRdfELyPKtcAYAAAD//4xTy27bMBC8+ysInqXA + dpQqPRfIMUBb9FQEAk2upG35ApcyYhT+9y6lOHLaFOiFgHZ2docz1OeTg+bT1y/mMH54GB7H5+ZW + pV5WhREOP0DnC+tGB+ZBxuAXWCdQGcrUXds2Tbu/3zcz4IIBW2hDzHUTaoce6/1239Tbtt7dv7DH + gBqI277zpxC/5rPo9AaeubytLhUHRGoArl2auJiCLRWpiJCy8llWK6iDz+Bn6d8I/SBYquVDh2TA + iJxU36MW6EUGyqUB/Kg86xFK6ykpfaoEupjCkUsRUh+SKzhTCIcxUyWUN2IEG0mg4V3Yn0QMZSvy + KiSamMkLlHAhwSyAdfJS8EdMwTvuvLnWnKCfSBXf/GTtFaC8D1kV32e3nl6Q86s/Ngys9EB/UGXP + vtPY8WrizNgLyiHKGT3z+TTnML2xVvIgF3OXw0+Y193dLePkmv4KLnkzmFmgXevtx+qdaZ2BrNDS + VY5SKz2CWZlr6GoyGK6AzdWd/xbz3uzl3hzu/4xfAa0h8rPuYgKD+u2F17YE5d/4V9urx7NgSZCO + /Ni7jJBKDgZ6NdnlxUo6UQbXcVgDpJhwebZ97LS6hda0u0MvN+fNbwAAAP//AwDs2pZ8xAMAAA== + headers: + CF-Cache-Status: + - DYNAMIC + CF-Ray: + - 9e20e20db9eddfce-SJC + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json + Date: + - Wed, 25 Mar 2026 21:07:04 GMT + Server: + - cloudflare + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + alt-svc: + - h3=":443"; ma=86400 + openai-organization: + - braintrust-data + openai-processing-ms: + - '669' + openai-project: + - proj_vsCSXafhhByzWOThMrJcZiw9 + openai-version: + - '2020-10-01' + set-cookie: + - __cf_bm=HayDHNMCRsqHCSCg5YnFHqR8EGszTInfOwZSaeO5eyg-1774472823.957494-1.0.1.1-T1qVHFP.Enq2BHk4byqWQwdinEXG5pjJnQ6C55kaXNumTZcQVGLUbKoXNUrcL8OlUtndtdfReMfEtfZ5GDvmyZ8zIPnM28rC8KnQNIAYwe3oTr24xzHY4xyLaaAQfXw_; + HttpOnly; Secure; Path=/; Domain=api.openai.com; Expires=Wed, 25 Mar 2026 + 21:37:04 GMT + x-openai-proxy-wasm: + - v0.1 + x-ratelimit-limit-requests: + - '30000' + x-ratelimit-limit-tokens: + - '150000000' + x-ratelimit-remaining-requests: + - '29999' + x-ratelimit-remaining-tokens: + - '149999940' + x-ratelimit-reset-requests: + - 2ms + x-ratelimit-reset-tokens: + - 0s + x-request-id: + - req_074bde6fdf2a4c4caf77d081e3a5af31 + status: + code: 200 + message: OK +- request: + body: '{"messages":[{"role":"system","content":[{"type":"text","text":"You rewrite + the input as a short title."}]},{"role":"user","content":[{"type":"text","text":"# + Conversation History\nThe content between tags contains + your conversation history\n\nuser: Summarize why tests should use real + recorded traffic.\n"}]}],"model":"gpt-4o-mini","stream":false,"temperature":0}' + headers: + Accept: + - application/json + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + Content-Length: + - '403' + Content-Type: + - application/json + Host: + - api.openai.com + User-Agent: + - AsyncOpenAI/Python 2.29.0 + X-Stainless-Arch: + - arm64 + X-Stainless-Async: + - async:asyncio + X-Stainless-Lang: + - python + X-Stainless-OS: + - MacOS + X-Stainless-Package-Version: + - 2.29.0 + X-Stainless-Runtime: + - CPython + X-Stainless-Runtime-Version: + - 3.13.3 + x-stainless-read-timeout: + - '600' + x-stainless-retry-count: + - '0' + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: !!binary | + H4sIAAAAAAAA/6rmUlBQykxRslJQSs5ILEnOLcjRdfELyDb29QAAAAD//4xSTW/bMAy951cYOsdD + vlrl3AI99DJsaIGiQ2EoEuWolSVPpLsMQ/77KDup3a0DdhFgPr7nx0c+SL99PHxZP94crg/XPz6v + v6/tw62YZ0bcPYOmM+uTjswDcjEMsE6gCLLqUsrNRq62l7IHmmjAZ1rdUrmJZeOCK1eL1aZcyHK5 + PbH30WlAbvvGn0Xxq3+zz2DgwOXF/FxpAFHVwLVzExdT9LkiFKJDUoHEfAR1DASht34FAawjLKIt + 7tGFuvgKyvOjYzJgirukrHW6cKG4AyRumAolsB2qPEzovJ8AKoRIKofRj/B0Qo5vpn2s2xR3+AdV + WA4D9xVnhxwkG0SKrejRI79PfTjdu3kFCzUtVRRfoP/dhRzkxLiSEdyeMGJ/fixfXsw/EKsMkHIe + J9kKrfQezMgcF6E64+IEmE1G/tvLR9rD2Bzy/8iPgNbQ8qlVbQLj9Pt5x7YE+V7/1fYWcW9YIKRX + PsCKHKS8BgNWdX64IoE/kaCpeFc1pDa54ZRsW2m1BmnkcmfF7Dj7DQAA//8DANl/ZwFYAwAA + headers: + CF-Cache-Status: + - DYNAMIC + CF-Ray: + - 9e20e31aaae4cf8f-SJC + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json + Date: + - Wed, 25 Mar 2026 21:07:47 GMT + Server: + - cloudflare + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + alt-svc: + - h3=":443"; ma=86400 + openai-organization: + - braintrust-data + openai-processing-ms: + - '724' + openai-project: + - proj_vsCSXafhhByzWOThMrJcZiw9 + openai-version: + - '2020-10-01' + set-cookie: + - __cf_bm=Mzctd7Vf4Pj8hLREk5uV3xfhGAJqHTBdS7Z68xdqJ3E-1774472866.9835687-1.0.1.1-9060awKZeUAS__HpRhOh3ZDNpHvhVrftE8gP2f5h5qVQ7SUhdLjp1QfnUBiOHckFloYERDcS4nSTfy3q7RPbo.rO9ak4DB2RBS7.iNAo4m7Yek2xS4nBLaBUTUBc7VfU; + HttpOnly; Secure; Path=/; Domain=api.openai.com; Expires=Wed, 25 Mar 2026 + 21:37:47 GMT + x-openai-proxy-wasm: + - v0.1 + x-ratelimit-limit-requests: + - '30000' + x-ratelimit-limit-tokens: + - '150000000' + x-ratelimit-remaining-requests: + - '29999' + x-ratelimit-remaining-tokens: + - '149999940' + x-ratelimit-reset-requests: + - 2ms + x-ratelimit-reset-tokens: + - 0s + x-request-id: + - req_923040bc8f624770996dc8d06bd77fef + status: + code: 200 + message: OK +- request: + body: '{"messages":[{"role":"system","content":[{"type":"text","text":"You answer + the previous message in one sentence."}]},{"role":"user","content":[{"type":"text","text":"# + Conversation History\nThe content between tags contains + your conversation history\n\nAlice: Benefits of Using Real Recorded + Traffic in Testing\n"}]}],"model":"gpt-4o-mini","stream":false,"temperature":0}' + headers: + Accept: + - application/json + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + Content-Length: + - '410' + Content-Type: + - application/json + Host: + - api.openai.com + User-Agent: + - AsyncOpenAI/Python 2.29.0 + X-Stainless-Arch: + - arm64 + X-Stainless-Async: + - async:asyncio + X-Stainless-Lang: + - python + X-Stainless-OS: + - MacOS + X-Stainless-Package-Version: + - 2.29.0 + X-Stainless-Runtime: + - CPython + X-Stainless-Runtime-Version: + - 3.13.3 + x-stainless-read-timeout: + - '600' + x-stainless-retry-count: + - '0' + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: !!binary | + H4sIAAAAAAAA/6rmUlBQykxRslJQSs5ILEnOLcjRdfELyDbOSAcAAAD//4xTwW7bMAy95ysEnZ0i + yZy6uw/tYehp6y5DYSgSbbOVJUGUswZD/n2UndTp2gG7CDAfH/n0nvz19un2W7i//rK6t9u777vD + rx/q4UUWmeF3T6DTmXWlPfMgoXcTrCOoBHnquqrKstrcXFcj0HsDNtPakJalX/bocLlZbcrlqlqu + b07szqMG4raf/CnE7/HMOp2BFy6vinOlByLVAtfOTVyM3uaKVERISbkkixnU3iVwo/QHQtcKlmr5 + 0D4aMCJF1TSoBTqRgFJuANcpx3qE0nqISh8KgX2Ifs+lALHxsc84UwjbLlEhlDOiAxtIoOFd2BxE + 8Hkr8iokGpjJC5TofYRRAOvkpeD2GL3rufPqUnOEZiCVfXODtReAcs4nlX0f3Xo8IcdXf6xvWemO + /qLKhn2nrubVxJmxF5R8kCN65PNxzGF4Y63kQX1IdfLPMK7bbqdxck5/BjflCUws0M716nPxwbTa + QFJo6SJHqZXuwMzMOXQ1GPQXwOLizu/FfDR7ujeH+z/jZ0BrCPys6xDBoH574bktQv43/tX26vEo + WBLEPT/2OiHEnIOBRg12erGSDpSgrzmsFmKIOD3bJtRafYLKVOtdIxfHxR8AAAD//wMAQ8dY3cQD + AAA= + headers: + CF-Cache-Status: + - DYNAMIC + CF-Ray: + - 9e20e31fec3dc132-SJC + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json + Date: + - Wed, 25 Mar 2026 21:07:48 GMT + Server: + - cloudflare + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + alt-svc: + - h3=":443"; ma=86400 + openai-organization: + - braintrust-data + openai-processing-ms: + - '768' + openai-project: + - proj_vsCSXafhhByzWOThMrJcZiw9 + openai-version: + - '2020-10-01' + set-cookie: + - __cf_bm=JYxrn1QLzqghAM1PJW6V89UMbmiLatd1atmmQw6NPr0-1774472867.8239858-1.0.1.1-hwGgJY4XYCiJtbeBfm6fdTUACpklHSNTd64qYVFPn23d73s8.NBRfBlLx6nUV4d3.tfIInsBJq50FlZh7Wv9iTFtj7HY1hVkAYRHdnNzeq_4_piEn49lFlc_GkLVdmE0; + HttpOnly; Secure; Path=/; Domain=api.openai.com; Expires=Wed, 25 Mar 2026 + 21:37:48 GMT + x-openai-proxy-wasm: + - v0.1 + x-ratelimit-limit-requests: + - '30000' + x-ratelimit-limit-tokens: + - '150000000' + x-ratelimit-remaining-requests: + - '29999' + x-ratelimit-remaining-tokens: + - '149999940' + x-ratelimit-reset-requests: + - 2ms + x-ratelimit-reset-tokens: + - 0s + x-request-id: + - req_13f99272933545f8bac789ebd64bb0f1 + status: + code: 200 + message: OK +version: 1 diff --git a/py/src/braintrust/integrations/agentscope/cassettes/test_agentscope_simple_agent_run.yaml b/py/src/braintrust/integrations/agentscope/cassettes/test_agentscope_simple_agent_run.yaml new file mode 100644 index 00000000..87f6358e --- /dev/null +++ b/py/src/braintrust/integrations/agentscope/cassettes/test_agentscope_simple_agent_run.yaml @@ -0,0 +1,320 @@ +interactions: +- request: + body: '{"messages":[{"role":"system","name":"system","content":[{"type":"text","text":"You + are a concise assistant. Answer in one sentence."}]},{"role":"user","name":"user","content":[{"type":"text","text":"Say + hello in exactly two words."}]}],"model":"gpt-4o-mini","stream":false,"temperature":0}' + headers: + Accept: + - application/json + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + Content-Length: + - '290' + Content-Type: + - application/json + Host: + - api.openai.com + User-Agent: + - AsyncOpenAI/Python 2.29.0 + X-Stainless-Arch: + - arm64 + X-Stainless-Async: + - async:asyncio + X-Stainless-Lang: + - python + X-Stainless-OS: + - MacOS + X-Stainless-Package-Version: + - 2.29.0 + X-Stainless-Runtime: + - CPython + X-Stainless-Runtime-Version: + - 3.13.3 + x-stainless-read-timeout: + - '600' + x-stainless-retry-count: + - '0' + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: !!binary | + H4sIAAAAAAAA/6rmUlBQykxRslJQSs5ILEnOLcjRdfELyCzNtgAAAAD//4xSsU7DMBDd+xWV56Rq + 05TQGYbODCyoilz7khgc29hOFYT675ydtglQJBZLvnfv/N7zPRzNkb33u02/qXTPN48AT89bkgSG + PrwC8xfWgmnkgRdaDTCzQD2EqauiyPMiK7Z3EWg1BxlotfFprtNWKJFmyyxPl0W6uj+zGy0YOGx7 + wet8/hnPoFNx6LG8TC6VFpyjNWDt0oRFq2WoEOqccJ4qT5IRZFp5UFH6DqTUc9+AhcW0xULVORpk + qk7KCUCV0p4Gm1Hc/oycrnKkro3VB/eDSiq06ZoSU3EYET7tvDYkoic899F2980JwUGt8aXXbxCf + W2fDODKGPQHPmEd9clLeJDeGlRw8FdJNUiOMsgb4yBwjph0XegLMJpZ/a7k1e7AtVP2f8SPAGBhc + otJY4IJ99zu2WQib+FfbNeIomDiwR1yt0guw4Rs4VLSTw34Q9+E8tCX+VQ3WWDEsSWVKRtdQ8GJ1 + qMjsNPsCAAD//wMAQQCebTIDAAA= + headers: + CF-Cache-Status: + - DYNAMIC + CF-Ray: + - 9e20e1608aaf6142-SJC + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json + Date: + - Wed, 25 Mar 2026 21:06:37 GMT + Server: + - cloudflare + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + alt-svc: + - h3=":443"; ma=86400 + openai-organization: + - braintrust-data + openai-processing-ms: + - '373' + openai-project: + - proj_vsCSXafhhByzWOThMrJcZiw9 + openai-version: + - '2020-10-01' + set-cookie: + - __cf_bm=0r2bm90d_zmUe.y8EYZpgoGyTVS4QQSDoJVx.yDpJng-1774472796.2480545-1.0.1.1-onYisUL_Bju9EhfGXQnZBZkwk3gdjG7tHXVdr34BVePUh3JL0OqfVWApVIaF_KDBKfw4HIiGBvzONzv_AS91kbK7eL.FzFDwILNg8_F1h3hsPpZO.pIoeUN1dp_.acW6; + HttpOnly; Secure; Path=/; Domain=api.openai.com; Expires=Wed, 25 Mar 2026 + 21:36:37 GMT + x-openai-proxy-wasm: + - v0.1 + x-ratelimit-limit-requests: + - '30000' + x-ratelimit-limit-tokens: + - '150000000' + x-ratelimit-remaining-requests: + - '29999' + x-ratelimit-remaining-tokens: + - '149999975' + x-ratelimit-reset-requests: + - 2ms + x-ratelimit-reset-tokens: + - 0s + x-request-id: + - req_0b44866bb8cd459db8712e04e4248889 + status: + code: 200 + message: OK +- request: + body: '{"messages":[{"role":"system","name":"system","content":[{"type":"text","text":"You + are a concise assistant. Answer in one sentence."}]},{"role":"user","name":"user","content":[{"type":"text","text":"Say + hello in exactly two words."}]}],"model":"gpt-4o-mini","stream":false,"temperature":0}' + headers: + Accept: + - application/json + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + Content-Length: + - '290' + Content-Type: + - application/json + Host: + - api.openai.com + User-Agent: + - AsyncOpenAI/Python 2.29.0 + X-Stainless-Arch: + - arm64 + X-Stainless-Async: + - async:asyncio + X-Stainless-Lang: + - python + X-Stainless-OS: + - MacOS + X-Stainless-Package-Version: + - 2.29.0 + X-Stainless-Runtime: + - CPython + X-Stainless-Runtime-Version: + - 3.13.3 + x-stainless-read-timeout: + - '600' + x-stainless-retry-count: + - '0' + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: !!binary | + H4sIAAAAAAAA/6rmUlBQykxRslJQSs5ILEnOLcjRdfELyPJOswAAAAD//4xS0WrCMBR99yskz61o + 7aiv7mX6MJgwtsGQEpPbNjNNQpKOjeG/76ZVWzcHewnknntuzjm5t/dsebeB9Wq/qZ7WD4/L55eU + piQKDL17A+ZPrAnTyAMvtOpgZoF6CFNnWZamWbJIkhaoNQcZaKXxcarjWigRJ9MkjadZPFsc2ZUW + DBy2veJ1PP5qz6BTcfjA8jQ6VWpwjpaAtVMTFq2WoUKoc8J5qjyJepBp5UG10lcgpR77CixMhi0W + isbRIFM1Ug4AqpT2NNhsxW2PyOEsR+rSWL1zP6ikQJuuyjEVhxHh085rQ1r0gOe2td1cOCE4qDY+ + 93oP7XPzpBtH+rAH4BHzqE8OyjfRlWE5B0+FdIPUCKOsAt4z+4hpw4UeAKOB5d9ars3ubAtV/md8 + DzAGBpcoNxa4YJd++zYLYRP/ajtH3AomDuw7rlbuBdjwDRwK2shuP4j7dB7qHP+qBGus6JakMDmj + c8h4NtsVZHQYfQMAAP//AwDajPLhMgMAAA== + headers: + CF-Cache-Status: + - DYNAMIC + CF-Ray: + - 9e20e202698a251d-SJC + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json + Date: + - Wed, 25 Mar 2026 21:07:03 GMT + Server: + - cloudflare + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + alt-svc: + - h3=":443"; ma=86400 + openai-organization: + - braintrust-data + openai-processing-ms: + - '334' + openai-project: + - proj_vsCSXafhhByzWOThMrJcZiw9 + openai-version: + - '2020-10-01' + set-cookie: + - __cf_bm=GJt8W0lyxqfgtKXEJ4M8twcG5pNqc4RmQiPqQ_IukiU-1774472822.1494222-1.0.1.1-Jv3zKpnCjFAAQQaXBEt3RElEP.QjtEFbqrvr8BASrk5X7XSiOj1UBc4tUR3t9QbKmOM0VrcVW6R3HYLaYxbTHQqz4Dpjl7Z.Sz9BslefycjBprbfLjQ1aoOYxrSO7lkO; + HttpOnly; Secure; Path=/; Domain=api.openai.com; Expires=Wed, 25 Mar 2026 + 21:37:03 GMT + x-openai-proxy-wasm: + - v0.1 + x-ratelimit-limit-requests: + - '30000' + x-ratelimit-limit-tokens: + - '150000000' + x-ratelimit-remaining-requests: + - '29999' + x-ratelimit-remaining-tokens: + - '149999977' + x-ratelimit-reset-requests: + - 2ms + x-ratelimit-reset-tokens: + - 0s + x-request-id: + - req_8c118a37a5da44069de28fba911081e0 + status: + code: 200 + message: OK +- request: + body: '{"messages":[{"role":"system","name":"system","content":[{"type":"text","text":"You + are a concise assistant. Answer in one sentence."}]},{"role":"user","name":"user","content":[{"type":"text","text":"Say + hello in exactly two words."}]}],"model":"gpt-4o-mini","stream":false,"temperature":0}' + headers: + Accept: + - application/json + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + Content-Length: + - '290' + Content-Type: + - application/json + Host: + - api.openai.com + User-Agent: + - AsyncOpenAI/Python 2.29.0 + X-Stainless-Arch: + - arm64 + X-Stainless-Async: + - async:asyncio + X-Stainless-Lang: + - python + X-Stainless-OS: + - MacOS + X-Stainless-Package-Version: + - 2.29.0 + X-Stainless-Runtime: + - CPython + X-Stainless-Runtime-Version: + - 3.13.3 + x-stainless-read-timeout: + - '600' + x-stainless-retry-count: + - '0' + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: !!binary | + H4sIAAAAAAAA/6rmUlBQykxRslJQSs5ILEnOLcjRdfELyDbKMgIAAAD//4xSQU7DMBC89xWVz0mV + pKHhDALBDSFxQlXk2pvE4NiW7dAi1L+zTtomhSJxseSdnfXMeO+zuxuWbp9vt4/pU/WyonrHk45E + gaE3b8D8kbVgGnnghVYDzCxQD2FqWhR5XmTXq1UPtJqDDLTa+DjXcSuUiLMky+OkiNPrA7vRgoHD + tle8zudf/Rl0Kg47LCfRsdKCc7QGrB2bsGi1DBVCnRPOU+VJNIJMKw+ql/4AUuq5b8DCYtpioeoc + DTJVJ+UEoEppT4PNXtz6gOxPcqSujdUb94NKKrTpmhJTcRgRPu28NqRH93iue9vdmROCg1rjS6/f + oX9umQ3jyBj2BDxgHvXJSfkqujCs5OCpkG6SGmGUNcBH5hgx7bjQE2A2sfxby6XZg22h6v+MHwHG + wOASlcYCF+zc79hmIWziX22niHvBxIH9wNUqvQAbvoFDRTs57Adxn85DW+Jf1WCNFcOSVKZkdAkF + L9JNRWb72TcAAAD//wMAHqai9DIDAAA= + headers: + CF-Cache-Status: + - DYNAMIC + CF-Ray: + - 9e20e3168f481d99-SJC + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json + Date: + - Wed, 25 Mar 2026 21:07:46 GMT + Server: + - cloudflare + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + alt-svc: + - h3=":443"; ma=86400 + openai-organization: + - braintrust-data + openai-processing-ms: + - '370' + openai-project: + - proj_vsCSXafhhByzWOThMrJcZiw9 + openai-version: + - '2020-10-01' + set-cookie: + - __cf_bm=QmJ_mn8jkQXEiES_f8MtNfFrG.TdUhMi9F3CsYesW.Y-1774472866.3273165-1.0.1.1-9jM9NCSYxoeVuM4GWc_rXbAFSxA2USepp4rz5niKNzmDK.TtF1V7PSKWLir8akfbpuyXvva5QQRFgDCcMT3P0xa_1Tzm9mW9uidzQIoQBbq5O6t0XUQzY8bQC4ddKj1i; + HttpOnly; Secure; Path=/; Domain=api.openai.com; Expires=Wed, 25 Mar 2026 + 21:37:46 GMT + x-openai-proxy-wasm: + - v0.1 + x-ratelimit-limit-requests: + - '30000' + x-ratelimit-limit-tokens: + - '150000000' + x-ratelimit-remaining-requests: + - '29999' + x-ratelimit-remaining-tokens: + - '149999977' + x-ratelimit-reset-requests: + - 2ms + x-ratelimit-reset-tokens: + - 0s + x-request-id: + - req_2ac33c8b77a343ea881d0fe0fbeccc14 + status: + code: 200 + message: OK +version: 1 diff --git a/py/src/braintrust/integrations/agentscope/cassettes/test_agentscope_tool_use_creates_tool_span.yaml b/py/src/braintrust/integrations/agentscope/cassettes/test_agentscope_tool_use_creates_tool_span.yaml new file mode 100644 index 00000000..8b272df9 --- /dev/null +++ b/py/src/braintrust/integrations/agentscope/cassettes/test_agentscope_tool_use_creates_tool_span.yaml @@ -0,0 +1,451 @@ +interactions: +- request: + body: '{"messages":[{"role":"system","name":"system","content":[{"type":"text","text":"You + are a helpful assistant. Use tools when required and keep answers brief."}]},{"role":"user","name":"user","content":[{"type":"text","text":"Use + Python to compute 6 * 7 and return just the result."}]}],"model":"gpt-4o-mini","stream":false,"temperature":0,"tools":[{"type":"function","function":{"name":"execute_python_code","parameters":{"properties":{"code":{"description":"The + Python code to be executed.","type":"string"},"timeout":{"default":300,"description":"The + maximum time (in seconds) allowed for the code to run.","type":"number"}},"required":["code"],"type":"object"},"description":"Execute + the given python code in a temp file and capture the return\ncode, standard + output and error. Note you must `print` the output to get\nthe result, and the + tmp file will be removed right after the execution."}}]}' + headers: + Accept: + - application/json + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + Content-Length: + - '897' + Content-Type: + - application/json + Host: + - api.openai.com + User-Agent: + - AsyncOpenAI/Python 2.29.0 + X-Stainless-Arch: + - arm64 + X-Stainless-Async: + - async:asyncio + X-Stainless-Lang: + - python + X-Stainless-OS: + - MacOS + X-Stainless-Package-Version: + - 2.29.0 + X-Stainless-Runtime: + - CPython + X-Stainless-Runtime-Version: + - 3.13.3 + x-stainless-read-timeout: + - '600' + x-stainless-retry-count: + - '0' + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: !!binary | + H4sIAAAAAAAA/6rmUlBQykxRslJQSs5ILEnOLcjRdfELyKxINAYAAAD//41TXW+bMBR951cgP21T + mICSklTawzZtUvuwTZ32sI7Kcs0NeDM2tU3TKMp/n20SIGkmjQcE99xzfD+ObxT9unj88PPmS5as + 85q+v9usSYVmjiEffgM1B9ZbKi0PDJOih6kCYsCpJnmeZXmaL5ceaGQJ3NGq1kSZjBomWJTGaRbF + eZQs9uxaMgrapv2yv2G49W9Xpyjh2Ybj2SHSgNakAhs7JNmgktxFENGaaUOEQbMRpFIYEK500XE+ + AYyUHFPC+Xhw/2wn3+OwbCI2Fx8vmutvn0hbPz3yz3fXvLldf/+xmJzXS29aX9CqE3QY0gQf4lcn + h1lMkMZz4RloZwC3G1NLgakd5ImMTSaq6hrbnWsBbQvksgp0VSAFuuMmfBdehm/CvChEq5gwr/rw + 6wLt0JHULjj3fT+ZloJVpwl/OUYihDTEdePneL9HdsPKuKxaJR/0CRWtrBV0ja1ztJ/EdCHBoRBf + AuqOdo6sXNMabOQf8Icm87RXRaMvRzSd70Fj6+QTVp7PzujhEgxh3hSDDymhNZQjdfQj6UomJ0Aw + 6f1lNee0+/6ZqP5HfgQohdbeONwqKBk97nhMU+Cu7b/Shin7gpEG9WTvITYMlNtHCSti3dLfUb3R + Bhpsl1aB8mbyBm9xHM/LyyRe5gsU7IK/crsEFF8EAAA= + headers: + CF-Cache-Status: + - DYNAMIC + CF-Ray: + - 9e20e16f0b9aaf0d-SJC + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json + Date: + - Wed, 25 Mar 2026 21:06:40 GMT + Server: + - cloudflare + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + alt-svc: + - h3=":443"; ma=86400 + openai-organization: + - braintrust-data + openai-processing-ms: + - '1239' + openai-project: + - proj_vsCSXafhhByzWOThMrJcZiw9 + openai-version: + - '2020-10-01' + set-cookie: + - __cf_bm=ncH1zypNLZZs6ohCgRXU6EAQ0iJm0PqJ2cW8rY7HVI8-1774472798.5665667-1.0.1.1-k0DS.3SU2tAkF.r3ZjBzLnG4fwZB2Fu5meB0y3aUWpZtAKeGqa66nGagt.hDaY2vSKIEbmMrwf7bozk5KFrU29xqoOc0X32xTuik1N0lOr3jOPFP1_u5ceUvxEjzvGTM; + HttpOnly; Secure; Path=/; Domain=api.openai.com; Expires=Wed, 25 Mar 2026 + 21:36:40 GMT + x-openai-proxy-wasm: + - v0.1 + x-ratelimit-limit-requests: + - '30000' + x-ratelimit-limit-tokens: + - '150000000' + x-ratelimit-remaining-requests: + - '29999' + x-ratelimit-remaining-tokens: + - '149999962' + x-ratelimit-reset-requests: + - 2ms + x-ratelimit-reset-tokens: + - 0s + x-request-id: + - req_02a17794efcb4607a4ef7c7281e6d187 + status: + code: 200 + message: OK +- request: + body: '{"messages":[{"role":"system","name":"system","content":[{"type":"text","text":"You + are a helpful assistant. Use tools when required and keep answers brief."}]},{"role":"user","name":"user","content":[{"type":"text","text":"Use + Python to compute 6 * 7 and return just the result."}]}],"model":"gpt-4o-mini","stream":false,"temperature":0,"tools":[{"type":"function","function":{"name":"execute_python_code","parameters":{"properties":{"code":{"description":"The + Python code to be executed.","type":"string"},"timeout":{"default":300,"description":"The + maximum time (in seconds) allowed for the code to run.","type":"number"}},"required":["code"],"type":"object"},"description":"Execute + the given python code in a temp file and capture the return\ncode, standard + output and error. Note you must `print` the output to get\nthe result, and the + tmp file will be removed right after the execution."}}]}' + headers: + Accept: + - application/json + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + Content-Length: + - '897' + Content-Type: + - application/json + Host: + - api.openai.com + User-Agent: + - AsyncOpenAI/Python 2.29.0 + X-Stainless-Arch: + - arm64 + X-Stainless-Async: + - async:asyncio + X-Stainless-Lang: + - python + X-Stainless-OS: + - MacOS + X-Stainless-Package-Version: + - 2.29.0 + X-Stainless-Runtime: + - CPython + X-Stainless-Runtime-Version: + - 3.13.3 + x-stainless-read-timeout: + - '600' + x-stainless-retry-count: + - '0' + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: !!binary | + H4sIAAAAAAAA/4xTXWvbMBR9968wetpGPBzj1ElgD2NlHSsrhY21tC5ClW9sdbJkJDkkDfnvk+Qk + dtIM5gdj33PP0f042gRhiFiB5iGiFTG0bnh0eXP78uP2Cyw+x79urq713e9LU94v5fr+q6jRyDHk + 8wtQs2d9pNLywDApOpgqIAac6jjL0jRLpknqgVoWwB2tbEyUyqhmgkVJnKRRnEXj6Y5dSUZB27RH + +xuGG/92dYoCVjYcj/aRGrQmJdjYPskGleQugojWTBsiDBr1IJXCgHCli5bzAWCk5JgSzvuDu2cz + +O6HZRPx3Wv6IK+vxvKbjOPlagbVKjM/Z98H53XS68YXtGgFPQxpgB/i85PDLCZI7bmwAtoawM3a + VFJgagd5ImOTiSrb2nbnWkCbHLmsHM1zpEC33ISfwovwQ5jluWgUE+ZdF36foy06ktoG576fBtNS + sGg14W/HSISQhrhu/Byfdsj2sDIuy0bJZ31CRQtrBV1h6xztJzFcSLAvxJeA2qOdIytXNwYb+Qf8 + oeNJ0qmi3pc9mkx2oLF18gEry0Zn9HABhjBvioMPKaEVFD219yNpCyYHQDDo/W0157S7/pko/0e+ + ByiFxt443CgoGD3uuE9T4K7tv9IOU/YFIw1qae8hNgyU20cBC2Ld0t1RvdYGamyXVoLyZvIGb3Ac + T4qLcTzLpijYBn8BAAD//wMAHPLnAF8EAAA= + headers: + CF-Cache-Status: + - DYNAMIC + CF-Ray: + - 9e20e212f84f7e56-SJC + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json + Date: + - Wed, 25 Mar 2026 21:07:05 GMT + Server: + - cloudflare + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + alt-svc: + - h3=":443"; ma=86400 + openai-organization: + - braintrust-data + openai-processing-ms: + - '760' + openai-project: + - proj_vsCSXafhhByzWOThMrJcZiw9 + openai-version: + - '2020-10-01' + set-cookie: + - __cf_bm=cEkxxhx5PI02Ywh8nf.Xs6jrFBlPOsqdtqnmu2dQoUs-1774472824.7966123-1.0.1.1-l8rKWW5YXcEa4qiqAmWX7gYkTwxcPBKKDdUiau.bKkThNBJdZMRBk8E6aelB55XL8mGvLIuOpvyEL1u_F6R5238q7PZg1iu7hVZPFcKKIeqC1cnWzWuetLrt0ahyQJcC; + HttpOnly; Secure; Path=/; Domain=api.openai.com; Expires=Wed, 25 Mar 2026 + 21:37:05 GMT + x-openai-proxy-wasm: + - v0.1 + x-ratelimit-limit-requests: + - '30000' + x-ratelimit-limit-tokens: + - '150000000' + x-ratelimit-remaining-requests: + - '29999' + x-ratelimit-remaining-tokens: + - '149999965' + x-ratelimit-reset-requests: + - 2ms + x-ratelimit-reset-tokens: + - 0s + x-request-id: + - req_5e3d6b5e29b545dc85b5ddedeb732996 + status: + code: 200 + message: OK +- request: + body: '{"messages":[{"role":"system","name":"system","content":[{"type":"text","text":"You + are a helpful assistant. Use tools when required and keep answers brief."}]},{"role":"user","name":"user","content":[{"type":"text","text":"Use + Python to compute 6 * 7 and return just the result."}]}],"model":"gpt-4o-mini","stream":false,"temperature":0,"tools":[{"type":"function","function":{"name":"execute_python_code","parameters":{"properties":{"code":{"description":"The + Python code to be executed.","type":"string"},"timeout":{"default":300,"description":"The + maximum time (in seconds) allowed for the code to run.","type":"number"}},"required":["code"],"type":"object"},"description":"Execute + the given python code in a temp file and capture the return\ncode, standard + output and error. Note you must `print` the output to get\nthe result, and the + tmp file will be removed right after the execution."}}]}' + headers: + Accept: + - application/json + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + Content-Length: + - '897' + Content-Type: + - application/json + Host: + - api.openai.com + User-Agent: + - AsyncOpenAI/Python 2.29.0 + X-Stainless-Arch: + - arm64 + X-Stainless-Async: + - async:asyncio + X-Stainless-Lang: + - python + X-Stainless-OS: + - MacOS + X-Stainless-Package-Version: + - 2.29.0 + X-Stainless-Runtime: + - CPython + X-Stainless-Runtime-Version: + - 3.13.3 + x-stainless-read-timeout: + - '600' + x-stainless-retry-count: + - '0' + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: !!binary | + H4sIAAAAAAAA/6rmUlBQykxRslJQSs5ILEnOLcjRdfELyDYtDwEAAAD//4xTXW+bMBR951cgP21T + mADR0FTaw6ZuL6m2h+WlHZXlmBvi1tjMNmmzKP99tkmApJk0HhDcc8/x/Tguk+dGP8hNJedPt2Jz + //B1/rKYo4ljyOUTUHNkfaTS8sAwKTqYKiAGnGqS51mWp9fTmQdqWQJ3tKoxUSajmgkWpXGaRXEe + JdcH9loyCtqm/bK/Ybjzb1enKOHVhuPJMVKD1qQCGzsm2aCS3EUQ0ZppQ4RBkwGkUhgQrnTRcj4C + jJQcU8L5cHD37Ebfw7BsIs7n2Uv14/Zukc9+t1/ufork23fx+c9idF4nvW18QatW0H5II7yP35wd + ZjFBas+FV6CtAdxszVoKTO0gz2RsMlFVW9vuXAtoVyCXVaCbAinQLTfhp3AafgjzohCNYsK868Lv + C7RHJ1L74NL342haClatJvztGIkQ0hDXjZ/j4wHZ9yvjsmqUXOozKlpZK+g1ts7RfhLjhQTHQnwJ + qD3ZObJydWOwkc/gD02u0k4VDb4c0PTqABpbJx+x8nxyQQ+XYAjzpuh9SAldQzlQBz+StmRyBASj + 3t9Wc0m765+J6n/kB4BSaOyNw42CktHTjoc0Be7a/iutn7IvGGlQG3sPsWGg3D5KWBHrlu6O6q02 + UGO7tAqUN5M3eIOzlKbTeJnHMxTsg78AAAD//wMAxy9FZF8EAAA= + headers: + CF-Cache-Status: + - DYNAMIC + CF-Ray: + - 9e20e3264eb7f005-SJC + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json + Date: + - Wed, 25 Mar 2026 21:07:49 GMT + Server: + - cloudflare + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + alt-svc: + - h3=":443"; ma=86400 + openai-organization: + - braintrust-data + openai-processing-ms: + - '724' + openai-project: + - proj_vsCSXafhhByzWOThMrJcZiw9 + openai-version: + - '2020-10-01' + set-cookie: + - __cf_bm=0XpUc1fNMZh7X5an8o2lKE5Me2U0kr5yZMBg0xANHr0-1774472868.8462937-1.0.1.1-dprcxP4hyQ0IPDL_vk1NK7FsJ11DyabA3P8I942JNvTn7zKho4A0pRDev9WYlxrW1LwDshBeG3MLMUsQs5Y9hxWVr.Wu3JoHqUF6i2Ho7_hr0NaKJDRn8f.qyG5mv6hc; + HttpOnly; Secure; Path=/; Domain=api.openai.com; Expires=Wed, 25 Mar 2026 + 21:37:49 GMT + x-openai-proxy-wasm: + - v0.1 + x-ratelimit-limit-requests: + - '30000' + x-ratelimit-limit-tokens: + - '150000000' + x-ratelimit-remaining-requests: + - '29999' + x-ratelimit-remaining-tokens: + - '149999962' + x-ratelimit-reset-requests: + - 2ms + x-ratelimit-reset-tokens: + - 0s + x-request-id: + - req_4413081d046145c59f7df6e62731b407 + status: + code: 200 + message: OK +- request: + body: '{"messages":[{"role":"system","name":"system","content":[{"type":"text","text":"You + are a helpful assistant. Use tools when required and keep answers brief."}]},{"role":"user","name":"user","content":[{"type":"text","text":"Use + Python to compute 6 * 7 and return just the result."}]},{"role":"assistant","name":"Jarvis","content":null,"tool_calls":[{"id":"call_7K4wgODLT79quBLSn1FNnAzT","type":"function","function":{"name":"execute_python_code","arguments":"{\"code\": + \"result = 6 * 7\\nprint(result)\"}"}}]},{"role":"tool","tool_call_id":"call_7K4wgODLT79quBLSn1FNnAzT","content":"042\n","name":"execute_python_code"}],"model":"gpt-4o-mini","stream":false,"temperature":0,"tools":[{"type":"function","function":{"name":"execute_python_code","parameters":{"properties":{"code":{"description":"The + Python code to be executed.","type":"string"},"timeout":{"default":300,"description":"The + maximum time (in seconds) allowed for the code to run.","type":"number"}},"required":["code"],"type":"object"},"description":"Execute + the given python code in a temp file and capture the return\ncode, standard + output and error. Note you must `print` the output to get\nthe result, and the + tmp file will be removed right after the execution."}}]}' + headers: + Accept: + - application/json + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + Content-Length: + - '1293' + Content-Type: + - application/json + Cookie: + - __cf_bm=0XpUc1fNMZh7X5an8o2lKE5Me2U0kr5yZMBg0xANHr0-1774472868.8462937-1.0.1.1-dprcxP4hyQ0IPDL_vk1NK7FsJ11DyabA3P8I942JNvTn7zKho4A0pRDev9WYlxrW1LwDshBeG3MLMUsQs5Y9hxWVr.Wu3JoHqUF6i2Ho7_hr0NaKJDRn8f.qyG5mv6hc + Host: + - api.openai.com + User-Agent: + - AsyncOpenAI/Python 2.29.0 + X-Stainless-Arch: + - arm64 + X-Stainless-Async: + - async:asyncio + X-Stainless-Lang: + - python + X-Stainless-OS: + - MacOS + X-Stainless-Package-Version: + - 2.29.0 + X-Stainless-Runtime: + - CPython + X-Stainless-Runtime-Version: + - 3.13.3 + x-stainless-read-timeout: + - '600' + x-stainless-retry-count: + - '0' + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: !!binary | + H4sIAAAAAAAA/6rmUlBQykxRslJQSs5ILEnOLcjRdfELyDZzdwUAAAD//41SwW6cMBC971dYPqXS + EgGiC7lGinLLKbcmQo4ZwAnYyDO0Sav9947NJpB2K/Vi7c6b93jzZp4r3/rsZ3t9c3Orf3RV22d3 + RSH3geGenkHTO+tSO+YBGWcXWHtQBEE1K8uiKPOqTCMwugaGQOsmSgqXjMaaJE/zIknLJKtO7N4Z + Dcht3/ivEL/iG3zaBl65HLViZQRE1QHX3pu46N0QKlIhGiRlSe5XUDtLYKP1+x6EB5wHEq4VDw8X + 4sAvGRYVJf/6IgyKIr/c0j20M6owgp2HYQMoax2pEEE0/nhCjh9WB9dN3j3hH1TZcgTY15wYcnxs + C8lNMqJHfh9jJPOnKSULjRPV5F4gfi5PrxY9uW5iRbPyBBI7HDas/LA/o1c3QMoMuAlVaqV7aFbq + ugE1N8ZtgN1m6r/dnNNeJje2+x/5FdAaJr6xevLQGP154rXNQzjUf7V9pBwNSwT/nS+vJgM+bKKB + VvF1LFeJb0gw1ryuDvzkzXJD7VSn6dfmkKVXZSV3x91vPJeasFEDAAA= + headers: + CF-Cache-Status: + - DYNAMIC + CF-Ray: + - 9e20e32d6800f95b-SJC + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json + Date: + - Wed, 25 Mar 2026 21:07:51 GMT + Server: + - cloudflare + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + alt-svc: + - h3=":443"; ma=86400 + openai-organization: + - braintrust-data + openai-processing-ms: + - '642' + openai-project: + - proj_vsCSXafhhByzWOThMrJcZiw9 + openai-version: + - '2020-10-01' + x-openai-proxy-wasm: + - v0.1 + x-ratelimit-limit-requests: + - '30000' + x-ratelimit-limit-tokens: + - '150000000' + x-ratelimit-remaining-requests: + - '29999' + x-ratelimit-remaining-tokens: + - '149999947' + x-ratelimit-reset-requests: + - 2ms + x-ratelimit-reset-tokens: + - 0s + x-request-id: + - req_962b361c03444494ac60f59571e1d91c + status: + code: 200 + message: OK +version: 1 diff --git a/py/src/braintrust/integrations/agentscope/cassettes/test_auto_agentscope.yaml b/py/src/braintrust/integrations/agentscope/cassettes/test_auto_agentscope.yaml new file mode 100644 index 00000000..221dcabb --- /dev/null +++ b/py/src/braintrust/integrations/agentscope/cassettes/test_auto_agentscope.yaml @@ -0,0 +1,122 @@ +interactions: +- request: + body: '{"messages":[{"role":"system","name":"system","content":[{"type":"text","text":"You + are a helpful assistant. Be brief."}]},{"role":"user","name":"user","content":[{"type":"text","text":"Say + hi in two words."}]}],"model":"gpt-4o-mini","stream":true,"stream_options":{"include_usage":true},"temperature":0}' + headers: + Accept: + - application/json + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + Content-Length: + - '304' + Content-Type: + - application/json + Host: + - api.openai.com + User-Agent: + - AsyncOpenAI/Python 2.29.0 + X-Stainless-Arch: + - arm64 + X-Stainless-Async: + - async:asyncio + X-Stainless-Lang: + - python + X-Stainless-OS: + - MacOS + X-Stainless-Package-Version: + - 2.29.0 + X-Stainless-Raw-Response: + - 'true' + X-Stainless-Runtime: + - CPython + X-Stainless-Runtime-Version: + - 3.13.3 + x-stainless-read-timeout: + - '600' + x-stainless-retry-count: + - '0' + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: 'data: {"id":"chatcmpl-DNPizxiSOmMQFp69qki8RpeoxlQWv","object":"chat.completion.chunk","created":1774472801,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ca3e7d71bf","choices":[{"index":0,"delta":{"role":"assistant","content":"","refusal":null},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"bSxoCuDli"} + + + data: {"id":"chatcmpl-DNPizxiSOmMQFp69qki8RpeoxlQWv","object":"chat.completion.chunk","created":1774472801,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ca3e7d71bf","choices":[{"index":0,"delta":{"content":"Hello"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"RR3fPq"} + + + data: {"id":"chatcmpl-DNPizxiSOmMQFp69qki8RpeoxlQWv","object":"chat.completion.chunk","created":1774472801,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ca3e7d71bf","choices":[{"index":0,"delta":{"content":" + there"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"Ks8Wg"} + + + data: {"id":"chatcmpl-DNPizxiSOmMQFp69qki8RpeoxlQWv","object":"chat.completion.chunk","created":1774472801,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ca3e7d71bf","choices":[{"index":0,"delta":{"content":"!"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"OTuTCk6Yhl"} + + + data: {"id":"chatcmpl-DNPizxiSOmMQFp69qki8RpeoxlQWv","object":"chat.completion.chunk","created":1774472801,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ca3e7d71bf","choices":[{"index":0,"delta":{},"logprobs":null,"finish_reason":"stop"}],"usage":null,"obfuscation":"Ky1jT"} + + + data: {"id":"chatcmpl-DNPizxiSOmMQFp69qki8RpeoxlQWv","object":"chat.completion.chunk","created":1774472801,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ca3e7d71bf","choices":[],"usage":{"prompt_tokens":29,"completion_tokens":3,"total_tokens":32,"prompt_tokens_details":{"cached_tokens":0,"audio_tokens":0},"completion_tokens_details":{"reasoning_tokens":0,"audio_tokens":0,"accepted_prediction_tokens":0,"rejected_prediction_tokens":0}},"obfuscation":"tz0KSnmcPPt"} + + + data: [DONE] + + + ' + headers: + CF-Cache-Status: + - DYNAMIC + CF-Ray: + - 9e20e1806b8d67a6-SJC + Connection: + - keep-alive + Content-Type: + - text/event-stream; charset=utf-8 + Date: + - Wed, 25 Mar 2026 21:06:43 GMT + Server: + - cloudflare + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + alt-svc: + - h3=":443"; ma=86400 + openai-organization: + - braintrust-data + openai-processing-ms: + - '1276' + openai-project: + - proj_vsCSXafhhByzWOThMrJcZiw9 + openai-version: + - '2020-10-01' + set-cookie: + - __cf_bm=26gJQ7Ja3taZFTBb7A3G23kqPcIKnacz5qIbjguQkYs-1774472801.3417854-1.0.1.1-9JqXxOO8Hh_qhFlPUB0VZAqRq_.bnwDdhOr_sD9UVAZbZxHIG013WyWO1wxnxzpoHB2eF6tlQndU7CKalttwp.wptRHYq2G6erRwpDHPPQiZU_8r.6r_TsmfH2ya11Un; + HttpOnly; Secure; Path=/; Domain=api.openai.com; Expires=Wed, 25 Mar 2026 + 21:36:43 GMT + x-openai-proxy-wasm: + - v0.1 + x-ratelimit-limit-requests: + - '30000' + x-ratelimit-limit-tokens: + - '150000000' + x-ratelimit-remaining-requests: + - '29999' + x-ratelimit-remaining-tokens: + - '149999982' + x-ratelimit-reset-requests: + - 2ms + x-ratelimit-reset-tokens: + - 0s + x-request-id: + - req_369a94628d4547e69137ec894aa584f3 + status: + code: 200 + message: OK +version: 1 diff --git a/py/src/braintrust/integrations/agentscope/integration.py b/py/src/braintrust/integrations/agentscope/integration.py new file mode 100644 index 00000000..fac8fe8d --- /dev/null +++ b/py/src/braintrust/integrations/agentscope/integration.py @@ -0,0 +1,26 @@ +"""AgentScope integration orchestration.""" + +from braintrust.integrations.base import BaseIntegration + +from .patchers import ( + AgentCallPatcher, + ChatModelPatcher, + FanoutPipelinePatcher, + SequentialPipelinePatcher, + ToolkitCallToolFunctionPatcher, +) + + +class AgentScopeIntegration(BaseIntegration): + """Braintrust instrumentation for AgentScope. Requires AgentScope v1.0.0 or higher.""" + + name = "agentscope" + import_names = ("agentscope",) + min_version = "1.0.0" + patchers = ( + AgentCallPatcher, + SequentialPipelinePatcher, + FanoutPipelinePatcher, + ToolkitCallToolFunctionPatcher, + ChatModelPatcher, + ) diff --git a/py/src/braintrust/integrations/agentscope/patchers.py b/py/src/braintrust/integrations/agentscope/patchers.py new file mode 100644 index 00000000..a0a9ba21 --- /dev/null +++ b/py/src/braintrust/integrations/agentscope/patchers.py @@ -0,0 +1,103 @@ +"""AgentScope patchers.""" + +from braintrust.integrations.base import CompositeFunctionWrapperPatcher, FunctionWrapperPatcher + +from .tracing import ( + _agent_call_wrapper, + _fanout_pipeline_wrapper, + _model_call_wrapper, + _sequential_pipeline_wrapper, + _toolkit_call_tool_function_wrapper, +) + + +class AgentCallPatcher(FunctionWrapperPatcher): + """Patch AgentScope agent execution.""" + + name = "agentscope.agent.call" + target_module = "agentscope.agent" + target_path = "AgentBase.__call__" + wrapper = _agent_call_wrapper + + +class SequentialPipelinePatcher(FunctionWrapperPatcher): + """Patch AgentScope sequential pipeline execution.""" + + name = "agentscope.pipeline.sequential" + target_module = "agentscope.pipeline" + target_path = "sequential_pipeline" + wrapper = _sequential_pipeline_wrapper + + +class FanoutPipelinePatcher(FunctionWrapperPatcher): + """Patch AgentScope fanout pipeline execution.""" + + name = "agentscope.pipeline.fanout" + target_module = "agentscope.pipeline" + target_path = "fanout_pipeline" + wrapper = _fanout_pipeline_wrapper + + +class ToolkitCallToolFunctionPatcher(FunctionWrapperPatcher): + """Patch AgentScope toolkit execution.""" + + name = "agentscope.tool.call_tool_function" + target_module = "agentscope.tool" + target_path = "Toolkit.call_tool_function" + wrapper = _toolkit_call_tool_function_wrapper + + +class _OpenAIChatModelPatcher(FunctionWrapperPatcher): + name = "agentscope.model.openai" + target_module = "agentscope.model" + target_path = "OpenAIChatModel.__call__" + wrapper = _model_call_wrapper + + +class _DashScopeChatModelPatcher(FunctionWrapperPatcher): + name = "agentscope.model.dashscope" + target_module = "agentscope.model" + target_path = "DashScopeChatModel.__call__" + wrapper = _model_call_wrapper + + +class _AnthropicChatModelPatcher(FunctionWrapperPatcher): + name = "agentscope.model.anthropic" + target_module = "agentscope.model" + target_path = "AnthropicChatModel.__call__" + wrapper = _model_call_wrapper + + +class _OllamaChatModelPatcher(FunctionWrapperPatcher): + name = "agentscope.model.ollama" + target_module = "agentscope.model" + target_path = "OllamaChatModel.__call__" + wrapper = _model_call_wrapper + + +class _GeminiChatModelPatcher(FunctionWrapperPatcher): + name = "agentscope.model.gemini" + target_module = "agentscope.model" + target_path = "GeminiChatModel.__call__" + wrapper = _model_call_wrapper + + +class _TrinityChatModelPatcher(FunctionWrapperPatcher): + name = "agentscope.model.trinity" + target_module = "agentscope.model" + target_path = "TrinityChatModel.__call__" + wrapper = _model_call_wrapper + + +class ChatModelPatcher(CompositeFunctionWrapperPatcher): + """Patch the built-in AgentScope chat model implementations.""" + + name = "agentscope.model" + sub_patchers = ( + _OpenAIChatModelPatcher, + _DashScopeChatModelPatcher, + _AnthropicChatModelPatcher, + _OllamaChatModelPatcher, + _GeminiChatModelPatcher, + _TrinityChatModelPatcher, + ) diff --git a/py/src/braintrust/integrations/agentscope/test_agentscope.py b/py/src/braintrust/integrations/agentscope/test_agentscope.py new file mode 100644 index 00000000..77962ba8 --- /dev/null +++ b/py/src/braintrust/integrations/agentscope/test_agentscope.py @@ -0,0 +1,303 @@ +from pathlib import Path + +import pytest +from braintrust import logger +from braintrust.integrations.agentscope import setup_agentscope +from braintrust.span_types import SpanTypeAttribute +from braintrust.test_helpers import init_test_logger +from braintrust.wrappers.test_utils import verify_autoinstrument_script + + +PROJECT_NAME = "test_agentscope" + +setup_agentscope(project_name=PROJECT_NAME) + + +@pytest.fixture(scope="module") +def vcr_config(): + return { + "cassette_library_dir": str(Path(__file__).parent / "cassettes"), + } + + +@pytest.fixture +def memory_logger(): + init_test_logger(PROJECT_NAME) + with logger._internal_with_memory_background_logger() as bgl: + yield bgl + + +def _span_type(span): + span_type = span["span_attributes"]["type"] + return span_type.value if hasattr(span_type, "value") else span_type + + +def _make_model(*, stream: bool = False): + from agentscope.model import OpenAIChatModel + + return OpenAIChatModel( + model_name="gpt-4o-mini", + stream=stream, + generate_kwargs={"temperature": 0}, + ) + + +def _make_agent(name: str, sys_prompt: str, *, toolkit=None, multi_agent: bool = False): + from agentscope.agent import ReActAgent + from agentscope.formatter import OpenAIChatFormatter, OpenAIMultiAgentFormatter + from agentscope.memory import InMemoryMemory + from agentscope.tool import Toolkit + + agent = ReActAgent( + name=name, + sys_prompt=sys_prompt, + model=_make_model(), + formatter=OpenAIMultiAgentFormatter() if multi_agent else OpenAIChatFormatter(), + toolkit=toolkit or Toolkit(), + memory=InMemoryMemory(), + ) + if hasattr(agent, "set_console_output_enabled"): + agent.set_console_output_enabled(False) + elif hasattr(agent, "disable_console_output"): + agent.disable_console_output() + return agent + + +@pytest.mark.vcr +@pytest.mark.asyncio +async def test_agentscope_simple_agent_run(memory_logger): + from agentscope.message import Msg + + assert not memory_logger.pop() + + agent = _make_agent( + "Friday", + "You are a concise assistant. Answer in one sentence.", + ) + + response = await agent( + Msg( + name="user", + content="Say hello in exactly two words.", + role="user", + ) + ) + + assert response is not None + + spans = memory_logger.pop() + agent_span = next(span for span in spans if span["span_attributes"]["name"] == "Friday.reply") + llm_spans = [span for span in spans if _span_type(span) == SpanTypeAttribute.LLM] + + assert _span_type(agent_span) == "task" + assert llm_spans + assert llm_spans[0]["metadata"]["model"] == "gpt-4o-mini" + assert "args" not in llm_spans[0]["input"] + assert llm_spans[0]["input"]["messages"][0]["role"] == "system" + assert llm_spans[0]["input"]["messages"][1]["role"] == "user" + assert llm_spans[0]["input"]["messages"][1]["content"][0]["text"] == "Say hello in exactly two words." + assert llm_spans[0]["output"]["role"] == "assistant" + assert llm_spans[0]["output"]["content"][0]["text"] == "Hello there." + assert "usage" not in llm_spans[0]["output"] + assert agent_span["span_id"] in llm_spans[0]["span_parents"] + + +@pytest.mark.vcr +@pytest.mark.asyncio +async def test_agentscope_sequential_pipeline_creates_parent_span(memory_logger): + from agentscope.message import Msg + from agentscope.pipeline import sequential_pipeline + + assert not memory_logger.pop() + + agents = [ + _make_agent("Alice", "You rewrite the input as a short title.", multi_agent=True), + _make_agent("Bob", "You answer the previous message in one sentence.", multi_agent=True), + ] + + result = await sequential_pipeline( + agents=agents, + msg=Msg( + name="user", + content="Summarize why tests should use real recorded traffic.", + role="user", + ), + ) + + assert result is not None + + spans = memory_logger.pop() + pipeline_span = next(span for span in spans if span["span_attributes"]["name"] == "sequential_pipeline.run") + alice_span = next(span for span in spans if span["span_attributes"]["name"] == "Alice.reply") + bob_span = next(span for span in spans if span["span_attributes"]["name"] == "Bob.reply") + + assert _span_type(pipeline_span) == "task" + assert pipeline_span["span_id"] in alice_span["span_parents"] + assert pipeline_span["span_id"] in bob_span["span_parents"] + + +@pytest.mark.vcr +@pytest.mark.asyncio +async def test_agentscope_tool_use_creates_tool_span(memory_logger): + from agentscope.message import Msg + from agentscope.tool import Toolkit, execute_python_code + + assert not memory_logger.pop() + + toolkit = Toolkit() + toolkit.register_tool_function(execute_python_code) + agent = _make_agent( + "Jarvis", + "You are a helpful assistant. Use tools when required and keep answers brief.", + toolkit=toolkit, + ) + + response = await agent( + Msg( + name="user", + content="Use Python to compute 6 * 7 and return just the result.", + role="user", + ) + ) + + assert response is not None + + spans = memory_logger.pop() + tool_spans = [span for span in spans if _span_type(span) == "tool"] + + assert tool_spans + assert tool_spans[0]["span_attributes"]["name"] == "execute_python_code.execute" + assert tool_spans[0]["input"]["tool_name"] == "execute_python_code" + assert tool_spans[0]["output"]["content"] + + llm_spans = [span for span in spans if _span_type(span) == SpanTypeAttribute.LLM] + assert llm_spans + assert llm_spans[0]["output"]["role"] == "assistant" + assert llm_spans[0]["output"]["content"][0]["type"] == "tool_use" + assert "usage" not in llm_spans[0]["output"] + + +@pytest.mark.asyncio +async def test_model_call_wrapper_stream_logs_final_output_and_metrics(memory_logger): + from braintrust.integrations.agentscope.tracing import _model_call_wrapper + + assert not memory_logger.pop() + + class FakeOpenAIChatModel: + model_name = "gpt-4o-mini" + + async def wrapped(*_args, **_kwargs): + async def _stream(): + yield {"content": [{"type": "text", "text": "Hello"}]} + yield { + "content": [{"type": "text", "text": "Hello there!"}], + "usage": {"prompt_tokens": 29, "completion_tokens": 3, "total_tokens": 32}, + } + + return _stream() + + stream = await _model_call_wrapper( + wrapped, + FakeOpenAIChatModel(), + args=([{"role": "user", "content": "Say hi in two words."}],), + kwargs={}, + ) + + chunks = [chunk async for chunk in stream] + + assert chunks[-1]["content"][0]["text"] == "Hello there!" + + spans = memory_logger.pop() + assert len(spans) == 1 + llm_span = spans[0] + + assert _span_type(llm_span) == SpanTypeAttribute.LLM + assert llm_span["output"]["role"] == "assistant" + assert llm_span["output"]["content"][0]["text"] == "Hello there!" + assert llm_span["metrics"]["prompt_tokens"] == 29 + assert llm_span["metrics"]["completion_tokens"] == 3 + assert llm_span["metrics"]["tokens"] == 32 + + +@pytest.mark.asyncio +async def test_model_call_wrapper_stream_span_covers_full_stream_duration(memory_logger): + """Span end timestamp must be recorded after the stream is fully consumed, not before.""" + import asyncio + + from braintrust.integrations.agentscope.tracing import _model_call_wrapper + + assert not memory_logger.pop() + + class FakeModel: + model_name = "gpt-4o-mini" + + async def wrapped(*_args, **_kwargs): + async def _stream(): + for i in range(3): + await asyncio.sleep(0.1) + yield {"content": [{"type": "text", "text": f"chunk-{i}"}]} + + return _stream() + + stream = await _model_call_wrapper( + wrapped, + FakeModel(), + args=([{"role": "user", "content": "hi"}],), + kwargs={}, + ) + async for _ in stream: + pass + + spans = memory_logger.pop() + assert len(spans) == 1 + span = spans[0] + m = span.get("metrics", {}) + duration_ms = (m["end"] - m["start"]) * 1000 + # Stream takes ~300ms (3 chunks × 100ms). The span duration must reflect that. + assert duration_ms >= 200, f"Span duration {duration_ms:.0f}ms is too short; span ended before stream was consumed" + + +@pytest.mark.asyncio +async def test_toolkit_call_tool_function_wrapper_stream_span_covers_full_stream_duration(memory_logger): + """Tool span end timestamp must be recorded after the stream is fully consumed, not before.""" + import asyncio + + from braintrust.integrations.agentscope.tracing import _toolkit_call_tool_function_wrapper + + assert not memory_logger.pop() + + class FakeToolkit: + pass + + class FakeToolCall: + name = "my_tool" + + async def wrapped(*_args, **_kwargs): + async def _stream(): + for i in range(3): + await asyncio.sleep(0.1) + yield f"chunk-{i}" + + return _stream() + + stream = await _toolkit_call_tool_function_wrapper( + wrapped, + FakeToolkit(), + args=(FakeToolCall(),), + kwargs={}, + ) + async for _ in stream: + pass + + spans = memory_logger.pop() + assert len(spans) == 1 + span = spans[0] + m = span.get("metrics", {}) + duration_ms = (m["end"] - m["start"]) * 1000 + # Stream takes ~300ms (3 chunks × 100ms). The span duration must reflect that. + assert duration_ms >= 200, f"Span duration {duration_ms:.0f}ms is too short; span ended before stream was consumed" + + +class TestAutoInstrumentAgentScope: + def test_auto_instrument_agentscope(self): + verify_autoinstrument_script("test_auto_agentscope.py") diff --git a/py/src/braintrust/integrations/agentscope/tracing.py b/py/src/braintrust/integrations/agentscope/tracing.py new file mode 100644 index 00000000..37d9fa92 --- /dev/null +++ b/py/src/braintrust/integrations/agentscope/tracing.py @@ -0,0 +1,272 @@ +"""AgentScope-specific span creation and stream aggregation.""" + +import contextlib +from contextlib import aclosing +from typing import Any + +from braintrust.logger import start_span +from braintrust.span_types import SpanTypeAttribute + + +def _clean(mapping: dict[str, Any]) -> dict[str, Any]: + return {key: value for key, value in mapping.items() if value is not None} + + +def _args_kwargs_input(args: Any, kwargs: dict[str, Any]) -> dict[str, Any]: + return _clean( + { + "args": list(args) if args else None, + "kwargs": kwargs if kwargs else None, + } + ) + + +def _agent_name(instance: Any) -> str: + return getattr(instance, "name", None) or instance.__class__.__name__ + + +def _pipeline_metadata(args: Any, kwargs: dict[str, Any]) -> dict[str, Any]: + agents = kwargs.get("agents") + if agents is None and args: + agents = args[0] + + agent_names = None + if agents: + agent_names = [getattr(agent, "name", agent.__class__.__name__) for agent in agents] + + return _clean({"agent_names": agent_names}) + + +def _extract_metrics(*candidates: Any) -> dict[str, float] | None: + key_map = { + "prompt_tokens": "prompt_tokens", + "input_tokens": "prompt_tokens", + "completion_tokens": "completion_tokens", + "output_tokens": "completion_tokens", + "total_tokens": "tokens", + "tokens": "tokens", + } + + for candidate in candidates: + data = _field_value(candidate, "usage") or candidate + + metrics = {} + for source_key, target_key in key_map.items(): + value = _field_value(data, source_key) + if isinstance(value, (int, float)): + metrics[target_key] = float(value) + if metrics: + return metrics + + return None + + +def _model_provider_name(instance: Any) -> str: + class_name = instance.__class__.__name__ + if class_name.endswith("Model"): + return class_name[: -len("Model")] + return class_name + + +def _model_metadata(instance: Any) -> dict[str, Any]: + return _clean( + { + "model": getattr(instance, "model_name", None), + "provider": _model_provider_name(instance), + "model_class": instance.__class__.__name__, + } + ) + + +def _model_call_input(args: Any, kwargs: dict[str, Any]) -> dict[str, Any]: + messages = kwargs.get("messages") + if messages is None and args: + messages = args[0] + + tools = kwargs.get("tools") + if tools is None and len(args) > 1: + tools = args[1] + + tool_choice = kwargs.get("tool_choice") + if tool_choice is None and len(args) > 2: + tool_choice = args[2] + + structured_model = kwargs.get("structured_model") + if structured_model is None and len(args) > 3: + structured_model = args[3] + + return _clean( + { + "messages": messages, + "tools": tools, + "tool_choice": tool_choice, + "structured_model": structured_model, + } + ) + + +def _model_call_metadata(instance: Any, kwargs: dict[str, Any]) -> dict[str, Any]: + extra_kwargs = { + key: value + for key, value in kwargs.items() + if key not in {"messages", "tools", "tool_choice", "structured_model"} and value is not None + } + return {**_model_metadata(instance), **extra_kwargs} + + +def _model_call_output(result: Any) -> Any: + if isinstance(result, dict): + data = result + elif _field_value(result, "content") is not None or _field_value(result, "metadata") is not None: + data = { + "content": _field_value(result, "content"), + "metadata": _field_value(result, "metadata"), + } + else: + return result + + normalized = _clean( + { + "role": "assistant" if data.get("content") is not None else None, + "content": data.get("content"), + "metadata": data.get("metadata"), + } + ) + return normalized or data + + +def _field_value(data: Any, key: str) -> Any: + if isinstance(data, dict): + return data.get(key) + try: + return getattr(data, key, None) + except Exception: + return None + + +def _tool_name(tool_call: Any) -> str: + if isinstance(tool_call, dict): + return str(tool_call.get("name") or "unknown_tool") + return str(getattr(tool_call, "name", "unknown_tool")) + + +def _make_task_wrapper( + *, + name_fn: Any, + metadata_fn: Any, + input_fn: Any = _args_kwargs_input, +) -> Any: + """Build a simple async wrapper that creates a TASK span and logs the result.""" + + async def _wrapper(wrapped: Any, instance: Any, args: Any, kwargs: dict[str, Any]) -> Any: + with start_span( + name=name_fn(instance, args, kwargs), + type=SpanTypeAttribute.TASK, + input=input_fn(args, kwargs), + metadata=metadata_fn(instance, args, kwargs), + ) as span: + try: + result = await wrapped(*args, **kwargs) + span.log(output=result) + return result + except Exception as exc: + span.log(error=str(exc)) + raise + + return _wrapper + + +_agent_call_wrapper = _make_task_wrapper( + name_fn=lambda instance, _a, _k: f"{_agent_name(instance)}.reply", + metadata_fn=lambda instance, _a, _k: _clean({"agent_class": instance.__class__.__name__}), +) + +_sequential_pipeline_wrapper = _make_task_wrapper( + name_fn=lambda _i, _a, _k: "sequential_pipeline.run", + metadata_fn=lambda _i, args, kwargs: _pipeline_metadata(args, kwargs), +) + +_fanout_pipeline_wrapper = _make_task_wrapper( + name_fn=lambda _i, _a, _k: "fanout_pipeline.run", + metadata_fn=lambda _i, args, kwargs: _pipeline_metadata(args, kwargs), +) + + +def _is_async_iterator(value: Any) -> bool: + try: + return getattr(value, "__aiter__", None) is not None and getattr(value, "__anext__", None) is not None + except Exception: + return False + + +def _deferred_stream_trace(result: Any, span: Any, stack: contextlib.ExitStack, log_fn: Any) -> Any: + """Wrap an async iterator so the span stays open until the stream is consumed.""" + deferred = stack.pop_all() + + async def _trace(): + with deferred: + last_chunk = None + async with aclosing(result) as agen: + async for chunk in agen: + last_chunk = chunk + yield chunk + if last_chunk is not None: + log_fn(span, last_chunk) + + return _trace() + + +async def _toolkit_call_tool_function_wrapper(wrapped: Any, instance: Any, args: Any, kwargs: dict[str, Any]) -> Any: + tool_call = args[0] if args else kwargs.get("tool_call") + tool_name = _tool_name(tool_call) + with contextlib.ExitStack() as stack: + span = stack.enter_context( + start_span( + name=f"{tool_name}.execute", + type=SpanTypeAttribute.TOOL, + input=_clean( + { + "tool_name": tool_name, + "tool_call": tool_call, + } + ), + metadata=_clean({"toolkit_class": instance.__class__.__name__}), + ) + ) + try: + result = await wrapped(*args, **kwargs) + if _is_async_iterator(result): + return _deferred_stream_trace(result, span, stack, lambda s, chunk: s.log(output=chunk)) + + span.log(output=result) + return result + except Exception as exc: + span.log(error=str(exc)) + raise + + +async def _model_call_wrapper(wrapped: Any, instance: Any, args: Any, kwargs: dict[str, Any]) -> Any: + with contextlib.ExitStack() as stack: + span = stack.enter_context( + start_span( + name=f"{_model_provider_name(instance)}.call", + type=SpanTypeAttribute.LLM, + input=_model_call_input(args, kwargs), + metadata=_model_call_metadata(instance, kwargs), + ) + ) + try: + result = await wrapped(*args, **kwargs) + if _is_async_iterator(result): + return _deferred_stream_trace( + result, + span, + stack, + lambda s, chunk: s.log(output=_model_call_output(chunk), metrics=_extract_metrics(chunk)), + ) + + span.log(output=_model_call_output(result), metrics=_extract_metrics(result)) + return result + except Exception as exc: + span.log(error=str(exc)) + raise diff --git a/py/src/braintrust/integrations/auto_test_scripts/test_auto_agentscope.py b/py/src/braintrust/integrations/auto_test_scripts/test_auto_agentscope.py new file mode 100644 index 00000000..2cc545a9 --- /dev/null +++ b/py/src/braintrust/integrations/auto_test_scripts/test_auto_agentscope.py @@ -0,0 +1,83 @@ +"""Test auto_instrument for AgentScope.""" + +import os +from pathlib import Path + + +os.environ["BRAINTRUST_CASSETTES_DIR"] = str(Path(__file__).resolve().parent.parent / "agentscope" / "cassettes") + +from braintrust.auto import auto_instrument +from braintrust.wrappers.test_utils import autoinstrument_test_context + + +results = auto_instrument() +assert results.get("agentscope") == True, "auto_instrument should return True for agentscope" + +results2 = auto_instrument() +assert results2.get("agentscope") == True, "auto_instrument should still return True on second call" + +from agentscope.agent import AgentBase, ReActAgent +from agentscope.formatter import OpenAIChatFormatter +from agentscope.memory import InMemoryMemory +from agentscope.message import Msg +from agentscope.model import OpenAIChatModel +from agentscope.pipeline import sequential_pipeline +from agentscope.tool import Toolkit + + +try: + from agentscope.pipeline import fanout_pipeline +except ImportError: + fanout_pipeline = None + + +assert hasattr(AgentBase.__call__, "__wrapped__"), "AgentBase.__call__ should be wrapped" +assert hasattr(sequential_pipeline, "__wrapped__"), "sequential_pipeline should be wrapped" +if fanout_pipeline is not None: + assert hasattr(fanout_pipeline, "__wrapped__"), "fanout_pipeline should be wrapped" +assert hasattr(Toolkit.call_tool_function, "__wrapped__"), "Toolkit.call_tool_function should be wrapped" +assert hasattr(OpenAIChatModel.__call__, "__wrapped__"), "OpenAIChatModel.__call__ should be wrapped" + + +with autoinstrument_test_context("test_auto_agentscope") as memory_logger: + agent = ReActAgent( + name="Test Agent", + sys_prompt="You are a helpful assistant. Be brief.", + model=OpenAIChatModel( + model_name="gpt-4o-mini", + generate_kwargs={"temperature": 0}, + ), + formatter=OpenAIChatFormatter(), + toolkit=Toolkit(), + memory=InMemoryMemory(), + ) + if hasattr(agent, "set_console_output_enabled"): + agent.set_console_output_enabled(False) + elif hasattr(agent, "disable_console_output"): + agent.disable_console_output() + + response = agent( + Msg( + name="user", + content="Say hi in two words.", + role="user", + ) + ) + + import asyncio + + result = asyncio.run(response) + assert result is not None + + spans = memory_logger.pop() + assert len(spans) >= 2, f"Expected at least 2 spans (agent + model), got {len(spans)}" + + agent_span = next(span for span in spans if span["span_attributes"]["name"] == "Test Agent.reply") + llm_spans = [span for span in spans if span["span_attributes"]["type"].value == "llm"] + + assert agent_span["span_attributes"]["type"].value == "task" + assert llm_spans, "Should have at least one LLM span" + assert llm_spans[0]["metadata"]["model"] == "gpt-4o-mini" + assert agent_span["span_id"] in llm_spans[0]["span_parents"] + +print("SUCCESS") diff --git a/py/src/braintrust/integrations/google_genai/test_google_genai.py b/py/src/braintrust/integrations/google_genai/test_google_genai.py index 9834fe30..f9e00d24 100644 --- a/py/src/braintrust/integrations/google_genai/test_google_genai.py +++ b/py/src/braintrust/integrations/google_genai/test_google_genai.py @@ -31,6 +31,7 @@ def before_record_request(request): "record_mode": record_mode, "filter_headers": [ "authorization", + "Authorization", "x-api-key", "x-goog-api-key", ],