From 7ce51e93023f66f3e343e379fc1930ddba335e9b Mon Sep 17 00:00:00 2001 From: "stainless-app[bot]" <142633134+stainless-app[bot]@users.noreply.github.com> Date: Wed, 9 Jul 2025 02:26:12 +0000 Subject: [PATCH 01/20] chore(internal): bump pinned h11 dep --- requirements-dev.lock | 4 ++-- requirements.lock | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/requirements-dev.lock b/requirements-dev.lock index a84b5f4..7999ff4 100644 --- a/requirements-dev.lock +++ b/requirements-dev.lock @@ -48,9 +48,9 @@ filelock==3.12.4 frozenlist==1.6.2 # via aiohttp # via aiosignal -h11==0.14.0 +h11==0.16.0 # via httpcore -httpcore==1.0.2 +httpcore==1.0.9 # via httpx httpx==0.28.1 # via codex-sdk diff --git a/requirements.lock b/requirements.lock index a0807d8..bde9133 100644 --- a/requirements.lock +++ b/requirements.lock @@ -36,9 +36,9 @@ exceptiongroup==1.2.2 frozenlist==1.6.2 # via aiohttp # via aiosignal -h11==0.14.0 +h11==0.16.0 # via httpcore -httpcore==1.0.2 +httpcore==1.0.9 # via httpx httpx==0.28.1 # via codex-sdk From 5cba94956fff8ca4de99426a20e5c67f0ce6a2ac Mon Sep 17 00:00:00 2001 From: "stainless-app[bot]" <142633134+stainless-app[bot]@users.noreply.github.com> Date: Wed, 9 Jul 2025 02:45:16 +0000 Subject: [PATCH 02/20] chore(package): mark python 3.13 as supported --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index 16e362c..e3d7c1f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -24,6 +24,7 @@ classifiers = [ "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", "Operating System :: OS Independent", "Operating System :: POSIX", "Operating System :: MacOS", From b374589baf01ca1236cf0823305e6bca037cf12b Mon Sep 17 00:00:00 2001 From: "stainless-app[bot]" <142633134+stainless-app[bot]@users.noreply.github.com> Date: Thu, 10 Jul 2025 02:40:45 +0000 Subject: [PATCH 03/20] fix(parsing): correctly handle nested discriminated unions --- src/codex/_models.py | 13 ++++++++----- tests/test_models.py | 45 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 53 insertions(+), 5 deletions(-) diff --git a/src/codex/_models.py b/src/codex/_models.py index 4f21498..528d568 100644 --- a/src/codex/_models.py +++ b/src/codex/_models.py @@ -2,9 +2,10 @@ import os import inspect -from typing import TYPE_CHECKING, Any, Type, Union, Generic, TypeVar, Callable, cast +from typing import TYPE_CHECKING, Any, Type, Union, Generic, TypeVar, Callable, Optional, cast from datetime import date, datetime from typing_extensions import ( + List, Unpack, Literal, ClassVar, @@ -366,7 +367,7 @@ def _construct_field(value: object, field: FieldInfo, key: str) -> object: if type_ is None: raise RuntimeError(f"Unexpected field type is None for {key}") - return construct_type(value=value, type_=type_) + return construct_type(value=value, type_=type_, metadata=getattr(field, "metadata", None)) def is_basemodel(type_: type) -> bool: @@ -420,7 +421,7 @@ def construct_type_unchecked(*, value: object, type_: type[_T]) -> _T: return cast(_T, construct_type(value=value, type_=type_)) -def construct_type(*, value: object, type_: object) -> object: +def construct_type(*, value: object, type_: object, metadata: Optional[List[Any]] = None) -> object: """Loose coercion to the expected type with construction of nested values. If the given value does not match the expected type then it is returned as-is. @@ -438,8 +439,10 @@ def construct_type(*, value: object, type_: object) -> object: type_ = type_.__value__ # type: ignore[unreachable] # unwrap `Annotated[T, ...]` -> `T` - if is_annotated_type(type_): - meta: tuple[Any, ...] = get_args(type_)[1:] + if metadata is not None: + meta: tuple[Any, ...] = tuple(metadata) + elif is_annotated_type(type_): + meta = get_args(type_)[1:] type_ = extract_type_arg(type_, 0) else: meta = tuple() diff --git a/tests/test_models.py b/tests/test_models.py index c96609c..3452a61 100644 --- a/tests/test_models.py +++ b/tests/test_models.py @@ -889,3 +889,48 @@ class ModelB(BaseModel): ) assert isinstance(m, ModelB) + + +def test_nested_discriminated_union() -> None: + class InnerType1(BaseModel): + type: Literal["type_1"] + + class InnerModel(BaseModel): + inner_value: str + + class InnerType2(BaseModel): + type: Literal["type_2"] + some_inner_model: InnerModel + + class Type1(BaseModel): + base_type: Literal["base_type_1"] + value: Annotated[ + Union[ + InnerType1, + InnerType2, + ], + PropertyInfo(discriminator="type"), + ] + + class Type2(BaseModel): + base_type: Literal["base_type_2"] + + T = Annotated[ + Union[ + Type1, + Type2, + ], + PropertyInfo(discriminator="base_type"), + ] + + model = construct_type( + type_=T, + value={ + "base_type": "base_type_1", + "value": { + "type": "type_2", + }, + }, + ) + assert isinstance(model, Type1) + assert isinstance(model.value, InnerType2) From d05336d89f5a49b09d7b1f85e7cb3ed74035157a Mon Sep 17 00:00:00 2001 From: "stainless-app[bot]" <142633134+stainless-app[bot]@users.noreply.github.com> Date: Fri, 11 Jul 2025 02:58:34 +0000 Subject: [PATCH 04/20] chore(readme): fix version rendering on pypi --- README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 5a5a805..71cf0a0 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,6 @@ # Codex SDK -[![PyPI version]()](https://pypi.org/project/codex-sdk/) + +[![PyPI version](https://img.shields.io/pypi/v/codex-sdk.svg?label=pypi%20(stable))](https://pypi.org/project/codex-sdk/) This library is not meant to be used directly. Refer to https://pypi.org/project/cleanlab-codex/ instead. From 4732aaeb03872abffb4e13df6dd1994711bd4268 Mon Sep 17 00:00:00 2001 From: "stainless-app[bot]" <142633134+stainless-app[bot]@users.noreply.github.com> Date: Sat, 12 Jul 2025 02:05:53 +0000 Subject: [PATCH 05/20] fix(client): don't send Content-Type header on GET requests --- pyproject.toml | 2 +- src/codex/_base_client.py | 11 +++++++++-- tests/test_client.py | 4 ++-- 3 files changed, 12 insertions(+), 5 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index e3d7c1f..964b48a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -39,7 +39,7 @@ Homepage = "https://github.com/cleanlab/codex-python" Repository = "https://github.com/cleanlab/codex-python" [project.optional-dependencies] -aiohttp = ["aiohttp", "httpx_aiohttp>=0.1.6"] +aiohttp = ["aiohttp", "httpx_aiohttp>=0.1.8"] [tool.rye] managed = true diff --git a/src/codex/_base_client.py b/src/codex/_base_client.py index 1eca89e..6da89f6 100644 --- a/src/codex/_base_client.py +++ b/src/codex/_base_client.py @@ -529,6 +529,15 @@ def _build_request( # work around https://github.com/encode/httpx/discussions/2880 kwargs["extensions"] = {"sni_hostname": prepared_url.host.replace("_", "-")} + is_body_allowed = options.method.lower() != "get" + + if is_body_allowed: + kwargs["json"] = json_data if is_given(json_data) else None + kwargs["files"] = files + else: + headers.pop("Content-Type", None) + kwargs.pop("data", None) + # TODO: report this error to httpx return self._client.build_request( # pyright: ignore[reportUnknownMemberType] headers=headers, @@ -540,8 +549,6 @@ def _build_request( # so that passing a `TypedDict` doesn't cause an error. # https://github.com/microsoft/pyright/issues/3526#event-6715453066 params=self.qs.stringify(cast(Mapping[str, Any], params)) if params else None, - json=json_data if is_given(json_data) else None, - files=files, **kwargs, ) diff --git a/tests/test_client.py b/tests/test_client.py index c012f4d..2474915 100644 --- a/tests/test_client.py +++ b/tests/test_client.py @@ -427,7 +427,7 @@ def test_request_extra_query(self) -> None: def test_multipart_repeating_array(self, client: Codex) -> None: request = client._build_request( FinalRequestOptions.construct( - method="get", + method="post", url="/foo", headers={"Content-Type": "multipart/form-data; boundary=6b7ba517decee4a450543ea6ae821c82"}, json_data={"array": ["foo", "bar"]}, @@ -1211,7 +1211,7 @@ def test_request_extra_query(self) -> None: def test_multipart_repeating_array(self, async_client: AsyncCodex) -> None: request = async_client._build_request( FinalRequestOptions.construct( - method="get", + method="post", url="/foo", headers={"Content-Type": "multipart/form-data; boundary=6b7ba517decee4a450543ea6ae821c82"}, json_data={"array": ["foo", "bar"]}, From b956ce083ef3c507a7649577724f337a562c427a Mon Sep 17 00:00:00 2001 From: "stainless-app[bot]" <142633134+stainless-app[bot]@users.noreply.github.com> Date: Mon, 14 Jul 2025 18:17:47 +0000 Subject: [PATCH 06/20] feat(api): api update --- .stats.yml | 2 +- src/codex/types/project_validate_params.py | 102 ++++---- src/codex/types/project_validate_response.py | 4 +- .../query_log_list_by_group_response.py | 239 +++++++++++++++++- .../query_log_list_groups_response.py | 231 ++++++++++++++++- .../types/projects/query_log_list_response.py | 231 ++++++++++++++++- .../projects/query_log_retrieve_response.py | 231 ++++++++++++++++- ...remediation_list_resolved_logs_response.py | 237 ++++++++++++++++- .../projects/test_remediations.py | 20 +- tests/api_resources/test_projects.py | 20 +- 10 files changed, 1238 insertions(+), 79 deletions(-) diff --git a/.stats.yml b/.stats.yml index 889336e..20ee827 100644 --- a/.stats.yml +++ b/.stats.yml @@ -1,3 +1,3 @@ configured_endpoints: 55 -openapi_spec_hash: 922886934580d0b2addcb6e26ada0e09 +openapi_spec_hash: b3a1a58600b52a20671bef2b25f5dbc4 config_hash: 8f6e5c3b064cbb77569a6bf654954a56 diff --git a/src/codex/types/project_validate_params.py b/src/codex/types/project_validate_params.py index 0862cbc..7b85d06 100644 --- a/src/codex/types/project_validate_params.py +++ b/src/codex/types/project_validate_params.py @@ -33,23 +33,23 @@ "MessageChatCompletionDeveloperMessageParamContentUnionMember1", "MessageChatCompletionSystemMessageParam", "MessageChatCompletionSystemMessageParamContentUnionMember1", - "MessageChatCompletionUserMessageParam", - "MessageChatCompletionUserMessageParamContentUnionMember1", - "MessageChatCompletionUserMessageParamContentUnionMember1ChatCompletionContentPartTextParam", - "MessageChatCompletionUserMessageParamContentUnionMember1ChatCompletionContentPartImageParam", - "MessageChatCompletionUserMessageParamContentUnionMember1ChatCompletionContentPartImageParamImageURL", - "MessageChatCompletionUserMessageParamContentUnionMember1ChatCompletionContentPartInputAudioParam", - "MessageChatCompletionUserMessageParamContentUnionMember1ChatCompletionContentPartInputAudioParamInputAudio", - "MessageChatCompletionUserMessageParamContentUnionMember1File", - "MessageChatCompletionUserMessageParamContentUnionMember1FileFile", - "MessageChatCompletionAssistantMessageParam", - "MessageChatCompletionAssistantMessageParamAudio", - "MessageChatCompletionAssistantMessageParamContentUnionMember1", - "MessageChatCompletionAssistantMessageParamContentUnionMember1ChatCompletionContentPartTextParam", - "MessageChatCompletionAssistantMessageParamContentUnionMember1ChatCompletionContentPartRefusalParam", - "MessageChatCompletionAssistantMessageParamFunctionCall", - "MessageChatCompletionAssistantMessageParamToolCall", - "MessageChatCompletionAssistantMessageParamToolCallFunction", + "MessageChatCompletionUserMessageParamInput", + "MessageChatCompletionUserMessageParamInputContentUnionMember1", + "MessageChatCompletionUserMessageParamInputContentUnionMember1ChatCompletionContentPartTextParam", + "MessageChatCompletionUserMessageParamInputContentUnionMember1ChatCompletionContentPartImageParam", + "MessageChatCompletionUserMessageParamInputContentUnionMember1ChatCompletionContentPartImageParamImageURL", + "MessageChatCompletionUserMessageParamInputContentUnionMember1ChatCompletionContentPartInputAudioParam", + "MessageChatCompletionUserMessageParamInputContentUnionMember1ChatCompletionContentPartInputAudioParamInputAudio", + "MessageChatCompletionUserMessageParamInputContentUnionMember1File", + "MessageChatCompletionUserMessageParamInputContentUnionMember1FileFile", + "MessageChatCompletionAssistantMessageParamInput", + "MessageChatCompletionAssistantMessageParamInputAudio", + "MessageChatCompletionAssistantMessageParamInputContentUnionMember1", + "MessageChatCompletionAssistantMessageParamInputContentUnionMember1ChatCompletionContentPartTextParam", + "MessageChatCompletionAssistantMessageParamInputContentUnionMember1ChatCompletionContentPartRefusalParam", + "MessageChatCompletionAssistantMessageParamInputFunctionCall", + "MessageChatCompletionAssistantMessageParamInputToolCall", + "MessageChatCompletionAssistantMessageParamInputToolCallFunction", "MessageChatCompletionToolMessageParam", "MessageChatCompletionToolMessageParamContentUnionMember1", "MessageChatCompletionFunctionMessageParam", @@ -468,7 +468,7 @@ class MessageChatCompletionSystemMessageParam(TypedDict, total=False): name: str -class MessageChatCompletionUserMessageParamContentUnionMember1ChatCompletionContentPartTextParam( +class MessageChatCompletionUserMessageParamInputContentUnionMember1ChatCompletionContentPartTextParam( TypedDict, total=False ): text: Required[str] @@ -476,7 +476,7 @@ class MessageChatCompletionUserMessageParamContentUnionMember1ChatCompletionCont type: Required[Literal["text"]] -class MessageChatCompletionUserMessageParamContentUnionMember1ChatCompletionContentPartImageParamImageURL( +class MessageChatCompletionUserMessageParamInputContentUnionMember1ChatCompletionContentPartImageParamImageURL( TypedDict, total=False ): url: Required[str] @@ -484,17 +484,17 @@ class MessageChatCompletionUserMessageParamContentUnionMember1ChatCompletionCont detail: Literal["auto", "low", "high"] -class MessageChatCompletionUserMessageParamContentUnionMember1ChatCompletionContentPartImageParam( +class MessageChatCompletionUserMessageParamInputContentUnionMember1ChatCompletionContentPartImageParam( TypedDict, total=False ): image_url: Required[ - MessageChatCompletionUserMessageParamContentUnionMember1ChatCompletionContentPartImageParamImageURL + MessageChatCompletionUserMessageParamInputContentUnionMember1ChatCompletionContentPartImageParamImageURL ] type: Required[Literal["image_url"]] -class MessageChatCompletionUserMessageParamContentUnionMember1ChatCompletionContentPartInputAudioParamInputAudio( +class MessageChatCompletionUserMessageParamInputContentUnionMember1ChatCompletionContentPartInputAudioParamInputAudio( TypedDict, total=False ): data: Required[str] @@ -502,17 +502,17 @@ class MessageChatCompletionUserMessageParamContentUnionMember1ChatCompletionCont format: Required[Literal["wav", "mp3"]] -class MessageChatCompletionUserMessageParamContentUnionMember1ChatCompletionContentPartInputAudioParam( +class MessageChatCompletionUserMessageParamInputContentUnionMember1ChatCompletionContentPartInputAudioParam( TypedDict, total=False ): input_audio: Required[ - MessageChatCompletionUserMessageParamContentUnionMember1ChatCompletionContentPartInputAudioParamInputAudio + MessageChatCompletionUserMessageParamInputContentUnionMember1ChatCompletionContentPartInputAudioParamInputAudio ] type: Required[Literal["input_audio"]] -class MessageChatCompletionUserMessageParamContentUnionMember1FileFile(TypedDict, total=False): +class MessageChatCompletionUserMessageParamInputContentUnionMember1FileFile(TypedDict, total=False): file_data: str file_id: str @@ -520,33 +520,33 @@ class MessageChatCompletionUserMessageParamContentUnionMember1FileFile(TypedDict filename: str -class MessageChatCompletionUserMessageParamContentUnionMember1File(TypedDict, total=False): - file: Required[MessageChatCompletionUserMessageParamContentUnionMember1FileFile] +class MessageChatCompletionUserMessageParamInputContentUnionMember1File(TypedDict, total=False): + file: Required[MessageChatCompletionUserMessageParamInputContentUnionMember1FileFile] type: Required[Literal["file"]] -MessageChatCompletionUserMessageParamContentUnionMember1: TypeAlias = Union[ - MessageChatCompletionUserMessageParamContentUnionMember1ChatCompletionContentPartTextParam, - MessageChatCompletionUserMessageParamContentUnionMember1ChatCompletionContentPartImageParam, - MessageChatCompletionUserMessageParamContentUnionMember1ChatCompletionContentPartInputAudioParam, - MessageChatCompletionUserMessageParamContentUnionMember1File, +MessageChatCompletionUserMessageParamInputContentUnionMember1: TypeAlias = Union[ + MessageChatCompletionUserMessageParamInputContentUnionMember1ChatCompletionContentPartTextParam, + MessageChatCompletionUserMessageParamInputContentUnionMember1ChatCompletionContentPartImageParam, + MessageChatCompletionUserMessageParamInputContentUnionMember1ChatCompletionContentPartInputAudioParam, + MessageChatCompletionUserMessageParamInputContentUnionMember1File, ] -class MessageChatCompletionUserMessageParam(TypedDict, total=False): - content: Required[Union[str, Iterable[MessageChatCompletionUserMessageParamContentUnionMember1]]] +class MessageChatCompletionUserMessageParamInput(TypedDict, total=False): + content: Required[Union[str, Iterable[MessageChatCompletionUserMessageParamInputContentUnionMember1]]] role: Required[Literal["user"]] name: str -class MessageChatCompletionAssistantMessageParamAudio(TypedDict, total=False): +class MessageChatCompletionAssistantMessageParamInputAudio(TypedDict, total=False): id: Required[str] -class MessageChatCompletionAssistantMessageParamContentUnionMember1ChatCompletionContentPartTextParam( +class MessageChatCompletionAssistantMessageParamInputContentUnionMember1ChatCompletionContentPartTextParam( TypedDict, total=False ): text: Required[str] @@ -554,7 +554,7 @@ class MessageChatCompletionAssistantMessageParamContentUnionMember1ChatCompletio type: Required[Literal["text"]] -class MessageChatCompletionAssistantMessageParamContentUnionMember1ChatCompletionContentPartRefusalParam( +class MessageChatCompletionAssistantMessageParamInputContentUnionMember1ChatCompletionContentPartRefusalParam( TypedDict, total=False ): refusal: Required[str] @@ -562,46 +562,46 @@ class MessageChatCompletionAssistantMessageParamContentUnionMember1ChatCompletio type: Required[Literal["refusal"]] -MessageChatCompletionAssistantMessageParamContentUnionMember1: TypeAlias = Union[ - MessageChatCompletionAssistantMessageParamContentUnionMember1ChatCompletionContentPartTextParam, - MessageChatCompletionAssistantMessageParamContentUnionMember1ChatCompletionContentPartRefusalParam, +MessageChatCompletionAssistantMessageParamInputContentUnionMember1: TypeAlias = Union[ + MessageChatCompletionAssistantMessageParamInputContentUnionMember1ChatCompletionContentPartTextParam, + MessageChatCompletionAssistantMessageParamInputContentUnionMember1ChatCompletionContentPartRefusalParam, ] -class MessageChatCompletionAssistantMessageParamFunctionCall(TypedDict, total=False): +class MessageChatCompletionAssistantMessageParamInputFunctionCall(TypedDict, total=False): arguments: Required[str] name: Required[str] -class MessageChatCompletionAssistantMessageParamToolCallFunction(TypedDict, total=False): +class MessageChatCompletionAssistantMessageParamInputToolCallFunction(TypedDict, total=False): arguments: Required[str] name: Required[str] -class MessageChatCompletionAssistantMessageParamToolCall(TypedDict, total=False): +class MessageChatCompletionAssistantMessageParamInputToolCall(TypedDict, total=False): id: Required[str] - function: Required[MessageChatCompletionAssistantMessageParamToolCallFunction] + function: Required[MessageChatCompletionAssistantMessageParamInputToolCallFunction] type: Required[Literal["function"]] -class MessageChatCompletionAssistantMessageParam(TypedDict, total=False): +class MessageChatCompletionAssistantMessageParamInput(TypedDict, total=False): role: Required[Literal["assistant"]] - audio: Optional[MessageChatCompletionAssistantMessageParamAudio] + audio: Optional[MessageChatCompletionAssistantMessageParamInputAudio] - content: Union[str, Iterable[MessageChatCompletionAssistantMessageParamContentUnionMember1], None] + content: Union[str, Iterable[MessageChatCompletionAssistantMessageParamInputContentUnionMember1], None] - function_call: Optional[MessageChatCompletionAssistantMessageParamFunctionCall] + function_call: Optional[MessageChatCompletionAssistantMessageParamInputFunctionCall] name: str refusal: Optional[str] - tool_calls: Iterable[MessageChatCompletionAssistantMessageParamToolCall] + tool_calls: Iterable[MessageChatCompletionAssistantMessageParamInputToolCall] class MessageChatCompletionToolMessageParamContentUnionMember1(TypedDict, total=False): @@ -629,8 +629,8 @@ class MessageChatCompletionFunctionMessageParam(TypedDict, total=False): Message: TypeAlias = Union[ MessageChatCompletionDeveloperMessageParam, MessageChatCompletionSystemMessageParam, - MessageChatCompletionUserMessageParam, - MessageChatCompletionAssistantMessageParam, + MessageChatCompletionUserMessageParamInput, + MessageChatCompletionAssistantMessageParamInput, MessageChatCompletionToolMessageParam, MessageChatCompletionFunctionMessageParam, ] diff --git a/src/codex/types/project_validate_response.py b/src/codex/types/project_validate_response.py index 3b06db2..4488311 100644 --- a/src/codex/types/project_validate_response.py +++ b/src/codex/types/project_validate_response.py @@ -48,8 +48,8 @@ class ProjectValidateResponse(BaseModel): expert_answer: Optional[str] = None """ - Alternate SME-provided answer from Codex if the response was flagged as bad and - an answer was found in the Codex Project, or None otherwise. + Alternate SME-provided answer from Codex if a relevant answer was found in the + Codex Project, or None otherwise. """ is_bad_response: bool diff --git a/src/codex/types/projects/query_log_list_by_group_response.py b/src/codex/types/projects/query_log_list_by_group_response.py index 1685073..ccd2d5e 100644 --- a/src/codex/types/projects/query_log_list_by_group_response.py +++ b/src/codex/types/projects/query_log_list_by_group_response.py @@ -2,7 +2,7 @@ from typing import Dict, List, Union, Optional from datetime import datetime -from typing_extensions import Literal +from typing_extensions import Literal, TypeAlias from ..._models import BaseModel @@ -16,6 +16,31 @@ "QueryLogsByGroupQueryLogFormattedNonGuardrailEvalScores", "QueryLogsByGroupQueryLogContext", "QueryLogsByGroupQueryLogDeterministicGuardrailsResults", + "QueryLogsByGroupQueryLogMessage", + "QueryLogsByGroupQueryLogMessageChatCompletionDeveloperMessageParam", + "QueryLogsByGroupQueryLogMessageChatCompletionDeveloperMessageParamContentUnionMember1", + "QueryLogsByGroupQueryLogMessageChatCompletionSystemMessageParam", + "QueryLogsByGroupQueryLogMessageChatCompletionSystemMessageParamContentUnionMember1", + "QueryLogsByGroupQueryLogMessageChatCompletionUserMessageParamOutput", + "QueryLogsByGroupQueryLogMessageChatCompletionUserMessageParamOutputContentUnionMember1", + "QueryLogsByGroupQueryLogMessageChatCompletionUserMessageParamOutputContentUnionMember1ChatCompletionContentPartTextParam", + "QueryLogsByGroupQueryLogMessageChatCompletionUserMessageParamOutputContentUnionMember1ChatCompletionContentPartImageParam", + "QueryLogsByGroupQueryLogMessageChatCompletionUserMessageParamOutputContentUnionMember1ChatCompletionContentPartImageParamImageURL", + "QueryLogsByGroupQueryLogMessageChatCompletionUserMessageParamOutputContentUnionMember1ChatCompletionContentPartInputAudioParam", + "QueryLogsByGroupQueryLogMessageChatCompletionUserMessageParamOutputContentUnionMember1ChatCompletionContentPartInputAudioParamInputAudio", + "QueryLogsByGroupQueryLogMessageChatCompletionUserMessageParamOutputContentUnionMember1File", + "QueryLogsByGroupQueryLogMessageChatCompletionUserMessageParamOutputContentUnionMember1FileFile", + "QueryLogsByGroupQueryLogMessageChatCompletionAssistantMessageParamOutput", + "QueryLogsByGroupQueryLogMessageChatCompletionAssistantMessageParamOutputAudio", + "QueryLogsByGroupQueryLogMessageChatCompletionAssistantMessageParamOutputContentUnionMember1", + "QueryLogsByGroupQueryLogMessageChatCompletionAssistantMessageParamOutputContentUnionMember1ChatCompletionContentPartTextParam", + "QueryLogsByGroupQueryLogMessageChatCompletionAssistantMessageParamOutputContentUnionMember1ChatCompletionContentPartRefusalParam", + "QueryLogsByGroupQueryLogMessageChatCompletionAssistantMessageParamOutputFunctionCall", + "QueryLogsByGroupQueryLogMessageChatCompletionAssistantMessageParamOutputToolCall", + "QueryLogsByGroupQueryLogMessageChatCompletionAssistantMessageParamOutputToolCallFunction", + "QueryLogsByGroupQueryLogMessageChatCompletionToolMessageParam", + "QueryLogsByGroupQueryLogMessageChatCompletionToolMessageParamContentUnionMember1", + "QueryLogsByGroupQueryLogMessageChatCompletionFunctionMessageParam", ] @@ -68,6 +93,200 @@ class QueryLogsByGroupQueryLogDeterministicGuardrailsResults(BaseModel): matches: Optional[List[str]] = None +class QueryLogsByGroupQueryLogMessageChatCompletionDeveloperMessageParamContentUnionMember1(BaseModel): + text: str + + type: Literal["text"] + + +class QueryLogsByGroupQueryLogMessageChatCompletionDeveloperMessageParam(BaseModel): + content: Union[str, List[QueryLogsByGroupQueryLogMessageChatCompletionDeveloperMessageParamContentUnionMember1]] + + role: Literal["developer"] + + name: Optional[str] = None + + +class QueryLogsByGroupQueryLogMessageChatCompletionSystemMessageParamContentUnionMember1(BaseModel): + text: str + + type: Literal["text"] + + +class QueryLogsByGroupQueryLogMessageChatCompletionSystemMessageParam(BaseModel): + content: Union[str, List[QueryLogsByGroupQueryLogMessageChatCompletionSystemMessageParamContentUnionMember1]] + + role: Literal["system"] + + name: Optional[str] = None + + +class QueryLogsByGroupQueryLogMessageChatCompletionUserMessageParamOutputContentUnionMember1ChatCompletionContentPartTextParam( + BaseModel +): + text: str + + type: Literal["text"] + + +class QueryLogsByGroupQueryLogMessageChatCompletionUserMessageParamOutputContentUnionMember1ChatCompletionContentPartImageParamImageURL( + BaseModel +): + url: str + + detail: Optional[Literal["auto", "low", "high"]] = None + + +class QueryLogsByGroupQueryLogMessageChatCompletionUserMessageParamOutputContentUnionMember1ChatCompletionContentPartImageParam( + BaseModel +): + image_url: QueryLogsByGroupQueryLogMessageChatCompletionUserMessageParamOutputContentUnionMember1ChatCompletionContentPartImageParamImageURL + + type: Literal["image_url"] + + +class QueryLogsByGroupQueryLogMessageChatCompletionUserMessageParamOutputContentUnionMember1ChatCompletionContentPartInputAudioParamInputAudio( + BaseModel +): + data: str + + format: Literal["wav", "mp3"] + + +class QueryLogsByGroupQueryLogMessageChatCompletionUserMessageParamOutputContentUnionMember1ChatCompletionContentPartInputAudioParam( + BaseModel +): + input_audio: QueryLogsByGroupQueryLogMessageChatCompletionUserMessageParamOutputContentUnionMember1ChatCompletionContentPartInputAudioParamInputAudio + + type: Literal["input_audio"] + + +class QueryLogsByGroupQueryLogMessageChatCompletionUserMessageParamOutputContentUnionMember1FileFile(BaseModel): + file_data: Optional[str] = None + + file_id: Optional[str] = None + + filename: Optional[str] = None + + +class QueryLogsByGroupQueryLogMessageChatCompletionUserMessageParamOutputContentUnionMember1File(BaseModel): + file: QueryLogsByGroupQueryLogMessageChatCompletionUserMessageParamOutputContentUnionMember1FileFile + + type: Literal["file"] + + +QueryLogsByGroupQueryLogMessageChatCompletionUserMessageParamOutputContentUnionMember1: TypeAlias = Union[ + QueryLogsByGroupQueryLogMessageChatCompletionUserMessageParamOutputContentUnionMember1ChatCompletionContentPartTextParam, + QueryLogsByGroupQueryLogMessageChatCompletionUserMessageParamOutputContentUnionMember1ChatCompletionContentPartImageParam, + QueryLogsByGroupQueryLogMessageChatCompletionUserMessageParamOutputContentUnionMember1ChatCompletionContentPartInputAudioParam, + QueryLogsByGroupQueryLogMessageChatCompletionUserMessageParamOutputContentUnionMember1File, +] + + +class QueryLogsByGroupQueryLogMessageChatCompletionUserMessageParamOutput(BaseModel): + content: Union[str, List[QueryLogsByGroupQueryLogMessageChatCompletionUserMessageParamOutputContentUnionMember1]] + + role: Literal["user"] + + name: Optional[str] = None + + +class QueryLogsByGroupQueryLogMessageChatCompletionAssistantMessageParamOutputAudio(BaseModel): + id: str + + +class QueryLogsByGroupQueryLogMessageChatCompletionAssistantMessageParamOutputContentUnionMember1ChatCompletionContentPartTextParam( + BaseModel +): + text: str + + type: Literal["text"] + + +class QueryLogsByGroupQueryLogMessageChatCompletionAssistantMessageParamOutputContentUnionMember1ChatCompletionContentPartRefusalParam( + BaseModel +): + refusal: str + + type: Literal["refusal"] + + +QueryLogsByGroupQueryLogMessageChatCompletionAssistantMessageParamOutputContentUnionMember1: TypeAlias = Union[ + QueryLogsByGroupQueryLogMessageChatCompletionAssistantMessageParamOutputContentUnionMember1ChatCompletionContentPartTextParam, + QueryLogsByGroupQueryLogMessageChatCompletionAssistantMessageParamOutputContentUnionMember1ChatCompletionContentPartRefusalParam, +] + + +class QueryLogsByGroupQueryLogMessageChatCompletionAssistantMessageParamOutputFunctionCall(BaseModel): + arguments: str + + name: str + + +class QueryLogsByGroupQueryLogMessageChatCompletionAssistantMessageParamOutputToolCallFunction(BaseModel): + arguments: str + + name: str + + +class QueryLogsByGroupQueryLogMessageChatCompletionAssistantMessageParamOutputToolCall(BaseModel): + id: str + + function: QueryLogsByGroupQueryLogMessageChatCompletionAssistantMessageParamOutputToolCallFunction + + type: Literal["function"] + + +class QueryLogsByGroupQueryLogMessageChatCompletionAssistantMessageParamOutput(BaseModel): + role: Literal["assistant"] + + audio: Optional[QueryLogsByGroupQueryLogMessageChatCompletionAssistantMessageParamOutputAudio] = None + + content: Union[ + str, List[QueryLogsByGroupQueryLogMessageChatCompletionAssistantMessageParamOutputContentUnionMember1], None + ] = None + + function_call: Optional[QueryLogsByGroupQueryLogMessageChatCompletionAssistantMessageParamOutputFunctionCall] = None + + name: Optional[str] = None + + refusal: Optional[str] = None + + tool_calls: Optional[List[QueryLogsByGroupQueryLogMessageChatCompletionAssistantMessageParamOutputToolCall]] = None + + +class QueryLogsByGroupQueryLogMessageChatCompletionToolMessageParamContentUnionMember1(BaseModel): + text: str + + type: Literal["text"] + + +class QueryLogsByGroupQueryLogMessageChatCompletionToolMessageParam(BaseModel): + content: Union[str, List[QueryLogsByGroupQueryLogMessageChatCompletionToolMessageParamContentUnionMember1]] + + role: Literal["tool"] + + tool_call_id: str + + +class QueryLogsByGroupQueryLogMessageChatCompletionFunctionMessageParam(BaseModel): + content: Optional[str] = None + + name: str + + role: Literal["function"] + + +QueryLogsByGroupQueryLogMessage: TypeAlias = Union[ + QueryLogsByGroupQueryLogMessageChatCompletionDeveloperMessageParam, + QueryLogsByGroupQueryLogMessageChatCompletionSystemMessageParam, + QueryLogsByGroupQueryLogMessageChatCompletionUserMessageParamOutput, + QueryLogsByGroupQueryLogMessageChatCompletionAssistantMessageParamOutput, + QueryLogsByGroupQueryLogMessageChatCompletionToolMessageParam, + QueryLogsByGroupQueryLogMessageChatCompletionFunctionMessageParam, +] + + class QueryLogsByGroupQueryLog(BaseModel): id: str @@ -85,10 +304,14 @@ class QueryLogsByGroupQueryLog(BaseModel): formatted_guardrail_eval_scores: Optional[Dict[str, QueryLogsByGroupQueryLogFormattedGuardrailEvalScores]] = None + formatted_messages: Optional[str] = None + formatted_non_guardrail_eval_scores: Optional[ Dict[str, QueryLogsByGroupQueryLogFormattedNonGuardrailEvalScores] ] = None + formatted_original_question: Optional[str] = None + is_bad_response: bool needs_review: bool @@ -140,6 +363,20 @@ class QueryLogsByGroupQueryLog(BaseModel): guardrailed: Optional[bool] = None """If true, the response was guardrailed""" + messages: Optional[List[QueryLogsByGroupQueryLogMessage]] = None + """Optional message history to provide conversation context for the query. + + Used to rewrite query into a self-contained version of itself. If not provided, + the query will be treated as self-contained. + """ + + original_question: Optional[str] = None + """The original question that was asked before any rewriting or processing. + + For all non-conversational RAG, original_question should be the same as the + final question seen in Codex. + """ + primary_eval_issue: Optional[str] = None """Primary issue identified in evaluation""" diff --git a/src/codex/types/projects/query_log_list_groups_response.py b/src/codex/types/projects/query_log_list_groups_response.py index 5d9222b..9adb422 100644 --- a/src/codex/types/projects/query_log_list_groups_response.py +++ b/src/codex/types/projects/query_log_list_groups_response.py @@ -2,7 +2,7 @@ from typing import Dict, List, Union, Optional from datetime import datetime -from typing_extensions import Literal +from typing_extensions import Literal, TypeAlias from ..._models import BaseModel @@ -14,6 +14,31 @@ "FormattedNonGuardrailEvalScores", "Context", "DeterministicGuardrailsResults", + "Message", + "MessageChatCompletionDeveloperMessageParam", + "MessageChatCompletionDeveloperMessageParamContentUnionMember1", + "MessageChatCompletionSystemMessageParam", + "MessageChatCompletionSystemMessageParamContentUnionMember1", + "MessageChatCompletionUserMessageParamOutput", + "MessageChatCompletionUserMessageParamOutputContentUnionMember1", + "MessageChatCompletionUserMessageParamOutputContentUnionMember1ChatCompletionContentPartTextParam", + "MessageChatCompletionUserMessageParamOutputContentUnionMember1ChatCompletionContentPartImageParam", + "MessageChatCompletionUserMessageParamOutputContentUnionMember1ChatCompletionContentPartImageParamImageURL", + "MessageChatCompletionUserMessageParamOutputContentUnionMember1ChatCompletionContentPartInputAudioParam", + "MessageChatCompletionUserMessageParamOutputContentUnionMember1ChatCompletionContentPartInputAudioParamInputAudio", + "MessageChatCompletionUserMessageParamOutputContentUnionMember1File", + "MessageChatCompletionUserMessageParamOutputContentUnionMember1FileFile", + "MessageChatCompletionAssistantMessageParamOutput", + "MessageChatCompletionAssistantMessageParamOutputAudio", + "MessageChatCompletionAssistantMessageParamOutputContentUnionMember1", + "MessageChatCompletionAssistantMessageParamOutputContentUnionMember1ChatCompletionContentPartTextParam", + "MessageChatCompletionAssistantMessageParamOutputContentUnionMember1ChatCompletionContentPartRefusalParam", + "MessageChatCompletionAssistantMessageParamOutputFunctionCall", + "MessageChatCompletionAssistantMessageParamOutputToolCall", + "MessageChatCompletionAssistantMessageParamOutputToolCallFunction", + "MessageChatCompletionToolMessageParam", + "MessageChatCompletionToolMessageParamContentUnionMember1", + "MessageChatCompletionFunctionMessageParam", ] @@ -66,6 +91,192 @@ class DeterministicGuardrailsResults(BaseModel): matches: Optional[List[str]] = None +class MessageChatCompletionDeveloperMessageParamContentUnionMember1(BaseModel): + text: str + + type: Literal["text"] + + +class MessageChatCompletionDeveloperMessageParam(BaseModel): + content: Union[str, List[MessageChatCompletionDeveloperMessageParamContentUnionMember1]] + + role: Literal["developer"] + + name: Optional[str] = None + + +class MessageChatCompletionSystemMessageParamContentUnionMember1(BaseModel): + text: str + + type: Literal["text"] + + +class MessageChatCompletionSystemMessageParam(BaseModel): + content: Union[str, List[MessageChatCompletionSystemMessageParamContentUnionMember1]] + + role: Literal["system"] + + name: Optional[str] = None + + +class MessageChatCompletionUserMessageParamOutputContentUnionMember1ChatCompletionContentPartTextParam(BaseModel): + text: str + + type: Literal["text"] + + +class MessageChatCompletionUserMessageParamOutputContentUnionMember1ChatCompletionContentPartImageParamImageURL( + BaseModel +): + url: str + + detail: Optional[Literal["auto", "low", "high"]] = None + + +class MessageChatCompletionUserMessageParamOutputContentUnionMember1ChatCompletionContentPartImageParam(BaseModel): + image_url: MessageChatCompletionUserMessageParamOutputContentUnionMember1ChatCompletionContentPartImageParamImageURL + + type: Literal["image_url"] + + +class MessageChatCompletionUserMessageParamOutputContentUnionMember1ChatCompletionContentPartInputAudioParamInputAudio( + BaseModel +): + data: str + + format: Literal["wav", "mp3"] + + +class MessageChatCompletionUserMessageParamOutputContentUnionMember1ChatCompletionContentPartInputAudioParam(BaseModel): + input_audio: ( + MessageChatCompletionUserMessageParamOutputContentUnionMember1ChatCompletionContentPartInputAudioParamInputAudio + ) + + type: Literal["input_audio"] + + +class MessageChatCompletionUserMessageParamOutputContentUnionMember1FileFile(BaseModel): + file_data: Optional[str] = None + + file_id: Optional[str] = None + + filename: Optional[str] = None + + +class MessageChatCompletionUserMessageParamOutputContentUnionMember1File(BaseModel): + file: MessageChatCompletionUserMessageParamOutputContentUnionMember1FileFile + + type: Literal["file"] + + +MessageChatCompletionUserMessageParamOutputContentUnionMember1: TypeAlias = Union[ + MessageChatCompletionUserMessageParamOutputContentUnionMember1ChatCompletionContentPartTextParam, + MessageChatCompletionUserMessageParamOutputContentUnionMember1ChatCompletionContentPartImageParam, + MessageChatCompletionUserMessageParamOutputContentUnionMember1ChatCompletionContentPartInputAudioParam, + MessageChatCompletionUserMessageParamOutputContentUnionMember1File, +] + + +class MessageChatCompletionUserMessageParamOutput(BaseModel): + content: Union[str, List[MessageChatCompletionUserMessageParamOutputContentUnionMember1]] + + role: Literal["user"] + + name: Optional[str] = None + + +class MessageChatCompletionAssistantMessageParamOutputAudio(BaseModel): + id: str + + +class MessageChatCompletionAssistantMessageParamOutputContentUnionMember1ChatCompletionContentPartTextParam(BaseModel): + text: str + + type: Literal["text"] + + +class MessageChatCompletionAssistantMessageParamOutputContentUnionMember1ChatCompletionContentPartRefusalParam( + BaseModel +): + refusal: str + + type: Literal["refusal"] + + +MessageChatCompletionAssistantMessageParamOutputContentUnionMember1: TypeAlias = Union[ + MessageChatCompletionAssistantMessageParamOutputContentUnionMember1ChatCompletionContentPartTextParam, + MessageChatCompletionAssistantMessageParamOutputContentUnionMember1ChatCompletionContentPartRefusalParam, +] + + +class MessageChatCompletionAssistantMessageParamOutputFunctionCall(BaseModel): + arguments: str + + name: str + + +class MessageChatCompletionAssistantMessageParamOutputToolCallFunction(BaseModel): + arguments: str + + name: str + + +class MessageChatCompletionAssistantMessageParamOutputToolCall(BaseModel): + id: str + + function: MessageChatCompletionAssistantMessageParamOutputToolCallFunction + + type: Literal["function"] + + +class MessageChatCompletionAssistantMessageParamOutput(BaseModel): + role: Literal["assistant"] + + audio: Optional[MessageChatCompletionAssistantMessageParamOutputAudio] = None + + content: Union[str, List[MessageChatCompletionAssistantMessageParamOutputContentUnionMember1], None] = None + + function_call: Optional[MessageChatCompletionAssistantMessageParamOutputFunctionCall] = None + + name: Optional[str] = None + + refusal: Optional[str] = None + + tool_calls: Optional[List[MessageChatCompletionAssistantMessageParamOutputToolCall]] = None + + +class MessageChatCompletionToolMessageParamContentUnionMember1(BaseModel): + text: str + + type: Literal["text"] + + +class MessageChatCompletionToolMessageParam(BaseModel): + content: Union[str, List[MessageChatCompletionToolMessageParamContentUnionMember1]] + + role: Literal["tool"] + + tool_call_id: str + + +class MessageChatCompletionFunctionMessageParam(BaseModel): + content: Optional[str] = None + + name: str + + role: Literal["function"] + + +Message: TypeAlias = Union[ + MessageChatCompletionDeveloperMessageParam, + MessageChatCompletionSystemMessageParam, + MessageChatCompletionUserMessageParamOutput, + MessageChatCompletionAssistantMessageParamOutput, + MessageChatCompletionToolMessageParam, + MessageChatCompletionFunctionMessageParam, +] + + class QueryLogListGroupsResponse(BaseModel): id: str @@ -83,8 +294,12 @@ class QueryLogListGroupsResponse(BaseModel): formatted_guardrail_eval_scores: Optional[Dict[str, FormattedGuardrailEvalScores]] = None + formatted_messages: Optional[str] = None + formatted_non_guardrail_eval_scores: Optional[Dict[str, FormattedNonGuardrailEvalScores]] = None + formatted_original_question: Optional[str] = None + is_bad_response: bool needs_review: bool @@ -138,6 +353,20 @@ class QueryLogListGroupsResponse(BaseModel): guardrailed: Optional[bool] = None """If true, the response was guardrailed""" + messages: Optional[List[Message]] = None + """Optional message history to provide conversation context for the query. + + Used to rewrite query into a self-contained version of itself. If not provided, + the query will be treated as self-contained. + """ + + original_question: Optional[str] = None + """The original question that was asked before any rewriting or processing. + + For all non-conversational RAG, original_question should be the same as the + final question seen in Codex. + """ + primary_eval_issue: Optional[str] = None """Primary issue identified in evaluation""" diff --git a/src/codex/types/projects/query_log_list_response.py b/src/codex/types/projects/query_log_list_response.py index ccdeb03..f6fbba1 100644 --- a/src/codex/types/projects/query_log_list_response.py +++ b/src/codex/types/projects/query_log_list_response.py @@ -2,7 +2,7 @@ from typing import Dict, List, Union, Optional from datetime import datetime -from typing_extensions import Literal +from typing_extensions import Literal, TypeAlias from ..._models import BaseModel @@ -14,6 +14,31 @@ "FormattedNonGuardrailEvalScores", "Context", "DeterministicGuardrailsResults", + "Message", + "MessageChatCompletionDeveloperMessageParam", + "MessageChatCompletionDeveloperMessageParamContentUnionMember1", + "MessageChatCompletionSystemMessageParam", + "MessageChatCompletionSystemMessageParamContentUnionMember1", + "MessageChatCompletionUserMessageParamOutput", + "MessageChatCompletionUserMessageParamOutputContentUnionMember1", + "MessageChatCompletionUserMessageParamOutputContentUnionMember1ChatCompletionContentPartTextParam", + "MessageChatCompletionUserMessageParamOutputContentUnionMember1ChatCompletionContentPartImageParam", + "MessageChatCompletionUserMessageParamOutputContentUnionMember1ChatCompletionContentPartImageParamImageURL", + "MessageChatCompletionUserMessageParamOutputContentUnionMember1ChatCompletionContentPartInputAudioParam", + "MessageChatCompletionUserMessageParamOutputContentUnionMember1ChatCompletionContentPartInputAudioParamInputAudio", + "MessageChatCompletionUserMessageParamOutputContentUnionMember1File", + "MessageChatCompletionUserMessageParamOutputContentUnionMember1FileFile", + "MessageChatCompletionAssistantMessageParamOutput", + "MessageChatCompletionAssistantMessageParamOutputAudio", + "MessageChatCompletionAssistantMessageParamOutputContentUnionMember1", + "MessageChatCompletionAssistantMessageParamOutputContentUnionMember1ChatCompletionContentPartTextParam", + "MessageChatCompletionAssistantMessageParamOutputContentUnionMember1ChatCompletionContentPartRefusalParam", + "MessageChatCompletionAssistantMessageParamOutputFunctionCall", + "MessageChatCompletionAssistantMessageParamOutputToolCall", + "MessageChatCompletionAssistantMessageParamOutputToolCallFunction", + "MessageChatCompletionToolMessageParam", + "MessageChatCompletionToolMessageParamContentUnionMember1", + "MessageChatCompletionFunctionMessageParam", ] @@ -66,6 +91,192 @@ class DeterministicGuardrailsResults(BaseModel): matches: Optional[List[str]] = None +class MessageChatCompletionDeveloperMessageParamContentUnionMember1(BaseModel): + text: str + + type: Literal["text"] + + +class MessageChatCompletionDeveloperMessageParam(BaseModel): + content: Union[str, List[MessageChatCompletionDeveloperMessageParamContentUnionMember1]] + + role: Literal["developer"] + + name: Optional[str] = None + + +class MessageChatCompletionSystemMessageParamContentUnionMember1(BaseModel): + text: str + + type: Literal["text"] + + +class MessageChatCompletionSystemMessageParam(BaseModel): + content: Union[str, List[MessageChatCompletionSystemMessageParamContentUnionMember1]] + + role: Literal["system"] + + name: Optional[str] = None + + +class MessageChatCompletionUserMessageParamOutputContentUnionMember1ChatCompletionContentPartTextParam(BaseModel): + text: str + + type: Literal["text"] + + +class MessageChatCompletionUserMessageParamOutputContentUnionMember1ChatCompletionContentPartImageParamImageURL( + BaseModel +): + url: str + + detail: Optional[Literal["auto", "low", "high"]] = None + + +class MessageChatCompletionUserMessageParamOutputContentUnionMember1ChatCompletionContentPartImageParam(BaseModel): + image_url: MessageChatCompletionUserMessageParamOutputContentUnionMember1ChatCompletionContentPartImageParamImageURL + + type: Literal["image_url"] + + +class MessageChatCompletionUserMessageParamOutputContentUnionMember1ChatCompletionContentPartInputAudioParamInputAudio( + BaseModel +): + data: str + + format: Literal["wav", "mp3"] + + +class MessageChatCompletionUserMessageParamOutputContentUnionMember1ChatCompletionContentPartInputAudioParam(BaseModel): + input_audio: ( + MessageChatCompletionUserMessageParamOutputContentUnionMember1ChatCompletionContentPartInputAudioParamInputAudio + ) + + type: Literal["input_audio"] + + +class MessageChatCompletionUserMessageParamOutputContentUnionMember1FileFile(BaseModel): + file_data: Optional[str] = None + + file_id: Optional[str] = None + + filename: Optional[str] = None + + +class MessageChatCompletionUserMessageParamOutputContentUnionMember1File(BaseModel): + file: MessageChatCompletionUserMessageParamOutputContentUnionMember1FileFile + + type: Literal["file"] + + +MessageChatCompletionUserMessageParamOutputContentUnionMember1: TypeAlias = Union[ + MessageChatCompletionUserMessageParamOutputContentUnionMember1ChatCompletionContentPartTextParam, + MessageChatCompletionUserMessageParamOutputContentUnionMember1ChatCompletionContentPartImageParam, + MessageChatCompletionUserMessageParamOutputContentUnionMember1ChatCompletionContentPartInputAudioParam, + MessageChatCompletionUserMessageParamOutputContentUnionMember1File, +] + + +class MessageChatCompletionUserMessageParamOutput(BaseModel): + content: Union[str, List[MessageChatCompletionUserMessageParamOutputContentUnionMember1]] + + role: Literal["user"] + + name: Optional[str] = None + + +class MessageChatCompletionAssistantMessageParamOutputAudio(BaseModel): + id: str + + +class MessageChatCompletionAssistantMessageParamOutputContentUnionMember1ChatCompletionContentPartTextParam(BaseModel): + text: str + + type: Literal["text"] + + +class MessageChatCompletionAssistantMessageParamOutputContentUnionMember1ChatCompletionContentPartRefusalParam( + BaseModel +): + refusal: str + + type: Literal["refusal"] + + +MessageChatCompletionAssistantMessageParamOutputContentUnionMember1: TypeAlias = Union[ + MessageChatCompletionAssistantMessageParamOutputContentUnionMember1ChatCompletionContentPartTextParam, + MessageChatCompletionAssistantMessageParamOutputContentUnionMember1ChatCompletionContentPartRefusalParam, +] + + +class MessageChatCompletionAssistantMessageParamOutputFunctionCall(BaseModel): + arguments: str + + name: str + + +class MessageChatCompletionAssistantMessageParamOutputToolCallFunction(BaseModel): + arguments: str + + name: str + + +class MessageChatCompletionAssistantMessageParamOutputToolCall(BaseModel): + id: str + + function: MessageChatCompletionAssistantMessageParamOutputToolCallFunction + + type: Literal["function"] + + +class MessageChatCompletionAssistantMessageParamOutput(BaseModel): + role: Literal["assistant"] + + audio: Optional[MessageChatCompletionAssistantMessageParamOutputAudio] = None + + content: Union[str, List[MessageChatCompletionAssistantMessageParamOutputContentUnionMember1], None] = None + + function_call: Optional[MessageChatCompletionAssistantMessageParamOutputFunctionCall] = None + + name: Optional[str] = None + + refusal: Optional[str] = None + + tool_calls: Optional[List[MessageChatCompletionAssistantMessageParamOutputToolCall]] = None + + +class MessageChatCompletionToolMessageParamContentUnionMember1(BaseModel): + text: str + + type: Literal["text"] + + +class MessageChatCompletionToolMessageParam(BaseModel): + content: Union[str, List[MessageChatCompletionToolMessageParamContentUnionMember1]] + + role: Literal["tool"] + + tool_call_id: str + + +class MessageChatCompletionFunctionMessageParam(BaseModel): + content: Optional[str] = None + + name: str + + role: Literal["function"] + + +Message: TypeAlias = Union[ + MessageChatCompletionDeveloperMessageParam, + MessageChatCompletionSystemMessageParam, + MessageChatCompletionUserMessageParamOutput, + MessageChatCompletionAssistantMessageParamOutput, + MessageChatCompletionToolMessageParam, + MessageChatCompletionFunctionMessageParam, +] + + class QueryLogListResponse(BaseModel): id: str @@ -83,8 +294,12 @@ class QueryLogListResponse(BaseModel): formatted_guardrail_eval_scores: Optional[Dict[str, FormattedGuardrailEvalScores]] = None + formatted_messages: Optional[str] = None + formatted_non_guardrail_eval_scores: Optional[Dict[str, FormattedNonGuardrailEvalScores]] = None + formatted_original_question: Optional[str] = None + is_bad_response: bool project_id: str @@ -132,6 +347,20 @@ class QueryLogListResponse(BaseModel): guardrailed: Optional[bool] = None """If true, the response was guardrailed""" + messages: Optional[List[Message]] = None + """Optional message history to provide conversation context for the query. + + Used to rewrite query into a self-contained version of itself. If not provided, + the query will be treated as self-contained. + """ + + original_question: Optional[str] = None + """The original question that was asked before any rewriting or processing. + + For all non-conversational RAG, original_question should be the same as the + final question seen in Codex. + """ + primary_eval_issue: Optional[str] = None """Primary issue identified in evaluation""" diff --git a/src/codex/types/projects/query_log_retrieve_response.py b/src/codex/types/projects/query_log_retrieve_response.py index 380bacb..784009c 100644 --- a/src/codex/types/projects/query_log_retrieve_response.py +++ b/src/codex/types/projects/query_log_retrieve_response.py @@ -2,7 +2,7 @@ from typing import Dict, List, Union, Optional from datetime import datetime -from typing_extensions import Literal +from typing_extensions import Literal, TypeAlias from ..._models import BaseModel @@ -14,6 +14,31 @@ "FormattedNonGuardrailEvalScores", "Context", "DeterministicGuardrailsResults", + "Message", + "MessageChatCompletionDeveloperMessageParam", + "MessageChatCompletionDeveloperMessageParamContentUnionMember1", + "MessageChatCompletionSystemMessageParam", + "MessageChatCompletionSystemMessageParamContentUnionMember1", + "MessageChatCompletionUserMessageParamOutput", + "MessageChatCompletionUserMessageParamOutputContentUnionMember1", + "MessageChatCompletionUserMessageParamOutputContentUnionMember1ChatCompletionContentPartTextParam", + "MessageChatCompletionUserMessageParamOutputContentUnionMember1ChatCompletionContentPartImageParam", + "MessageChatCompletionUserMessageParamOutputContentUnionMember1ChatCompletionContentPartImageParamImageURL", + "MessageChatCompletionUserMessageParamOutputContentUnionMember1ChatCompletionContentPartInputAudioParam", + "MessageChatCompletionUserMessageParamOutputContentUnionMember1ChatCompletionContentPartInputAudioParamInputAudio", + "MessageChatCompletionUserMessageParamOutputContentUnionMember1File", + "MessageChatCompletionUserMessageParamOutputContentUnionMember1FileFile", + "MessageChatCompletionAssistantMessageParamOutput", + "MessageChatCompletionAssistantMessageParamOutputAudio", + "MessageChatCompletionAssistantMessageParamOutputContentUnionMember1", + "MessageChatCompletionAssistantMessageParamOutputContentUnionMember1ChatCompletionContentPartTextParam", + "MessageChatCompletionAssistantMessageParamOutputContentUnionMember1ChatCompletionContentPartRefusalParam", + "MessageChatCompletionAssistantMessageParamOutputFunctionCall", + "MessageChatCompletionAssistantMessageParamOutputToolCall", + "MessageChatCompletionAssistantMessageParamOutputToolCallFunction", + "MessageChatCompletionToolMessageParam", + "MessageChatCompletionToolMessageParamContentUnionMember1", + "MessageChatCompletionFunctionMessageParam", ] @@ -66,6 +91,192 @@ class DeterministicGuardrailsResults(BaseModel): matches: Optional[List[str]] = None +class MessageChatCompletionDeveloperMessageParamContentUnionMember1(BaseModel): + text: str + + type: Literal["text"] + + +class MessageChatCompletionDeveloperMessageParam(BaseModel): + content: Union[str, List[MessageChatCompletionDeveloperMessageParamContentUnionMember1]] + + role: Literal["developer"] + + name: Optional[str] = None + + +class MessageChatCompletionSystemMessageParamContentUnionMember1(BaseModel): + text: str + + type: Literal["text"] + + +class MessageChatCompletionSystemMessageParam(BaseModel): + content: Union[str, List[MessageChatCompletionSystemMessageParamContentUnionMember1]] + + role: Literal["system"] + + name: Optional[str] = None + + +class MessageChatCompletionUserMessageParamOutputContentUnionMember1ChatCompletionContentPartTextParam(BaseModel): + text: str + + type: Literal["text"] + + +class MessageChatCompletionUserMessageParamOutputContentUnionMember1ChatCompletionContentPartImageParamImageURL( + BaseModel +): + url: str + + detail: Optional[Literal["auto", "low", "high"]] = None + + +class MessageChatCompletionUserMessageParamOutputContentUnionMember1ChatCompletionContentPartImageParam(BaseModel): + image_url: MessageChatCompletionUserMessageParamOutputContentUnionMember1ChatCompletionContentPartImageParamImageURL + + type: Literal["image_url"] + + +class MessageChatCompletionUserMessageParamOutputContentUnionMember1ChatCompletionContentPartInputAudioParamInputAudio( + BaseModel +): + data: str + + format: Literal["wav", "mp3"] + + +class MessageChatCompletionUserMessageParamOutputContentUnionMember1ChatCompletionContentPartInputAudioParam(BaseModel): + input_audio: ( + MessageChatCompletionUserMessageParamOutputContentUnionMember1ChatCompletionContentPartInputAudioParamInputAudio + ) + + type: Literal["input_audio"] + + +class MessageChatCompletionUserMessageParamOutputContentUnionMember1FileFile(BaseModel): + file_data: Optional[str] = None + + file_id: Optional[str] = None + + filename: Optional[str] = None + + +class MessageChatCompletionUserMessageParamOutputContentUnionMember1File(BaseModel): + file: MessageChatCompletionUserMessageParamOutputContentUnionMember1FileFile + + type: Literal["file"] + + +MessageChatCompletionUserMessageParamOutputContentUnionMember1: TypeAlias = Union[ + MessageChatCompletionUserMessageParamOutputContentUnionMember1ChatCompletionContentPartTextParam, + MessageChatCompletionUserMessageParamOutputContentUnionMember1ChatCompletionContentPartImageParam, + MessageChatCompletionUserMessageParamOutputContentUnionMember1ChatCompletionContentPartInputAudioParam, + MessageChatCompletionUserMessageParamOutputContentUnionMember1File, +] + + +class MessageChatCompletionUserMessageParamOutput(BaseModel): + content: Union[str, List[MessageChatCompletionUserMessageParamOutputContentUnionMember1]] + + role: Literal["user"] + + name: Optional[str] = None + + +class MessageChatCompletionAssistantMessageParamOutputAudio(BaseModel): + id: str + + +class MessageChatCompletionAssistantMessageParamOutputContentUnionMember1ChatCompletionContentPartTextParam(BaseModel): + text: str + + type: Literal["text"] + + +class MessageChatCompletionAssistantMessageParamOutputContentUnionMember1ChatCompletionContentPartRefusalParam( + BaseModel +): + refusal: str + + type: Literal["refusal"] + + +MessageChatCompletionAssistantMessageParamOutputContentUnionMember1: TypeAlias = Union[ + MessageChatCompletionAssistantMessageParamOutputContentUnionMember1ChatCompletionContentPartTextParam, + MessageChatCompletionAssistantMessageParamOutputContentUnionMember1ChatCompletionContentPartRefusalParam, +] + + +class MessageChatCompletionAssistantMessageParamOutputFunctionCall(BaseModel): + arguments: str + + name: str + + +class MessageChatCompletionAssistantMessageParamOutputToolCallFunction(BaseModel): + arguments: str + + name: str + + +class MessageChatCompletionAssistantMessageParamOutputToolCall(BaseModel): + id: str + + function: MessageChatCompletionAssistantMessageParamOutputToolCallFunction + + type: Literal["function"] + + +class MessageChatCompletionAssistantMessageParamOutput(BaseModel): + role: Literal["assistant"] + + audio: Optional[MessageChatCompletionAssistantMessageParamOutputAudio] = None + + content: Union[str, List[MessageChatCompletionAssistantMessageParamOutputContentUnionMember1], None] = None + + function_call: Optional[MessageChatCompletionAssistantMessageParamOutputFunctionCall] = None + + name: Optional[str] = None + + refusal: Optional[str] = None + + tool_calls: Optional[List[MessageChatCompletionAssistantMessageParamOutputToolCall]] = None + + +class MessageChatCompletionToolMessageParamContentUnionMember1(BaseModel): + text: str + + type: Literal["text"] + + +class MessageChatCompletionToolMessageParam(BaseModel): + content: Union[str, List[MessageChatCompletionToolMessageParamContentUnionMember1]] + + role: Literal["tool"] + + tool_call_id: str + + +class MessageChatCompletionFunctionMessageParam(BaseModel): + content: Optional[str] = None + + name: str + + role: Literal["function"] + + +Message: TypeAlias = Union[ + MessageChatCompletionDeveloperMessageParam, + MessageChatCompletionSystemMessageParam, + MessageChatCompletionUserMessageParamOutput, + MessageChatCompletionAssistantMessageParamOutput, + MessageChatCompletionToolMessageParam, + MessageChatCompletionFunctionMessageParam, +] + + class QueryLogRetrieveResponse(BaseModel): id: str @@ -83,8 +294,12 @@ class QueryLogRetrieveResponse(BaseModel): formatted_guardrail_eval_scores: Optional[Dict[str, FormattedGuardrailEvalScores]] = None + formatted_messages: Optional[str] = None + formatted_non_guardrail_eval_scores: Optional[Dict[str, FormattedNonGuardrailEvalScores]] = None + formatted_original_question: Optional[str] = None + is_bad_response: bool needs_review: bool @@ -136,6 +351,20 @@ class QueryLogRetrieveResponse(BaseModel): guardrailed: Optional[bool] = None """If true, the response was guardrailed""" + messages: Optional[List[Message]] = None + """Optional message history to provide conversation context for the query. + + Used to rewrite query into a self-contained version of itself. If not provided, + the query will be treated as self-contained. + """ + + original_question: Optional[str] = None + """The original question that was asked before any rewriting or processing. + + For all non-conversational RAG, original_question should be the same as the + final question seen in Codex. + """ + primary_eval_issue: Optional[str] = None """Primary issue identified in evaluation""" diff --git a/src/codex/types/projects/remediation_list_resolved_logs_response.py b/src/codex/types/projects/remediation_list_resolved_logs_response.py index 876e7ce..1e0154c 100644 --- a/src/codex/types/projects/remediation_list_resolved_logs_response.py +++ b/src/codex/types/projects/remediation_list_resolved_logs_response.py @@ -2,7 +2,7 @@ from typing import Dict, List, Union, Optional from datetime import datetime -from typing_extensions import Literal +from typing_extensions import Literal, TypeAlias from ..._models import BaseModel @@ -15,6 +15,31 @@ "QueryLogFormattedNonGuardrailEvalScores", "QueryLogContext", "QueryLogDeterministicGuardrailsResults", + "QueryLogMessage", + "QueryLogMessageChatCompletionDeveloperMessageParam", + "QueryLogMessageChatCompletionDeveloperMessageParamContentUnionMember1", + "QueryLogMessageChatCompletionSystemMessageParam", + "QueryLogMessageChatCompletionSystemMessageParamContentUnionMember1", + "QueryLogMessageChatCompletionUserMessageParamOutput", + "QueryLogMessageChatCompletionUserMessageParamOutputContentUnionMember1", + "QueryLogMessageChatCompletionUserMessageParamOutputContentUnionMember1ChatCompletionContentPartTextParam", + "QueryLogMessageChatCompletionUserMessageParamOutputContentUnionMember1ChatCompletionContentPartImageParam", + "QueryLogMessageChatCompletionUserMessageParamOutputContentUnionMember1ChatCompletionContentPartImageParamImageURL", + "QueryLogMessageChatCompletionUserMessageParamOutputContentUnionMember1ChatCompletionContentPartInputAudioParam", + "QueryLogMessageChatCompletionUserMessageParamOutputContentUnionMember1ChatCompletionContentPartInputAudioParamInputAudio", + "QueryLogMessageChatCompletionUserMessageParamOutputContentUnionMember1File", + "QueryLogMessageChatCompletionUserMessageParamOutputContentUnionMember1FileFile", + "QueryLogMessageChatCompletionAssistantMessageParamOutput", + "QueryLogMessageChatCompletionAssistantMessageParamOutputAudio", + "QueryLogMessageChatCompletionAssistantMessageParamOutputContentUnionMember1", + "QueryLogMessageChatCompletionAssistantMessageParamOutputContentUnionMember1ChatCompletionContentPartTextParam", + "QueryLogMessageChatCompletionAssistantMessageParamOutputContentUnionMember1ChatCompletionContentPartRefusalParam", + "QueryLogMessageChatCompletionAssistantMessageParamOutputFunctionCall", + "QueryLogMessageChatCompletionAssistantMessageParamOutputToolCall", + "QueryLogMessageChatCompletionAssistantMessageParamOutputToolCallFunction", + "QueryLogMessageChatCompletionToolMessageParam", + "QueryLogMessageChatCompletionToolMessageParamContentUnionMember1", + "QueryLogMessageChatCompletionFunctionMessageParam", ] @@ -67,6 +92,198 @@ class QueryLogDeterministicGuardrailsResults(BaseModel): matches: Optional[List[str]] = None +class QueryLogMessageChatCompletionDeveloperMessageParamContentUnionMember1(BaseModel): + text: str + + type: Literal["text"] + + +class QueryLogMessageChatCompletionDeveloperMessageParam(BaseModel): + content: Union[str, List[QueryLogMessageChatCompletionDeveloperMessageParamContentUnionMember1]] + + role: Literal["developer"] + + name: Optional[str] = None + + +class QueryLogMessageChatCompletionSystemMessageParamContentUnionMember1(BaseModel): + text: str + + type: Literal["text"] + + +class QueryLogMessageChatCompletionSystemMessageParam(BaseModel): + content: Union[str, List[QueryLogMessageChatCompletionSystemMessageParamContentUnionMember1]] + + role: Literal["system"] + + name: Optional[str] = None + + +class QueryLogMessageChatCompletionUserMessageParamOutputContentUnionMember1ChatCompletionContentPartTextParam( + BaseModel +): + text: str + + type: Literal["text"] + + +class QueryLogMessageChatCompletionUserMessageParamOutputContentUnionMember1ChatCompletionContentPartImageParamImageURL( + BaseModel +): + url: str + + detail: Optional[Literal["auto", "low", "high"]] = None + + +class QueryLogMessageChatCompletionUserMessageParamOutputContentUnionMember1ChatCompletionContentPartImageParam( + BaseModel +): + image_url: QueryLogMessageChatCompletionUserMessageParamOutputContentUnionMember1ChatCompletionContentPartImageParamImageURL + + type: Literal["image_url"] + + +class QueryLogMessageChatCompletionUserMessageParamOutputContentUnionMember1ChatCompletionContentPartInputAudioParamInputAudio( + BaseModel +): + data: str + + format: Literal["wav", "mp3"] + + +class QueryLogMessageChatCompletionUserMessageParamOutputContentUnionMember1ChatCompletionContentPartInputAudioParam( + BaseModel +): + input_audio: QueryLogMessageChatCompletionUserMessageParamOutputContentUnionMember1ChatCompletionContentPartInputAudioParamInputAudio + + type: Literal["input_audio"] + + +class QueryLogMessageChatCompletionUserMessageParamOutputContentUnionMember1FileFile(BaseModel): + file_data: Optional[str] = None + + file_id: Optional[str] = None + + filename: Optional[str] = None + + +class QueryLogMessageChatCompletionUserMessageParamOutputContentUnionMember1File(BaseModel): + file: QueryLogMessageChatCompletionUserMessageParamOutputContentUnionMember1FileFile + + type: Literal["file"] + + +QueryLogMessageChatCompletionUserMessageParamOutputContentUnionMember1: TypeAlias = Union[ + QueryLogMessageChatCompletionUserMessageParamOutputContentUnionMember1ChatCompletionContentPartTextParam, + QueryLogMessageChatCompletionUserMessageParamOutputContentUnionMember1ChatCompletionContentPartImageParam, + QueryLogMessageChatCompletionUserMessageParamOutputContentUnionMember1ChatCompletionContentPartInputAudioParam, + QueryLogMessageChatCompletionUserMessageParamOutputContentUnionMember1File, +] + + +class QueryLogMessageChatCompletionUserMessageParamOutput(BaseModel): + content: Union[str, List[QueryLogMessageChatCompletionUserMessageParamOutputContentUnionMember1]] + + role: Literal["user"] + + name: Optional[str] = None + + +class QueryLogMessageChatCompletionAssistantMessageParamOutputAudio(BaseModel): + id: str + + +class QueryLogMessageChatCompletionAssistantMessageParamOutputContentUnionMember1ChatCompletionContentPartTextParam( + BaseModel +): + text: str + + type: Literal["text"] + + +class QueryLogMessageChatCompletionAssistantMessageParamOutputContentUnionMember1ChatCompletionContentPartRefusalParam( + BaseModel +): + refusal: str + + type: Literal["refusal"] + + +QueryLogMessageChatCompletionAssistantMessageParamOutputContentUnionMember1: TypeAlias = Union[ + QueryLogMessageChatCompletionAssistantMessageParamOutputContentUnionMember1ChatCompletionContentPartTextParam, + QueryLogMessageChatCompletionAssistantMessageParamOutputContentUnionMember1ChatCompletionContentPartRefusalParam, +] + + +class QueryLogMessageChatCompletionAssistantMessageParamOutputFunctionCall(BaseModel): + arguments: str + + name: str + + +class QueryLogMessageChatCompletionAssistantMessageParamOutputToolCallFunction(BaseModel): + arguments: str + + name: str + + +class QueryLogMessageChatCompletionAssistantMessageParamOutputToolCall(BaseModel): + id: str + + function: QueryLogMessageChatCompletionAssistantMessageParamOutputToolCallFunction + + type: Literal["function"] + + +class QueryLogMessageChatCompletionAssistantMessageParamOutput(BaseModel): + role: Literal["assistant"] + + audio: Optional[QueryLogMessageChatCompletionAssistantMessageParamOutputAudio] = None + + content: Union[str, List[QueryLogMessageChatCompletionAssistantMessageParamOutputContentUnionMember1], None] = None + + function_call: Optional[QueryLogMessageChatCompletionAssistantMessageParamOutputFunctionCall] = None + + name: Optional[str] = None + + refusal: Optional[str] = None + + tool_calls: Optional[List[QueryLogMessageChatCompletionAssistantMessageParamOutputToolCall]] = None + + +class QueryLogMessageChatCompletionToolMessageParamContentUnionMember1(BaseModel): + text: str + + type: Literal["text"] + + +class QueryLogMessageChatCompletionToolMessageParam(BaseModel): + content: Union[str, List[QueryLogMessageChatCompletionToolMessageParamContentUnionMember1]] + + role: Literal["tool"] + + tool_call_id: str + + +class QueryLogMessageChatCompletionFunctionMessageParam(BaseModel): + content: Optional[str] = None + + name: str + + role: Literal["function"] + + +QueryLogMessage: TypeAlias = Union[ + QueryLogMessageChatCompletionDeveloperMessageParam, + QueryLogMessageChatCompletionSystemMessageParam, + QueryLogMessageChatCompletionUserMessageParamOutput, + QueryLogMessageChatCompletionAssistantMessageParamOutput, + QueryLogMessageChatCompletionToolMessageParam, + QueryLogMessageChatCompletionFunctionMessageParam, +] + + class QueryLog(BaseModel): id: str @@ -84,8 +301,12 @@ class QueryLog(BaseModel): formatted_guardrail_eval_scores: Optional[Dict[str, QueryLogFormattedGuardrailEvalScores]] = None + formatted_messages: Optional[str] = None + formatted_non_guardrail_eval_scores: Optional[Dict[str, QueryLogFormattedNonGuardrailEvalScores]] = None + formatted_original_question: Optional[str] = None + is_bad_response: bool project_id: str @@ -133,6 +354,20 @@ class QueryLog(BaseModel): guardrailed: Optional[bool] = None """If true, the response was guardrailed""" + messages: Optional[List[QueryLogMessage]] = None + """Optional message history to provide conversation context for the query. + + Used to rewrite query into a self-contained version of itself. If not provided, + the query will be treated as self-contained. + """ + + original_question: Optional[str] = None + """The original question that was asked before any rewriting or processing. + + For all non-conversational RAG, original_question should be the same as the + final question seen in Codex. + """ + primary_eval_issue: Optional[str] = None """Primary issue identified in evaluation""" diff --git a/tests/api_resources/projects/test_remediations.py b/tests/api_resources/projects/test_remediations.py index 947850f..5866dbe 100644 --- a/tests/api_resources/projects/test_remediations.py +++ b/tests/api_resources/projects/test_remediations.py @@ -35,7 +35,7 @@ class TestRemediations: def test_method_create(self, client: Codex) -> None: remediation = client.projects.remediations.create( project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", - question="question", + question="x", ) assert_matches_type(RemediationCreateResponse, remediation, path=["response"]) @@ -44,7 +44,7 @@ def test_method_create(self, client: Codex) -> None: def test_method_create_with_all_params(self, client: Codex) -> None: remediation = client.projects.remediations.create( project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", - question="question", + question="x", answer="answer", draft_answer="draft_answer", ) @@ -55,7 +55,7 @@ def test_method_create_with_all_params(self, client: Codex) -> None: def test_raw_response_create(self, client: Codex) -> None: response = client.projects.remediations.with_raw_response.create( project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", - question="question", + question="x", ) assert response.is_closed is True @@ -68,7 +68,7 @@ def test_raw_response_create(self, client: Codex) -> None: def test_streaming_response_create(self, client: Codex) -> None: with client.projects.remediations.with_streaming_response.create( project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", - question="question", + question="x", ) as response: assert not response.is_closed assert response.http_request.headers.get("X-Stainless-Lang") == "python" @@ -84,7 +84,7 @@ def test_path_params_create(self, client: Codex) -> None: with pytest.raises(ValueError, match=r"Expected a non-empty value for `project_id` but received ''"): client.projects.remediations.with_raw_response.create( project_id="", - question="question", + question="x", ) @pytest.mark.skip() @@ -636,7 +636,7 @@ class TestAsyncRemediations: async def test_method_create(self, async_client: AsyncCodex) -> None: remediation = await async_client.projects.remediations.create( project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", - question="question", + question="x", ) assert_matches_type(RemediationCreateResponse, remediation, path=["response"]) @@ -645,7 +645,7 @@ async def test_method_create(self, async_client: AsyncCodex) -> None: async def test_method_create_with_all_params(self, async_client: AsyncCodex) -> None: remediation = await async_client.projects.remediations.create( project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", - question="question", + question="x", answer="answer", draft_answer="draft_answer", ) @@ -656,7 +656,7 @@ async def test_method_create_with_all_params(self, async_client: AsyncCodex) -> async def test_raw_response_create(self, async_client: AsyncCodex) -> None: response = await async_client.projects.remediations.with_raw_response.create( project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", - question="question", + question="x", ) assert response.is_closed is True @@ -669,7 +669,7 @@ async def test_raw_response_create(self, async_client: AsyncCodex) -> None: async def test_streaming_response_create(self, async_client: AsyncCodex) -> None: async with async_client.projects.remediations.with_streaming_response.create( project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", - question="question", + question="x", ) as response: assert not response.is_closed assert response.http_request.headers.get("X-Stainless-Lang") == "python" @@ -685,7 +685,7 @@ async def test_path_params_create(self, async_client: AsyncCodex) -> None: with pytest.raises(ValueError, match=r"Expected a non-empty value for `project_id` but received ''"): await async_client.projects.remediations.with_raw_response.create( project_id="", - question="question", + question="x", ) @pytest.mark.skip() diff --git a/tests/api_resources/test_projects.py b/tests/api_resources/test_projects.py index 9ecffa0..4507741 100644 --- a/tests/api_resources/test_projects.py +++ b/tests/api_resources/test_projects.py @@ -632,7 +632,7 @@ def test_method_validate(self, client: Codex) -> None: project = client.projects.validate( project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", context="context", - query="query", + query="x", response="string", ) assert_matches_type(ProjectValidateResponse, project, path=["response"]) @@ -643,7 +643,7 @@ def test_method_validate_with_all_params(self, client: Codex) -> None: project = client.projects.validate( project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", context="context", - query="query", + query="x", response="string", use_llm_matching=True, constrain_outputs=["string"], @@ -685,7 +685,7 @@ def test_raw_response_validate(self, client: Codex) -> None: response = client.projects.with_raw_response.validate( project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", context="context", - query="query", + query="x", response="string", ) @@ -700,7 +700,7 @@ def test_streaming_response_validate(self, client: Codex) -> None: with client.projects.with_streaming_response.validate( project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", context="context", - query="query", + query="x", response="string", ) as response: assert not response.is_closed @@ -718,7 +718,7 @@ def test_path_params_validate(self, client: Codex) -> None: client.projects.with_raw_response.validate( project_id="", context="context", - query="query", + query="x", response="string", ) @@ -1334,7 +1334,7 @@ async def test_method_validate(self, async_client: AsyncCodex) -> None: project = await async_client.projects.validate( project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", context="context", - query="query", + query="x", response="string", ) assert_matches_type(ProjectValidateResponse, project, path=["response"]) @@ -1345,7 +1345,7 @@ async def test_method_validate_with_all_params(self, async_client: AsyncCodex) - project = await async_client.projects.validate( project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", context="context", - query="query", + query="x", response="string", use_llm_matching=True, constrain_outputs=["string"], @@ -1387,7 +1387,7 @@ async def test_raw_response_validate(self, async_client: AsyncCodex) -> None: response = await async_client.projects.with_raw_response.validate( project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", context="context", - query="query", + query="x", response="string", ) @@ -1402,7 +1402,7 @@ async def test_streaming_response_validate(self, async_client: AsyncCodex) -> No async with async_client.projects.with_streaming_response.validate( project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", context="context", - query="query", + query="x", response="string", ) as response: assert not response.is_closed @@ -1420,6 +1420,6 @@ async def test_path_params_validate(self, async_client: AsyncCodex) -> None: await async_client.projects.with_raw_response.validate( project_id="", context="context", - query="query", + query="x", response="string", ) From f55f4b768f8c1d00bdf61e56b0a7227c8424c5b6 Mon Sep 17 00:00:00 2001 From: "stainless-app[bot]" <142633134+stainless-app[bot]@users.noreply.github.com> Date: Tue, 15 Jul 2025 22:17:46 +0000 Subject: [PATCH 07/20] feat(api): api update --- .stats.yml | 2 +- api.md | 2 +- src/codex/resources/projects/evals.py | 38 ++++++++++++++++--- src/codex/types/projects/__init__.py | 1 + src/codex/types/projects/eval_list_params.py | 16 ++++++++ .../types/projects/eval_list_response.py | 11 ++++-- tests/api_resources/projects/test_evals.py | 38 +++++++++++++++---- 7 files changed, 89 insertions(+), 19 deletions(-) create mode 100644 src/codex/types/projects/eval_list_params.py diff --git a/.stats.yml b/.stats.yml index 20ee827..b16c056 100644 --- a/.stats.yml +++ b/.stats.yml @@ -1,3 +1,3 @@ configured_endpoints: 55 -openapi_spec_hash: b3a1a58600b52a20671bef2b25f5dbc4 +openapi_spec_hash: 1e86d5a7384400f4c3ddfb824fb31d84 config_hash: 8f6e5c3b064cbb77569a6bf654954a56 diff --git a/api.md b/api.md index 0c02791..22281ae 100644 --- a/api.md +++ b/api.md @@ -192,7 +192,7 @@ Methods: - client.projects.evals.create(project_id, \*\*params) -> ProjectReturnSchema - client.projects.evals.update(path_eval_key, \*, project_id, \*\*params) -> ProjectReturnSchema -- client.projects.evals.list(project_id) -> EvalListResponse +- client.projects.evals.list(project_id, \*\*params) -> EvalListResponse - client.projects.evals.delete(eval_key, \*, project_id) -> ProjectReturnSchema ## QueryLogs diff --git a/src/codex/resources/projects/evals.py b/src/codex/resources/projects/evals.py index 1fc9589..9de41b7 100644 --- a/src/codex/resources/projects/evals.py +++ b/src/codex/resources/projects/evals.py @@ -18,7 +18,7 @@ async_to_streamed_response_wrapper, ) from ..._base_client import make_request_options -from ...types.projects import eval_create_params, eval_update_params +from ...types.projects import eval_list_params, eval_create_params, eval_update_params from ...types.project_return_schema import ProjectReturnSchema from ...types.projects.eval_list_response import EvalListResponse @@ -324,6 +324,9 @@ def list( self, project_id: str, *, + guardrails_only: bool | NotGiven = NOT_GIVEN, + limit: Optional[int] | NotGiven = NOT_GIVEN, + offset: int | NotGiven = NOT_GIVEN, # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. # The extra values given here take precedence over values defined on the client or passed to this method. extra_headers: Headers | None = None, @@ -332,7 +335,7 @@ def list( timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> EvalListResponse: """ - Get the evaluations config for a project. + Get the evaluations config for a project with optional pagination. Args: extra_headers: Send extra headers @@ -348,7 +351,18 @@ def list( return self._get( f"/api/projects/{project_id}/evals", options=make_request_options( - extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + extra_headers=extra_headers, + extra_query=extra_query, + extra_body=extra_body, + timeout=timeout, + query=maybe_transform( + { + "guardrails_only": guardrails_only, + "limit": limit, + "offset": offset, + }, + eval_list_params.EvalListParams, + ), ), cast_to=EvalListResponse, ) @@ -689,6 +703,9 @@ async def list( self, project_id: str, *, + guardrails_only: bool | NotGiven = NOT_GIVEN, + limit: Optional[int] | NotGiven = NOT_GIVEN, + offset: int | NotGiven = NOT_GIVEN, # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. # The extra values given here take precedence over values defined on the client or passed to this method. extra_headers: Headers | None = None, @@ -697,7 +714,7 @@ async def list( timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> EvalListResponse: """ - Get the evaluations config for a project. + Get the evaluations config for a project with optional pagination. Args: extra_headers: Send extra headers @@ -713,7 +730,18 @@ async def list( return await self._get( f"/api/projects/{project_id}/evals", options=make_request_options( - extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + extra_headers=extra_headers, + extra_query=extra_query, + extra_body=extra_body, + timeout=timeout, + query=await async_maybe_transform( + { + "guardrails_only": guardrails_only, + "limit": limit, + "offset": offset, + }, + eval_list_params.EvalListParams, + ), ), cast_to=EvalListResponse, ) diff --git a/src/codex/types/projects/__init__.py b/src/codex/types/projects/__init__.py index 4f75470..cb2989f 100644 --- a/src/codex/types/projects/__init__.py +++ b/src/codex/types/projects/__init__.py @@ -2,6 +2,7 @@ from __future__ import annotations +from .eval_list_params import EvalListParams as EvalListParams from .access_key_schema import AccessKeySchema as AccessKeySchema from .eval_create_params import EvalCreateParams as EvalCreateParams from .eval_list_response import EvalListResponse as EvalListResponse diff --git a/src/codex/types/projects/eval_list_params.py b/src/codex/types/projects/eval_list_params.py new file mode 100644 index 0000000..b0f2fb6 --- /dev/null +++ b/src/codex/types/projects/eval_list_params.py @@ -0,0 +1,16 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from __future__ import annotations + +from typing import Optional +from typing_extensions import TypedDict + +__all__ = ["EvalListParams"] + + +class EvalListParams(TypedDict, total=False): + guardrails_only: bool + + limit: Optional[int] + + offset: int diff --git a/src/codex/types/projects/eval_list_response.py b/src/codex/types/projects/eval_list_response.py index 48859b8..eb2cb9a 100644 --- a/src/codex/types/projects/eval_list_response.py +++ b/src/codex/types/projects/eval_list_response.py @@ -1,14 +1,14 @@ # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. from typing import List, Optional -from typing_extensions import Literal, TypeAlias +from typing_extensions import Literal from ..._models import BaseModel -__all__ = ["EvalListResponse", "EvalListResponseItem"] +__all__ = ["EvalListResponse", "Eval"] -class EvalListResponseItem(BaseModel): +class Eval(BaseModel): criteria: str """ The evaluation criteria text that describes what aspect is being evaluated and @@ -69,4 +69,7 @@ class EvalListResponseItem(BaseModel): """Whether the evaluation fails when score is above or below the threshold""" -EvalListResponse: TypeAlias = List[EvalListResponseItem] +class EvalListResponse(BaseModel): + evals: List[Eval] + + total_count: int diff --git a/tests/api_resources/projects/test_evals.py b/tests/api_resources/projects/test_evals.py index 22b8380..f36de27 100644 --- a/tests/api_resources/projects/test_evals.py +++ b/tests/api_resources/projects/test_evals.py @@ -259,7 +259,18 @@ def test_path_params_update_overload_2(self, client: Codex) -> None: @parametrize def test_method_list(self, client: Codex) -> None: eval = client.projects.evals.list( - "182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", + project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", + ) + assert_matches_type(EvalListResponse, eval, path=["response"]) + + @pytest.mark.skip() + @parametrize + def test_method_list_with_all_params(self, client: Codex) -> None: + eval = client.projects.evals.list( + project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", + guardrails_only=True, + limit=1, + offset=0, ) assert_matches_type(EvalListResponse, eval, path=["response"]) @@ -267,7 +278,7 @@ def test_method_list(self, client: Codex) -> None: @parametrize def test_raw_response_list(self, client: Codex) -> None: response = client.projects.evals.with_raw_response.list( - "182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", + project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", ) assert response.is_closed is True @@ -279,7 +290,7 @@ def test_raw_response_list(self, client: Codex) -> None: @parametrize def test_streaming_response_list(self, client: Codex) -> None: with client.projects.evals.with_streaming_response.list( - "182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", + project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", ) as response: assert not response.is_closed assert response.http_request.headers.get("X-Stainless-Lang") == "python" @@ -294,7 +305,7 @@ def test_streaming_response_list(self, client: Codex) -> None: def test_path_params_list(self, client: Codex) -> None: with pytest.raises(ValueError, match=r"Expected a non-empty value for `project_id` but received ''"): client.projects.evals.with_raw_response.list( - "", + project_id="", ) @pytest.mark.skip() @@ -596,7 +607,18 @@ async def test_path_params_update_overload_2(self, async_client: AsyncCodex) -> @parametrize async def test_method_list(self, async_client: AsyncCodex) -> None: eval = await async_client.projects.evals.list( - "182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", + project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", + ) + assert_matches_type(EvalListResponse, eval, path=["response"]) + + @pytest.mark.skip() + @parametrize + async def test_method_list_with_all_params(self, async_client: AsyncCodex) -> None: + eval = await async_client.projects.evals.list( + project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", + guardrails_only=True, + limit=1, + offset=0, ) assert_matches_type(EvalListResponse, eval, path=["response"]) @@ -604,7 +626,7 @@ async def test_method_list(self, async_client: AsyncCodex) -> None: @parametrize async def test_raw_response_list(self, async_client: AsyncCodex) -> None: response = await async_client.projects.evals.with_raw_response.list( - "182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", + project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", ) assert response.is_closed is True @@ -616,7 +638,7 @@ async def test_raw_response_list(self, async_client: AsyncCodex) -> None: @parametrize async def test_streaming_response_list(self, async_client: AsyncCodex) -> None: async with async_client.projects.evals.with_streaming_response.list( - "182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", + project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", ) as response: assert not response.is_closed assert response.http_request.headers.get("X-Stainless-Lang") == "python" @@ -631,7 +653,7 @@ async def test_streaming_response_list(self, async_client: AsyncCodex) -> None: async def test_path_params_list(self, async_client: AsyncCodex) -> None: with pytest.raises(ValueError, match=r"Expected a non-empty value for `project_id` but received ''"): await async_client.projects.evals.with_raw_response.list( - "", + project_id="", ) @pytest.mark.skip() From 575d1901319984fea901ce216323a5259e17f98c Mon Sep 17 00:00:00 2001 From: "stainless-app[bot]" <142633134+stainless-app[bot]@users.noreply.github.com> Date: Wed, 16 Jul 2025 17:17:47 +0000 Subject: [PATCH 08/20] feat(api): api update --- .stats.yml | 2 +- .../types/projects/query_log_list_by_group_response.py | 6 +++--- src/codex/types/projects/query_log_list_groups_response.py | 6 +++--- src/codex/types/projects/query_log_list_response.py | 6 +++--- src/codex/types/projects/query_log_retrieve_response.py | 6 +++--- .../projects/remediation_list_resolved_logs_response.py | 6 +++--- 6 files changed, 16 insertions(+), 16 deletions(-) diff --git a/.stats.yml b/.stats.yml index b16c056..138e8d5 100644 --- a/.stats.yml +++ b/.stats.yml @@ -1,3 +1,3 @@ configured_endpoints: 55 -openapi_spec_hash: 1e86d5a7384400f4c3ddfb824fb31d84 +openapi_spec_hash: fd2542df68972f34edeb819c58600791 config_hash: 8f6e5c3b064cbb77569a6bf654954a56 diff --git a/src/codex/types/projects/query_log_list_by_group_response.py b/src/codex/types/projects/query_log_list_by_group_response.py index ccd2d5e..0ff5909 100644 --- a/src/codex/types/projects/query_log_list_by_group_response.py +++ b/src/codex/types/projects/query_log_list_by_group_response.py @@ -364,10 +364,10 @@ class QueryLogsByGroupQueryLog(BaseModel): """If true, the response was guardrailed""" messages: Optional[List[QueryLogsByGroupQueryLogMessage]] = None - """Optional message history to provide conversation context for the query. + """Message history to provide conversation context for the query. - Used to rewrite query into a self-contained version of itself. If not provided, - the query will be treated as self-contained. + Used for TrustworthyRAG and to rewrite query into a self-contained version of + itself. """ original_question: Optional[str] = None diff --git a/src/codex/types/projects/query_log_list_groups_response.py b/src/codex/types/projects/query_log_list_groups_response.py index 9adb422..495fc56 100644 --- a/src/codex/types/projects/query_log_list_groups_response.py +++ b/src/codex/types/projects/query_log_list_groups_response.py @@ -354,10 +354,10 @@ class QueryLogListGroupsResponse(BaseModel): """If true, the response was guardrailed""" messages: Optional[List[Message]] = None - """Optional message history to provide conversation context for the query. + """Message history to provide conversation context for the query. - Used to rewrite query into a self-contained version of itself. If not provided, - the query will be treated as self-contained. + Used for TrustworthyRAG and to rewrite query into a self-contained version of + itself. """ original_question: Optional[str] = None diff --git a/src/codex/types/projects/query_log_list_response.py b/src/codex/types/projects/query_log_list_response.py index f6fbba1..72a8cab 100644 --- a/src/codex/types/projects/query_log_list_response.py +++ b/src/codex/types/projects/query_log_list_response.py @@ -348,10 +348,10 @@ class QueryLogListResponse(BaseModel): """If true, the response was guardrailed""" messages: Optional[List[Message]] = None - """Optional message history to provide conversation context for the query. + """Message history to provide conversation context for the query. - Used to rewrite query into a self-contained version of itself. If not provided, - the query will be treated as self-contained. + Used for TrustworthyRAG and to rewrite query into a self-contained version of + itself. """ original_question: Optional[str] = None diff --git a/src/codex/types/projects/query_log_retrieve_response.py b/src/codex/types/projects/query_log_retrieve_response.py index 784009c..4324269 100644 --- a/src/codex/types/projects/query_log_retrieve_response.py +++ b/src/codex/types/projects/query_log_retrieve_response.py @@ -352,10 +352,10 @@ class QueryLogRetrieveResponse(BaseModel): """If true, the response was guardrailed""" messages: Optional[List[Message]] = None - """Optional message history to provide conversation context for the query. + """Message history to provide conversation context for the query. - Used to rewrite query into a self-contained version of itself. If not provided, - the query will be treated as self-contained. + Used for TrustworthyRAG and to rewrite query into a self-contained version of + itself. """ original_question: Optional[str] = None diff --git a/src/codex/types/projects/remediation_list_resolved_logs_response.py b/src/codex/types/projects/remediation_list_resolved_logs_response.py index 1e0154c..cebfaf4 100644 --- a/src/codex/types/projects/remediation_list_resolved_logs_response.py +++ b/src/codex/types/projects/remediation_list_resolved_logs_response.py @@ -355,10 +355,10 @@ class QueryLog(BaseModel): """If true, the response was guardrailed""" messages: Optional[List[QueryLogMessage]] = None - """Optional message history to provide conversation context for the query. + """Message history to provide conversation context for the query. - Used to rewrite query into a self-contained version of itself. If not provided, - the query will be treated as self-contained. + Used for TrustworthyRAG and to rewrite query into a self-contained version of + itself. """ original_question: Optional[str] = None From 1cdf391742b196d5a723307e8c202a69e00b371d Mon Sep 17 00:00:00 2001 From: "stainless-app[bot]" <142633134+stainless-app[bot]@users.noreply.github.com> Date: Tue, 22 Jul 2025 02:05:22 +0000 Subject: [PATCH 09/20] fix(parsing): ignore empty metadata --- src/codex/_models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/codex/_models.py b/src/codex/_models.py index 528d568..ffcbf67 100644 --- a/src/codex/_models.py +++ b/src/codex/_models.py @@ -439,7 +439,7 @@ def construct_type(*, value: object, type_: object, metadata: Optional[List[Any] type_ = type_.__value__ # type: ignore[unreachable] # unwrap `Annotated[T, ...]` -> `T` - if metadata is not None: + if metadata is not None and len(metadata) > 0: meta: tuple[Any, ...] = tuple(metadata) elif is_annotated_type(type_): meta = get_args(type_)[1:] From 0a33c4710d4890d17ddd973ba4a2ed183e45e4c7 Mon Sep 17 00:00:00 2001 From: "stainless-app[bot]" <142633134+stainless-app[bot]@users.noreply.github.com> Date: Tue, 22 Jul 2025 22:17:30 +0000 Subject: [PATCH 10/20] feat(api): api update --- .stats.yml | 4 +- api.md | 1 - src/codex/resources/projects/projects.py | 102 --------------- src/codex/types/__init__.py | 1 - .../types/project_increment_queries_params.py | 11 -- tests/api_resources/test_projects.py | 118 ------------------ 6 files changed, 2 insertions(+), 235 deletions(-) delete mode 100644 src/codex/types/project_increment_queries_params.py diff --git a/.stats.yml b/.stats.yml index 138e8d5..c31fbb0 100644 --- a/.stats.yml +++ b/.stats.yml @@ -1,3 +1,3 @@ -configured_endpoints: 55 -openapi_spec_hash: fd2542df68972f34edeb819c58600791 +configured_endpoints: 54 +openapi_spec_hash: 168bdf5a611596d39812ce7259416529 config_hash: 8f6e5c3b064cbb77569a6bf654954a56 diff --git a/api.md b/api.md index 22281ae..1646d0b 100644 --- a/api.md +++ b/api.md @@ -153,7 +153,6 @@ Methods: - client.projects.list(\*\*params) -> ProjectListResponse - client.projects.delete(project_id) -> None - client.projects.export(project_id) -> object -- client.projects.increment_queries(project_id, \*\*params) -> object - client.projects.invite_sme(project_id, \*\*params) -> ProjectInviteSmeResponse - client.projects.retrieve_analytics(project_id, \*\*params) -> ProjectRetrieveAnalyticsResponse - client.projects.validate(project_id, \*\*params) -> ProjectValidateResponse diff --git a/src/codex/resources/projects/projects.py b/src/codex/resources/projects/projects.py index dc01b11..3a109ed 100644 --- a/src/codex/resources/projects/projects.py +++ b/src/codex/resources/projects/projects.py @@ -2,7 +2,6 @@ from __future__ import annotations -import typing_extensions from typing import Dict, List, Iterable, Optional from typing_extensions import Literal @@ -22,7 +21,6 @@ project_update_params, project_validate_params, project_invite_sme_params, - project_increment_queries_params, project_retrieve_analytics_params, ) from ..._types import NOT_GIVEN, Body, Query, Headers, NoneType, NotGiven @@ -349,45 +347,6 @@ def export( cast_to=object, ) - @typing_extensions.deprecated("deprecated") - def increment_queries( - self, - project_id: str, - *, - count: int | NotGiven = NOT_GIVEN, - # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. - # The extra values given here take precedence over values defined on the client or passed to this method. - extra_headers: Headers | None = None, - extra_query: Query | None = None, - extra_body: Body | None = None, - timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, - ) -> object: - """ - Increment the queries metric for a project. - - Args: - extra_headers: Send extra headers - - extra_query: Add additional query parameters to the request - - extra_body: Add additional JSON properties to the request - - timeout: Override the client-level default timeout for this request, in seconds - """ - if not project_id: - raise ValueError(f"Expected a non-empty value for `project_id` but received {project_id!r}") - return self._post( - f"/api/projects/{project_id}/increment_queries", - options=make_request_options( - extra_headers=extra_headers, - extra_query=extra_query, - extra_body=extra_body, - timeout=timeout, - query=maybe_transform({"count": count}, project_increment_queries_params.ProjectIncrementQueriesParams), - ), - cast_to=object, - ) - def invite_sme( self, project_id: str, @@ -956,47 +915,6 @@ async def export( cast_to=object, ) - @typing_extensions.deprecated("deprecated") - async def increment_queries( - self, - project_id: str, - *, - count: int | NotGiven = NOT_GIVEN, - # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. - # The extra values given here take precedence over values defined on the client or passed to this method. - extra_headers: Headers | None = None, - extra_query: Query | None = None, - extra_body: Body | None = None, - timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, - ) -> object: - """ - Increment the queries metric for a project. - - Args: - extra_headers: Send extra headers - - extra_query: Add additional query parameters to the request - - extra_body: Add additional JSON properties to the request - - timeout: Override the client-level default timeout for this request, in seconds - """ - if not project_id: - raise ValueError(f"Expected a non-empty value for `project_id` but received {project_id!r}") - return await self._post( - f"/api/projects/{project_id}/increment_queries", - options=make_request_options( - extra_headers=extra_headers, - extra_query=extra_query, - extra_body=extra_body, - timeout=timeout, - query=await async_maybe_transform( - {"count": count}, project_increment_queries_params.ProjectIncrementQueriesParams - ), - ), - cast_to=object, - ) - async def invite_sme( self, project_id: str, @@ -1308,11 +1226,6 @@ def __init__(self, projects: ProjectsResource) -> None: self.export = to_raw_response_wrapper( projects.export, ) - self.increment_queries = ( # pyright: ignore[reportDeprecated] - to_raw_response_wrapper( - projects.increment_queries # pyright: ignore[reportDeprecated], - ) - ) self.invite_sme = to_raw_response_wrapper( projects.invite_sme, ) @@ -1362,11 +1275,6 @@ def __init__(self, projects: AsyncProjectsResource) -> None: self.export = async_to_raw_response_wrapper( projects.export, ) - self.increment_queries = ( # pyright: ignore[reportDeprecated] - async_to_raw_response_wrapper( - projects.increment_queries # pyright: ignore[reportDeprecated], - ) - ) self.invite_sme = async_to_raw_response_wrapper( projects.invite_sme, ) @@ -1416,11 +1324,6 @@ def __init__(self, projects: ProjectsResource) -> None: self.export = to_streamed_response_wrapper( projects.export, ) - self.increment_queries = ( # pyright: ignore[reportDeprecated] - to_streamed_response_wrapper( - projects.increment_queries # pyright: ignore[reportDeprecated], - ) - ) self.invite_sme = to_streamed_response_wrapper( projects.invite_sme, ) @@ -1470,11 +1373,6 @@ def __init__(self, projects: AsyncProjectsResource) -> None: self.export = async_to_streamed_response_wrapper( projects.export, ) - self.increment_queries = ( # pyright: ignore[reportDeprecated] - async_to_streamed_response_wrapper( - projects.increment_queries # pyright: ignore[reportDeprecated], - ) - ) self.invite_sme = async_to_streamed_response_wrapper( projects.invite_sme, ) diff --git a/src/codex/types/__init__.py b/src/codex/types/__init__.py index 70713a3..daa1635 100644 --- a/src/codex/types/__init__.py +++ b/src/codex/types/__init__.py @@ -19,7 +19,6 @@ from .organization_schema_public import OrganizationSchemaPublic as OrganizationSchemaPublic from .project_invite_sme_response import ProjectInviteSmeResponse as ProjectInviteSmeResponse from .user_activate_account_params import UserActivateAccountParams as UserActivateAccountParams -from .project_increment_queries_params import ProjectIncrementQueriesParams as ProjectIncrementQueriesParams from .project_retrieve_analytics_params import ProjectRetrieveAnalyticsParams as ProjectRetrieveAnalyticsParams from .organization_list_members_response import OrganizationListMembersResponse as OrganizationListMembersResponse from .project_retrieve_analytics_response import ProjectRetrieveAnalyticsResponse as ProjectRetrieveAnalyticsResponse diff --git a/src/codex/types/project_increment_queries_params.py b/src/codex/types/project_increment_queries_params.py deleted file mode 100644 index f6043a7..0000000 --- a/src/codex/types/project_increment_queries_params.py +++ /dev/null @@ -1,11 +0,0 @@ -# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. - -from __future__ import annotations - -from typing_extensions import TypedDict - -__all__ = ["ProjectIncrementQueriesParams"] - - -class ProjectIncrementQueriesParams(TypedDict, total=False): - count: int diff --git a/tests/api_resources/test_projects.py b/tests/api_resources/test_projects.py index 4507741..ce4b7f5 100644 --- a/tests/api_resources/test_projects.py +++ b/tests/api_resources/test_projects.py @@ -18,8 +18,6 @@ ) from tests.utils import assert_matches_type -# pyright: reportDeprecated=false - base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010") @@ -462,64 +460,6 @@ def test_path_params_export(self, client: Codex) -> None: "", ) - @pytest.mark.skip() - @parametrize - def test_method_increment_queries(self, client: Codex) -> None: - with pytest.warns(DeprecationWarning): - project = client.projects.increment_queries( - project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", - ) - - assert_matches_type(object, project, path=["response"]) - - @pytest.mark.skip() - @parametrize - def test_method_increment_queries_with_all_params(self, client: Codex) -> None: - with pytest.warns(DeprecationWarning): - project = client.projects.increment_queries( - project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", - count=0, - ) - - assert_matches_type(object, project, path=["response"]) - - @pytest.mark.skip() - @parametrize - def test_raw_response_increment_queries(self, client: Codex) -> None: - with pytest.warns(DeprecationWarning): - response = client.projects.with_raw_response.increment_queries( - project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", - ) - - assert response.is_closed is True - assert response.http_request.headers.get("X-Stainless-Lang") == "python" - project = response.parse() - assert_matches_type(object, project, path=["response"]) - - @pytest.mark.skip() - @parametrize - def test_streaming_response_increment_queries(self, client: Codex) -> None: - with pytest.warns(DeprecationWarning): - with client.projects.with_streaming_response.increment_queries( - project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", - ) as response: - assert not response.is_closed - assert response.http_request.headers.get("X-Stainless-Lang") == "python" - - project = response.parse() - assert_matches_type(object, project, path=["response"]) - - assert cast(Any, response.is_closed) is True - - @pytest.mark.skip() - @parametrize - def test_path_params_increment_queries(self, client: Codex) -> None: - with pytest.warns(DeprecationWarning): - with pytest.raises(ValueError, match=r"Expected a non-empty value for `project_id` but received ''"): - client.projects.with_raw_response.increment_queries( - project_id="", - ) - @pytest.mark.skip() @parametrize def test_method_invite_sme(self, client: Codex) -> None: @@ -1164,64 +1104,6 @@ async def test_path_params_export(self, async_client: AsyncCodex) -> None: "", ) - @pytest.mark.skip() - @parametrize - async def test_method_increment_queries(self, async_client: AsyncCodex) -> None: - with pytest.warns(DeprecationWarning): - project = await async_client.projects.increment_queries( - project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", - ) - - assert_matches_type(object, project, path=["response"]) - - @pytest.mark.skip() - @parametrize - async def test_method_increment_queries_with_all_params(self, async_client: AsyncCodex) -> None: - with pytest.warns(DeprecationWarning): - project = await async_client.projects.increment_queries( - project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", - count=0, - ) - - assert_matches_type(object, project, path=["response"]) - - @pytest.mark.skip() - @parametrize - async def test_raw_response_increment_queries(self, async_client: AsyncCodex) -> None: - with pytest.warns(DeprecationWarning): - response = await async_client.projects.with_raw_response.increment_queries( - project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", - ) - - assert response.is_closed is True - assert response.http_request.headers.get("X-Stainless-Lang") == "python" - project = await response.parse() - assert_matches_type(object, project, path=["response"]) - - @pytest.mark.skip() - @parametrize - async def test_streaming_response_increment_queries(self, async_client: AsyncCodex) -> None: - with pytest.warns(DeprecationWarning): - async with async_client.projects.with_streaming_response.increment_queries( - project_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", - ) as response: - assert not response.is_closed - assert response.http_request.headers.get("X-Stainless-Lang") == "python" - - project = await response.parse() - assert_matches_type(object, project, path=["response"]) - - assert cast(Any, response.is_closed) is True - - @pytest.mark.skip() - @parametrize - async def test_path_params_increment_queries(self, async_client: AsyncCodex) -> None: - with pytest.warns(DeprecationWarning): - with pytest.raises(ValueError, match=r"Expected a non-empty value for `project_id` but received ''"): - await async_client.projects.with_raw_response.increment_queries( - project_id="", - ) - @pytest.mark.skip() @parametrize async def test_method_invite_sme(self, async_client: AsyncCodex) -> None: From 3c74ca0f1a913bed65cc4c6580dda25a07a90b74 Mon Sep 17 00:00:00 2001 From: "stainless-app[bot]" <142633134+stainless-app[bot]@users.noreply.github.com> Date: Wed, 23 Jul 2025 02:06:52 +0000 Subject: [PATCH 11/20] fix(parsing): parse extra field types --- src/codex/_models.py | 25 +++++++++++++++++++++++-- tests/test_models.py | 29 ++++++++++++++++++++++++++++- 2 files changed, 51 insertions(+), 3 deletions(-) diff --git a/src/codex/_models.py b/src/codex/_models.py index ffcbf67..b8387ce 100644 --- a/src/codex/_models.py +++ b/src/codex/_models.py @@ -208,14 +208,18 @@ def construct( # pyright: ignore[reportIncompatibleMethodOverride] else: fields_values[name] = field_get_default(field) + extra_field_type = _get_extra_fields_type(__cls) + _extra = {} for key, value in values.items(): if key not in model_fields: + parsed = construct_type(value=value, type_=extra_field_type) if extra_field_type is not None else value + if PYDANTIC_V2: - _extra[key] = value + _extra[key] = parsed else: _fields_set.add(key) - fields_values[key] = value + fields_values[key] = parsed object.__setattr__(m, "__dict__", fields_values) @@ -370,6 +374,23 @@ def _construct_field(value: object, field: FieldInfo, key: str) -> object: return construct_type(value=value, type_=type_, metadata=getattr(field, "metadata", None)) +def _get_extra_fields_type(cls: type[pydantic.BaseModel]) -> type | None: + if not PYDANTIC_V2: + # TODO + return None + + schema = cls.__pydantic_core_schema__ + if schema["type"] == "model": + fields = schema["schema"] + if fields["type"] == "model-fields": + extras = fields.get("extras_schema") + if extras and "cls" in extras: + # mypy can't narrow the type + return extras["cls"] # type: ignore[no-any-return] + + return None + + def is_basemodel(type_: type) -> bool: """Returns whether or not the given type is either a `BaseModel` or a union of `BaseModel`""" if is_union(type_): diff --git a/tests/test_models.py b/tests/test_models.py index 3452a61..a989702 100644 --- a/tests/test_models.py +++ b/tests/test_models.py @@ -1,5 +1,5 @@ import json -from typing import Any, Dict, List, Union, Optional, cast +from typing import TYPE_CHECKING, Any, Dict, List, Union, Optional, cast from datetime import datetime, timezone from typing_extensions import Literal, Annotated, TypeAliasType @@ -934,3 +934,30 @@ class Type2(BaseModel): ) assert isinstance(model, Type1) assert isinstance(model.value, InnerType2) + + +@pytest.mark.skipif(not PYDANTIC_V2, reason="this is only supported in pydantic v2 for now") +def test_extra_properties() -> None: + class Item(BaseModel): + prop: int + + class Model(BaseModel): + __pydantic_extra__: Dict[str, Item] = Field(init=False) # pyright: ignore[reportIncompatibleVariableOverride] + + other: str + + if TYPE_CHECKING: + + def __getattr__(self, attr: str) -> Item: ... + + model = construct_type( + type_=Model, + value={ + "a": {"prop": 1}, + "other": "foo", + }, + ) + assert isinstance(model, Model) + assert model.a.prop == 1 + assert isinstance(model.a, Item) + assert model.other == "foo" From 7e7caf9a3ad214c5df3686122e4f26b850dcb8b0 Mon Sep 17 00:00:00 2001 From: "stainless-app[bot]" <142633134+stainless-app[bot]@users.noreply.github.com> Date: Wed, 23 Jul 2025 20:17:58 +0000 Subject: [PATCH 12/20] feat(api): api update --- .stats.yml | 2 +- src/codex/resources/projects/query_logs.py | 4 ++-- src/codex/types/projects/query_log_list_groups_params.py | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.stats.yml b/.stats.yml index c31fbb0..4cb0619 100644 --- a/.stats.yml +++ b/.stats.yml @@ -1,3 +1,3 @@ configured_endpoints: 54 -openapi_spec_hash: 168bdf5a611596d39812ce7259416529 +openapi_spec_hash: 1ef62145e2247a442b75c87b23267e2d config_hash: 8f6e5c3b064cbb77569a6bf654954a56 diff --git a/src/codex/resources/projects/query_logs.py b/src/codex/resources/projects/query_logs.py index 9ccecbe..6fa490e 100644 --- a/src/codex/resources/projects/query_logs.py +++ b/src/codex/resources/projects/query_logs.py @@ -286,7 +286,7 @@ def list_groups( List[Literal["hallucination", "search_failure", "unhelpful", "difficult_query", "ungrounded"]] ] | NotGiven = NOT_GIVEN, - sort: Optional[Literal["created_at", "primary_eval_issue_score", "total_count", "custom_rank"]] + sort: Optional[Literal["created_at", "primary_eval_issue_score", "total_count", "custom_rank", "impact_score"]] | NotGiven = NOT_GIVEN, was_cache_hit: Optional[bool] | NotGiven = NOT_GIVEN, # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. @@ -646,7 +646,7 @@ def list_groups( List[Literal["hallucination", "search_failure", "unhelpful", "difficult_query", "ungrounded"]] ] | NotGiven = NOT_GIVEN, - sort: Optional[Literal["created_at", "primary_eval_issue_score", "total_count", "custom_rank"]] + sort: Optional[Literal["created_at", "primary_eval_issue_score", "total_count", "custom_rank", "impact_score"]] | NotGiven = NOT_GIVEN, was_cache_hit: Optional[bool] | NotGiven = NOT_GIVEN, # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. diff --git a/src/codex/types/projects/query_log_list_groups_params.py b/src/codex/types/projects/query_log_list_groups_params.py index cd82d9a..f75ee29 100644 --- a/src/codex/types/projects/query_log_list_groups_params.py +++ b/src/codex/types/projects/query_log_list_groups_params.py @@ -44,7 +44,7 @@ class QueryLogListGroupsParams(TypedDict, total=False): ] """Filter logs that have ANY of these primary evaluation issues (OR operation)""" - sort: Optional[Literal["created_at", "primary_eval_issue_score", "total_count", "custom_rank"]] + sort: Optional[Literal["created_at", "primary_eval_issue_score", "total_count", "custom_rank", "impact_score"]] was_cache_hit: Optional[bool] """Filter by cache hit status""" From 6992031e6aa610031f24d818040050b0fc185c34 Mon Sep 17 00:00:00 2001 From: "stainless-app[bot]" <142633134+stainless-app[bot]@users.noreply.github.com> Date: Thu, 24 Jul 2025 18:17:59 +0000 Subject: [PATCH 13/20] feat(api): api update --- .stats.yml | 2 +- src/codex/types/project_create_params.py | 2 ++ src/codex/types/project_list_response.py | 2 ++ src/codex/types/project_retrieve_response.py | 2 ++ src/codex/types/project_return_schema.py | 2 ++ src/codex/types/project_update_params.py | 2 ++ tests/api_resources/test_projects.py | 4 ++++ 7 files changed, 15 insertions(+), 1 deletion(-) diff --git a/.stats.yml b/.stats.yml index 4cb0619..de1764d 100644 --- a/.stats.yml +++ b/.stats.yml @@ -1,3 +1,3 @@ configured_endpoints: 54 -openapi_spec_hash: 1ef62145e2247a442b75c87b23267e2d +openapi_spec_hash: 1c6e7d5ed06d72868a57e64381bc473c config_hash: 8f6e5c3b064cbb77569a6bf654954a56 diff --git a/src/codex/types/project_create_params.py b/src/codex/types/project_create_params.py index a836978..c75023f 100644 --- a/src/codex/types/project_create_params.py +++ b/src/codex/types/project_create_params.py @@ -333,4 +333,6 @@ class Config(TypedDict, total=False): query_use_llm_matching: bool + tlm_evals_model: str + upper_llm_match_distance_threshold: float diff --git a/src/codex/types/project_list_response.py b/src/codex/types/project_list_response.py index c39bf08..4ac3849 100644 --- a/src/codex/types/project_list_response.py +++ b/src/codex/types/project_list_response.py @@ -323,6 +323,8 @@ class ProjectConfig(BaseModel): query_use_llm_matching: Optional[bool] = None + tlm_evals_model: Optional[str] = None + upper_llm_match_distance_threshold: Optional[float] = None diff --git a/src/codex/types/project_retrieve_response.py b/src/codex/types/project_retrieve_response.py index 7d1f8ed..6e87d65 100644 --- a/src/codex/types/project_retrieve_response.py +++ b/src/codex/types/project_retrieve_response.py @@ -322,6 +322,8 @@ class Config(BaseModel): query_use_llm_matching: Optional[bool] = None + tlm_evals_model: Optional[str] = None + upper_llm_match_distance_threshold: Optional[float] = None diff --git a/src/codex/types/project_return_schema.py b/src/codex/types/project_return_schema.py index 170d799..bb087cd 100644 --- a/src/codex/types/project_return_schema.py +++ b/src/codex/types/project_return_schema.py @@ -322,6 +322,8 @@ class Config(BaseModel): query_use_llm_matching: Optional[bool] = None + tlm_evals_model: Optional[str] = None + upper_llm_match_distance_threshold: Optional[float] = None diff --git a/src/codex/types/project_update_params.py b/src/codex/types/project_update_params.py index 3e24441..c550b43 100644 --- a/src/codex/types/project_update_params.py +++ b/src/codex/types/project_update_params.py @@ -331,4 +331,6 @@ class Config(TypedDict, total=False): query_use_llm_matching: bool + tlm_evals_model: str + upper_llm_match_distance_threshold: float diff --git a/tests/api_resources/test_projects.py b/tests/api_resources/test_projects.py index ce4b7f5..9312ca0 100644 --- a/tests/api_resources/test_projects.py +++ b/tests/api_resources/test_projects.py @@ -118,6 +118,7 @@ def test_method_create_with_all_params(self, client: Codex) -> None: "lower_llm_match_distance_threshold": 0, "max_distance": 0, "query_use_llm_matching": True, + "tlm_evals_model": "tlm_evals_model", "upper_llm_match_distance_threshold": 0, }, name="name", @@ -293,6 +294,7 @@ def test_method_update_with_all_params(self, client: Codex) -> None: "lower_llm_match_distance_threshold": 0, "max_distance": 0, "query_use_llm_matching": True, + "tlm_evals_model": "tlm_evals_model", "upper_llm_match_distance_threshold": 0, }, description="description", @@ -762,6 +764,7 @@ async def test_method_create_with_all_params(self, async_client: AsyncCodex) -> "lower_llm_match_distance_threshold": 0, "max_distance": 0, "query_use_llm_matching": True, + "tlm_evals_model": "tlm_evals_model", "upper_llm_match_distance_threshold": 0, }, name="name", @@ -937,6 +940,7 @@ async def test_method_update_with_all_params(self, async_client: AsyncCodex) -> "lower_llm_match_distance_threshold": 0, "max_distance": 0, "query_use_llm_matching": True, + "tlm_evals_model": "tlm_evals_model", "upper_llm_match_distance_threshold": 0, }, description="description", From 00df8ec35d44e5bdc6e68661a92d9d21905222c7 Mon Sep 17 00:00:00 2001 From: "stainless-app[bot]" <142633134+stainless-app[bot]@users.noreply.github.com> Date: Fri, 25 Jul 2025 03:08:09 +0000 Subject: [PATCH 14/20] chore(project): add settings file for vscode --- .gitignore | 1 - .vscode/settings.json | 3 +++ 2 files changed, 3 insertions(+), 1 deletion(-) create mode 100644 .vscode/settings.json diff --git a/.gitignore b/.gitignore index 8779740..95ceb18 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,4 @@ .prism.log -.vscode _dev __pycache__ diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..5b01030 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,3 @@ +{ + "python.analysis.importFormat": "relative", +} From 2ee809593ddb15c4de776a2048883287ec5c0cdb Mon Sep 17 00:00:00 2001 From: "stainless-app[bot]" <142633134+stainless-app[bot]@users.noreply.github.com> Date: Fri, 25 Jul 2025 21:18:01 +0000 Subject: [PATCH 15/20] feat(api): api update --- .stats.yml | 2 +- src/codex/types/project_validate_params.py | 172 ++++++++--------- .../query_log_list_by_group_response.py | 176 +++++++++--------- .../query_log_list_groups_response.py | 168 ++++++++--------- .../types/projects/query_log_list_response.py | 168 ++++++++--------- .../projects/query_log_retrieve_response.py | 168 ++++++++--------- ...remediation_list_resolved_logs_response.py | 172 ++++++++--------- tests/api_resources/test_projects.py | 36 +++- 8 files changed, 547 insertions(+), 515 deletions(-) diff --git a/.stats.yml b/.stats.yml index de1764d..9fb5140 100644 --- a/.stats.yml +++ b/.stats.yml @@ -1,3 +1,3 @@ configured_endpoints: 54 -openapi_spec_hash: 1c6e7d5ed06d72868a57e64381bc473c +openapi_spec_hash: f7b67b502828e6d0ca3944d40d00d89b config_hash: 8f6e5c3b064cbb77569a6bf654954a56 diff --git a/src/codex/types/project_validate_params.py b/src/codex/types/project_validate_params.py index 7b85d06..081dd2a 100644 --- a/src/codex/types/project_validate_params.py +++ b/src/codex/types/project_validate_params.py @@ -29,19 +29,6 @@ "ResponseChatCompletionUsageCompletionTokensDetails", "ResponseChatCompletionUsagePromptTokensDetails", "Message", - "MessageChatCompletionDeveloperMessageParam", - "MessageChatCompletionDeveloperMessageParamContentUnionMember1", - "MessageChatCompletionSystemMessageParam", - "MessageChatCompletionSystemMessageParamContentUnionMember1", - "MessageChatCompletionUserMessageParamInput", - "MessageChatCompletionUserMessageParamInputContentUnionMember1", - "MessageChatCompletionUserMessageParamInputContentUnionMember1ChatCompletionContentPartTextParam", - "MessageChatCompletionUserMessageParamInputContentUnionMember1ChatCompletionContentPartImageParam", - "MessageChatCompletionUserMessageParamInputContentUnionMember1ChatCompletionContentPartImageParamImageURL", - "MessageChatCompletionUserMessageParamInputContentUnionMember1ChatCompletionContentPartInputAudioParam", - "MessageChatCompletionUserMessageParamInputContentUnionMember1ChatCompletionContentPartInputAudioParamInputAudio", - "MessageChatCompletionUserMessageParamInputContentUnionMember1File", - "MessageChatCompletionUserMessageParamInputContentUnionMember1FileFile", "MessageChatCompletionAssistantMessageParamInput", "MessageChatCompletionAssistantMessageParamInputAudio", "MessageChatCompletionAssistantMessageParamInputContentUnionMember1", @@ -52,7 +39,20 @@ "MessageChatCompletionAssistantMessageParamInputToolCallFunction", "MessageChatCompletionToolMessageParam", "MessageChatCompletionToolMessageParamContentUnionMember1", + "MessageChatCompletionUserMessageParamInput", + "MessageChatCompletionUserMessageParamInputContentUnionMember1", + "MessageChatCompletionUserMessageParamInputContentUnionMember1ChatCompletionContentPartTextParam", + "MessageChatCompletionUserMessageParamInputContentUnionMember1ChatCompletionContentPartImageParam", + "MessageChatCompletionUserMessageParamInputContentUnionMember1ChatCompletionContentPartImageParamImageURL", + "MessageChatCompletionUserMessageParamInputContentUnionMember1ChatCompletionContentPartInputAudioParam", + "MessageChatCompletionUserMessageParamInputContentUnionMember1ChatCompletionContentPartInputAudioParamInputAudio", + "MessageChatCompletionUserMessageParamInputContentUnionMember1File", + "MessageChatCompletionUserMessageParamInputContentUnionMember1FileFile", + "MessageChatCompletionSystemMessageParam", + "MessageChatCompletionSystemMessageParamContentUnionMember1", "MessageChatCompletionFunctionMessageParam", + "MessageChatCompletionDeveloperMessageParam", + "MessageChatCompletionDeveloperMessageParamContentUnionMember1", "Options", ] @@ -440,32 +440,80 @@ class ResponseChatCompletionTyped(TypedDict, total=False): Response: TypeAlias = Union[str, ResponseChatCompletion] -class MessageChatCompletionDeveloperMessageParamContentUnionMember1(TypedDict, total=False): +class MessageChatCompletionAssistantMessageParamInputAudio(TypedDict, total=False): + id: Required[str] + + +class MessageChatCompletionAssistantMessageParamInputContentUnionMember1ChatCompletionContentPartTextParam( + TypedDict, total=False +): text: Required[str] type: Required[Literal["text"]] -class MessageChatCompletionDeveloperMessageParam(TypedDict, total=False): - content: Required[Union[str, Iterable[MessageChatCompletionDeveloperMessageParamContentUnionMember1]]] +class MessageChatCompletionAssistantMessageParamInputContentUnionMember1ChatCompletionContentPartRefusalParam( + TypedDict, total=False +): + refusal: Required[str] + + type: Required[Literal["refusal"]] - role: Required[Literal["developer"]] + +MessageChatCompletionAssistantMessageParamInputContentUnionMember1: TypeAlias = Union[ + MessageChatCompletionAssistantMessageParamInputContentUnionMember1ChatCompletionContentPartTextParam, + MessageChatCompletionAssistantMessageParamInputContentUnionMember1ChatCompletionContentPartRefusalParam, +] + + +class MessageChatCompletionAssistantMessageParamInputFunctionCall(TypedDict, total=False): + arguments: Required[str] + + name: Required[str] + + +class MessageChatCompletionAssistantMessageParamInputToolCallFunction(TypedDict, total=False): + arguments: Required[str] + + name: Required[str] + + +class MessageChatCompletionAssistantMessageParamInputToolCall(TypedDict, total=False): + id: Required[str] + + function: Required[MessageChatCompletionAssistantMessageParamInputToolCallFunction] + + type: Required[Literal["function"]] + + +class MessageChatCompletionAssistantMessageParamInput(TypedDict, total=False): + role: Required[Literal["assistant"]] + + audio: Optional[MessageChatCompletionAssistantMessageParamInputAudio] + + content: Union[str, Iterable[MessageChatCompletionAssistantMessageParamInputContentUnionMember1], None] + + function_call: Optional[MessageChatCompletionAssistantMessageParamInputFunctionCall] name: str + refusal: Optional[str] -class MessageChatCompletionSystemMessageParamContentUnionMember1(TypedDict, total=False): + tool_calls: Iterable[MessageChatCompletionAssistantMessageParamInputToolCall] + + +class MessageChatCompletionToolMessageParamContentUnionMember1(TypedDict, total=False): text: Required[str] type: Required[Literal["text"]] -class MessageChatCompletionSystemMessageParam(TypedDict, total=False): - content: Required[Union[str, Iterable[MessageChatCompletionSystemMessageParamContentUnionMember1]]] +class MessageChatCompletionToolMessageParam(TypedDict, total=False): + content: Required[Union[str, Iterable[MessageChatCompletionToolMessageParamContentUnionMember1]]] - role: Required[Literal["system"]] + role: Required[Literal["tool"]] - name: str + tool_call_id: Required[str] class MessageChatCompletionUserMessageParamInputContentUnionMember1ChatCompletionContentPartTextParam( @@ -542,97 +590,49 @@ class MessageChatCompletionUserMessageParamInput(TypedDict, total=False): name: str -class MessageChatCompletionAssistantMessageParamInputAudio(TypedDict, total=False): - id: Required[str] - - -class MessageChatCompletionAssistantMessageParamInputContentUnionMember1ChatCompletionContentPartTextParam( - TypedDict, total=False -): +class MessageChatCompletionSystemMessageParamContentUnionMember1(TypedDict, total=False): text: Required[str] type: Required[Literal["text"]] -class MessageChatCompletionAssistantMessageParamInputContentUnionMember1ChatCompletionContentPartRefusalParam( - TypedDict, total=False -): - refusal: Required[str] - - type: Required[Literal["refusal"]] - - -MessageChatCompletionAssistantMessageParamInputContentUnionMember1: TypeAlias = Union[ - MessageChatCompletionAssistantMessageParamInputContentUnionMember1ChatCompletionContentPartTextParam, - MessageChatCompletionAssistantMessageParamInputContentUnionMember1ChatCompletionContentPartRefusalParam, -] - +class MessageChatCompletionSystemMessageParam(TypedDict, total=False): + content: Required[Union[str, Iterable[MessageChatCompletionSystemMessageParamContentUnionMember1]]] -class MessageChatCompletionAssistantMessageParamInputFunctionCall(TypedDict, total=False): - arguments: Required[str] + role: Required[Literal["system"]] - name: Required[str] + name: str -class MessageChatCompletionAssistantMessageParamInputToolCallFunction(TypedDict, total=False): - arguments: Required[str] +class MessageChatCompletionFunctionMessageParam(TypedDict, total=False): + content: Required[Optional[str]] name: Required[str] - -class MessageChatCompletionAssistantMessageParamInputToolCall(TypedDict, total=False): - id: Required[str] - - function: Required[MessageChatCompletionAssistantMessageParamInputToolCallFunction] - - type: Required[Literal["function"]] - - -class MessageChatCompletionAssistantMessageParamInput(TypedDict, total=False): - role: Required[Literal["assistant"]] - - audio: Optional[MessageChatCompletionAssistantMessageParamInputAudio] - - content: Union[str, Iterable[MessageChatCompletionAssistantMessageParamInputContentUnionMember1], None] - - function_call: Optional[MessageChatCompletionAssistantMessageParamInputFunctionCall] - - name: str - - refusal: Optional[str] - - tool_calls: Iterable[MessageChatCompletionAssistantMessageParamInputToolCall] + role: Required[Literal["function"]] -class MessageChatCompletionToolMessageParamContentUnionMember1(TypedDict, total=False): +class MessageChatCompletionDeveloperMessageParamContentUnionMember1(TypedDict, total=False): text: Required[str] type: Required[Literal["text"]] -class MessageChatCompletionToolMessageParam(TypedDict, total=False): - content: Required[Union[str, Iterable[MessageChatCompletionToolMessageParamContentUnionMember1]]] - - role: Required[Literal["tool"]] - - tool_call_id: Required[str] - - -class MessageChatCompletionFunctionMessageParam(TypedDict, total=False): - content: Required[Optional[str]] +class MessageChatCompletionDeveloperMessageParam(TypedDict, total=False): + content: Required[Union[str, Iterable[MessageChatCompletionDeveloperMessageParamContentUnionMember1]]] - name: Required[str] + role: Required[Literal["developer"]] - role: Required[Literal["function"]] + name: str Message: TypeAlias = Union[ - MessageChatCompletionDeveloperMessageParam, - MessageChatCompletionSystemMessageParam, - MessageChatCompletionUserMessageParamInput, MessageChatCompletionAssistantMessageParamInput, MessageChatCompletionToolMessageParam, + MessageChatCompletionUserMessageParamInput, + MessageChatCompletionSystemMessageParam, MessageChatCompletionFunctionMessageParam, + MessageChatCompletionDeveloperMessageParam, ] diff --git a/src/codex/types/projects/query_log_list_by_group_response.py b/src/codex/types/projects/query_log_list_by_group_response.py index 0ff5909..ae49b95 100644 --- a/src/codex/types/projects/query_log_list_by_group_response.py +++ b/src/codex/types/projects/query_log_list_by_group_response.py @@ -17,19 +17,6 @@ "QueryLogsByGroupQueryLogContext", "QueryLogsByGroupQueryLogDeterministicGuardrailsResults", "QueryLogsByGroupQueryLogMessage", - "QueryLogsByGroupQueryLogMessageChatCompletionDeveloperMessageParam", - "QueryLogsByGroupQueryLogMessageChatCompletionDeveloperMessageParamContentUnionMember1", - "QueryLogsByGroupQueryLogMessageChatCompletionSystemMessageParam", - "QueryLogsByGroupQueryLogMessageChatCompletionSystemMessageParamContentUnionMember1", - "QueryLogsByGroupQueryLogMessageChatCompletionUserMessageParamOutput", - "QueryLogsByGroupQueryLogMessageChatCompletionUserMessageParamOutputContentUnionMember1", - "QueryLogsByGroupQueryLogMessageChatCompletionUserMessageParamOutputContentUnionMember1ChatCompletionContentPartTextParam", - "QueryLogsByGroupQueryLogMessageChatCompletionUserMessageParamOutputContentUnionMember1ChatCompletionContentPartImageParam", - "QueryLogsByGroupQueryLogMessageChatCompletionUserMessageParamOutputContentUnionMember1ChatCompletionContentPartImageParamImageURL", - "QueryLogsByGroupQueryLogMessageChatCompletionUserMessageParamOutputContentUnionMember1ChatCompletionContentPartInputAudioParam", - "QueryLogsByGroupQueryLogMessageChatCompletionUserMessageParamOutputContentUnionMember1ChatCompletionContentPartInputAudioParamInputAudio", - "QueryLogsByGroupQueryLogMessageChatCompletionUserMessageParamOutputContentUnionMember1File", - "QueryLogsByGroupQueryLogMessageChatCompletionUserMessageParamOutputContentUnionMember1FileFile", "QueryLogsByGroupQueryLogMessageChatCompletionAssistantMessageParamOutput", "QueryLogsByGroupQueryLogMessageChatCompletionAssistantMessageParamOutputAudio", "QueryLogsByGroupQueryLogMessageChatCompletionAssistantMessageParamOutputContentUnionMember1", @@ -40,7 +27,20 @@ "QueryLogsByGroupQueryLogMessageChatCompletionAssistantMessageParamOutputToolCallFunction", "QueryLogsByGroupQueryLogMessageChatCompletionToolMessageParam", "QueryLogsByGroupQueryLogMessageChatCompletionToolMessageParamContentUnionMember1", + "QueryLogsByGroupQueryLogMessageChatCompletionUserMessageParamOutput", + "QueryLogsByGroupQueryLogMessageChatCompletionUserMessageParamOutputContentUnionMember1", + "QueryLogsByGroupQueryLogMessageChatCompletionUserMessageParamOutputContentUnionMember1ChatCompletionContentPartTextParam", + "QueryLogsByGroupQueryLogMessageChatCompletionUserMessageParamOutputContentUnionMember1ChatCompletionContentPartImageParam", + "QueryLogsByGroupQueryLogMessageChatCompletionUserMessageParamOutputContentUnionMember1ChatCompletionContentPartImageParamImageURL", + "QueryLogsByGroupQueryLogMessageChatCompletionUserMessageParamOutputContentUnionMember1ChatCompletionContentPartInputAudioParam", + "QueryLogsByGroupQueryLogMessageChatCompletionUserMessageParamOutputContentUnionMember1ChatCompletionContentPartInputAudioParamInputAudio", + "QueryLogsByGroupQueryLogMessageChatCompletionUserMessageParamOutputContentUnionMember1File", + "QueryLogsByGroupQueryLogMessageChatCompletionUserMessageParamOutputContentUnionMember1FileFile", + "QueryLogsByGroupQueryLogMessageChatCompletionSystemMessageParam", + "QueryLogsByGroupQueryLogMessageChatCompletionSystemMessageParamContentUnionMember1", "QueryLogsByGroupQueryLogMessageChatCompletionFunctionMessageParam", + "QueryLogsByGroupQueryLogMessageChatCompletionDeveloperMessageParam", + "QueryLogsByGroupQueryLogMessageChatCompletionDeveloperMessageParamContentUnionMember1", ] @@ -93,32 +93,82 @@ class QueryLogsByGroupQueryLogDeterministicGuardrailsResults(BaseModel): matches: Optional[List[str]] = None -class QueryLogsByGroupQueryLogMessageChatCompletionDeveloperMessageParamContentUnionMember1(BaseModel): +class QueryLogsByGroupQueryLogMessageChatCompletionAssistantMessageParamOutputAudio(BaseModel): + id: str + + +class QueryLogsByGroupQueryLogMessageChatCompletionAssistantMessageParamOutputContentUnionMember1ChatCompletionContentPartTextParam( + BaseModel +): text: str type: Literal["text"] -class QueryLogsByGroupQueryLogMessageChatCompletionDeveloperMessageParam(BaseModel): - content: Union[str, List[QueryLogsByGroupQueryLogMessageChatCompletionDeveloperMessageParamContentUnionMember1]] +class QueryLogsByGroupQueryLogMessageChatCompletionAssistantMessageParamOutputContentUnionMember1ChatCompletionContentPartRefusalParam( + BaseModel +): + refusal: str - role: Literal["developer"] + type: Literal["refusal"] + + +QueryLogsByGroupQueryLogMessageChatCompletionAssistantMessageParamOutputContentUnionMember1: TypeAlias = Union[ + QueryLogsByGroupQueryLogMessageChatCompletionAssistantMessageParamOutputContentUnionMember1ChatCompletionContentPartTextParam, + QueryLogsByGroupQueryLogMessageChatCompletionAssistantMessageParamOutputContentUnionMember1ChatCompletionContentPartRefusalParam, +] + + +class QueryLogsByGroupQueryLogMessageChatCompletionAssistantMessageParamOutputFunctionCall(BaseModel): + arguments: str + + name: str + + +class QueryLogsByGroupQueryLogMessageChatCompletionAssistantMessageParamOutputToolCallFunction(BaseModel): + arguments: str + + name: str + + +class QueryLogsByGroupQueryLogMessageChatCompletionAssistantMessageParamOutputToolCall(BaseModel): + id: str + + function: QueryLogsByGroupQueryLogMessageChatCompletionAssistantMessageParamOutputToolCallFunction + + type: Literal["function"] + + +class QueryLogsByGroupQueryLogMessageChatCompletionAssistantMessageParamOutput(BaseModel): + role: Literal["assistant"] + + audio: Optional[QueryLogsByGroupQueryLogMessageChatCompletionAssistantMessageParamOutputAudio] = None + + content: Union[ + str, List[QueryLogsByGroupQueryLogMessageChatCompletionAssistantMessageParamOutputContentUnionMember1], None + ] = None + + function_call: Optional[QueryLogsByGroupQueryLogMessageChatCompletionAssistantMessageParamOutputFunctionCall] = None name: Optional[str] = None + refusal: Optional[str] = None -class QueryLogsByGroupQueryLogMessageChatCompletionSystemMessageParamContentUnionMember1(BaseModel): + tool_calls: Optional[List[QueryLogsByGroupQueryLogMessageChatCompletionAssistantMessageParamOutputToolCall]] = None + + +class QueryLogsByGroupQueryLogMessageChatCompletionToolMessageParamContentUnionMember1(BaseModel): text: str type: Literal["text"] -class QueryLogsByGroupQueryLogMessageChatCompletionSystemMessageParam(BaseModel): - content: Union[str, List[QueryLogsByGroupQueryLogMessageChatCompletionSystemMessageParamContentUnionMember1]] +class QueryLogsByGroupQueryLogMessageChatCompletionToolMessageParam(BaseModel): + content: Union[str, List[QueryLogsByGroupQueryLogMessageChatCompletionToolMessageParamContentUnionMember1]] - role: Literal["system"] + role: Literal["tool"] - name: Optional[str] = None + tool_call_id: str class QueryLogsByGroupQueryLogMessageChatCompletionUserMessageParamOutputContentUnionMember1ChatCompletionContentPartTextParam( @@ -191,99 +241,49 @@ class QueryLogsByGroupQueryLogMessageChatCompletionUserMessageParamOutput(BaseMo name: Optional[str] = None -class QueryLogsByGroupQueryLogMessageChatCompletionAssistantMessageParamOutputAudio(BaseModel): - id: str - - -class QueryLogsByGroupQueryLogMessageChatCompletionAssistantMessageParamOutputContentUnionMember1ChatCompletionContentPartTextParam( - BaseModel -): +class QueryLogsByGroupQueryLogMessageChatCompletionSystemMessageParamContentUnionMember1(BaseModel): text: str type: Literal["text"] -class QueryLogsByGroupQueryLogMessageChatCompletionAssistantMessageParamOutputContentUnionMember1ChatCompletionContentPartRefusalParam( - BaseModel -): - refusal: str - - type: Literal["refusal"] - - -QueryLogsByGroupQueryLogMessageChatCompletionAssistantMessageParamOutputContentUnionMember1: TypeAlias = Union[ - QueryLogsByGroupQueryLogMessageChatCompletionAssistantMessageParamOutputContentUnionMember1ChatCompletionContentPartTextParam, - QueryLogsByGroupQueryLogMessageChatCompletionAssistantMessageParamOutputContentUnionMember1ChatCompletionContentPartRefusalParam, -] - +class QueryLogsByGroupQueryLogMessageChatCompletionSystemMessageParam(BaseModel): + content: Union[str, List[QueryLogsByGroupQueryLogMessageChatCompletionSystemMessageParamContentUnionMember1]] -class QueryLogsByGroupQueryLogMessageChatCompletionAssistantMessageParamOutputFunctionCall(BaseModel): - arguments: str + role: Literal["system"] - name: str + name: Optional[str] = None -class QueryLogsByGroupQueryLogMessageChatCompletionAssistantMessageParamOutputToolCallFunction(BaseModel): - arguments: str +class QueryLogsByGroupQueryLogMessageChatCompletionFunctionMessageParam(BaseModel): + content: Optional[str] = None name: str - -class QueryLogsByGroupQueryLogMessageChatCompletionAssistantMessageParamOutputToolCall(BaseModel): - id: str - - function: QueryLogsByGroupQueryLogMessageChatCompletionAssistantMessageParamOutputToolCallFunction - - type: Literal["function"] - - -class QueryLogsByGroupQueryLogMessageChatCompletionAssistantMessageParamOutput(BaseModel): - role: Literal["assistant"] - - audio: Optional[QueryLogsByGroupQueryLogMessageChatCompletionAssistantMessageParamOutputAudio] = None - - content: Union[ - str, List[QueryLogsByGroupQueryLogMessageChatCompletionAssistantMessageParamOutputContentUnionMember1], None - ] = None - - function_call: Optional[QueryLogsByGroupQueryLogMessageChatCompletionAssistantMessageParamOutputFunctionCall] = None - - name: Optional[str] = None - - refusal: Optional[str] = None - - tool_calls: Optional[List[QueryLogsByGroupQueryLogMessageChatCompletionAssistantMessageParamOutputToolCall]] = None + role: Literal["function"] -class QueryLogsByGroupQueryLogMessageChatCompletionToolMessageParamContentUnionMember1(BaseModel): +class QueryLogsByGroupQueryLogMessageChatCompletionDeveloperMessageParamContentUnionMember1(BaseModel): text: str type: Literal["text"] -class QueryLogsByGroupQueryLogMessageChatCompletionToolMessageParam(BaseModel): - content: Union[str, List[QueryLogsByGroupQueryLogMessageChatCompletionToolMessageParamContentUnionMember1]] - - role: Literal["tool"] - - tool_call_id: str - - -class QueryLogsByGroupQueryLogMessageChatCompletionFunctionMessageParam(BaseModel): - content: Optional[str] = None +class QueryLogsByGroupQueryLogMessageChatCompletionDeveloperMessageParam(BaseModel): + content: Union[str, List[QueryLogsByGroupQueryLogMessageChatCompletionDeveloperMessageParamContentUnionMember1]] - name: str + role: Literal["developer"] - role: Literal["function"] + name: Optional[str] = None QueryLogsByGroupQueryLogMessage: TypeAlias = Union[ - QueryLogsByGroupQueryLogMessageChatCompletionDeveloperMessageParam, - QueryLogsByGroupQueryLogMessageChatCompletionSystemMessageParam, - QueryLogsByGroupQueryLogMessageChatCompletionUserMessageParamOutput, QueryLogsByGroupQueryLogMessageChatCompletionAssistantMessageParamOutput, QueryLogsByGroupQueryLogMessageChatCompletionToolMessageParam, + QueryLogsByGroupQueryLogMessageChatCompletionUserMessageParamOutput, + QueryLogsByGroupQueryLogMessageChatCompletionSystemMessageParam, QueryLogsByGroupQueryLogMessageChatCompletionFunctionMessageParam, + QueryLogsByGroupQueryLogMessageChatCompletionDeveloperMessageParam, ] diff --git a/src/codex/types/projects/query_log_list_groups_response.py b/src/codex/types/projects/query_log_list_groups_response.py index 495fc56..cc3b208 100644 --- a/src/codex/types/projects/query_log_list_groups_response.py +++ b/src/codex/types/projects/query_log_list_groups_response.py @@ -15,19 +15,6 @@ "Context", "DeterministicGuardrailsResults", "Message", - "MessageChatCompletionDeveloperMessageParam", - "MessageChatCompletionDeveloperMessageParamContentUnionMember1", - "MessageChatCompletionSystemMessageParam", - "MessageChatCompletionSystemMessageParamContentUnionMember1", - "MessageChatCompletionUserMessageParamOutput", - "MessageChatCompletionUserMessageParamOutputContentUnionMember1", - "MessageChatCompletionUserMessageParamOutputContentUnionMember1ChatCompletionContentPartTextParam", - "MessageChatCompletionUserMessageParamOutputContentUnionMember1ChatCompletionContentPartImageParam", - "MessageChatCompletionUserMessageParamOutputContentUnionMember1ChatCompletionContentPartImageParamImageURL", - "MessageChatCompletionUserMessageParamOutputContentUnionMember1ChatCompletionContentPartInputAudioParam", - "MessageChatCompletionUserMessageParamOutputContentUnionMember1ChatCompletionContentPartInputAudioParamInputAudio", - "MessageChatCompletionUserMessageParamOutputContentUnionMember1File", - "MessageChatCompletionUserMessageParamOutputContentUnionMember1FileFile", "MessageChatCompletionAssistantMessageParamOutput", "MessageChatCompletionAssistantMessageParamOutputAudio", "MessageChatCompletionAssistantMessageParamOutputContentUnionMember1", @@ -38,7 +25,20 @@ "MessageChatCompletionAssistantMessageParamOutputToolCallFunction", "MessageChatCompletionToolMessageParam", "MessageChatCompletionToolMessageParamContentUnionMember1", + "MessageChatCompletionUserMessageParamOutput", + "MessageChatCompletionUserMessageParamOutputContentUnionMember1", + "MessageChatCompletionUserMessageParamOutputContentUnionMember1ChatCompletionContentPartTextParam", + "MessageChatCompletionUserMessageParamOutputContentUnionMember1ChatCompletionContentPartImageParam", + "MessageChatCompletionUserMessageParamOutputContentUnionMember1ChatCompletionContentPartImageParamImageURL", + "MessageChatCompletionUserMessageParamOutputContentUnionMember1ChatCompletionContentPartInputAudioParam", + "MessageChatCompletionUserMessageParamOutputContentUnionMember1ChatCompletionContentPartInputAudioParamInputAudio", + "MessageChatCompletionUserMessageParamOutputContentUnionMember1File", + "MessageChatCompletionUserMessageParamOutputContentUnionMember1FileFile", + "MessageChatCompletionSystemMessageParam", + "MessageChatCompletionSystemMessageParamContentUnionMember1", "MessageChatCompletionFunctionMessageParam", + "MessageChatCompletionDeveloperMessageParam", + "MessageChatCompletionDeveloperMessageParamContentUnionMember1", ] @@ -91,32 +91,78 @@ class DeterministicGuardrailsResults(BaseModel): matches: Optional[List[str]] = None -class MessageChatCompletionDeveloperMessageParamContentUnionMember1(BaseModel): +class MessageChatCompletionAssistantMessageParamOutputAudio(BaseModel): + id: str + + +class MessageChatCompletionAssistantMessageParamOutputContentUnionMember1ChatCompletionContentPartTextParam(BaseModel): text: str type: Literal["text"] -class MessageChatCompletionDeveloperMessageParam(BaseModel): - content: Union[str, List[MessageChatCompletionDeveloperMessageParamContentUnionMember1]] +class MessageChatCompletionAssistantMessageParamOutputContentUnionMember1ChatCompletionContentPartRefusalParam( + BaseModel +): + refusal: str - role: Literal["developer"] + type: Literal["refusal"] + + +MessageChatCompletionAssistantMessageParamOutputContentUnionMember1: TypeAlias = Union[ + MessageChatCompletionAssistantMessageParamOutputContentUnionMember1ChatCompletionContentPartTextParam, + MessageChatCompletionAssistantMessageParamOutputContentUnionMember1ChatCompletionContentPartRefusalParam, +] + + +class MessageChatCompletionAssistantMessageParamOutputFunctionCall(BaseModel): + arguments: str + + name: str + + +class MessageChatCompletionAssistantMessageParamOutputToolCallFunction(BaseModel): + arguments: str + + name: str + + +class MessageChatCompletionAssistantMessageParamOutputToolCall(BaseModel): + id: str + + function: MessageChatCompletionAssistantMessageParamOutputToolCallFunction + + type: Literal["function"] + + +class MessageChatCompletionAssistantMessageParamOutput(BaseModel): + role: Literal["assistant"] + + audio: Optional[MessageChatCompletionAssistantMessageParamOutputAudio] = None + + content: Union[str, List[MessageChatCompletionAssistantMessageParamOutputContentUnionMember1], None] = None + + function_call: Optional[MessageChatCompletionAssistantMessageParamOutputFunctionCall] = None name: Optional[str] = None + refusal: Optional[str] = None -class MessageChatCompletionSystemMessageParamContentUnionMember1(BaseModel): + tool_calls: Optional[List[MessageChatCompletionAssistantMessageParamOutputToolCall]] = None + + +class MessageChatCompletionToolMessageParamContentUnionMember1(BaseModel): text: str type: Literal["text"] -class MessageChatCompletionSystemMessageParam(BaseModel): - content: Union[str, List[MessageChatCompletionSystemMessageParamContentUnionMember1]] +class MessageChatCompletionToolMessageParam(BaseModel): + content: Union[str, List[MessageChatCompletionToolMessageParamContentUnionMember1]] - role: Literal["system"] + role: Literal["tool"] - name: Optional[str] = None + tool_call_id: str class MessageChatCompletionUserMessageParamOutputContentUnionMember1ChatCompletionContentPartTextParam(BaseModel): @@ -185,95 +231,49 @@ class MessageChatCompletionUserMessageParamOutput(BaseModel): name: Optional[str] = None -class MessageChatCompletionAssistantMessageParamOutputAudio(BaseModel): - id: str - - -class MessageChatCompletionAssistantMessageParamOutputContentUnionMember1ChatCompletionContentPartTextParam(BaseModel): +class MessageChatCompletionSystemMessageParamContentUnionMember1(BaseModel): text: str type: Literal["text"] -class MessageChatCompletionAssistantMessageParamOutputContentUnionMember1ChatCompletionContentPartRefusalParam( - BaseModel -): - refusal: str - - type: Literal["refusal"] - - -MessageChatCompletionAssistantMessageParamOutputContentUnionMember1: TypeAlias = Union[ - MessageChatCompletionAssistantMessageParamOutputContentUnionMember1ChatCompletionContentPartTextParam, - MessageChatCompletionAssistantMessageParamOutputContentUnionMember1ChatCompletionContentPartRefusalParam, -] - +class MessageChatCompletionSystemMessageParam(BaseModel): + content: Union[str, List[MessageChatCompletionSystemMessageParamContentUnionMember1]] -class MessageChatCompletionAssistantMessageParamOutputFunctionCall(BaseModel): - arguments: str + role: Literal["system"] - name: str + name: Optional[str] = None -class MessageChatCompletionAssistantMessageParamOutputToolCallFunction(BaseModel): - arguments: str +class MessageChatCompletionFunctionMessageParam(BaseModel): + content: Optional[str] = None name: str - -class MessageChatCompletionAssistantMessageParamOutputToolCall(BaseModel): - id: str - - function: MessageChatCompletionAssistantMessageParamOutputToolCallFunction - - type: Literal["function"] - - -class MessageChatCompletionAssistantMessageParamOutput(BaseModel): - role: Literal["assistant"] - - audio: Optional[MessageChatCompletionAssistantMessageParamOutputAudio] = None - - content: Union[str, List[MessageChatCompletionAssistantMessageParamOutputContentUnionMember1], None] = None - - function_call: Optional[MessageChatCompletionAssistantMessageParamOutputFunctionCall] = None - - name: Optional[str] = None - - refusal: Optional[str] = None - - tool_calls: Optional[List[MessageChatCompletionAssistantMessageParamOutputToolCall]] = None + role: Literal["function"] -class MessageChatCompletionToolMessageParamContentUnionMember1(BaseModel): +class MessageChatCompletionDeveloperMessageParamContentUnionMember1(BaseModel): text: str type: Literal["text"] -class MessageChatCompletionToolMessageParam(BaseModel): - content: Union[str, List[MessageChatCompletionToolMessageParamContentUnionMember1]] - - role: Literal["tool"] - - tool_call_id: str - - -class MessageChatCompletionFunctionMessageParam(BaseModel): - content: Optional[str] = None +class MessageChatCompletionDeveloperMessageParam(BaseModel): + content: Union[str, List[MessageChatCompletionDeveloperMessageParamContentUnionMember1]] - name: str + role: Literal["developer"] - role: Literal["function"] + name: Optional[str] = None Message: TypeAlias = Union[ - MessageChatCompletionDeveloperMessageParam, - MessageChatCompletionSystemMessageParam, - MessageChatCompletionUserMessageParamOutput, MessageChatCompletionAssistantMessageParamOutput, MessageChatCompletionToolMessageParam, + MessageChatCompletionUserMessageParamOutput, + MessageChatCompletionSystemMessageParam, MessageChatCompletionFunctionMessageParam, + MessageChatCompletionDeveloperMessageParam, ] diff --git a/src/codex/types/projects/query_log_list_response.py b/src/codex/types/projects/query_log_list_response.py index 72a8cab..0778898 100644 --- a/src/codex/types/projects/query_log_list_response.py +++ b/src/codex/types/projects/query_log_list_response.py @@ -15,19 +15,6 @@ "Context", "DeterministicGuardrailsResults", "Message", - "MessageChatCompletionDeveloperMessageParam", - "MessageChatCompletionDeveloperMessageParamContentUnionMember1", - "MessageChatCompletionSystemMessageParam", - "MessageChatCompletionSystemMessageParamContentUnionMember1", - "MessageChatCompletionUserMessageParamOutput", - "MessageChatCompletionUserMessageParamOutputContentUnionMember1", - "MessageChatCompletionUserMessageParamOutputContentUnionMember1ChatCompletionContentPartTextParam", - "MessageChatCompletionUserMessageParamOutputContentUnionMember1ChatCompletionContentPartImageParam", - "MessageChatCompletionUserMessageParamOutputContentUnionMember1ChatCompletionContentPartImageParamImageURL", - "MessageChatCompletionUserMessageParamOutputContentUnionMember1ChatCompletionContentPartInputAudioParam", - "MessageChatCompletionUserMessageParamOutputContentUnionMember1ChatCompletionContentPartInputAudioParamInputAudio", - "MessageChatCompletionUserMessageParamOutputContentUnionMember1File", - "MessageChatCompletionUserMessageParamOutputContentUnionMember1FileFile", "MessageChatCompletionAssistantMessageParamOutput", "MessageChatCompletionAssistantMessageParamOutputAudio", "MessageChatCompletionAssistantMessageParamOutputContentUnionMember1", @@ -38,7 +25,20 @@ "MessageChatCompletionAssistantMessageParamOutputToolCallFunction", "MessageChatCompletionToolMessageParam", "MessageChatCompletionToolMessageParamContentUnionMember1", + "MessageChatCompletionUserMessageParamOutput", + "MessageChatCompletionUserMessageParamOutputContentUnionMember1", + "MessageChatCompletionUserMessageParamOutputContentUnionMember1ChatCompletionContentPartTextParam", + "MessageChatCompletionUserMessageParamOutputContentUnionMember1ChatCompletionContentPartImageParam", + "MessageChatCompletionUserMessageParamOutputContentUnionMember1ChatCompletionContentPartImageParamImageURL", + "MessageChatCompletionUserMessageParamOutputContentUnionMember1ChatCompletionContentPartInputAudioParam", + "MessageChatCompletionUserMessageParamOutputContentUnionMember1ChatCompletionContentPartInputAudioParamInputAudio", + "MessageChatCompletionUserMessageParamOutputContentUnionMember1File", + "MessageChatCompletionUserMessageParamOutputContentUnionMember1FileFile", + "MessageChatCompletionSystemMessageParam", + "MessageChatCompletionSystemMessageParamContentUnionMember1", "MessageChatCompletionFunctionMessageParam", + "MessageChatCompletionDeveloperMessageParam", + "MessageChatCompletionDeveloperMessageParamContentUnionMember1", ] @@ -91,32 +91,78 @@ class DeterministicGuardrailsResults(BaseModel): matches: Optional[List[str]] = None -class MessageChatCompletionDeveloperMessageParamContentUnionMember1(BaseModel): +class MessageChatCompletionAssistantMessageParamOutputAudio(BaseModel): + id: str + + +class MessageChatCompletionAssistantMessageParamOutputContentUnionMember1ChatCompletionContentPartTextParam(BaseModel): text: str type: Literal["text"] -class MessageChatCompletionDeveloperMessageParam(BaseModel): - content: Union[str, List[MessageChatCompletionDeveloperMessageParamContentUnionMember1]] +class MessageChatCompletionAssistantMessageParamOutputContentUnionMember1ChatCompletionContentPartRefusalParam( + BaseModel +): + refusal: str - role: Literal["developer"] + type: Literal["refusal"] + + +MessageChatCompletionAssistantMessageParamOutputContentUnionMember1: TypeAlias = Union[ + MessageChatCompletionAssistantMessageParamOutputContentUnionMember1ChatCompletionContentPartTextParam, + MessageChatCompletionAssistantMessageParamOutputContentUnionMember1ChatCompletionContentPartRefusalParam, +] + + +class MessageChatCompletionAssistantMessageParamOutputFunctionCall(BaseModel): + arguments: str + + name: str + + +class MessageChatCompletionAssistantMessageParamOutputToolCallFunction(BaseModel): + arguments: str + + name: str + + +class MessageChatCompletionAssistantMessageParamOutputToolCall(BaseModel): + id: str + + function: MessageChatCompletionAssistantMessageParamOutputToolCallFunction + + type: Literal["function"] + + +class MessageChatCompletionAssistantMessageParamOutput(BaseModel): + role: Literal["assistant"] + + audio: Optional[MessageChatCompletionAssistantMessageParamOutputAudio] = None + + content: Union[str, List[MessageChatCompletionAssistantMessageParamOutputContentUnionMember1], None] = None + + function_call: Optional[MessageChatCompletionAssistantMessageParamOutputFunctionCall] = None name: Optional[str] = None + refusal: Optional[str] = None -class MessageChatCompletionSystemMessageParamContentUnionMember1(BaseModel): + tool_calls: Optional[List[MessageChatCompletionAssistantMessageParamOutputToolCall]] = None + + +class MessageChatCompletionToolMessageParamContentUnionMember1(BaseModel): text: str type: Literal["text"] -class MessageChatCompletionSystemMessageParam(BaseModel): - content: Union[str, List[MessageChatCompletionSystemMessageParamContentUnionMember1]] +class MessageChatCompletionToolMessageParam(BaseModel): + content: Union[str, List[MessageChatCompletionToolMessageParamContentUnionMember1]] - role: Literal["system"] + role: Literal["tool"] - name: Optional[str] = None + tool_call_id: str class MessageChatCompletionUserMessageParamOutputContentUnionMember1ChatCompletionContentPartTextParam(BaseModel): @@ -185,95 +231,49 @@ class MessageChatCompletionUserMessageParamOutput(BaseModel): name: Optional[str] = None -class MessageChatCompletionAssistantMessageParamOutputAudio(BaseModel): - id: str - - -class MessageChatCompletionAssistantMessageParamOutputContentUnionMember1ChatCompletionContentPartTextParam(BaseModel): +class MessageChatCompletionSystemMessageParamContentUnionMember1(BaseModel): text: str type: Literal["text"] -class MessageChatCompletionAssistantMessageParamOutputContentUnionMember1ChatCompletionContentPartRefusalParam( - BaseModel -): - refusal: str - - type: Literal["refusal"] - - -MessageChatCompletionAssistantMessageParamOutputContentUnionMember1: TypeAlias = Union[ - MessageChatCompletionAssistantMessageParamOutputContentUnionMember1ChatCompletionContentPartTextParam, - MessageChatCompletionAssistantMessageParamOutputContentUnionMember1ChatCompletionContentPartRefusalParam, -] - +class MessageChatCompletionSystemMessageParam(BaseModel): + content: Union[str, List[MessageChatCompletionSystemMessageParamContentUnionMember1]] -class MessageChatCompletionAssistantMessageParamOutputFunctionCall(BaseModel): - arguments: str + role: Literal["system"] - name: str + name: Optional[str] = None -class MessageChatCompletionAssistantMessageParamOutputToolCallFunction(BaseModel): - arguments: str +class MessageChatCompletionFunctionMessageParam(BaseModel): + content: Optional[str] = None name: str - -class MessageChatCompletionAssistantMessageParamOutputToolCall(BaseModel): - id: str - - function: MessageChatCompletionAssistantMessageParamOutputToolCallFunction - - type: Literal["function"] - - -class MessageChatCompletionAssistantMessageParamOutput(BaseModel): - role: Literal["assistant"] - - audio: Optional[MessageChatCompletionAssistantMessageParamOutputAudio] = None - - content: Union[str, List[MessageChatCompletionAssistantMessageParamOutputContentUnionMember1], None] = None - - function_call: Optional[MessageChatCompletionAssistantMessageParamOutputFunctionCall] = None - - name: Optional[str] = None - - refusal: Optional[str] = None - - tool_calls: Optional[List[MessageChatCompletionAssistantMessageParamOutputToolCall]] = None + role: Literal["function"] -class MessageChatCompletionToolMessageParamContentUnionMember1(BaseModel): +class MessageChatCompletionDeveloperMessageParamContentUnionMember1(BaseModel): text: str type: Literal["text"] -class MessageChatCompletionToolMessageParam(BaseModel): - content: Union[str, List[MessageChatCompletionToolMessageParamContentUnionMember1]] - - role: Literal["tool"] - - tool_call_id: str - - -class MessageChatCompletionFunctionMessageParam(BaseModel): - content: Optional[str] = None +class MessageChatCompletionDeveloperMessageParam(BaseModel): + content: Union[str, List[MessageChatCompletionDeveloperMessageParamContentUnionMember1]] - name: str + role: Literal["developer"] - role: Literal["function"] + name: Optional[str] = None Message: TypeAlias = Union[ - MessageChatCompletionDeveloperMessageParam, - MessageChatCompletionSystemMessageParam, - MessageChatCompletionUserMessageParamOutput, MessageChatCompletionAssistantMessageParamOutput, MessageChatCompletionToolMessageParam, + MessageChatCompletionUserMessageParamOutput, + MessageChatCompletionSystemMessageParam, MessageChatCompletionFunctionMessageParam, + MessageChatCompletionDeveloperMessageParam, ] diff --git a/src/codex/types/projects/query_log_retrieve_response.py b/src/codex/types/projects/query_log_retrieve_response.py index 4324269..2751ef2 100644 --- a/src/codex/types/projects/query_log_retrieve_response.py +++ b/src/codex/types/projects/query_log_retrieve_response.py @@ -15,19 +15,6 @@ "Context", "DeterministicGuardrailsResults", "Message", - "MessageChatCompletionDeveloperMessageParam", - "MessageChatCompletionDeveloperMessageParamContentUnionMember1", - "MessageChatCompletionSystemMessageParam", - "MessageChatCompletionSystemMessageParamContentUnionMember1", - "MessageChatCompletionUserMessageParamOutput", - "MessageChatCompletionUserMessageParamOutputContentUnionMember1", - "MessageChatCompletionUserMessageParamOutputContentUnionMember1ChatCompletionContentPartTextParam", - "MessageChatCompletionUserMessageParamOutputContentUnionMember1ChatCompletionContentPartImageParam", - "MessageChatCompletionUserMessageParamOutputContentUnionMember1ChatCompletionContentPartImageParamImageURL", - "MessageChatCompletionUserMessageParamOutputContentUnionMember1ChatCompletionContentPartInputAudioParam", - "MessageChatCompletionUserMessageParamOutputContentUnionMember1ChatCompletionContentPartInputAudioParamInputAudio", - "MessageChatCompletionUserMessageParamOutputContentUnionMember1File", - "MessageChatCompletionUserMessageParamOutputContentUnionMember1FileFile", "MessageChatCompletionAssistantMessageParamOutput", "MessageChatCompletionAssistantMessageParamOutputAudio", "MessageChatCompletionAssistantMessageParamOutputContentUnionMember1", @@ -38,7 +25,20 @@ "MessageChatCompletionAssistantMessageParamOutputToolCallFunction", "MessageChatCompletionToolMessageParam", "MessageChatCompletionToolMessageParamContentUnionMember1", + "MessageChatCompletionUserMessageParamOutput", + "MessageChatCompletionUserMessageParamOutputContentUnionMember1", + "MessageChatCompletionUserMessageParamOutputContentUnionMember1ChatCompletionContentPartTextParam", + "MessageChatCompletionUserMessageParamOutputContentUnionMember1ChatCompletionContentPartImageParam", + "MessageChatCompletionUserMessageParamOutputContentUnionMember1ChatCompletionContentPartImageParamImageURL", + "MessageChatCompletionUserMessageParamOutputContentUnionMember1ChatCompletionContentPartInputAudioParam", + "MessageChatCompletionUserMessageParamOutputContentUnionMember1ChatCompletionContentPartInputAudioParamInputAudio", + "MessageChatCompletionUserMessageParamOutputContentUnionMember1File", + "MessageChatCompletionUserMessageParamOutputContentUnionMember1FileFile", + "MessageChatCompletionSystemMessageParam", + "MessageChatCompletionSystemMessageParamContentUnionMember1", "MessageChatCompletionFunctionMessageParam", + "MessageChatCompletionDeveloperMessageParam", + "MessageChatCompletionDeveloperMessageParamContentUnionMember1", ] @@ -91,32 +91,78 @@ class DeterministicGuardrailsResults(BaseModel): matches: Optional[List[str]] = None -class MessageChatCompletionDeveloperMessageParamContentUnionMember1(BaseModel): +class MessageChatCompletionAssistantMessageParamOutputAudio(BaseModel): + id: str + + +class MessageChatCompletionAssistantMessageParamOutputContentUnionMember1ChatCompletionContentPartTextParam(BaseModel): text: str type: Literal["text"] -class MessageChatCompletionDeveloperMessageParam(BaseModel): - content: Union[str, List[MessageChatCompletionDeveloperMessageParamContentUnionMember1]] +class MessageChatCompletionAssistantMessageParamOutputContentUnionMember1ChatCompletionContentPartRefusalParam( + BaseModel +): + refusal: str - role: Literal["developer"] + type: Literal["refusal"] + + +MessageChatCompletionAssistantMessageParamOutputContentUnionMember1: TypeAlias = Union[ + MessageChatCompletionAssistantMessageParamOutputContentUnionMember1ChatCompletionContentPartTextParam, + MessageChatCompletionAssistantMessageParamOutputContentUnionMember1ChatCompletionContentPartRefusalParam, +] + + +class MessageChatCompletionAssistantMessageParamOutputFunctionCall(BaseModel): + arguments: str + + name: str + + +class MessageChatCompletionAssistantMessageParamOutputToolCallFunction(BaseModel): + arguments: str + + name: str + + +class MessageChatCompletionAssistantMessageParamOutputToolCall(BaseModel): + id: str + + function: MessageChatCompletionAssistantMessageParamOutputToolCallFunction + + type: Literal["function"] + + +class MessageChatCompletionAssistantMessageParamOutput(BaseModel): + role: Literal["assistant"] + + audio: Optional[MessageChatCompletionAssistantMessageParamOutputAudio] = None + + content: Union[str, List[MessageChatCompletionAssistantMessageParamOutputContentUnionMember1], None] = None + + function_call: Optional[MessageChatCompletionAssistantMessageParamOutputFunctionCall] = None name: Optional[str] = None + refusal: Optional[str] = None -class MessageChatCompletionSystemMessageParamContentUnionMember1(BaseModel): + tool_calls: Optional[List[MessageChatCompletionAssistantMessageParamOutputToolCall]] = None + + +class MessageChatCompletionToolMessageParamContentUnionMember1(BaseModel): text: str type: Literal["text"] -class MessageChatCompletionSystemMessageParam(BaseModel): - content: Union[str, List[MessageChatCompletionSystemMessageParamContentUnionMember1]] +class MessageChatCompletionToolMessageParam(BaseModel): + content: Union[str, List[MessageChatCompletionToolMessageParamContentUnionMember1]] - role: Literal["system"] + role: Literal["tool"] - name: Optional[str] = None + tool_call_id: str class MessageChatCompletionUserMessageParamOutputContentUnionMember1ChatCompletionContentPartTextParam(BaseModel): @@ -185,95 +231,49 @@ class MessageChatCompletionUserMessageParamOutput(BaseModel): name: Optional[str] = None -class MessageChatCompletionAssistantMessageParamOutputAudio(BaseModel): - id: str - - -class MessageChatCompletionAssistantMessageParamOutputContentUnionMember1ChatCompletionContentPartTextParam(BaseModel): +class MessageChatCompletionSystemMessageParamContentUnionMember1(BaseModel): text: str type: Literal["text"] -class MessageChatCompletionAssistantMessageParamOutputContentUnionMember1ChatCompletionContentPartRefusalParam( - BaseModel -): - refusal: str - - type: Literal["refusal"] - - -MessageChatCompletionAssistantMessageParamOutputContentUnionMember1: TypeAlias = Union[ - MessageChatCompletionAssistantMessageParamOutputContentUnionMember1ChatCompletionContentPartTextParam, - MessageChatCompletionAssistantMessageParamOutputContentUnionMember1ChatCompletionContentPartRefusalParam, -] - +class MessageChatCompletionSystemMessageParam(BaseModel): + content: Union[str, List[MessageChatCompletionSystemMessageParamContentUnionMember1]] -class MessageChatCompletionAssistantMessageParamOutputFunctionCall(BaseModel): - arguments: str + role: Literal["system"] - name: str + name: Optional[str] = None -class MessageChatCompletionAssistantMessageParamOutputToolCallFunction(BaseModel): - arguments: str +class MessageChatCompletionFunctionMessageParam(BaseModel): + content: Optional[str] = None name: str - -class MessageChatCompletionAssistantMessageParamOutputToolCall(BaseModel): - id: str - - function: MessageChatCompletionAssistantMessageParamOutputToolCallFunction - - type: Literal["function"] - - -class MessageChatCompletionAssistantMessageParamOutput(BaseModel): - role: Literal["assistant"] - - audio: Optional[MessageChatCompletionAssistantMessageParamOutputAudio] = None - - content: Union[str, List[MessageChatCompletionAssistantMessageParamOutputContentUnionMember1], None] = None - - function_call: Optional[MessageChatCompletionAssistantMessageParamOutputFunctionCall] = None - - name: Optional[str] = None - - refusal: Optional[str] = None - - tool_calls: Optional[List[MessageChatCompletionAssistantMessageParamOutputToolCall]] = None + role: Literal["function"] -class MessageChatCompletionToolMessageParamContentUnionMember1(BaseModel): +class MessageChatCompletionDeveloperMessageParamContentUnionMember1(BaseModel): text: str type: Literal["text"] -class MessageChatCompletionToolMessageParam(BaseModel): - content: Union[str, List[MessageChatCompletionToolMessageParamContentUnionMember1]] - - role: Literal["tool"] - - tool_call_id: str - - -class MessageChatCompletionFunctionMessageParam(BaseModel): - content: Optional[str] = None +class MessageChatCompletionDeveloperMessageParam(BaseModel): + content: Union[str, List[MessageChatCompletionDeveloperMessageParamContentUnionMember1]] - name: str + role: Literal["developer"] - role: Literal["function"] + name: Optional[str] = None Message: TypeAlias = Union[ - MessageChatCompletionDeveloperMessageParam, - MessageChatCompletionSystemMessageParam, - MessageChatCompletionUserMessageParamOutput, MessageChatCompletionAssistantMessageParamOutput, MessageChatCompletionToolMessageParam, + MessageChatCompletionUserMessageParamOutput, + MessageChatCompletionSystemMessageParam, MessageChatCompletionFunctionMessageParam, + MessageChatCompletionDeveloperMessageParam, ] diff --git a/src/codex/types/projects/remediation_list_resolved_logs_response.py b/src/codex/types/projects/remediation_list_resolved_logs_response.py index cebfaf4..d56f9a4 100644 --- a/src/codex/types/projects/remediation_list_resolved_logs_response.py +++ b/src/codex/types/projects/remediation_list_resolved_logs_response.py @@ -16,19 +16,6 @@ "QueryLogContext", "QueryLogDeterministicGuardrailsResults", "QueryLogMessage", - "QueryLogMessageChatCompletionDeveloperMessageParam", - "QueryLogMessageChatCompletionDeveloperMessageParamContentUnionMember1", - "QueryLogMessageChatCompletionSystemMessageParam", - "QueryLogMessageChatCompletionSystemMessageParamContentUnionMember1", - "QueryLogMessageChatCompletionUserMessageParamOutput", - "QueryLogMessageChatCompletionUserMessageParamOutputContentUnionMember1", - "QueryLogMessageChatCompletionUserMessageParamOutputContentUnionMember1ChatCompletionContentPartTextParam", - "QueryLogMessageChatCompletionUserMessageParamOutputContentUnionMember1ChatCompletionContentPartImageParam", - "QueryLogMessageChatCompletionUserMessageParamOutputContentUnionMember1ChatCompletionContentPartImageParamImageURL", - "QueryLogMessageChatCompletionUserMessageParamOutputContentUnionMember1ChatCompletionContentPartInputAudioParam", - "QueryLogMessageChatCompletionUserMessageParamOutputContentUnionMember1ChatCompletionContentPartInputAudioParamInputAudio", - "QueryLogMessageChatCompletionUserMessageParamOutputContentUnionMember1File", - "QueryLogMessageChatCompletionUserMessageParamOutputContentUnionMember1FileFile", "QueryLogMessageChatCompletionAssistantMessageParamOutput", "QueryLogMessageChatCompletionAssistantMessageParamOutputAudio", "QueryLogMessageChatCompletionAssistantMessageParamOutputContentUnionMember1", @@ -39,7 +26,20 @@ "QueryLogMessageChatCompletionAssistantMessageParamOutputToolCallFunction", "QueryLogMessageChatCompletionToolMessageParam", "QueryLogMessageChatCompletionToolMessageParamContentUnionMember1", + "QueryLogMessageChatCompletionUserMessageParamOutput", + "QueryLogMessageChatCompletionUserMessageParamOutputContentUnionMember1", + "QueryLogMessageChatCompletionUserMessageParamOutputContentUnionMember1ChatCompletionContentPartTextParam", + "QueryLogMessageChatCompletionUserMessageParamOutputContentUnionMember1ChatCompletionContentPartImageParam", + "QueryLogMessageChatCompletionUserMessageParamOutputContentUnionMember1ChatCompletionContentPartImageParamImageURL", + "QueryLogMessageChatCompletionUserMessageParamOutputContentUnionMember1ChatCompletionContentPartInputAudioParam", + "QueryLogMessageChatCompletionUserMessageParamOutputContentUnionMember1ChatCompletionContentPartInputAudioParamInputAudio", + "QueryLogMessageChatCompletionUserMessageParamOutputContentUnionMember1File", + "QueryLogMessageChatCompletionUserMessageParamOutputContentUnionMember1FileFile", + "QueryLogMessageChatCompletionSystemMessageParam", + "QueryLogMessageChatCompletionSystemMessageParamContentUnionMember1", "QueryLogMessageChatCompletionFunctionMessageParam", + "QueryLogMessageChatCompletionDeveloperMessageParam", + "QueryLogMessageChatCompletionDeveloperMessageParamContentUnionMember1", ] @@ -92,32 +92,80 @@ class QueryLogDeterministicGuardrailsResults(BaseModel): matches: Optional[List[str]] = None -class QueryLogMessageChatCompletionDeveloperMessageParamContentUnionMember1(BaseModel): +class QueryLogMessageChatCompletionAssistantMessageParamOutputAudio(BaseModel): + id: str + + +class QueryLogMessageChatCompletionAssistantMessageParamOutputContentUnionMember1ChatCompletionContentPartTextParam( + BaseModel +): text: str type: Literal["text"] -class QueryLogMessageChatCompletionDeveloperMessageParam(BaseModel): - content: Union[str, List[QueryLogMessageChatCompletionDeveloperMessageParamContentUnionMember1]] +class QueryLogMessageChatCompletionAssistantMessageParamOutputContentUnionMember1ChatCompletionContentPartRefusalParam( + BaseModel +): + refusal: str + + type: Literal["refusal"] - role: Literal["developer"] + +QueryLogMessageChatCompletionAssistantMessageParamOutputContentUnionMember1: TypeAlias = Union[ + QueryLogMessageChatCompletionAssistantMessageParamOutputContentUnionMember1ChatCompletionContentPartTextParam, + QueryLogMessageChatCompletionAssistantMessageParamOutputContentUnionMember1ChatCompletionContentPartRefusalParam, +] + + +class QueryLogMessageChatCompletionAssistantMessageParamOutputFunctionCall(BaseModel): + arguments: str + + name: str + + +class QueryLogMessageChatCompletionAssistantMessageParamOutputToolCallFunction(BaseModel): + arguments: str + + name: str + + +class QueryLogMessageChatCompletionAssistantMessageParamOutputToolCall(BaseModel): + id: str + + function: QueryLogMessageChatCompletionAssistantMessageParamOutputToolCallFunction + + type: Literal["function"] + + +class QueryLogMessageChatCompletionAssistantMessageParamOutput(BaseModel): + role: Literal["assistant"] + + audio: Optional[QueryLogMessageChatCompletionAssistantMessageParamOutputAudio] = None + + content: Union[str, List[QueryLogMessageChatCompletionAssistantMessageParamOutputContentUnionMember1], None] = None + + function_call: Optional[QueryLogMessageChatCompletionAssistantMessageParamOutputFunctionCall] = None name: Optional[str] = None + refusal: Optional[str] = None -class QueryLogMessageChatCompletionSystemMessageParamContentUnionMember1(BaseModel): + tool_calls: Optional[List[QueryLogMessageChatCompletionAssistantMessageParamOutputToolCall]] = None + + +class QueryLogMessageChatCompletionToolMessageParamContentUnionMember1(BaseModel): text: str type: Literal["text"] -class QueryLogMessageChatCompletionSystemMessageParam(BaseModel): - content: Union[str, List[QueryLogMessageChatCompletionSystemMessageParamContentUnionMember1]] +class QueryLogMessageChatCompletionToolMessageParam(BaseModel): + content: Union[str, List[QueryLogMessageChatCompletionToolMessageParamContentUnionMember1]] - role: Literal["system"] + role: Literal["tool"] - name: Optional[str] = None + tool_call_id: str class QueryLogMessageChatCompletionUserMessageParamOutputContentUnionMember1ChatCompletionContentPartTextParam( @@ -190,97 +238,49 @@ class QueryLogMessageChatCompletionUserMessageParamOutput(BaseModel): name: Optional[str] = None -class QueryLogMessageChatCompletionAssistantMessageParamOutputAudio(BaseModel): - id: str - - -class QueryLogMessageChatCompletionAssistantMessageParamOutputContentUnionMember1ChatCompletionContentPartTextParam( - BaseModel -): +class QueryLogMessageChatCompletionSystemMessageParamContentUnionMember1(BaseModel): text: str type: Literal["text"] -class QueryLogMessageChatCompletionAssistantMessageParamOutputContentUnionMember1ChatCompletionContentPartRefusalParam( - BaseModel -): - refusal: str - - type: Literal["refusal"] - - -QueryLogMessageChatCompletionAssistantMessageParamOutputContentUnionMember1: TypeAlias = Union[ - QueryLogMessageChatCompletionAssistantMessageParamOutputContentUnionMember1ChatCompletionContentPartTextParam, - QueryLogMessageChatCompletionAssistantMessageParamOutputContentUnionMember1ChatCompletionContentPartRefusalParam, -] - +class QueryLogMessageChatCompletionSystemMessageParam(BaseModel): + content: Union[str, List[QueryLogMessageChatCompletionSystemMessageParamContentUnionMember1]] -class QueryLogMessageChatCompletionAssistantMessageParamOutputFunctionCall(BaseModel): - arguments: str + role: Literal["system"] - name: str + name: Optional[str] = None -class QueryLogMessageChatCompletionAssistantMessageParamOutputToolCallFunction(BaseModel): - arguments: str +class QueryLogMessageChatCompletionFunctionMessageParam(BaseModel): + content: Optional[str] = None name: str - -class QueryLogMessageChatCompletionAssistantMessageParamOutputToolCall(BaseModel): - id: str - - function: QueryLogMessageChatCompletionAssistantMessageParamOutputToolCallFunction - - type: Literal["function"] - - -class QueryLogMessageChatCompletionAssistantMessageParamOutput(BaseModel): - role: Literal["assistant"] - - audio: Optional[QueryLogMessageChatCompletionAssistantMessageParamOutputAudio] = None - - content: Union[str, List[QueryLogMessageChatCompletionAssistantMessageParamOutputContentUnionMember1], None] = None - - function_call: Optional[QueryLogMessageChatCompletionAssistantMessageParamOutputFunctionCall] = None - - name: Optional[str] = None - - refusal: Optional[str] = None - - tool_calls: Optional[List[QueryLogMessageChatCompletionAssistantMessageParamOutputToolCall]] = None + role: Literal["function"] -class QueryLogMessageChatCompletionToolMessageParamContentUnionMember1(BaseModel): +class QueryLogMessageChatCompletionDeveloperMessageParamContentUnionMember1(BaseModel): text: str type: Literal["text"] -class QueryLogMessageChatCompletionToolMessageParam(BaseModel): - content: Union[str, List[QueryLogMessageChatCompletionToolMessageParamContentUnionMember1]] - - role: Literal["tool"] - - tool_call_id: str - - -class QueryLogMessageChatCompletionFunctionMessageParam(BaseModel): - content: Optional[str] = None +class QueryLogMessageChatCompletionDeveloperMessageParam(BaseModel): + content: Union[str, List[QueryLogMessageChatCompletionDeveloperMessageParamContentUnionMember1]] - name: str + role: Literal["developer"] - role: Literal["function"] + name: Optional[str] = None QueryLogMessage: TypeAlias = Union[ - QueryLogMessageChatCompletionDeveloperMessageParam, - QueryLogMessageChatCompletionSystemMessageParam, - QueryLogMessageChatCompletionUserMessageParamOutput, QueryLogMessageChatCompletionAssistantMessageParamOutput, QueryLogMessageChatCompletionToolMessageParam, + QueryLogMessageChatCompletionUserMessageParamOutput, + QueryLogMessageChatCompletionSystemMessageParam, QueryLogMessageChatCompletionFunctionMessageParam, + QueryLogMessageChatCompletionDeveloperMessageParam, ] diff --git a/tests/api_resources/test_projects.py b/tests/api_resources/test_projects.py index 9312ca0..ae3f4f0 100644 --- a/tests/api_resources/test_projects.py +++ b/tests/api_resources/test_projects.py @@ -594,9 +594,25 @@ def test_method_validate_with_all_params(self, client: Codex) -> None: eval_scores={"foo": 0}, messages=[ { + "role": "assistant", + "audio": {"id": "id"}, "content": "string", - "role": "developer", + "function_call": { + "arguments": "arguments", + "name": "name", + }, "name": "name", + "refusal": "refusal", + "tool_calls": [ + { + "id": "id", + "function": { + "arguments": "arguments", + "name": "name", + }, + "type": "function", + } + ], } ], options={ @@ -1240,9 +1256,25 @@ async def test_method_validate_with_all_params(self, async_client: AsyncCodex) - eval_scores={"foo": 0}, messages=[ { + "role": "assistant", + "audio": {"id": "id"}, "content": "string", - "role": "developer", + "function_call": { + "arguments": "arguments", + "name": "name", + }, "name": "name", + "refusal": "refusal", + "tool_calls": [ + { + "id": "id", + "function": { + "arguments": "arguments", + "name": "name", + }, + "type": "function", + } + ], } ], options={ From 3039fdde263eb1a0da9b733958dcdcf653a4509b Mon Sep 17 00:00:00 2001 From: "stainless-app[bot]" <142633134+stainless-app[bot]@users.noreply.github.com> Date: Fri, 25 Jul 2025 23:18:00 +0000 Subject: [PATCH 16/20] codegen metadata --- .stats.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.stats.yml b/.stats.yml index 9fb5140..19c8465 100644 --- a/.stats.yml +++ b/.stats.yml @@ -1,3 +1,3 @@ configured_endpoints: 54 -openapi_spec_hash: f7b67b502828e6d0ca3944d40d00d89b +openapi_spec_hash: 57e29e33aec4bbc20171ec3128594e75 config_hash: 8f6e5c3b064cbb77569a6bf654954a56 From 1a06cfc7c19943ac468b2ec9f2787215363cf77e Mon Sep 17 00:00:00 2001 From: "stainless-app[bot]" <142633134+stainless-app[bot]@users.noreply.github.com> Date: Mon, 28 Jul 2025 17:03:15 +0000 Subject: [PATCH 17/20] feat(api): api update --- .stats.yml | 2 +- src/codex/resources/projects/projects.py | 74 +++++----- src/codex/resources/tlm.py | 128 +++++++++--------- src/codex/types/project_validate_params.py | 58 +++++--- .../query_log_list_by_group_response.py | 46 +++++++ .../query_log_list_groups_response.py | 46 +++++++ .../types/projects/query_log_list_response.py | 46 +++++++ .../projects/query_log_retrieve_response.py | 46 +++++++ ...remediation_list_resolved_logs_response.py | 46 +++++++ src/codex/types/tlm_prompt_params.py | 34 ++--- src/codex/types/tlm_score_params.py | 34 ++--- tests/api_resources/test_projects.py | 24 ++++ tests/api_resources/test_tlm.py | 4 + 13 files changed, 443 insertions(+), 145 deletions(-) diff --git a/.stats.yml b/.stats.yml index 19c8465..3fdd5d0 100644 --- a/.stats.yml +++ b/.stats.yml @@ -1,3 +1,3 @@ configured_endpoints: 54 -openapi_spec_hash: 57e29e33aec4bbc20171ec3128594e75 +openapi_spec_hash: 49989625bf633c5fdb3e11140f788f2d config_hash: 8f6e5c3b064cbb77569a6bf654954a56 diff --git a/src/codex/resources/projects/projects.py b/src/codex/resources/projects/projects.py index 3a109ed..f82bcd0 100644 --- a/src/codex/resources/projects/projects.py +++ b/src/codex/resources/projects/projects.py @@ -460,6 +460,7 @@ def validate( quality_preset: Literal["best", "high", "medium", "low", "base"] | NotGiven = NOT_GIVEN, rewritten_question: Optional[str] | NotGiven = NOT_GIVEN, task: Optional[str] | NotGiven = NOT_GIVEN, + tools: Optional[Iterable[project_validate_params.Tool]] | NotGiven = NOT_GIVEN, x_client_library_version: str | NotGiven = NOT_GIVEN, x_integration_type: str | NotGiven = NOT_GIVEN, x_source: str | NotGiven = NOT_GIVEN, @@ -504,17 +505,16 @@ def validate( The default values corresponding to each quality preset are: - - **best:** `num_candidate_responses` = 6, `num_consistency_samples` = 8, - `use_self_reflection` = True. This preset improves LLM responses. - - **high:** `num_candidate_responses` = 4, `num_consistency_samples` = 8, - `use_self_reflection` = True. This preset improves LLM responses. - - **medium:** `num_candidate_responses` = 1, `num_consistency_samples` = 8, - `use_self_reflection` = True. - - **low:** `num_candidate_responses` = 1, `num_consistency_samples` = 4, - `use_self_reflection` = True. - - **base:** `num_candidate_responses` = 1, `num_consistency_samples` = 0, - `use_self_reflection` = False. When using `get_trustworthiness_score()` on - "base" preset, a faster self-reflection is employed. + - **best:** `num_consistency_samples` = 8, `num_self_reflections` = 3, + `reasoning_effort` = `"high"`. + - **high:** `num_consistency_samples` = 4, `num_self_reflections` = 3, + `reasoning_effort` = `"high"`. + - **medium:** `num_consistency_samples` = 0, `num_self_reflections` = 3, + `reasoning_effort` = `"high"`. + - **low:** `num_consistency_samples` = 0, `num_self_reflections` = 3, + `reasoning_effort` = `"none"`. + - **base:** `num_consistency_samples` = 0, `num_self_reflections` = 1, + `reasoning_effort` = `"none"`. By default, TLM uses the: "medium" `quality_preset`, "gpt-4.1-mini" base `model`, and `max_tokens` is set to 512. You can set custom values for these @@ -550,12 +550,11 @@ def validate( strange prompts or prompts that are too vague/open-ended to receive a clearly defined 'good' response. TLM measures consistency via the degree of contradiction between sampled responses that the model considers plausible. - use_self_reflection (bool, default = `True`): whether the LLM is asked to reflect on the given response and directly evaluate correctness/confidence. - Setting this False disables reflection and will reduce runtimes/costs, but potentially also the reliability of trustworthiness scores. - Reflection helps quantify aleatoric uncertainty associated with challenging prompts - and catches responses that are noticeably incorrect/bad upon further analysis. + num_self_reflections(int, default = 3): the number of self-reflections to perform where the LLM is asked to reflect on the given response and directly evaluate correctness/confidence. + The maximum number of self-reflections currently supported is 3. Lower values will reduce runtimes/costs, but potentially also the reliability of trustworthiness scores. + Reflection helps quantify aleatoric uncertainty associated with challenging prompts and catches responses that are noticeably incorrect/bad upon further analysis. - similarity_measure ({"semantic", "string", "embedding", "embedding_large", "code", "discrepancy"}, default = "semantic"): how the + similarity_measure ({"semantic", "string", "embedding", "embedding_large", "code", "discrepancy"}, default = "discrepancy"): how the trustworthiness scoring's consistency algorithm measures similarity between alternative responses considered plausible by the model. Supported similarity measures include - "semantic" (based on natural language inference), "embedding" (based on vector embedding similarity), "embedding_large" (based on a larger embedding model), @@ -574,6 +573,8 @@ def validate( - name: Name of the evaluation criteria. - criteria: Instructions specifying the evaluation criteria. + use_self_reflection (bool, default = `True`): deprecated. Use `num_self_reflections` instead. + prompt: The prompt to use for the TLM call. If not provided, the prompt will be generated from the messages. @@ -582,6 +583,9 @@ def validate( rewritten_question: The re-written query if it was provided by the client to Codex from a user to be used instead of the original query. + tools: Tools to use for the LLM call. If not provided, it is assumed no tools were + provided to the LLM. + extra_headers: Send extra headers extra_query: Add additional query parameters to the request @@ -620,6 +624,7 @@ def validate( "quality_preset": quality_preset, "rewritten_question": rewritten_question, "task": task, + "tools": tools, }, project_validate_params.ProjectValidateParams, ), @@ -1028,6 +1033,7 @@ async def validate( quality_preset: Literal["best", "high", "medium", "low", "base"] | NotGiven = NOT_GIVEN, rewritten_question: Optional[str] | NotGiven = NOT_GIVEN, task: Optional[str] | NotGiven = NOT_GIVEN, + tools: Optional[Iterable[project_validate_params.Tool]] | NotGiven = NOT_GIVEN, x_client_library_version: str | NotGiven = NOT_GIVEN, x_integration_type: str | NotGiven = NOT_GIVEN, x_source: str | NotGiven = NOT_GIVEN, @@ -1072,17 +1078,16 @@ async def validate( The default values corresponding to each quality preset are: - - **best:** `num_candidate_responses` = 6, `num_consistency_samples` = 8, - `use_self_reflection` = True. This preset improves LLM responses. - - **high:** `num_candidate_responses` = 4, `num_consistency_samples` = 8, - `use_self_reflection` = True. This preset improves LLM responses. - - **medium:** `num_candidate_responses` = 1, `num_consistency_samples` = 8, - `use_self_reflection` = True. - - **low:** `num_candidate_responses` = 1, `num_consistency_samples` = 4, - `use_self_reflection` = True. - - **base:** `num_candidate_responses` = 1, `num_consistency_samples` = 0, - `use_self_reflection` = False. When using `get_trustworthiness_score()` on - "base" preset, a faster self-reflection is employed. + - **best:** `num_consistency_samples` = 8, `num_self_reflections` = 3, + `reasoning_effort` = `"high"`. + - **high:** `num_consistency_samples` = 4, `num_self_reflections` = 3, + `reasoning_effort` = `"high"`. + - **medium:** `num_consistency_samples` = 0, `num_self_reflections` = 3, + `reasoning_effort` = `"high"`. + - **low:** `num_consistency_samples` = 0, `num_self_reflections` = 3, + `reasoning_effort` = `"none"`. + - **base:** `num_consistency_samples` = 0, `num_self_reflections` = 1, + `reasoning_effort` = `"none"`. By default, TLM uses the: "medium" `quality_preset`, "gpt-4.1-mini" base `model`, and `max_tokens` is set to 512. You can set custom values for these @@ -1118,12 +1123,11 @@ async def validate( strange prompts or prompts that are too vague/open-ended to receive a clearly defined 'good' response. TLM measures consistency via the degree of contradiction between sampled responses that the model considers plausible. - use_self_reflection (bool, default = `True`): whether the LLM is asked to reflect on the given response and directly evaluate correctness/confidence. - Setting this False disables reflection and will reduce runtimes/costs, but potentially also the reliability of trustworthiness scores. - Reflection helps quantify aleatoric uncertainty associated with challenging prompts - and catches responses that are noticeably incorrect/bad upon further analysis. + num_self_reflections(int, default = 3): the number of self-reflections to perform where the LLM is asked to reflect on the given response and directly evaluate correctness/confidence. + The maximum number of self-reflections currently supported is 3. Lower values will reduce runtimes/costs, but potentially also the reliability of trustworthiness scores. + Reflection helps quantify aleatoric uncertainty associated with challenging prompts and catches responses that are noticeably incorrect/bad upon further analysis. - similarity_measure ({"semantic", "string", "embedding", "embedding_large", "code", "discrepancy"}, default = "semantic"): how the + similarity_measure ({"semantic", "string", "embedding", "embedding_large", "code", "discrepancy"}, default = "discrepancy"): how the trustworthiness scoring's consistency algorithm measures similarity between alternative responses considered plausible by the model. Supported similarity measures include - "semantic" (based on natural language inference), "embedding" (based on vector embedding similarity), "embedding_large" (based on a larger embedding model), @@ -1142,6 +1146,8 @@ async def validate( - name: Name of the evaluation criteria. - criteria: Instructions specifying the evaluation criteria. + use_self_reflection (bool, default = `True`): deprecated. Use `num_self_reflections` instead. + prompt: The prompt to use for the TLM call. If not provided, the prompt will be generated from the messages. @@ -1150,6 +1156,9 @@ async def validate( rewritten_question: The re-written query if it was provided by the client to Codex from a user to be used instead of the original query. + tools: Tools to use for the LLM call. If not provided, it is assumed no tools were + provided to the LLM. + extra_headers: Send extra headers extra_query: Add additional query parameters to the request @@ -1188,6 +1197,7 @@ async def validate( "quality_preset": quality_preset, "rewritten_question": rewritten_question, "task": task, + "tools": tools, }, project_validate_params.ProjectValidateParams, ), diff --git a/src/codex/resources/tlm.py b/src/codex/resources/tlm.py index 12ff6c0..c6064ed 100644 --- a/src/codex/resources/tlm.py +++ b/src/codex/resources/tlm.py @@ -79,17 +79,16 @@ def prompt( The default values corresponding to each quality preset are: - - **best:** `num_candidate_responses` = 6, `num_consistency_samples` = 8, - `use_self_reflection` = True. This preset improves LLM responses. - - **high:** `num_candidate_responses` = 4, `num_consistency_samples` = 8, - `use_self_reflection` = True. This preset improves LLM responses. - - **medium:** `num_candidate_responses` = 1, `num_consistency_samples` = 8, - `use_self_reflection` = True. - - **low:** `num_candidate_responses` = 1, `num_consistency_samples` = 4, - `use_self_reflection` = True. - - **base:** `num_candidate_responses` = 1, `num_consistency_samples` = 0, - `use_self_reflection` = False. When using `get_trustworthiness_score()` on - "base" preset, a faster self-reflection is employed. + - **best:** `num_consistency_samples` = 8, `num_self_reflections` = 3, + `reasoning_effort` = `"high"`. + - **high:** `num_consistency_samples` = 4, `num_self_reflections` = 3, + `reasoning_effort` = `"high"`. + - **medium:** `num_consistency_samples` = 0, `num_self_reflections` = 3, + `reasoning_effort` = `"high"`. + - **low:** `num_consistency_samples` = 0, `num_self_reflections` = 3, + `reasoning_effort` = `"none"`. + - **base:** `num_consistency_samples` = 0, `num_self_reflections` = 1, + `reasoning_effort` = `"none"`. By default, TLM uses the: "medium" `quality_preset`, "gpt-4.1-mini" base `model`, and `max_tokens` is set to 512. You can set custom values for these @@ -125,12 +124,11 @@ def prompt( strange prompts or prompts that are too vague/open-ended to receive a clearly defined 'good' response. TLM measures consistency via the degree of contradiction between sampled responses that the model considers plausible. - use_self_reflection (bool, default = `True`): whether the LLM is asked to reflect on the given response and directly evaluate correctness/confidence. - Setting this False disables reflection and will reduce runtimes/costs, but potentially also the reliability of trustworthiness scores. - Reflection helps quantify aleatoric uncertainty associated with challenging prompts - and catches responses that are noticeably incorrect/bad upon further analysis. + num_self_reflections(int, default = 3): the number of self-reflections to perform where the LLM is asked to reflect on the given response and directly evaluate correctness/confidence. + The maximum number of self-reflections currently supported is 3. Lower values will reduce runtimes/costs, but potentially also the reliability of trustworthiness scores. + Reflection helps quantify aleatoric uncertainty associated with challenging prompts and catches responses that are noticeably incorrect/bad upon further analysis. - similarity_measure ({"semantic", "string", "embedding", "embedding_large", "code", "discrepancy"}, default = "semantic"): how the + similarity_measure ({"semantic", "string", "embedding", "embedding_large", "code", "discrepancy"}, default = "discrepancy"): how the trustworthiness scoring's consistency algorithm measures similarity between alternative responses considered plausible by the model. Supported similarity measures include - "semantic" (based on natural language inference), "embedding" (based on vector embedding similarity), "embedding_large" (based on a larger embedding model), @@ -149,6 +147,8 @@ def prompt( - name: Name of the evaluation criteria. - criteria: Instructions specifying the evaluation criteria. + use_self_reflection (bool, default = `True`): deprecated. Use `num_self_reflections` instead. + quality_preset: The quality preset to use for the TLM or Trustworthy RAG API. extra_headers: Send extra headers @@ -217,17 +217,16 @@ def score( The default values corresponding to each quality preset are: - - **best:** `num_candidate_responses` = 6, `num_consistency_samples` = 8, - `use_self_reflection` = True. This preset improves LLM responses. - - **high:** `num_candidate_responses` = 4, `num_consistency_samples` = 8, - `use_self_reflection` = True. This preset improves LLM responses. - - **medium:** `num_candidate_responses` = 1, `num_consistency_samples` = 8, - `use_self_reflection` = True. - - **low:** `num_candidate_responses` = 1, `num_consistency_samples` = 4, - `use_self_reflection` = True. - - **base:** `num_candidate_responses` = 1, `num_consistency_samples` = 0, - `use_self_reflection` = False. When using `get_trustworthiness_score()` on - "base" preset, a faster self-reflection is employed. + - **best:** `num_consistency_samples` = 8, `num_self_reflections` = 3, + `reasoning_effort` = `"high"`. + - **high:** `num_consistency_samples` = 4, `num_self_reflections` = 3, + `reasoning_effort` = `"high"`. + - **medium:** `num_consistency_samples` = 0, `num_self_reflections` = 3, + `reasoning_effort` = `"high"`. + - **low:** `num_consistency_samples` = 0, `num_self_reflections` = 3, + `reasoning_effort` = `"none"`. + - **base:** `num_consistency_samples` = 0, `num_self_reflections` = 1, + `reasoning_effort` = `"none"`. By default, TLM uses the: "medium" `quality_preset`, "gpt-4.1-mini" base `model`, and `max_tokens` is set to 512. You can set custom values for these @@ -263,12 +262,11 @@ def score( strange prompts or prompts that are too vague/open-ended to receive a clearly defined 'good' response. TLM measures consistency via the degree of contradiction between sampled responses that the model considers plausible. - use_self_reflection (bool, default = `True`): whether the LLM is asked to reflect on the given response and directly evaluate correctness/confidence. - Setting this False disables reflection and will reduce runtimes/costs, but potentially also the reliability of trustworthiness scores. - Reflection helps quantify aleatoric uncertainty associated with challenging prompts - and catches responses that are noticeably incorrect/bad upon further analysis. + num_self_reflections(int, default = 3): the number of self-reflections to perform where the LLM is asked to reflect on the given response and directly evaluate correctness/confidence. + The maximum number of self-reflections currently supported is 3. Lower values will reduce runtimes/costs, but potentially also the reliability of trustworthiness scores. + Reflection helps quantify aleatoric uncertainty associated with challenging prompts and catches responses that are noticeably incorrect/bad upon further analysis. - similarity_measure ({"semantic", "string", "embedding", "embedding_large", "code", "discrepancy"}, default = "semantic"): how the + similarity_measure ({"semantic", "string", "embedding", "embedding_large", "code", "discrepancy"}, default = "discrepancy"): how the trustworthiness scoring's consistency algorithm measures similarity between alternative responses considered plausible by the model. Supported similarity measures include - "semantic" (based on natural language inference), "embedding" (based on vector embedding similarity), "embedding_large" (based on a larger embedding model), @@ -287,6 +285,8 @@ def score( - name: Name of the evaluation criteria. - criteria: Instructions specifying the evaluation criteria. + use_self_reflection (bool, default = `True`): deprecated. Use `num_self_reflections` instead. + quality_preset: The quality preset to use for the TLM or Trustworthy RAG API. extra_headers: Send extra headers @@ -371,17 +371,16 @@ async def prompt( The default values corresponding to each quality preset are: - - **best:** `num_candidate_responses` = 6, `num_consistency_samples` = 8, - `use_self_reflection` = True. This preset improves LLM responses. - - **high:** `num_candidate_responses` = 4, `num_consistency_samples` = 8, - `use_self_reflection` = True. This preset improves LLM responses. - - **medium:** `num_candidate_responses` = 1, `num_consistency_samples` = 8, - `use_self_reflection` = True. - - **low:** `num_candidate_responses` = 1, `num_consistency_samples` = 4, - `use_self_reflection` = True. - - **base:** `num_candidate_responses` = 1, `num_consistency_samples` = 0, - `use_self_reflection` = False. When using `get_trustworthiness_score()` on - "base" preset, a faster self-reflection is employed. + - **best:** `num_consistency_samples` = 8, `num_self_reflections` = 3, + `reasoning_effort` = `"high"`. + - **high:** `num_consistency_samples` = 4, `num_self_reflections` = 3, + `reasoning_effort` = `"high"`. + - **medium:** `num_consistency_samples` = 0, `num_self_reflections` = 3, + `reasoning_effort` = `"high"`. + - **low:** `num_consistency_samples` = 0, `num_self_reflections` = 3, + `reasoning_effort` = `"none"`. + - **base:** `num_consistency_samples` = 0, `num_self_reflections` = 1, + `reasoning_effort` = `"none"`. By default, TLM uses the: "medium" `quality_preset`, "gpt-4.1-mini" base `model`, and `max_tokens` is set to 512. You can set custom values for these @@ -417,12 +416,11 @@ async def prompt( strange prompts or prompts that are too vague/open-ended to receive a clearly defined 'good' response. TLM measures consistency via the degree of contradiction between sampled responses that the model considers plausible. - use_self_reflection (bool, default = `True`): whether the LLM is asked to reflect on the given response and directly evaluate correctness/confidence. - Setting this False disables reflection and will reduce runtimes/costs, but potentially also the reliability of trustworthiness scores. - Reflection helps quantify aleatoric uncertainty associated with challenging prompts - and catches responses that are noticeably incorrect/bad upon further analysis. + num_self_reflections(int, default = 3): the number of self-reflections to perform where the LLM is asked to reflect on the given response and directly evaluate correctness/confidence. + The maximum number of self-reflections currently supported is 3. Lower values will reduce runtimes/costs, but potentially also the reliability of trustworthiness scores. + Reflection helps quantify aleatoric uncertainty associated with challenging prompts and catches responses that are noticeably incorrect/bad upon further analysis. - similarity_measure ({"semantic", "string", "embedding", "embedding_large", "code", "discrepancy"}, default = "semantic"): how the + similarity_measure ({"semantic", "string", "embedding", "embedding_large", "code", "discrepancy"}, default = "discrepancy"): how the trustworthiness scoring's consistency algorithm measures similarity between alternative responses considered plausible by the model. Supported similarity measures include - "semantic" (based on natural language inference), "embedding" (based on vector embedding similarity), "embedding_large" (based on a larger embedding model), @@ -441,6 +439,8 @@ async def prompt( - name: Name of the evaluation criteria. - criteria: Instructions specifying the evaluation criteria. + use_self_reflection (bool, default = `True`): deprecated. Use `num_self_reflections` instead. + quality_preset: The quality preset to use for the TLM or Trustworthy RAG API. extra_headers: Send extra headers @@ -509,17 +509,16 @@ async def score( The default values corresponding to each quality preset are: - - **best:** `num_candidate_responses` = 6, `num_consistency_samples` = 8, - `use_self_reflection` = True. This preset improves LLM responses. - - **high:** `num_candidate_responses` = 4, `num_consistency_samples` = 8, - `use_self_reflection` = True. This preset improves LLM responses. - - **medium:** `num_candidate_responses` = 1, `num_consistency_samples` = 8, - `use_self_reflection` = True. - - **low:** `num_candidate_responses` = 1, `num_consistency_samples` = 4, - `use_self_reflection` = True. - - **base:** `num_candidate_responses` = 1, `num_consistency_samples` = 0, - `use_self_reflection` = False. When using `get_trustworthiness_score()` on - "base" preset, a faster self-reflection is employed. + - **best:** `num_consistency_samples` = 8, `num_self_reflections` = 3, + `reasoning_effort` = `"high"`. + - **high:** `num_consistency_samples` = 4, `num_self_reflections` = 3, + `reasoning_effort` = `"high"`. + - **medium:** `num_consistency_samples` = 0, `num_self_reflections` = 3, + `reasoning_effort` = `"high"`. + - **low:** `num_consistency_samples` = 0, `num_self_reflections` = 3, + `reasoning_effort` = `"none"`. + - **base:** `num_consistency_samples` = 0, `num_self_reflections` = 1, + `reasoning_effort` = `"none"`. By default, TLM uses the: "medium" `quality_preset`, "gpt-4.1-mini" base `model`, and `max_tokens` is set to 512. You can set custom values for these @@ -555,12 +554,11 @@ async def score( strange prompts or prompts that are too vague/open-ended to receive a clearly defined 'good' response. TLM measures consistency via the degree of contradiction between sampled responses that the model considers plausible. - use_self_reflection (bool, default = `True`): whether the LLM is asked to reflect on the given response and directly evaluate correctness/confidence. - Setting this False disables reflection and will reduce runtimes/costs, but potentially also the reliability of trustworthiness scores. - Reflection helps quantify aleatoric uncertainty associated with challenging prompts - and catches responses that are noticeably incorrect/bad upon further analysis. + num_self_reflections(int, default = 3): the number of self-reflections to perform where the LLM is asked to reflect on the given response and directly evaluate correctness/confidence. + The maximum number of self-reflections currently supported is 3. Lower values will reduce runtimes/costs, but potentially also the reliability of trustworthiness scores. + Reflection helps quantify aleatoric uncertainty associated with challenging prompts and catches responses that are noticeably incorrect/bad upon further analysis. - similarity_measure ({"semantic", "string", "embedding", "embedding_large", "code", "discrepancy"}, default = "semantic"): how the + similarity_measure ({"semantic", "string", "embedding", "embedding_large", "code", "discrepancy"}, default = "discrepancy"): how the trustworthiness scoring's consistency algorithm measures similarity between alternative responses considered plausible by the model. Supported similarity measures include - "semantic" (based on natural language inference), "embedding" (based on vector embedding similarity), "embedding_large" (based on a larger embedding model), @@ -579,6 +577,8 @@ async def score( - name: Name of the evaluation criteria. - criteria: Instructions specifying the evaluation criteria. + use_self_reflection (bool, default = `True`): deprecated. Use `num_self_reflections` instead. + quality_preset: The quality preset to use for the TLM or Trustworthy RAG API. extra_headers: Send extra headers diff --git a/src/codex/types/project_validate_params.py b/src/codex/types/project_validate_params.py index 081dd2a..6231367 100644 --- a/src/codex/types/project_validate_params.py +++ b/src/codex/types/project_validate_params.py @@ -54,6 +54,8 @@ "MessageChatCompletionDeveloperMessageParam", "MessageChatCompletionDeveloperMessageParamContentUnionMember1", "Options", + "Tool", + "ToolFunction", ] @@ -106,17 +108,16 @@ class ProjectValidateParams(TypedDict, total=False): The default values corresponding to each quality preset are: - - **best:** `num_candidate_responses` = 6, `num_consistency_samples` = 8, - `use_self_reflection` = True. This preset improves LLM responses. - - **high:** `num_candidate_responses` = 4, `num_consistency_samples` = 8, - `use_self_reflection` = True. This preset improves LLM responses. - - **medium:** `num_candidate_responses` = 1, `num_consistency_samples` = 8, - `use_self_reflection` = True. - - **low:** `num_candidate_responses` = 1, `num_consistency_samples` = 4, - `use_self_reflection` = True. - - **base:** `num_candidate_responses` = 1, `num_consistency_samples` = 0, - `use_self_reflection` = False. When using `get_trustworthiness_score()` on - "base" preset, a faster self-reflection is employed. + - **best:** `num_consistency_samples` = 8, `num_self_reflections` = 3, + `reasoning_effort` = `"high"`. + - **high:** `num_consistency_samples` = 4, `num_self_reflections` = 3, + `reasoning_effort` = `"high"`. + - **medium:** `num_consistency_samples` = 0, `num_self_reflections` = 3, + `reasoning_effort` = `"high"`. + - **low:** `num_consistency_samples` = 0, `num_self_reflections` = 3, + `reasoning_effort` = `"none"`. + - **base:** `num_consistency_samples` = 0, `num_self_reflections` = 1, + `reasoning_effort` = `"none"`. By default, TLM uses the: "medium" `quality_preset`, "gpt-4.1-mini" base `model`, and `max_tokens` is set to 512. You can set custom values for these @@ -152,12 +153,11 @@ class ProjectValidateParams(TypedDict, total=False): strange prompts or prompts that are too vague/open-ended to receive a clearly defined 'good' response. TLM measures consistency via the degree of contradiction between sampled responses that the model considers plausible. - use_self_reflection (bool, default = `True`): whether the LLM is asked to reflect on the given response and directly evaluate correctness/confidence. - Setting this False disables reflection and will reduce runtimes/costs, but potentially also the reliability of trustworthiness scores. - Reflection helps quantify aleatoric uncertainty associated with challenging prompts - and catches responses that are noticeably incorrect/bad upon further analysis. + num_self_reflections(int, default = 3): the number of self-reflections to perform where the LLM is asked to reflect on the given response and directly evaluate correctness/confidence. + The maximum number of self-reflections currently supported is 3. Lower values will reduce runtimes/costs, but potentially also the reliability of trustworthiness scores. + Reflection helps quantify aleatoric uncertainty associated with challenging prompts and catches responses that are noticeably incorrect/bad upon further analysis. - similarity_measure ({"semantic", "string", "embedding", "embedding_large", "code", "discrepancy"}, default = "semantic"): how the + similarity_measure ({"semantic", "string", "embedding", "embedding_large", "code", "discrepancy"}, default = "discrepancy"): how the trustworthiness scoring's consistency algorithm measures similarity between alternative responses considered plausible by the model. Supported similarity measures include - "semantic" (based on natural language inference), "embedding" (based on vector embedding similarity), "embedding_large" (based on a larger embedding model), @@ -175,6 +175,8 @@ class ProjectValidateParams(TypedDict, total=False): The expected input format is a list of dictionaries, where each dictionary has the following keys: - name: Name of the evaluation criteria. - criteria: Instructions specifying the evaluation criteria. + + use_self_reflection (bool, default = `True`): deprecated. Use `num_self_reflections` instead. """ prompt: Optional[str] @@ -194,6 +196,12 @@ class ProjectValidateParams(TypedDict, total=False): task: Optional[str] + tools: Optional[Iterable[Tool]] + """Tools to use for the LLM call. + + If not provided, it is assumed no tools were provided to the LLM. + """ + x_client_library_version: Annotated[str, PropertyInfo(alias="x-client-library-version")] x_integration_type: Annotated[str, PropertyInfo(alias="x-integration-type")] @@ -649,8 +657,26 @@ class Options(TypedDict, total=False): num_consistency_samples: int + num_self_reflections: int + reasoning_effort: str similarity_measure: str use_self_reflection: bool + + +class ToolFunction(TypedDict, total=False): + name: Required[str] + + description: str + + parameters: object + + strict: Optional[bool] + + +class Tool(TypedDict, total=False): + function: Required[ToolFunction] + + type: Required[Literal["function"]] diff --git a/src/codex/types/projects/query_log_list_by_group_response.py b/src/codex/types/projects/query_log_list_by_group_response.py index ae49b95..b3c774b 100644 --- a/src/codex/types/projects/query_log_list_by_group_response.py +++ b/src/codex/types/projects/query_log_list_by_group_response.py @@ -16,6 +16,8 @@ "QueryLogsByGroupQueryLogFormattedNonGuardrailEvalScores", "QueryLogsByGroupQueryLogContext", "QueryLogsByGroupQueryLogDeterministicGuardrailsResults", + "QueryLogsByGroupQueryLogEvaluatedResponseToolCall", + "QueryLogsByGroupQueryLogEvaluatedResponseToolCallFunction", "QueryLogsByGroupQueryLogMessage", "QueryLogsByGroupQueryLogMessageChatCompletionAssistantMessageParamOutput", "QueryLogsByGroupQueryLogMessageChatCompletionAssistantMessageParamOutputAudio", @@ -41,6 +43,8 @@ "QueryLogsByGroupQueryLogMessageChatCompletionFunctionMessageParam", "QueryLogsByGroupQueryLogMessageChatCompletionDeveloperMessageParam", "QueryLogsByGroupQueryLogMessageChatCompletionDeveloperMessageParamContentUnionMember1", + "QueryLogsByGroupQueryLogTool", + "QueryLogsByGroupQueryLogToolFunction", ] @@ -93,6 +97,20 @@ class QueryLogsByGroupQueryLogDeterministicGuardrailsResults(BaseModel): matches: Optional[List[str]] = None +class QueryLogsByGroupQueryLogEvaluatedResponseToolCallFunction(BaseModel): + arguments: str + + name: str + + +class QueryLogsByGroupQueryLogEvaluatedResponseToolCall(BaseModel): + id: str + + function: QueryLogsByGroupQueryLogEvaluatedResponseToolCallFunction + + type: Literal["function"] + + class QueryLogsByGroupQueryLogMessageChatCompletionAssistantMessageParamOutputAudio(BaseModel): id: str @@ -287,6 +305,22 @@ class QueryLogsByGroupQueryLogMessageChatCompletionDeveloperMessageParam(BaseMod ] +class QueryLogsByGroupQueryLogToolFunction(BaseModel): + name: str + + description: Optional[str] = None + + parameters: Optional[object] = None + + strict: Optional[bool] = None + + +class QueryLogsByGroupQueryLogTool(BaseModel): + function: QueryLogsByGroupQueryLogToolFunction + + type: Literal["function"] + + class QueryLogsByGroupQueryLog(BaseModel): id: str @@ -357,6 +391,12 @@ class QueryLogsByGroupQueryLog(BaseModel): evaluated_response: Optional[str] = None """The response being evaluated from the RAG system (before any remediation)""" + evaluated_response_tool_calls: Optional[List[QueryLogsByGroupQueryLogEvaluatedResponseToolCall]] = None + """Tool calls from the evaluated response, if any. + + Used to log tool calls in the query log. + """ + guardrail_evals: Optional[List[str]] = None """Evals that should trigger guardrail""" @@ -383,6 +423,12 @@ class QueryLogsByGroupQueryLog(BaseModel): primary_eval_issue_score: Optional[float] = None """Score of the primary eval issue""" + tools: Optional[List[QueryLogsByGroupQueryLogTool]] = None + """Tools to use for the LLM call. + + If not provided, it is assumed no tools were provided to the LLM. + """ + class QueryLogsByGroup(BaseModel): query_logs: List[QueryLogsByGroupQueryLog] diff --git a/src/codex/types/projects/query_log_list_groups_response.py b/src/codex/types/projects/query_log_list_groups_response.py index cc3b208..6ed4d14 100644 --- a/src/codex/types/projects/query_log_list_groups_response.py +++ b/src/codex/types/projects/query_log_list_groups_response.py @@ -14,6 +14,8 @@ "FormattedNonGuardrailEvalScores", "Context", "DeterministicGuardrailsResults", + "EvaluatedResponseToolCall", + "EvaluatedResponseToolCallFunction", "Message", "MessageChatCompletionAssistantMessageParamOutput", "MessageChatCompletionAssistantMessageParamOutputAudio", @@ -39,6 +41,8 @@ "MessageChatCompletionFunctionMessageParam", "MessageChatCompletionDeveloperMessageParam", "MessageChatCompletionDeveloperMessageParamContentUnionMember1", + "Tool", + "ToolFunction", ] @@ -91,6 +95,20 @@ class DeterministicGuardrailsResults(BaseModel): matches: Optional[List[str]] = None +class EvaluatedResponseToolCallFunction(BaseModel): + arguments: str + + name: str + + +class EvaluatedResponseToolCall(BaseModel): + id: str + + function: EvaluatedResponseToolCallFunction + + type: Literal["function"] + + class MessageChatCompletionAssistantMessageParamOutputAudio(BaseModel): id: str @@ -277,6 +295,22 @@ class MessageChatCompletionDeveloperMessageParam(BaseModel): ] +class ToolFunction(BaseModel): + name: str + + description: Optional[str] = None + + parameters: Optional[object] = None + + strict: Optional[bool] = None + + +class Tool(BaseModel): + function: ToolFunction + + type: Literal["function"] + + class QueryLogListGroupsResponse(BaseModel): id: str @@ -347,6 +381,12 @@ class QueryLogListGroupsResponse(BaseModel): evaluated_response: Optional[str] = None """The response being evaluated from the RAG system (before any remediation)""" + evaluated_response_tool_calls: Optional[List[EvaluatedResponseToolCall]] = None + """Tool calls from the evaluated response, if any. + + Used to log tool calls in the query log. + """ + guardrail_evals: Optional[List[str]] = None """Evals that should trigger guardrail""" @@ -372,3 +412,9 @@ class QueryLogListGroupsResponse(BaseModel): primary_eval_issue_score: Optional[float] = None """Score of the primary eval issue""" + + tools: Optional[List[Tool]] = None + """Tools to use for the LLM call. + + If not provided, it is assumed no tools were provided to the LLM. + """ diff --git a/src/codex/types/projects/query_log_list_response.py b/src/codex/types/projects/query_log_list_response.py index 0778898..c6737b2 100644 --- a/src/codex/types/projects/query_log_list_response.py +++ b/src/codex/types/projects/query_log_list_response.py @@ -14,6 +14,8 @@ "FormattedNonGuardrailEvalScores", "Context", "DeterministicGuardrailsResults", + "EvaluatedResponseToolCall", + "EvaluatedResponseToolCallFunction", "Message", "MessageChatCompletionAssistantMessageParamOutput", "MessageChatCompletionAssistantMessageParamOutputAudio", @@ -39,6 +41,8 @@ "MessageChatCompletionFunctionMessageParam", "MessageChatCompletionDeveloperMessageParam", "MessageChatCompletionDeveloperMessageParamContentUnionMember1", + "Tool", + "ToolFunction", ] @@ -91,6 +95,20 @@ class DeterministicGuardrailsResults(BaseModel): matches: Optional[List[str]] = None +class EvaluatedResponseToolCallFunction(BaseModel): + arguments: str + + name: str + + +class EvaluatedResponseToolCall(BaseModel): + id: str + + function: EvaluatedResponseToolCallFunction + + type: Literal["function"] + + class MessageChatCompletionAssistantMessageParamOutputAudio(BaseModel): id: str @@ -277,6 +295,22 @@ class MessageChatCompletionDeveloperMessageParam(BaseModel): ] +class ToolFunction(BaseModel): + name: str + + description: Optional[str] = None + + parameters: Optional[object] = None + + strict: Optional[bool] = None + + +class Tool(BaseModel): + function: ToolFunction + + type: Literal["function"] + + class QueryLogListResponse(BaseModel): id: str @@ -341,6 +375,12 @@ class QueryLogListResponse(BaseModel): evaluated_response: Optional[str] = None """The response being evaluated from the RAG system (before any remediation)""" + evaluated_response_tool_calls: Optional[List[EvaluatedResponseToolCall]] = None + """Tool calls from the evaluated response, if any. + + Used to log tool calls in the query log. + """ + guardrail_evals: Optional[List[str]] = None """Evals that should trigger guardrail""" @@ -366,3 +406,9 @@ class QueryLogListResponse(BaseModel): primary_eval_issue_score: Optional[float] = None """Score of the primary eval issue""" + + tools: Optional[List[Tool]] = None + """Tools to use for the LLM call. + + If not provided, it is assumed no tools were provided to the LLM. + """ diff --git a/src/codex/types/projects/query_log_retrieve_response.py b/src/codex/types/projects/query_log_retrieve_response.py index 2751ef2..8fd8662 100644 --- a/src/codex/types/projects/query_log_retrieve_response.py +++ b/src/codex/types/projects/query_log_retrieve_response.py @@ -14,6 +14,8 @@ "FormattedNonGuardrailEvalScores", "Context", "DeterministicGuardrailsResults", + "EvaluatedResponseToolCall", + "EvaluatedResponseToolCallFunction", "Message", "MessageChatCompletionAssistantMessageParamOutput", "MessageChatCompletionAssistantMessageParamOutputAudio", @@ -39,6 +41,8 @@ "MessageChatCompletionFunctionMessageParam", "MessageChatCompletionDeveloperMessageParam", "MessageChatCompletionDeveloperMessageParamContentUnionMember1", + "Tool", + "ToolFunction", ] @@ -91,6 +95,20 @@ class DeterministicGuardrailsResults(BaseModel): matches: Optional[List[str]] = None +class EvaluatedResponseToolCallFunction(BaseModel): + arguments: str + + name: str + + +class EvaluatedResponseToolCall(BaseModel): + id: str + + function: EvaluatedResponseToolCallFunction + + type: Literal["function"] + + class MessageChatCompletionAssistantMessageParamOutputAudio(BaseModel): id: str @@ -277,6 +295,22 @@ class MessageChatCompletionDeveloperMessageParam(BaseModel): ] +class ToolFunction(BaseModel): + name: str + + description: Optional[str] = None + + parameters: Optional[object] = None + + strict: Optional[bool] = None + + +class Tool(BaseModel): + function: ToolFunction + + type: Literal["function"] + + class QueryLogRetrieveResponse(BaseModel): id: str @@ -345,6 +379,12 @@ class QueryLogRetrieveResponse(BaseModel): evaluated_response: Optional[str] = None """The response being evaluated from the RAG system (before any remediation)""" + evaluated_response_tool_calls: Optional[List[EvaluatedResponseToolCall]] = None + """Tool calls from the evaluated response, if any. + + Used to log tool calls in the query log. + """ + guardrail_evals: Optional[List[str]] = None """Evals that should trigger guardrail""" @@ -370,3 +410,9 @@ class QueryLogRetrieveResponse(BaseModel): primary_eval_issue_score: Optional[float] = None """Score of the primary eval issue""" + + tools: Optional[List[Tool]] = None + """Tools to use for the LLM call. + + If not provided, it is assumed no tools were provided to the LLM. + """ diff --git a/src/codex/types/projects/remediation_list_resolved_logs_response.py b/src/codex/types/projects/remediation_list_resolved_logs_response.py index d56f9a4..567a086 100644 --- a/src/codex/types/projects/remediation_list_resolved_logs_response.py +++ b/src/codex/types/projects/remediation_list_resolved_logs_response.py @@ -15,6 +15,8 @@ "QueryLogFormattedNonGuardrailEvalScores", "QueryLogContext", "QueryLogDeterministicGuardrailsResults", + "QueryLogEvaluatedResponseToolCall", + "QueryLogEvaluatedResponseToolCallFunction", "QueryLogMessage", "QueryLogMessageChatCompletionAssistantMessageParamOutput", "QueryLogMessageChatCompletionAssistantMessageParamOutputAudio", @@ -40,6 +42,8 @@ "QueryLogMessageChatCompletionFunctionMessageParam", "QueryLogMessageChatCompletionDeveloperMessageParam", "QueryLogMessageChatCompletionDeveloperMessageParamContentUnionMember1", + "QueryLogTool", + "QueryLogToolFunction", ] @@ -92,6 +96,20 @@ class QueryLogDeterministicGuardrailsResults(BaseModel): matches: Optional[List[str]] = None +class QueryLogEvaluatedResponseToolCallFunction(BaseModel): + arguments: str + + name: str + + +class QueryLogEvaluatedResponseToolCall(BaseModel): + id: str + + function: QueryLogEvaluatedResponseToolCallFunction + + type: Literal["function"] + + class QueryLogMessageChatCompletionAssistantMessageParamOutputAudio(BaseModel): id: str @@ -284,6 +302,22 @@ class QueryLogMessageChatCompletionDeveloperMessageParam(BaseModel): ] +class QueryLogToolFunction(BaseModel): + name: str + + description: Optional[str] = None + + parameters: Optional[object] = None + + strict: Optional[bool] = None + + +class QueryLogTool(BaseModel): + function: QueryLogToolFunction + + type: Literal["function"] + + class QueryLog(BaseModel): id: str @@ -348,6 +382,12 @@ class QueryLog(BaseModel): evaluated_response: Optional[str] = None """The response being evaluated from the RAG system (before any remediation)""" + evaluated_response_tool_calls: Optional[List[QueryLogEvaluatedResponseToolCall]] = None + """Tool calls from the evaluated response, if any. + + Used to log tool calls in the query log. + """ + guardrail_evals: Optional[List[str]] = None """Evals that should trigger guardrail""" @@ -374,6 +414,12 @@ class QueryLog(BaseModel): primary_eval_issue_score: Optional[float] = None """Score of the primary eval issue""" + tools: Optional[List[QueryLogTool]] = None + """Tools to use for the LLM call. + + If not provided, it is assumed no tools were provided to the LLM. + """ + class RemediationListResolvedLogsResponse(BaseModel): query_logs: List[QueryLog] diff --git a/src/codex/types/tlm_prompt_params.py b/src/codex/types/tlm_prompt_params.py index 3c04bfc..8749c5a 100644 --- a/src/codex/types/tlm_prompt_params.py +++ b/src/codex/types/tlm_prompt_params.py @@ -30,17 +30,16 @@ class TlmPromptParams(TypedDict, total=False): The default values corresponding to each quality preset are: - - **best:** `num_candidate_responses` = 6, `num_consistency_samples` = 8, - `use_self_reflection` = True. This preset improves LLM responses. - - **high:** `num_candidate_responses` = 4, `num_consistency_samples` = 8, - `use_self_reflection` = True. This preset improves LLM responses. - - **medium:** `num_candidate_responses` = 1, `num_consistency_samples` = 8, - `use_self_reflection` = True. - - **low:** `num_candidate_responses` = 1, `num_consistency_samples` = 4, - `use_self_reflection` = True. - - **base:** `num_candidate_responses` = 1, `num_consistency_samples` = 0, - `use_self_reflection` = False. When using `get_trustworthiness_score()` on - "base" preset, a faster self-reflection is employed. + - **best:** `num_consistency_samples` = 8, `num_self_reflections` = 3, + `reasoning_effort` = `"high"`. + - **high:** `num_consistency_samples` = 4, `num_self_reflections` = 3, + `reasoning_effort` = `"high"`. + - **medium:** `num_consistency_samples` = 0, `num_self_reflections` = 3, + `reasoning_effort` = `"high"`. + - **low:** `num_consistency_samples` = 0, `num_self_reflections` = 3, + `reasoning_effort` = `"none"`. + - **base:** `num_consistency_samples` = 0, `num_self_reflections` = 1, + `reasoning_effort` = `"none"`. By default, TLM uses the: "medium" `quality_preset`, "gpt-4.1-mini" base `model`, and `max_tokens` is set to 512. You can set custom values for these @@ -76,12 +75,11 @@ class TlmPromptParams(TypedDict, total=False): strange prompts or prompts that are too vague/open-ended to receive a clearly defined 'good' response. TLM measures consistency via the degree of contradiction between sampled responses that the model considers plausible. - use_self_reflection (bool, default = `True`): whether the LLM is asked to reflect on the given response and directly evaluate correctness/confidence. - Setting this False disables reflection and will reduce runtimes/costs, but potentially also the reliability of trustworthiness scores. - Reflection helps quantify aleatoric uncertainty associated with challenging prompts - and catches responses that are noticeably incorrect/bad upon further analysis. + num_self_reflections(int, default = 3): the number of self-reflections to perform where the LLM is asked to reflect on the given response and directly evaluate correctness/confidence. + The maximum number of self-reflections currently supported is 3. Lower values will reduce runtimes/costs, but potentially also the reliability of trustworthiness scores. + Reflection helps quantify aleatoric uncertainty associated with challenging prompts and catches responses that are noticeably incorrect/bad upon further analysis. - similarity_measure ({"semantic", "string", "embedding", "embedding_large", "code", "discrepancy"}, default = "semantic"): how the + similarity_measure ({"semantic", "string", "embedding", "embedding_large", "code", "discrepancy"}, default = "discrepancy"): how the trustworthiness scoring's consistency algorithm measures similarity between alternative responses considered plausible by the model. Supported similarity measures include - "semantic" (based on natural language inference), "embedding" (based on vector embedding similarity), "embedding_large" (based on a larger embedding model), @@ -99,6 +97,8 @@ class TlmPromptParams(TypedDict, total=False): The expected input format is a list of dictionaries, where each dictionary has the following keys: - name: Name of the evaluation criteria. - criteria: Instructions specifying the evaluation criteria. + + use_self_reflection (bool, default = `True`): deprecated. Use `num_self_reflections` instead. """ quality_preset: Literal["best", "high", "medium", "low", "base"] @@ -120,6 +120,8 @@ class Options(TypedDict, total=False): num_consistency_samples: int + num_self_reflections: int + reasoning_effort: str similarity_measure: str diff --git a/src/codex/types/tlm_score_params.py b/src/codex/types/tlm_score_params.py index 95bcc4c..4a0a32a 100644 --- a/src/codex/types/tlm_score_params.py +++ b/src/codex/types/tlm_score_params.py @@ -32,17 +32,16 @@ class TlmScoreParams(TypedDict, total=False): The default values corresponding to each quality preset are: - - **best:** `num_candidate_responses` = 6, `num_consistency_samples` = 8, - `use_self_reflection` = True. This preset improves LLM responses. - - **high:** `num_candidate_responses` = 4, `num_consistency_samples` = 8, - `use_self_reflection` = True. This preset improves LLM responses. - - **medium:** `num_candidate_responses` = 1, `num_consistency_samples` = 8, - `use_self_reflection` = True. - - **low:** `num_candidate_responses` = 1, `num_consistency_samples` = 4, - `use_self_reflection` = True. - - **base:** `num_candidate_responses` = 1, `num_consistency_samples` = 0, - `use_self_reflection` = False. When using `get_trustworthiness_score()` on - "base" preset, a faster self-reflection is employed. + - **best:** `num_consistency_samples` = 8, `num_self_reflections` = 3, + `reasoning_effort` = `"high"`. + - **high:** `num_consistency_samples` = 4, `num_self_reflections` = 3, + `reasoning_effort` = `"high"`. + - **medium:** `num_consistency_samples` = 0, `num_self_reflections` = 3, + `reasoning_effort` = `"high"`. + - **low:** `num_consistency_samples` = 0, `num_self_reflections` = 3, + `reasoning_effort` = `"none"`. + - **base:** `num_consistency_samples` = 0, `num_self_reflections` = 1, + `reasoning_effort` = `"none"`. By default, TLM uses the: "medium" `quality_preset`, "gpt-4.1-mini" base `model`, and `max_tokens` is set to 512. You can set custom values for these @@ -78,12 +77,11 @@ class TlmScoreParams(TypedDict, total=False): strange prompts or prompts that are too vague/open-ended to receive a clearly defined 'good' response. TLM measures consistency via the degree of contradiction between sampled responses that the model considers plausible. - use_self_reflection (bool, default = `True`): whether the LLM is asked to reflect on the given response and directly evaluate correctness/confidence. - Setting this False disables reflection and will reduce runtimes/costs, but potentially also the reliability of trustworthiness scores. - Reflection helps quantify aleatoric uncertainty associated with challenging prompts - and catches responses that are noticeably incorrect/bad upon further analysis. + num_self_reflections(int, default = 3): the number of self-reflections to perform where the LLM is asked to reflect on the given response and directly evaluate correctness/confidence. + The maximum number of self-reflections currently supported is 3. Lower values will reduce runtimes/costs, but potentially also the reliability of trustworthiness scores. + Reflection helps quantify aleatoric uncertainty associated with challenging prompts and catches responses that are noticeably incorrect/bad upon further analysis. - similarity_measure ({"semantic", "string", "embedding", "embedding_large", "code", "discrepancy"}, default = "semantic"): how the + similarity_measure ({"semantic", "string", "embedding", "embedding_large", "code", "discrepancy"}, default = "discrepancy"): how the trustworthiness scoring's consistency algorithm measures similarity between alternative responses considered plausible by the model. Supported similarity measures include - "semantic" (based on natural language inference), "embedding" (based on vector embedding similarity), "embedding_large" (based on a larger embedding model), @@ -101,6 +99,8 @@ class TlmScoreParams(TypedDict, total=False): The expected input format is a list of dictionaries, where each dictionary has the following keys: - name: Name of the evaluation criteria. - criteria: Instructions specifying the evaluation criteria. + + use_self_reflection (bool, default = `True`): deprecated. Use `num_self_reflections` instead. """ quality_preset: Literal["best", "high", "medium", "low", "base"] @@ -122,6 +122,8 @@ class Options(TypedDict, total=False): num_consistency_samples: int + num_self_reflections: int + reasoning_effort: str similarity_measure: str diff --git a/tests/api_resources/test_projects.py b/tests/api_resources/test_projects.py index ae3f4f0..7884db0 100644 --- a/tests/api_resources/test_projects.py +++ b/tests/api_resources/test_projects.py @@ -622,6 +622,7 @@ def test_method_validate_with_all_params(self, client: Codex) -> None: "model": "model", "num_candidate_responses": 0, "num_consistency_samples": 0, + "num_self_reflections": 0, "reasoning_effort": "reasoning_effort", "similarity_measure": "similarity_measure", "use_self_reflection": True, @@ -630,6 +631,17 @@ def test_method_validate_with_all_params(self, client: Codex) -> None: quality_preset="best", rewritten_question="rewritten_question", task="task", + tools=[ + { + "function": { + "name": "name", + "description": "description", + "parameters": {}, + "strict": True, + }, + "type": "function", + } + ], x_client_library_version="x-client-library-version", x_integration_type="x-integration-type", x_source="x-source", @@ -1284,6 +1296,7 @@ async def test_method_validate_with_all_params(self, async_client: AsyncCodex) - "model": "model", "num_candidate_responses": 0, "num_consistency_samples": 0, + "num_self_reflections": 0, "reasoning_effort": "reasoning_effort", "similarity_measure": "similarity_measure", "use_self_reflection": True, @@ -1292,6 +1305,17 @@ async def test_method_validate_with_all_params(self, async_client: AsyncCodex) - quality_preset="best", rewritten_question="rewritten_question", task="task", + tools=[ + { + "function": { + "name": "name", + "description": "description", + "parameters": {}, + "strict": True, + }, + "type": "function", + } + ], x_client_library_version="x-client-library-version", x_integration_type="x-integration-type", x_source="x-source", diff --git a/tests/api_resources/test_tlm.py b/tests/api_resources/test_tlm.py index 41376a4..da0a9ad 100644 --- a/tests/api_resources/test_tlm.py +++ b/tests/api_resources/test_tlm.py @@ -38,6 +38,7 @@ def test_method_prompt_with_all_params(self, client: Codex) -> None: "model": "model", "num_candidate_responses": 0, "num_consistency_samples": 0, + "num_self_reflections": 0, "reasoning_effort": "reasoning_effort", "similarity_measure": "similarity_measure", "use_self_reflection": True, @@ -96,6 +97,7 @@ def test_method_score_with_all_params(self, client: Codex) -> None: "model": "model", "num_candidate_responses": 0, "num_consistency_samples": 0, + "num_self_reflections": 0, "reasoning_effort": "reasoning_effort", "similarity_measure": "similarity_measure", "use_self_reflection": True, @@ -160,6 +162,7 @@ async def test_method_prompt_with_all_params(self, async_client: AsyncCodex) -> "model": "model", "num_candidate_responses": 0, "num_consistency_samples": 0, + "num_self_reflections": 0, "reasoning_effort": "reasoning_effort", "similarity_measure": "similarity_measure", "use_self_reflection": True, @@ -218,6 +221,7 @@ async def test_method_score_with_all_params(self, async_client: AsyncCodex) -> N "model": "model", "num_candidate_responses": 0, "num_consistency_samples": 0, + "num_self_reflections": 0, "reasoning_effort": "reasoning_effort", "similarity_measure": "similarity_measure", "use_self_reflection": True, From 6b52a985af9df1b6618d0685fafee2bae7e98566 Mon Sep 17 00:00:00 2001 From: "stainless-app[bot]" <142633134+stainless-app[bot]@users.noreply.github.com> Date: Mon, 28 Jul 2025 17:04:05 +0000 Subject: [PATCH 18/20] feat(api): remove deprecated endpoint increment_queries --- .stats.yml | 4 +- src/codex/resources/projects/projects.py | 74 +++++----- src/codex/resources/tlm.py | 128 +++++++++--------- src/codex/types/project_validate_params.py | 58 +++----- .../query_log_list_by_group_response.py | 46 ------- .../query_log_list_groups_response.py | 46 ------- .../types/projects/query_log_list_response.py | 46 ------- .../projects/query_log_retrieve_response.py | 46 ------- ...remediation_list_resolved_logs_response.py | 46 ------- src/codex/types/tlm_prompt_params.py | 34 +++-- src/codex/types/tlm_score_params.py | 34 +++-- tests/api_resources/test_projects.py | 24 ---- tests/api_resources/test_tlm.py | 4 - 13 files changed, 146 insertions(+), 444 deletions(-) diff --git a/.stats.yml b/.stats.yml index 3fdd5d0..031dedf 100644 --- a/.stats.yml +++ b/.stats.yml @@ -1,3 +1,3 @@ configured_endpoints: 54 -openapi_spec_hash: 49989625bf633c5fdb3e11140f788f2d -config_hash: 8f6e5c3b064cbb77569a6bf654954a56 +openapi_spec_hash: 57e29e33aec4bbc20171ec3128594e75 +config_hash: 930284cfa37f835d949c8a1b124f4807 diff --git a/src/codex/resources/projects/projects.py b/src/codex/resources/projects/projects.py index f82bcd0..3a109ed 100644 --- a/src/codex/resources/projects/projects.py +++ b/src/codex/resources/projects/projects.py @@ -460,7 +460,6 @@ def validate( quality_preset: Literal["best", "high", "medium", "low", "base"] | NotGiven = NOT_GIVEN, rewritten_question: Optional[str] | NotGiven = NOT_GIVEN, task: Optional[str] | NotGiven = NOT_GIVEN, - tools: Optional[Iterable[project_validate_params.Tool]] | NotGiven = NOT_GIVEN, x_client_library_version: str | NotGiven = NOT_GIVEN, x_integration_type: str | NotGiven = NOT_GIVEN, x_source: str | NotGiven = NOT_GIVEN, @@ -505,16 +504,17 @@ def validate( The default values corresponding to each quality preset are: - - **best:** `num_consistency_samples` = 8, `num_self_reflections` = 3, - `reasoning_effort` = `"high"`. - - **high:** `num_consistency_samples` = 4, `num_self_reflections` = 3, - `reasoning_effort` = `"high"`. - - **medium:** `num_consistency_samples` = 0, `num_self_reflections` = 3, - `reasoning_effort` = `"high"`. - - **low:** `num_consistency_samples` = 0, `num_self_reflections` = 3, - `reasoning_effort` = `"none"`. - - **base:** `num_consistency_samples` = 0, `num_self_reflections` = 1, - `reasoning_effort` = `"none"`. + - **best:** `num_candidate_responses` = 6, `num_consistency_samples` = 8, + `use_self_reflection` = True. This preset improves LLM responses. + - **high:** `num_candidate_responses` = 4, `num_consistency_samples` = 8, + `use_self_reflection` = True. This preset improves LLM responses. + - **medium:** `num_candidate_responses` = 1, `num_consistency_samples` = 8, + `use_self_reflection` = True. + - **low:** `num_candidate_responses` = 1, `num_consistency_samples` = 4, + `use_self_reflection` = True. + - **base:** `num_candidate_responses` = 1, `num_consistency_samples` = 0, + `use_self_reflection` = False. When using `get_trustworthiness_score()` on + "base" preset, a faster self-reflection is employed. By default, TLM uses the: "medium" `quality_preset`, "gpt-4.1-mini" base `model`, and `max_tokens` is set to 512. You can set custom values for these @@ -550,11 +550,12 @@ def validate( strange prompts or prompts that are too vague/open-ended to receive a clearly defined 'good' response. TLM measures consistency via the degree of contradiction between sampled responses that the model considers plausible. - num_self_reflections(int, default = 3): the number of self-reflections to perform where the LLM is asked to reflect on the given response and directly evaluate correctness/confidence. - The maximum number of self-reflections currently supported is 3. Lower values will reduce runtimes/costs, but potentially also the reliability of trustworthiness scores. - Reflection helps quantify aleatoric uncertainty associated with challenging prompts and catches responses that are noticeably incorrect/bad upon further analysis. + use_self_reflection (bool, default = `True`): whether the LLM is asked to reflect on the given response and directly evaluate correctness/confidence. + Setting this False disables reflection and will reduce runtimes/costs, but potentially also the reliability of trustworthiness scores. + Reflection helps quantify aleatoric uncertainty associated with challenging prompts + and catches responses that are noticeably incorrect/bad upon further analysis. - similarity_measure ({"semantic", "string", "embedding", "embedding_large", "code", "discrepancy"}, default = "discrepancy"): how the + similarity_measure ({"semantic", "string", "embedding", "embedding_large", "code", "discrepancy"}, default = "semantic"): how the trustworthiness scoring's consistency algorithm measures similarity between alternative responses considered plausible by the model. Supported similarity measures include - "semantic" (based on natural language inference), "embedding" (based on vector embedding similarity), "embedding_large" (based on a larger embedding model), @@ -573,8 +574,6 @@ def validate( - name: Name of the evaluation criteria. - criteria: Instructions specifying the evaluation criteria. - use_self_reflection (bool, default = `True`): deprecated. Use `num_self_reflections` instead. - prompt: The prompt to use for the TLM call. If not provided, the prompt will be generated from the messages. @@ -583,9 +582,6 @@ def validate( rewritten_question: The re-written query if it was provided by the client to Codex from a user to be used instead of the original query. - tools: Tools to use for the LLM call. If not provided, it is assumed no tools were - provided to the LLM. - extra_headers: Send extra headers extra_query: Add additional query parameters to the request @@ -624,7 +620,6 @@ def validate( "quality_preset": quality_preset, "rewritten_question": rewritten_question, "task": task, - "tools": tools, }, project_validate_params.ProjectValidateParams, ), @@ -1033,7 +1028,6 @@ async def validate( quality_preset: Literal["best", "high", "medium", "low", "base"] | NotGiven = NOT_GIVEN, rewritten_question: Optional[str] | NotGiven = NOT_GIVEN, task: Optional[str] | NotGiven = NOT_GIVEN, - tools: Optional[Iterable[project_validate_params.Tool]] | NotGiven = NOT_GIVEN, x_client_library_version: str | NotGiven = NOT_GIVEN, x_integration_type: str | NotGiven = NOT_GIVEN, x_source: str | NotGiven = NOT_GIVEN, @@ -1078,16 +1072,17 @@ async def validate( The default values corresponding to each quality preset are: - - **best:** `num_consistency_samples` = 8, `num_self_reflections` = 3, - `reasoning_effort` = `"high"`. - - **high:** `num_consistency_samples` = 4, `num_self_reflections` = 3, - `reasoning_effort` = `"high"`. - - **medium:** `num_consistency_samples` = 0, `num_self_reflections` = 3, - `reasoning_effort` = `"high"`. - - **low:** `num_consistency_samples` = 0, `num_self_reflections` = 3, - `reasoning_effort` = `"none"`. - - **base:** `num_consistency_samples` = 0, `num_self_reflections` = 1, - `reasoning_effort` = `"none"`. + - **best:** `num_candidate_responses` = 6, `num_consistency_samples` = 8, + `use_self_reflection` = True. This preset improves LLM responses. + - **high:** `num_candidate_responses` = 4, `num_consistency_samples` = 8, + `use_self_reflection` = True. This preset improves LLM responses. + - **medium:** `num_candidate_responses` = 1, `num_consistency_samples` = 8, + `use_self_reflection` = True. + - **low:** `num_candidate_responses` = 1, `num_consistency_samples` = 4, + `use_self_reflection` = True. + - **base:** `num_candidate_responses` = 1, `num_consistency_samples` = 0, + `use_self_reflection` = False. When using `get_trustworthiness_score()` on + "base" preset, a faster self-reflection is employed. By default, TLM uses the: "medium" `quality_preset`, "gpt-4.1-mini" base `model`, and `max_tokens` is set to 512. You can set custom values for these @@ -1123,11 +1118,12 @@ async def validate( strange prompts or prompts that are too vague/open-ended to receive a clearly defined 'good' response. TLM measures consistency via the degree of contradiction between sampled responses that the model considers plausible. - num_self_reflections(int, default = 3): the number of self-reflections to perform where the LLM is asked to reflect on the given response and directly evaluate correctness/confidence. - The maximum number of self-reflections currently supported is 3. Lower values will reduce runtimes/costs, but potentially also the reliability of trustworthiness scores. - Reflection helps quantify aleatoric uncertainty associated with challenging prompts and catches responses that are noticeably incorrect/bad upon further analysis. + use_self_reflection (bool, default = `True`): whether the LLM is asked to reflect on the given response and directly evaluate correctness/confidence. + Setting this False disables reflection and will reduce runtimes/costs, but potentially also the reliability of trustworthiness scores. + Reflection helps quantify aleatoric uncertainty associated with challenging prompts + and catches responses that are noticeably incorrect/bad upon further analysis. - similarity_measure ({"semantic", "string", "embedding", "embedding_large", "code", "discrepancy"}, default = "discrepancy"): how the + similarity_measure ({"semantic", "string", "embedding", "embedding_large", "code", "discrepancy"}, default = "semantic"): how the trustworthiness scoring's consistency algorithm measures similarity between alternative responses considered plausible by the model. Supported similarity measures include - "semantic" (based on natural language inference), "embedding" (based on vector embedding similarity), "embedding_large" (based on a larger embedding model), @@ -1146,8 +1142,6 @@ async def validate( - name: Name of the evaluation criteria. - criteria: Instructions specifying the evaluation criteria. - use_self_reflection (bool, default = `True`): deprecated. Use `num_self_reflections` instead. - prompt: The prompt to use for the TLM call. If not provided, the prompt will be generated from the messages. @@ -1156,9 +1150,6 @@ async def validate( rewritten_question: The re-written query if it was provided by the client to Codex from a user to be used instead of the original query. - tools: Tools to use for the LLM call. If not provided, it is assumed no tools were - provided to the LLM. - extra_headers: Send extra headers extra_query: Add additional query parameters to the request @@ -1197,7 +1188,6 @@ async def validate( "quality_preset": quality_preset, "rewritten_question": rewritten_question, "task": task, - "tools": tools, }, project_validate_params.ProjectValidateParams, ), diff --git a/src/codex/resources/tlm.py b/src/codex/resources/tlm.py index c6064ed..12ff6c0 100644 --- a/src/codex/resources/tlm.py +++ b/src/codex/resources/tlm.py @@ -79,16 +79,17 @@ def prompt( The default values corresponding to each quality preset are: - - **best:** `num_consistency_samples` = 8, `num_self_reflections` = 3, - `reasoning_effort` = `"high"`. - - **high:** `num_consistency_samples` = 4, `num_self_reflections` = 3, - `reasoning_effort` = `"high"`. - - **medium:** `num_consistency_samples` = 0, `num_self_reflections` = 3, - `reasoning_effort` = `"high"`. - - **low:** `num_consistency_samples` = 0, `num_self_reflections` = 3, - `reasoning_effort` = `"none"`. - - **base:** `num_consistency_samples` = 0, `num_self_reflections` = 1, - `reasoning_effort` = `"none"`. + - **best:** `num_candidate_responses` = 6, `num_consistency_samples` = 8, + `use_self_reflection` = True. This preset improves LLM responses. + - **high:** `num_candidate_responses` = 4, `num_consistency_samples` = 8, + `use_self_reflection` = True. This preset improves LLM responses. + - **medium:** `num_candidate_responses` = 1, `num_consistency_samples` = 8, + `use_self_reflection` = True. + - **low:** `num_candidate_responses` = 1, `num_consistency_samples` = 4, + `use_self_reflection` = True. + - **base:** `num_candidate_responses` = 1, `num_consistency_samples` = 0, + `use_self_reflection` = False. When using `get_trustworthiness_score()` on + "base" preset, a faster self-reflection is employed. By default, TLM uses the: "medium" `quality_preset`, "gpt-4.1-mini" base `model`, and `max_tokens` is set to 512. You can set custom values for these @@ -124,11 +125,12 @@ def prompt( strange prompts or prompts that are too vague/open-ended to receive a clearly defined 'good' response. TLM measures consistency via the degree of contradiction between sampled responses that the model considers plausible. - num_self_reflections(int, default = 3): the number of self-reflections to perform where the LLM is asked to reflect on the given response and directly evaluate correctness/confidence. - The maximum number of self-reflections currently supported is 3. Lower values will reduce runtimes/costs, but potentially also the reliability of trustworthiness scores. - Reflection helps quantify aleatoric uncertainty associated with challenging prompts and catches responses that are noticeably incorrect/bad upon further analysis. + use_self_reflection (bool, default = `True`): whether the LLM is asked to reflect on the given response and directly evaluate correctness/confidence. + Setting this False disables reflection and will reduce runtimes/costs, but potentially also the reliability of trustworthiness scores. + Reflection helps quantify aleatoric uncertainty associated with challenging prompts + and catches responses that are noticeably incorrect/bad upon further analysis. - similarity_measure ({"semantic", "string", "embedding", "embedding_large", "code", "discrepancy"}, default = "discrepancy"): how the + similarity_measure ({"semantic", "string", "embedding", "embedding_large", "code", "discrepancy"}, default = "semantic"): how the trustworthiness scoring's consistency algorithm measures similarity between alternative responses considered plausible by the model. Supported similarity measures include - "semantic" (based on natural language inference), "embedding" (based on vector embedding similarity), "embedding_large" (based on a larger embedding model), @@ -147,8 +149,6 @@ def prompt( - name: Name of the evaluation criteria. - criteria: Instructions specifying the evaluation criteria. - use_self_reflection (bool, default = `True`): deprecated. Use `num_self_reflections` instead. - quality_preset: The quality preset to use for the TLM or Trustworthy RAG API. extra_headers: Send extra headers @@ -217,16 +217,17 @@ def score( The default values corresponding to each quality preset are: - - **best:** `num_consistency_samples` = 8, `num_self_reflections` = 3, - `reasoning_effort` = `"high"`. - - **high:** `num_consistency_samples` = 4, `num_self_reflections` = 3, - `reasoning_effort` = `"high"`. - - **medium:** `num_consistency_samples` = 0, `num_self_reflections` = 3, - `reasoning_effort` = `"high"`. - - **low:** `num_consistency_samples` = 0, `num_self_reflections` = 3, - `reasoning_effort` = `"none"`. - - **base:** `num_consistency_samples` = 0, `num_self_reflections` = 1, - `reasoning_effort` = `"none"`. + - **best:** `num_candidate_responses` = 6, `num_consistency_samples` = 8, + `use_self_reflection` = True. This preset improves LLM responses. + - **high:** `num_candidate_responses` = 4, `num_consistency_samples` = 8, + `use_self_reflection` = True. This preset improves LLM responses. + - **medium:** `num_candidate_responses` = 1, `num_consistency_samples` = 8, + `use_self_reflection` = True. + - **low:** `num_candidate_responses` = 1, `num_consistency_samples` = 4, + `use_self_reflection` = True. + - **base:** `num_candidate_responses` = 1, `num_consistency_samples` = 0, + `use_self_reflection` = False. When using `get_trustworthiness_score()` on + "base" preset, a faster self-reflection is employed. By default, TLM uses the: "medium" `quality_preset`, "gpt-4.1-mini" base `model`, and `max_tokens` is set to 512. You can set custom values for these @@ -262,11 +263,12 @@ def score( strange prompts or prompts that are too vague/open-ended to receive a clearly defined 'good' response. TLM measures consistency via the degree of contradiction between sampled responses that the model considers plausible. - num_self_reflections(int, default = 3): the number of self-reflections to perform where the LLM is asked to reflect on the given response and directly evaluate correctness/confidence. - The maximum number of self-reflections currently supported is 3. Lower values will reduce runtimes/costs, but potentially also the reliability of trustworthiness scores. - Reflection helps quantify aleatoric uncertainty associated with challenging prompts and catches responses that are noticeably incorrect/bad upon further analysis. + use_self_reflection (bool, default = `True`): whether the LLM is asked to reflect on the given response and directly evaluate correctness/confidence. + Setting this False disables reflection and will reduce runtimes/costs, but potentially also the reliability of trustworthiness scores. + Reflection helps quantify aleatoric uncertainty associated with challenging prompts + and catches responses that are noticeably incorrect/bad upon further analysis. - similarity_measure ({"semantic", "string", "embedding", "embedding_large", "code", "discrepancy"}, default = "discrepancy"): how the + similarity_measure ({"semantic", "string", "embedding", "embedding_large", "code", "discrepancy"}, default = "semantic"): how the trustworthiness scoring's consistency algorithm measures similarity between alternative responses considered plausible by the model. Supported similarity measures include - "semantic" (based on natural language inference), "embedding" (based on vector embedding similarity), "embedding_large" (based on a larger embedding model), @@ -285,8 +287,6 @@ def score( - name: Name of the evaluation criteria. - criteria: Instructions specifying the evaluation criteria. - use_self_reflection (bool, default = `True`): deprecated. Use `num_self_reflections` instead. - quality_preset: The quality preset to use for the TLM or Trustworthy RAG API. extra_headers: Send extra headers @@ -371,16 +371,17 @@ async def prompt( The default values corresponding to each quality preset are: - - **best:** `num_consistency_samples` = 8, `num_self_reflections` = 3, - `reasoning_effort` = `"high"`. - - **high:** `num_consistency_samples` = 4, `num_self_reflections` = 3, - `reasoning_effort` = `"high"`. - - **medium:** `num_consistency_samples` = 0, `num_self_reflections` = 3, - `reasoning_effort` = `"high"`. - - **low:** `num_consistency_samples` = 0, `num_self_reflections` = 3, - `reasoning_effort` = `"none"`. - - **base:** `num_consistency_samples` = 0, `num_self_reflections` = 1, - `reasoning_effort` = `"none"`. + - **best:** `num_candidate_responses` = 6, `num_consistency_samples` = 8, + `use_self_reflection` = True. This preset improves LLM responses. + - **high:** `num_candidate_responses` = 4, `num_consistency_samples` = 8, + `use_self_reflection` = True. This preset improves LLM responses. + - **medium:** `num_candidate_responses` = 1, `num_consistency_samples` = 8, + `use_self_reflection` = True. + - **low:** `num_candidate_responses` = 1, `num_consistency_samples` = 4, + `use_self_reflection` = True. + - **base:** `num_candidate_responses` = 1, `num_consistency_samples` = 0, + `use_self_reflection` = False. When using `get_trustworthiness_score()` on + "base" preset, a faster self-reflection is employed. By default, TLM uses the: "medium" `quality_preset`, "gpt-4.1-mini" base `model`, and `max_tokens` is set to 512. You can set custom values for these @@ -416,11 +417,12 @@ async def prompt( strange prompts or prompts that are too vague/open-ended to receive a clearly defined 'good' response. TLM measures consistency via the degree of contradiction between sampled responses that the model considers plausible. - num_self_reflections(int, default = 3): the number of self-reflections to perform where the LLM is asked to reflect on the given response and directly evaluate correctness/confidence. - The maximum number of self-reflections currently supported is 3. Lower values will reduce runtimes/costs, but potentially also the reliability of trustworthiness scores. - Reflection helps quantify aleatoric uncertainty associated with challenging prompts and catches responses that are noticeably incorrect/bad upon further analysis. + use_self_reflection (bool, default = `True`): whether the LLM is asked to reflect on the given response and directly evaluate correctness/confidence. + Setting this False disables reflection and will reduce runtimes/costs, but potentially also the reliability of trustworthiness scores. + Reflection helps quantify aleatoric uncertainty associated with challenging prompts + and catches responses that are noticeably incorrect/bad upon further analysis. - similarity_measure ({"semantic", "string", "embedding", "embedding_large", "code", "discrepancy"}, default = "discrepancy"): how the + similarity_measure ({"semantic", "string", "embedding", "embedding_large", "code", "discrepancy"}, default = "semantic"): how the trustworthiness scoring's consistency algorithm measures similarity between alternative responses considered plausible by the model. Supported similarity measures include - "semantic" (based on natural language inference), "embedding" (based on vector embedding similarity), "embedding_large" (based on a larger embedding model), @@ -439,8 +441,6 @@ async def prompt( - name: Name of the evaluation criteria. - criteria: Instructions specifying the evaluation criteria. - use_self_reflection (bool, default = `True`): deprecated. Use `num_self_reflections` instead. - quality_preset: The quality preset to use for the TLM or Trustworthy RAG API. extra_headers: Send extra headers @@ -509,16 +509,17 @@ async def score( The default values corresponding to each quality preset are: - - **best:** `num_consistency_samples` = 8, `num_self_reflections` = 3, - `reasoning_effort` = `"high"`. - - **high:** `num_consistency_samples` = 4, `num_self_reflections` = 3, - `reasoning_effort` = `"high"`. - - **medium:** `num_consistency_samples` = 0, `num_self_reflections` = 3, - `reasoning_effort` = `"high"`. - - **low:** `num_consistency_samples` = 0, `num_self_reflections` = 3, - `reasoning_effort` = `"none"`. - - **base:** `num_consistency_samples` = 0, `num_self_reflections` = 1, - `reasoning_effort` = `"none"`. + - **best:** `num_candidate_responses` = 6, `num_consistency_samples` = 8, + `use_self_reflection` = True. This preset improves LLM responses. + - **high:** `num_candidate_responses` = 4, `num_consistency_samples` = 8, + `use_self_reflection` = True. This preset improves LLM responses. + - **medium:** `num_candidate_responses` = 1, `num_consistency_samples` = 8, + `use_self_reflection` = True. + - **low:** `num_candidate_responses` = 1, `num_consistency_samples` = 4, + `use_self_reflection` = True. + - **base:** `num_candidate_responses` = 1, `num_consistency_samples` = 0, + `use_self_reflection` = False. When using `get_trustworthiness_score()` on + "base" preset, a faster self-reflection is employed. By default, TLM uses the: "medium" `quality_preset`, "gpt-4.1-mini" base `model`, and `max_tokens` is set to 512. You can set custom values for these @@ -554,11 +555,12 @@ async def score( strange prompts or prompts that are too vague/open-ended to receive a clearly defined 'good' response. TLM measures consistency via the degree of contradiction between sampled responses that the model considers plausible. - num_self_reflections(int, default = 3): the number of self-reflections to perform where the LLM is asked to reflect on the given response and directly evaluate correctness/confidence. - The maximum number of self-reflections currently supported is 3. Lower values will reduce runtimes/costs, but potentially also the reliability of trustworthiness scores. - Reflection helps quantify aleatoric uncertainty associated with challenging prompts and catches responses that are noticeably incorrect/bad upon further analysis. + use_self_reflection (bool, default = `True`): whether the LLM is asked to reflect on the given response and directly evaluate correctness/confidence. + Setting this False disables reflection and will reduce runtimes/costs, but potentially also the reliability of trustworthiness scores. + Reflection helps quantify aleatoric uncertainty associated with challenging prompts + and catches responses that are noticeably incorrect/bad upon further analysis. - similarity_measure ({"semantic", "string", "embedding", "embedding_large", "code", "discrepancy"}, default = "discrepancy"): how the + similarity_measure ({"semantic", "string", "embedding", "embedding_large", "code", "discrepancy"}, default = "semantic"): how the trustworthiness scoring's consistency algorithm measures similarity between alternative responses considered plausible by the model. Supported similarity measures include - "semantic" (based on natural language inference), "embedding" (based on vector embedding similarity), "embedding_large" (based on a larger embedding model), @@ -577,8 +579,6 @@ async def score( - name: Name of the evaluation criteria. - criteria: Instructions specifying the evaluation criteria. - use_self_reflection (bool, default = `True`): deprecated. Use `num_self_reflections` instead. - quality_preset: The quality preset to use for the TLM or Trustworthy RAG API. extra_headers: Send extra headers diff --git a/src/codex/types/project_validate_params.py b/src/codex/types/project_validate_params.py index 6231367..081dd2a 100644 --- a/src/codex/types/project_validate_params.py +++ b/src/codex/types/project_validate_params.py @@ -54,8 +54,6 @@ "MessageChatCompletionDeveloperMessageParam", "MessageChatCompletionDeveloperMessageParamContentUnionMember1", "Options", - "Tool", - "ToolFunction", ] @@ -108,16 +106,17 @@ class ProjectValidateParams(TypedDict, total=False): The default values corresponding to each quality preset are: - - **best:** `num_consistency_samples` = 8, `num_self_reflections` = 3, - `reasoning_effort` = `"high"`. - - **high:** `num_consistency_samples` = 4, `num_self_reflections` = 3, - `reasoning_effort` = `"high"`. - - **medium:** `num_consistency_samples` = 0, `num_self_reflections` = 3, - `reasoning_effort` = `"high"`. - - **low:** `num_consistency_samples` = 0, `num_self_reflections` = 3, - `reasoning_effort` = `"none"`. - - **base:** `num_consistency_samples` = 0, `num_self_reflections` = 1, - `reasoning_effort` = `"none"`. + - **best:** `num_candidate_responses` = 6, `num_consistency_samples` = 8, + `use_self_reflection` = True. This preset improves LLM responses. + - **high:** `num_candidate_responses` = 4, `num_consistency_samples` = 8, + `use_self_reflection` = True. This preset improves LLM responses. + - **medium:** `num_candidate_responses` = 1, `num_consistency_samples` = 8, + `use_self_reflection` = True. + - **low:** `num_candidate_responses` = 1, `num_consistency_samples` = 4, + `use_self_reflection` = True. + - **base:** `num_candidate_responses` = 1, `num_consistency_samples` = 0, + `use_self_reflection` = False. When using `get_trustworthiness_score()` on + "base" preset, a faster self-reflection is employed. By default, TLM uses the: "medium" `quality_preset`, "gpt-4.1-mini" base `model`, and `max_tokens` is set to 512. You can set custom values for these @@ -153,11 +152,12 @@ class ProjectValidateParams(TypedDict, total=False): strange prompts or prompts that are too vague/open-ended to receive a clearly defined 'good' response. TLM measures consistency via the degree of contradiction between sampled responses that the model considers plausible. - num_self_reflections(int, default = 3): the number of self-reflections to perform where the LLM is asked to reflect on the given response and directly evaluate correctness/confidence. - The maximum number of self-reflections currently supported is 3. Lower values will reduce runtimes/costs, but potentially also the reliability of trustworthiness scores. - Reflection helps quantify aleatoric uncertainty associated with challenging prompts and catches responses that are noticeably incorrect/bad upon further analysis. + use_self_reflection (bool, default = `True`): whether the LLM is asked to reflect on the given response and directly evaluate correctness/confidence. + Setting this False disables reflection and will reduce runtimes/costs, but potentially also the reliability of trustworthiness scores. + Reflection helps quantify aleatoric uncertainty associated with challenging prompts + and catches responses that are noticeably incorrect/bad upon further analysis. - similarity_measure ({"semantic", "string", "embedding", "embedding_large", "code", "discrepancy"}, default = "discrepancy"): how the + similarity_measure ({"semantic", "string", "embedding", "embedding_large", "code", "discrepancy"}, default = "semantic"): how the trustworthiness scoring's consistency algorithm measures similarity between alternative responses considered plausible by the model. Supported similarity measures include - "semantic" (based on natural language inference), "embedding" (based on vector embedding similarity), "embedding_large" (based on a larger embedding model), @@ -175,8 +175,6 @@ class ProjectValidateParams(TypedDict, total=False): The expected input format is a list of dictionaries, where each dictionary has the following keys: - name: Name of the evaluation criteria. - criteria: Instructions specifying the evaluation criteria. - - use_self_reflection (bool, default = `True`): deprecated. Use `num_self_reflections` instead. """ prompt: Optional[str] @@ -196,12 +194,6 @@ class ProjectValidateParams(TypedDict, total=False): task: Optional[str] - tools: Optional[Iterable[Tool]] - """Tools to use for the LLM call. - - If not provided, it is assumed no tools were provided to the LLM. - """ - x_client_library_version: Annotated[str, PropertyInfo(alias="x-client-library-version")] x_integration_type: Annotated[str, PropertyInfo(alias="x-integration-type")] @@ -657,26 +649,8 @@ class Options(TypedDict, total=False): num_consistency_samples: int - num_self_reflections: int - reasoning_effort: str similarity_measure: str use_self_reflection: bool - - -class ToolFunction(TypedDict, total=False): - name: Required[str] - - description: str - - parameters: object - - strict: Optional[bool] - - -class Tool(TypedDict, total=False): - function: Required[ToolFunction] - - type: Required[Literal["function"]] diff --git a/src/codex/types/projects/query_log_list_by_group_response.py b/src/codex/types/projects/query_log_list_by_group_response.py index b3c774b..ae49b95 100644 --- a/src/codex/types/projects/query_log_list_by_group_response.py +++ b/src/codex/types/projects/query_log_list_by_group_response.py @@ -16,8 +16,6 @@ "QueryLogsByGroupQueryLogFormattedNonGuardrailEvalScores", "QueryLogsByGroupQueryLogContext", "QueryLogsByGroupQueryLogDeterministicGuardrailsResults", - "QueryLogsByGroupQueryLogEvaluatedResponseToolCall", - "QueryLogsByGroupQueryLogEvaluatedResponseToolCallFunction", "QueryLogsByGroupQueryLogMessage", "QueryLogsByGroupQueryLogMessageChatCompletionAssistantMessageParamOutput", "QueryLogsByGroupQueryLogMessageChatCompletionAssistantMessageParamOutputAudio", @@ -43,8 +41,6 @@ "QueryLogsByGroupQueryLogMessageChatCompletionFunctionMessageParam", "QueryLogsByGroupQueryLogMessageChatCompletionDeveloperMessageParam", "QueryLogsByGroupQueryLogMessageChatCompletionDeveloperMessageParamContentUnionMember1", - "QueryLogsByGroupQueryLogTool", - "QueryLogsByGroupQueryLogToolFunction", ] @@ -97,20 +93,6 @@ class QueryLogsByGroupQueryLogDeterministicGuardrailsResults(BaseModel): matches: Optional[List[str]] = None -class QueryLogsByGroupQueryLogEvaluatedResponseToolCallFunction(BaseModel): - arguments: str - - name: str - - -class QueryLogsByGroupQueryLogEvaluatedResponseToolCall(BaseModel): - id: str - - function: QueryLogsByGroupQueryLogEvaluatedResponseToolCallFunction - - type: Literal["function"] - - class QueryLogsByGroupQueryLogMessageChatCompletionAssistantMessageParamOutputAudio(BaseModel): id: str @@ -305,22 +287,6 @@ class QueryLogsByGroupQueryLogMessageChatCompletionDeveloperMessageParam(BaseMod ] -class QueryLogsByGroupQueryLogToolFunction(BaseModel): - name: str - - description: Optional[str] = None - - parameters: Optional[object] = None - - strict: Optional[bool] = None - - -class QueryLogsByGroupQueryLogTool(BaseModel): - function: QueryLogsByGroupQueryLogToolFunction - - type: Literal["function"] - - class QueryLogsByGroupQueryLog(BaseModel): id: str @@ -391,12 +357,6 @@ class QueryLogsByGroupQueryLog(BaseModel): evaluated_response: Optional[str] = None """The response being evaluated from the RAG system (before any remediation)""" - evaluated_response_tool_calls: Optional[List[QueryLogsByGroupQueryLogEvaluatedResponseToolCall]] = None - """Tool calls from the evaluated response, if any. - - Used to log tool calls in the query log. - """ - guardrail_evals: Optional[List[str]] = None """Evals that should trigger guardrail""" @@ -423,12 +383,6 @@ class QueryLogsByGroupQueryLog(BaseModel): primary_eval_issue_score: Optional[float] = None """Score of the primary eval issue""" - tools: Optional[List[QueryLogsByGroupQueryLogTool]] = None - """Tools to use for the LLM call. - - If not provided, it is assumed no tools were provided to the LLM. - """ - class QueryLogsByGroup(BaseModel): query_logs: List[QueryLogsByGroupQueryLog] diff --git a/src/codex/types/projects/query_log_list_groups_response.py b/src/codex/types/projects/query_log_list_groups_response.py index 6ed4d14..cc3b208 100644 --- a/src/codex/types/projects/query_log_list_groups_response.py +++ b/src/codex/types/projects/query_log_list_groups_response.py @@ -14,8 +14,6 @@ "FormattedNonGuardrailEvalScores", "Context", "DeterministicGuardrailsResults", - "EvaluatedResponseToolCall", - "EvaluatedResponseToolCallFunction", "Message", "MessageChatCompletionAssistantMessageParamOutput", "MessageChatCompletionAssistantMessageParamOutputAudio", @@ -41,8 +39,6 @@ "MessageChatCompletionFunctionMessageParam", "MessageChatCompletionDeveloperMessageParam", "MessageChatCompletionDeveloperMessageParamContentUnionMember1", - "Tool", - "ToolFunction", ] @@ -95,20 +91,6 @@ class DeterministicGuardrailsResults(BaseModel): matches: Optional[List[str]] = None -class EvaluatedResponseToolCallFunction(BaseModel): - arguments: str - - name: str - - -class EvaluatedResponseToolCall(BaseModel): - id: str - - function: EvaluatedResponseToolCallFunction - - type: Literal["function"] - - class MessageChatCompletionAssistantMessageParamOutputAudio(BaseModel): id: str @@ -295,22 +277,6 @@ class MessageChatCompletionDeveloperMessageParam(BaseModel): ] -class ToolFunction(BaseModel): - name: str - - description: Optional[str] = None - - parameters: Optional[object] = None - - strict: Optional[bool] = None - - -class Tool(BaseModel): - function: ToolFunction - - type: Literal["function"] - - class QueryLogListGroupsResponse(BaseModel): id: str @@ -381,12 +347,6 @@ class QueryLogListGroupsResponse(BaseModel): evaluated_response: Optional[str] = None """The response being evaluated from the RAG system (before any remediation)""" - evaluated_response_tool_calls: Optional[List[EvaluatedResponseToolCall]] = None - """Tool calls from the evaluated response, if any. - - Used to log tool calls in the query log. - """ - guardrail_evals: Optional[List[str]] = None """Evals that should trigger guardrail""" @@ -412,9 +372,3 @@ class QueryLogListGroupsResponse(BaseModel): primary_eval_issue_score: Optional[float] = None """Score of the primary eval issue""" - - tools: Optional[List[Tool]] = None - """Tools to use for the LLM call. - - If not provided, it is assumed no tools were provided to the LLM. - """ diff --git a/src/codex/types/projects/query_log_list_response.py b/src/codex/types/projects/query_log_list_response.py index c6737b2..0778898 100644 --- a/src/codex/types/projects/query_log_list_response.py +++ b/src/codex/types/projects/query_log_list_response.py @@ -14,8 +14,6 @@ "FormattedNonGuardrailEvalScores", "Context", "DeterministicGuardrailsResults", - "EvaluatedResponseToolCall", - "EvaluatedResponseToolCallFunction", "Message", "MessageChatCompletionAssistantMessageParamOutput", "MessageChatCompletionAssistantMessageParamOutputAudio", @@ -41,8 +39,6 @@ "MessageChatCompletionFunctionMessageParam", "MessageChatCompletionDeveloperMessageParam", "MessageChatCompletionDeveloperMessageParamContentUnionMember1", - "Tool", - "ToolFunction", ] @@ -95,20 +91,6 @@ class DeterministicGuardrailsResults(BaseModel): matches: Optional[List[str]] = None -class EvaluatedResponseToolCallFunction(BaseModel): - arguments: str - - name: str - - -class EvaluatedResponseToolCall(BaseModel): - id: str - - function: EvaluatedResponseToolCallFunction - - type: Literal["function"] - - class MessageChatCompletionAssistantMessageParamOutputAudio(BaseModel): id: str @@ -295,22 +277,6 @@ class MessageChatCompletionDeveloperMessageParam(BaseModel): ] -class ToolFunction(BaseModel): - name: str - - description: Optional[str] = None - - parameters: Optional[object] = None - - strict: Optional[bool] = None - - -class Tool(BaseModel): - function: ToolFunction - - type: Literal["function"] - - class QueryLogListResponse(BaseModel): id: str @@ -375,12 +341,6 @@ class QueryLogListResponse(BaseModel): evaluated_response: Optional[str] = None """The response being evaluated from the RAG system (before any remediation)""" - evaluated_response_tool_calls: Optional[List[EvaluatedResponseToolCall]] = None - """Tool calls from the evaluated response, if any. - - Used to log tool calls in the query log. - """ - guardrail_evals: Optional[List[str]] = None """Evals that should trigger guardrail""" @@ -406,9 +366,3 @@ class QueryLogListResponse(BaseModel): primary_eval_issue_score: Optional[float] = None """Score of the primary eval issue""" - - tools: Optional[List[Tool]] = None - """Tools to use for the LLM call. - - If not provided, it is assumed no tools were provided to the LLM. - """ diff --git a/src/codex/types/projects/query_log_retrieve_response.py b/src/codex/types/projects/query_log_retrieve_response.py index 8fd8662..2751ef2 100644 --- a/src/codex/types/projects/query_log_retrieve_response.py +++ b/src/codex/types/projects/query_log_retrieve_response.py @@ -14,8 +14,6 @@ "FormattedNonGuardrailEvalScores", "Context", "DeterministicGuardrailsResults", - "EvaluatedResponseToolCall", - "EvaluatedResponseToolCallFunction", "Message", "MessageChatCompletionAssistantMessageParamOutput", "MessageChatCompletionAssistantMessageParamOutputAudio", @@ -41,8 +39,6 @@ "MessageChatCompletionFunctionMessageParam", "MessageChatCompletionDeveloperMessageParam", "MessageChatCompletionDeveloperMessageParamContentUnionMember1", - "Tool", - "ToolFunction", ] @@ -95,20 +91,6 @@ class DeterministicGuardrailsResults(BaseModel): matches: Optional[List[str]] = None -class EvaluatedResponseToolCallFunction(BaseModel): - arguments: str - - name: str - - -class EvaluatedResponseToolCall(BaseModel): - id: str - - function: EvaluatedResponseToolCallFunction - - type: Literal["function"] - - class MessageChatCompletionAssistantMessageParamOutputAudio(BaseModel): id: str @@ -295,22 +277,6 @@ class MessageChatCompletionDeveloperMessageParam(BaseModel): ] -class ToolFunction(BaseModel): - name: str - - description: Optional[str] = None - - parameters: Optional[object] = None - - strict: Optional[bool] = None - - -class Tool(BaseModel): - function: ToolFunction - - type: Literal["function"] - - class QueryLogRetrieveResponse(BaseModel): id: str @@ -379,12 +345,6 @@ class QueryLogRetrieveResponse(BaseModel): evaluated_response: Optional[str] = None """The response being evaluated from the RAG system (before any remediation)""" - evaluated_response_tool_calls: Optional[List[EvaluatedResponseToolCall]] = None - """Tool calls from the evaluated response, if any. - - Used to log tool calls in the query log. - """ - guardrail_evals: Optional[List[str]] = None """Evals that should trigger guardrail""" @@ -410,9 +370,3 @@ class QueryLogRetrieveResponse(BaseModel): primary_eval_issue_score: Optional[float] = None """Score of the primary eval issue""" - - tools: Optional[List[Tool]] = None - """Tools to use for the LLM call. - - If not provided, it is assumed no tools were provided to the LLM. - """ diff --git a/src/codex/types/projects/remediation_list_resolved_logs_response.py b/src/codex/types/projects/remediation_list_resolved_logs_response.py index 567a086..d56f9a4 100644 --- a/src/codex/types/projects/remediation_list_resolved_logs_response.py +++ b/src/codex/types/projects/remediation_list_resolved_logs_response.py @@ -15,8 +15,6 @@ "QueryLogFormattedNonGuardrailEvalScores", "QueryLogContext", "QueryLogDeterministicGuardrailsResults", - "QueryLogEvaluatedResponseToolCall", - "QueryLogEvaluatedResponseToolCallFunction", "QueryLogMessage", "QueryLogMessageChatCompletionAssistantMessageParamOutput", "QueryLogMessageChatCompletionAssistantMessageParamOutputAudio", @@ -42,8 +40,6 @@ "QueryLogMessageChatCompletionFunctionMessageParam", "QueryLogMessageChatCompletionDeveloperMessageParam", "QueryLogMessageChatCompletionDeveloperMessageParamContentUnionMember1", - "QueryLogTool", - "QueryLogToolFunction", ] @@ -96,20 +92,6 @@ class QueryLogDeterministicGuardrailsResults(BaseModel): matches: Optional[List[str]] = None -class QueryLogEvaluatedResponseToolCallFunction(BaseModel): - arguments: str - - name: str - - -class QueryLogEvaluatedResponseToolCall(BaseModel): - id: str - - function: QueryLogEvaluatedResponseToolCallFunction - - type: Literal["function"] - - class QueryLogMessageChatCompletionAssistantMessageParamOutputAudio(BaseModel): id: str @@ -302,22 +284,6 @@ class QueryLogMessageChatCompletionDeveloperMessageParam(BaseModel): ] -class QueryLogToolFunction(BaseModel): - name: str - - description: Optional[str] = None - - parameters: Optional[object] = None - - strict: Optional[bool] = None - - -class QueryLogTool(BaseModel): - function: QueryLogToolFunction - - type: Literal["function"] - - class QueryLog(BaseModel): id: str @@ -382,12 +348,6 @@ class QueryLog(BaseModel): evaluated_response: Optional[str] = None """The response being evaluated from the RAG system (before any remediation)""" - evaluated_response_tool_calls: Optional[List[QueryLogEvaluatedResponseToolCall]] = None - """Tool calls from the evaluated response, if any. - - Used to log tool calls in the query log. - """ - guardrail_evals: Optional[List[str]] = None """Evals that should trigger guardrail""" @@ -414,12 +374,6 @@ class QueryLog(BaseModel): primary_eval_issue_score: Optional[float] = None """Score of the primary eval issue""" - tools: Optional[List[QueryLogTool]] = None - """Tools to use for the LLM call. - - If not provided, it is assumed no tools were provided to the LLM. - """ - class RemediationListResolvedLogsResponse(BaseModel): query_logs: List[QueryLog] diff --git a/src/codex/types/tlm_prompt_params.py b/src/codex/types/tlm_prompt_params.py index 8749c5a..3c04bfc 100644 --- a/src/codex/types/tlm_prompt_params.py +++ b/src/codex/types/tlm_prompt_params.py @@ -30,16 +30,17 @@ class TlmPromptParams(TypedDict, total=False): The default values corresponding to each quality preset are: - - **best:** `num_consistency_samples` = 8, `num_self_reflections` = 3, - `reasoning_effort` = `"high"`. - - **high:** `num_consistency_samples` = 4, `num_self_reflections` = 3, - `reasoning_effort` = `"high"`. - - **medium:** `num_consistency_samples` = 0, `num_self_reflections` = 3, - `reasoning_effort` = `"high"`. - - **low:** `num_consistency_samples` = 0, `num_self_reflections` = 3, - `reasoning_effort` = `"none"`. - - **base:** `num_consistency_samples` = 0, `num_self_reflections` = 1, - `reasoning_effort` = `"none"`. + - **best:** `num_candidate_responses` = 6, `num_consistency_samples` = 8, + `use_self_reflection` = True. This preset improves LLM responses. + - **high:** `num_candidate_responses` = 4, `num_consistency_samples` = 8, + `use_self_reflection` = True. This preset improves LLM responses. + - **medium:** `num_candidate_responses` = 1, `num_consistency_samples` = 8, + `use_self_reflection` = True. + - **low:** `num_candidate_responses` = 1, `num_consistency_samples` = 4, + `use_self_reflection` = True. + - **base:** `num_candidate_responses` = 1, `num_consistency_samples` = 0, + `use_self_reflection` = False. When using `get_trustworthiness_score()` on + "base" preset, a faster self-reflection is employed. By default, TLM uses the: "medium" `quality_preset`, "gpt-4.1-mini" base `model`, and `max_tokens` is set to 512. You can set custom values for these @@ -75,11 +76,12 @@ class TlmPromptParams(TypedDict, total=False): strange prompts or prompts that are too vague/open-ended to receive a clearly defined 'good' response. TLM measures consistency via the degree of contradiction between sampled responses that the model considers plausible. - num_self_reflections(int, default = 3): the number of self-reflections to perform where the LLM is asked to reflect on the given response and directly evaluate correctness/confidence. - The maximum number of self-reflections currently supported is 3. Lower values will reduce runtimes/costs, but potentially also the reliability of trustworthiness scores. - Reflection helps quantify aleatoric uncertainty associated with challenging prompts and catches responses that are noticeably incorrect/bad upon further analysis. + use_self_reflection (bool, default = `True`): whether the LLM is asked to reflect on the given response and directly evaluate correctness/confidence. + Setting this False disables reflection and will reduce runtimes/costs, but potentially also the reliability of trustworthiness scores. + Reflection helps quantify aleatoric uncertainty associated with challenging prompts + and catches responses that are noticeably incorrect/bad upon further analysis. - similarity_measure ({"semantic", "string", "embedding", "embedding_large", "code", "discrepancy"}, default = "discrepancy"): how the + similarity_measure ({"semantic", "string", "embedding", "embedding_large", "code", "discrepancy"}, default = "semantic"): how the trustworthiness scoring's consistency algorithm measures similarity between alternative responses considered plausible by the model. Supported similarity measures include - "semantic" (based on natural language inference), "embedding" (based on vector embedding similarity), "embedding_large" (based on a larger embedding model), @@ -97,8 +99,6 @@ class TlmPromptParams(TypedDict, total=False): The expected input format is a list of dictionaries, where each dictionary has the following keys: - name: Name of the evaluation criteria. - criteria: Instructions specifying the evaluation criteria. - - use_self_reflection (bool, default = `True`): deprecated. Use `num_self_reflections` instead. """ quality_preset: Literal["best", "high", "medium", "low", "base"] @@ -120,8 +120,6 @@ class Options(TypedDict, total=False): num_consistency_samples: int - num_self_reflections: int - reasoning_effort: str similarity_measure: str diff --git a/src/codex/types/tlm_score_params.py b/src/codex/types/tlm_score_params.py index 4a0a32a..95bcc4c 100644 --- a/src/codex/types/tlm_score_params.py +++ b/src/codex/types/tlm_score_params.py @@ -32,16 +32,17 @@ class TlmScoreParams(TypedDict, total=False): The default values corresponding to each quality preset are: - - **best:** `num_consistency_samples` = 8, `num_self_reflections` = 3, - `reasoning_effort` = `"high"`. - - **high:** `num_consistency_samples` = 4, `num_self_reflections` = 3, - `reasoning_effort` = `"high"`. - - **medium:** `num_consistency_samples` = 0, `num_self_reflections` = 3, - `reasoning_effort` = `"high"`. - - **low:** `num_consistency_samples` = 0, `num_self_reflections` = 3, - `reasoning_effort` = `"none"`. - - **base:** `num_consistency_samples` = 0, `num_self_reflections` = 1, - `reasoning_effort` = `"none"`. + - **best:** `num_candidate_responses` = 6, `num_consistency_samples` = 8, + `use_self_reflection` = True. This preset improves LLM responses. + - **high:** `num_candidate_responses` = 4, `num_consistency_samples` = 8, + `use_self_reflection` = True. This preset improves LLM responses. + - **medium:** `num_candidate_responses` = 1, `num_consistency_samples` = 8, + `use_self_reflection` = True. + - **low:** `num_candidate_responses` = 1, `num_consistency_samples` = 4, + `use_self_reflection` = True. + - **base:** `num_candidate_responses` = 1, `num_consistency_samples` = 0, + `use_self_reflection` = False. When using `get_trustworthiness_score()` on + "base" preset, a faster self-reflection is employed. By default, TLM uses the: "medium" `quality_preset`, "gpt-4.1-mini" base `model`, and `max_tokens` is set to 512. You can set custom values for these @@ -77,11 +78,12 @@ class TlmScoreParams(TypedDict, total=False): strange prompts or prompts that are too vague/open-ended to receive a clearly defined 'good' response. TLM measures consistency via the degree of contradiction between sampled responses that the model considers plausible. - num_self_reflections(int, default = 3): the number of self-reflections to perform where the LLM is asked to reflect on the given response and directly evaluate correctness/confidence. - The maximum number of self-reflections currently supported is 3. Lower values will reduce runtimes/costs, but potentially also the reliability of trustworthiness scores. - Reflection helps quantify aleatoric uncertainty associated with challenging prompts and catches responses that are noticeably incorrect/bad upon further analysis. + use_self_reflection (bool, default = `True`): whether the LLM is asked to reflect on the given response and directly evaluate correctness/confidence. + Setting this False disables reflection and will reduce runtimes/costs, but potentially also the reliability of trustworthiness scores. + Reflection helps quantify aleatoric uncertainty associated with challenging prompts + and catches responses that are noticeably incorrect/bad upon further analysis. - similarity_measure ({"semantic", "string", "embedding", "embedding_large", "code", "discrepancy"}, default = "discrepancy"): how the + similarity_measure ({"semantic", "string", "embedding", "embedding_large", "code", "discrepancy"}, default = "semantic"): how the trustworthiness scoring's consistency algorithm measures similarity between alternative responses considered plausible by the model. Supported similarity measures include - "semantic" (based on natural language inference), "embedding" (based on vector embedding similarity), "embedding_large" (based on a larger embedding model), @@ -99,8 +101,6 @@ class TlmScoreParams(TypedDict, total=False): The expected input format is a list of dictionaries, where each dictionary has the following keys: - name: Name of the evaluation criteria. - criteria: Instructions specifying the evaluation criteria. - - use_self_reflection (bool, default = `True`): deprecated. Use `num_self_reflections` instead. """ quality_preset: Literal["best", "high", "medium", "low", "base"] @@ -122,8 +122,6 @@ class Options(TypedDict, total=False): num_consistency_samples: int - num_self_reflections: int - reasoning_effort: str similarity_measure: str diff --git a/tests/api_resources/test_projects.py b/tests/api_resources/test_projects.py index 7884db0..ae3f4f0 100644 --- a/tests/api_resources/test_projects.py +++ b/tests/api_resources/test_projects.py @@ -622,7 +622,6 @@ def test_method_validate_with_all_params(self, client: Codex) -> None: "model": "model", "num_candidate_responses": 0, "num_consistency_samples": 0, - "num_self_reflections": 0, "reasoning_effort": "reasoning_effort", "similarity_measure": "similarity_measure", "use_self_reflection": True, @@ -631,17 +630,6 @@ def test_method_validate_with_all_params(self, client: Codex) -> None: quality_preset="best", rewritten_question="rewritten_question", task="task", - tools=[ - { - "function": { - "name": "name", - "description": "description", - "parameters": {}, - "strict": True, - }, - "type": "function", - } - ], x_client_library_version="x-client-library-version", x_integration_type="x-integration-type", x_source="x-source", @@ -1296,7 +1284,6 @@ async def test_method_validate_with_all_params(self, async_client: AsyncCodex) - "model": "model", "num_candidate_responses": 0, "num_consistency_samples": 0, - "num_self_reflections": 0, "reasoning_effort": "reasoning_effort", "similarity_measure": "similarity_measure", "use_self_reflection": True, @@ -1305,17 +1292,6 @@ async def test_method_validate_with_all_params(self, async_client: AsyncCodex) - quality_preset="best", rewritten_question="rewritten_question", task="task", - tools=[ - { - "function": { - "name": "name", - "description": "description", - "parameters": {}, - "strict": True, - }, - "type": "function", - } - ], x_client_library_version="x-client-library-version", x_integration_type="x-integration-type", x_source="x-source", diff --git a/tests/api_resources/test_tlm.py b/tests/api_resources/test_tlm.py index da0a9ad..41376a4 100644 --- a/tests/api_resources/test_tlm.py +++ b/tests/api_resources/test_tlm.py @@ -38,7 +38,6 @@ def test_method_prompt_with_all_params(self, client: Codex) -> None: "model": "model", "num_candidate_responses": 0, "num_consistency_samples": 0, - "num_self_reflections": 0, "reasoning_effort": "reasoning_effort", "similarity_measure": "similarity_measure", "use_self_reflection": True, @@ -97,7 +96,6 @@ def test_method_score_with_all_params(self, client: Codex) -> None: "model": "model", "num_candidate_responses": 0, "num_consistency_samples": 0, - "num_self_reflections": 0, "reasoning_effort": "reasoning_effort", "similarity_measure": "similarity_measure", "use_self_reflection": True, @@ -162,7 +160,6 @@ async def test_method_prompt_with_all_params(self, async_client: AsyncCodex) -> "model": "model", "num_candidate_responses": 0, "num_consistency_samples": 0, - "num_self_reflections": 0, "reasoning_effort": "reasoning_effort", "similarity_measure": "similarity_measure", "use_self_reflection": True, @@ -221,7 +218,6 @@ async def test_method_score_with_all_params(self, async_client: AsyncCodex) -> N "model": "model", "num_candidate_responses": 0, "num_consistency_samples": 0, - "num_self_reflections": 0, "reasoning_effort": "reasoning_effort", "similarity_measure": "similarity_measure", "use_self_reflection": True, From ce6d89f3c885765b21c6ba43b1b7b9a1ebf8a61e Mon Sep 17 00:00:00 2001 From: "stainless-app[bot]" <142633134+stainless-app[bot]@users.noreply.github.com> Date: Mon, 28 Jul 2025 17:18:00 +0000 Subject: [PATCH 19/20] feat(api): api update --- .stats.yml | 2 +- src/codex/resources/projects/projects.py | 74 +++++----- src/codex/resources/tlm.py | 128 +++++++++--------- src/codex/types/project_validate_params.py | 58 +++++--- .../query_log_list_by_group_response.py | 46 +++++++ .../query_log_list_groups_response.py | 46 +++++++ .../types/projects/query_log_list_response.py | 46 +++++++ .../projects/query_log_retrieve_response.py | 46 +++++++ ...remediation_list_resolved_logs_response.py | 46 +++++++ src/codex/types/tlm_prompt_params.py | 34 ++--- src/codex/types/tlm_score_params.py | 34 ++--- tests/api_resources/test_projects.py | 24 ++++ tests/api_resources/test_tlm.py | 4 + 13 files changed, 443 insertions(+), 145 deletions(-) diff --git a/.stats.yml b/.stats.yml index 031dedf..4f2aa48 100644 --- a/.stats.yml +++ b/.stats.yml @@ -1,3 +1,3 @@ configured_endpoints: 54 -openapi_spec_hash: 57e29e33aec4bbc20171ec3128594e75 +openapi_spec_hash: 49989625bf633c5fdb3e11140f788f2d config_hash: 930284cfa37f835d949c8a1b124f4807 diff --git a/src/codex/resources/projects/projects.py b/src/codex/resources/projects/projects.py index 3a109ed..f82bcd0 100644 --- a/src/codex/resources/projects/projects.py +++ b/src/codex/resources/projects/projects.py @@ -460,6 +460,7 @@ def validate( quality_preset: Literal["best", "high", "medium", "low", "base"] | NotGiven = NOT_GIVEN, rewritten_question: Optional[str] | NotGiven = NOT_GIVEN, task: Optional[str] | NotGiven = NOT_GIVEN, + tools: Optional[Iterable[project_validate_params.Tool]] | NotGiven = NOT_GIVEN, x_client_library_version: str | NotGiven = NOT_GIVEN, x_integration_type: str | NotGiven = NOT_GIVEN, x_source: str | NotGiven = NOT_GIVEN, @@ -504,17 +505,16 @@ def validate( The default values corresponding to each quality preset are: - - **best:** `num_candidate_responses` = 6, `num_consistency_samples` = 8, - `use_self_reflection` = True. This preset improves LLM responses. - - **high:** `num_candidate_responses` = 4, `num_consistency_samples` = 8, - `use_self_reflection` = True. This preset improves LLM responses. - - **medium:** `num_candidate_responses` = 1, `num_consistency_samples` = 8, - `use_self_reflection` = True. - - **low:** `num_candidate_responses` = 1, `num_consistency_samples` = 4, - `use_self_reflection` = True. - - **base:** `num_candidate_responses` = 1, `num_consistency_samples` = 0, - `use_self_reflection` = False. When using `get_trustworthiness_score()` on - "base" preset, a faster self-reflection is employed. + - **best:** `num_consistency_samples` = 8, `num_self_reflections` = 3, + `reasoning_effort` = `"high"`. + - **high:** `num_consistency_samples` = 4, `num_self_reflections` = 3, + `reasoning_effort` = `"high"`. + - **medium:** `num_consistency_samples` = 0, `num_self_reflections` = 3, + `reasoning_effort` = `"high"`. + - **low:** `num_consistency_samples` = 0, `num_self_reflections` = 3, + `reasoning_effort` = `"none"`. + - **base:** `num_consistency_samples` = 0, `num_self_reflections` = 1, + `reasoning_effort` = `"none"`. By default, TLM uses the: "medium" `quality_preset`, "gpt-4.1-mini" base `model`, and `max_tokens` is set to 512. You can set custom values for these @@ -550,12 +550,11 @@ def validate( strange prompts or prompts that are too vague/open-ended to receive a clearly defined 'good' response. TLM measures consistency via the degree of contradiction between sampled responses that the model considers plausible. - use_self_reflection (bool, default = `True`): whether the LLM is asked to reflect on the given response and directly evaluate correctness/confidence. - Setting this False disables reflection and will reduce runtimes/costs, but potentially also the reliability of trustworthiness scores. - Reflection helps quantify aleatoric uncertainty associated with challenging prompts - and catches responses that are noticeably incorrect/bad upon further analysis. + num_self_reflections(int, default = 3): the number of self-reflections to perform where the LLM is asked to reflect on the given response and directly evaluate correctness/confidence. + The maximum number of self-reflections currently supported is 3. Lower values will reduce runtimes/costs, but potentially also the reliability of trustworthiness scores. + Reflection helps quantify aleatoric uncertainty associated with challenging prompts and catches responses that are noticeably incorrect/bad upon further analysis. - similarity_measure ({"semantic", "string", "embedding", "embedding_large", "code", "discrepancy"}, default = "semantic"): how the + similarity_measure ({"semantic", "string", "embedding", "embedding_large", "code", "discrepancy"}, default = "discrepancy"): how the trustworthiness scoring's consistency algorithm measures similarity between alternative responses considered plausible by the model. Supported similarity measures include - "semantic" (based on natural language inference), "embedding" (based on vector embedding similarity), "embedding_large" (based on a larger embedding model), @@ -574,6 +573,8 @@ def validate( - name: Name of the evaluation criteria. - criteria: Instructions specifying the evaluation criteria. + use_self_reflection (bool, default = `True`): deprecated. Use `num_self_reflections` instead. + prompt: The prompt to use for the TLM call. If not provided, the prompt will be generated from the messages. @@ -582,6 +583,9 @@ def validate( rewritten_question: The re-written query if it was provided by the client to Codex from a user to be used instead of the original query. + tools: Tools to use for the LLM call. If not provided, it is assumed no tools were + provided to the LLM. + extra_headers: Send extra headers extra_query: Add additional query parameters to the request @@ -620,6 +624,7 @@ def validate( "quality_preset": quality_preset, "rewritten_question": rewritten_question, "task": task, + "tools": tools, }, project_validate_params.ProjectValidateParams, ), @@ -1028,6 +1033,7 @@ async def validate( quality_preset: Literal["best", "high", "medium", "low", "base"] | NotGiven = NOT_GIVEN, rewritten_question: Optional[str] | NotGiven = NOT_GIVEN, task: Optional[str] | NotGiven = NOT_GIVEN, + tools: Optional[Iterable[project_validate_params.Tool]] | NotGiven = NOT_GIVEN, x_client_library_version: str | NotGiven = NOT_GIVEN, x_integration_type: str | NotGiven = NOT_GIVEN, x_source: str | NotGiven = NOT_GIVEN, @@ -1072,17 +1078,16 @@ async def validate( The default values corresponding to each quality preset are: - - **best:** `num_candidate_responses` = 6, `num_consistency_samples` = 8, - `use_self_reflection` = True. This preset improves LLM responses. - - **high:** `num_candidate_responses` = 4, `num_consistency_samples` = 8, - `use_self_reflection` = True. This preset improves LLM responses. - - **medium:** `num_candidate_responses` = 1, `num_consistency_samples` = 8, - `use_self_reflection` = True. - - **low:** `num_candidate_responses` = 1, `num_consistency_samples` = 4, - `use_self_reflection` = True. - - **base:** `num_candidate_responses` = 1, `num_consistency_samples` = 0, - `use_self_reflection` = False. When using `get_trustworthiness_score()` on - "base" preset, a faster self-reflection is employed. + - **best:** `num_consistency_samples` = 8, `num_self_reflections` = 3, + `reasoning_effort` = `"high"`. + - **high:** `num_consistency_samples` = 4, `num_self_reflections` = 3, + `reasoning_effort` = `"high"`. + - **medium:** `num_consistency_samples` = 0, `num_self_reflections` = 3, + `reasoning_effort` = `"high"`. + - **low:** `num_consistency_samples` = 0, `num_self_reflections` = 3, + `reasoning_effort` = `"none"`. + - **base:** `num_consistency_samples` = 0, `num_self_reflections` = 1, + `reasoning_effort` = `"none"`. By default, TLM uses the: "medium" `quality_preset`, "gpt-4.1-mini" base `model`, and `max_tokens` is set to 512. You can set custom values for these @@ -1118,12 +1123,11 @@ async def validate( strange prompts or prompts that are too vague/open-ended to receive a clearly defined 'good' response. TLM measures consistency via the degree of contradiction between sampled responses that the model considers plausible. - use_self_reflection (bool, default = `True`): whether the LLM is asked to reflect on the given response and directly evaluate correctness/confidence. - Setting this False disables reflection and will reduce runtimes/costs, but potentially also the reliability of trustworthiness scores. - Reflection helps quantify aleatoric uncertainty associated with challenging prompts - and catches responses that are noticeably incorrect/bad upon further analysis. + num_self_reflections(int, default = 3): the number of self-reflections to perform where the LLM is asked to reflect on the given response and directly evaluate correctness/confidence. + The maximum number of self-reflections currently supported is 3. Lower values will reduce runtimes/costs, but potentially also the reliability of trustworthiness scores. + Reflection helps quantify aleatoric uncertainty associated with challenging prompts and catches responses that are noticeably incorrect/bad upon further analysis. - similarity_measure ({"semantic", "string", "embedding", "embedding_large", "code", "discrepancy"}, default = "semantic"): how the + similarity_measure ({"semantic", "string", "embedding", "embedding_large", "code", "discrepancy"}, default = "discrepancy"): how the trustworthiness scoring's consistency algorithm measures similarity between alternative responses considered plausible by the model. Supported similarity measures include - "semantic" (based on natural language inference), "embedding" (based on vector embedding similarity), "embedding_large" (based on a larger embedding model), @@ -1142,6 +1146,8 @@ async def validate( - name: Name of the evaluation criteria. - criteria: Instructions specifying the evaluation criteria. + use_self_reflection (bool, default = `True`): deprecated. Use `num_self_reflections` instead. + prompt: The prompt to use for the TLM call. If not provided, the prompt will be generated from the messages. @@ -1150,6 +1156,9 @@ async def validate( rewritten_question: The re-written query if it was provided by the client to Codex from a user to be used instead of the original query. + tools: Tools to use for the LLM call. If not provided, it is assumed no tools were + provided to the LLM. + extra_headers: Send extra headers extra_query: Add additional query parameters to the request @@ -1188,6 +1197,7 @@ async def validate( "quality_preset": quality_preset, "rewritten_question": rewritten_question, "task": task, + "tools": tools, }, project_validate_params.ProjectValidateParams, ), diff --git a/src/codex/resources/tlm.py b/src/codex/resources/tlm.py index 12ff6c0..c6064ed 100644 --- a/src/codex/resources/tlm.py +++ b/src/codex/resources/tlm.py @@ -79,17 +79,16 @@ def prompt( The default values corresponding to each quality preset are: - - **best:** `num_candidate_responses` = 6, `num_consistency_samples` = 8, - `use_self_reflection` = True. This preset improves LLM responses. - - **high:** `num_candidate_responses` = 4, `num_consistency_samples` = 8, - `use_self_reflection` = True. This preset improves LLM responses. - - **medium:** `num_candidate_responses` = 1, `num_consistency_samples` = 8, - `use_self_reflection` = True. - - **low:** `num_candidate_responses` = 1, `num_consistency_samples` = 4, - `use_self_reflection` = True. - - **base:** `num_candidate_responses` = 1, `num_consistency_samples` = 0, - `use_self_reflection` = False. When using `get_trustworthiness_score()` on - "base" preset, a faster self-reflection is employed. + - **best:** `num_consistency_samples` = 8, `num_self_reflections` = 3, + `reasoning_effort` = `"high"`. + - **high:** `num_consistency_samples` = 4, `num_self_reflections` = 3, + `reasoning_effort` = `"high"`. + - **medium:** `num_consistency_samples` = 0, `num_self_reflections` = 3, + `reasoning_effort` = `"high"`. + - **low:** `num_consistency_samples` = 0, `num_self_reflections` = 3, + `reasoning_effort` = `"none"`. + - **base:** `num_consistency_samples` = 0, `num_self_reflections` = 1, + `reasoning_effort` = `"none"`. By default, TLM uses the: "medium" `quality_preset`, "gpt-4.1-mini" base `model`, and `max_tokens` is set to 512. You can set custom values for these @@ -125,12 +124,11 @@ def prompt( strange prompts or prompts that are too vague/open-ended to receive a clearly defined 'good' response. TLM measures consistency via the degree of contradiction between sampled responses that the model considers plausible. - use_self_reflection (bool, default = `True`): whether the LLM is asked to reflect on the given response and directly evaluate correctness/confidence. - Setting this False disables reflection and will reduce runtimes/costs, but potentially also the reliability of trustworthiness scores. - Reflection helps quantify aleatoric uncertainty associated with challenging prompts - and catches responses that are noticeably incorrect/bad upon further analysis. + num_self_reflections(int, default = 3): the number of self-reflections to perform where the LLM is asked to reflect on the given response and directly evaluate correctness/confidence. + The maximum number of self-reflections currently supported is 3. Lower values will reduce runtimes/costs, but potentially also the reliability of trustworthiness scores. + Reflection helps quantify aleatoric uncertainty associated with challenging prompts and catches responses that are noticeably incorrect/bad upon further analysis. - similarity_measure ({"semantic", "string", "embedding", "embedding_large", "code", "discrepancy"}, default = "semantic"): how the + similarity_measure ({"semantic", "string", "embedding", "embedding_large", "code", "discrepancy"}, default = "discrepancy"): how the trustworthiness scoring's consistency algorithm measures similarity between alternative responses considered plausible by the model. Supported similarity measures include - "semantic" (based on natural language inference), "embedding" (based on vector embedding similarity), "embedding_large" (based on a larger embedding model), @@ -149,6 +147,8 @@ def prompt( - name: Name of the evaluation criteria. - criteria: Instructions specifying the evaluation criteria. + use_self_reflection (bool, default = `True`): deprecated. Use `num_self_reflections` instead. + quality_preset: The quality preset to use for the TLM or Trustworthy RAG API. extra_headers: Send extra headers @@ -217,17 +217,16 @@ def score( The default values corresponding to each quality preset are: - - **best:** `num_candidate_responses` = 6, `num_consistency_samples` = 8, - `use_self_reflection` = True. This preset improves LLM responses. - - **high:** `num_candidate_responses` = 4, `num_consistency_samples` = 8, - `use_self_reflection` = True. This preset improves LLM responses. - - **medium:** `num_candidate_responses` = 1, `num_consistency_samples` = 8, - `use_self_reflection` = True. - - **low:** `num_candidate_responses` = 1, `num_consistency_samples` = 4, - `use_self_reflection` = True. - - **base:** `num_candidate_responses` = 1, `num_consistency_samples` = 0, - `use_self_reflection` = False. When using `get_trustworthiness_score()` on - "base" preset, a faster self-reflection is employed. + - **best:** `num_consistency_samples` = 8, `num_self_reflections` = 3, + `reasoning_effort` = `"high"`. + - **high:** `num_consistency_samples` = 4, `num_self_reflections` = 3, + `reasoning_effort` = `"high"`. + - **medium:** `num_consistency_samples` = 0, `num_self_reflections` = 3, + `reasoning_effort` = `"high"`. + - **low:** `num_consistency_samples` = 0, `num_self_reflections` = 3, + `reasoning_effort` = `"none"`. + - **base:** `num_consistency_samples` = 0, `num_self_reflections` = 1, + `reasoning_effort` = `"none"`. By default, TLM uses the: "medium" `quality_preset`, "gpt-4.1-mini" base `model`, and `max_tokens` is set to 512. You can set custom values for these @@ -263,12 +262,11 @@ def score( strange prompts or prompts that are too vague/open-ended to receive a clearly defined 'good' response. TLM measures consistency via the degree of contradiction between sampled responses that the model considers plausible. - use_self_reflection (bool, default = `True`): whether the LLM is asked to reflect on the given response and directly evaluate correctness/confidence. - Setting this False disables reflection and will reduce runtimes/costs, but potentially also the reliability of trustworthiness scores. - Reflection helps quantify aleatoric uncertainty associated with challenging prompts - and catches responses that are noticeably incorrect/bad upon further analysis. + num_self_reflections(int, default = 3): the number of self-reflections to perform where the LLM is asked to reflect on the given response and directly evaluate correctness/confidence. + The maximum number of self-reflections currently supported is 3. Lower values will reduce runtimes/costs, but potentially also the reliability of trustworthiness scores. + Reflection helps quantify aleatoric uncertainty associated with challenging prompts and catches responses that are noticeably incorrect/bad upon further analysis. - similarity_measure ({"semantic", "string", "embedding", "embedding_large", "code", "discrepancy"}, default = "semantic"): how the + similarity_measure ({"semantic", "string", "embedding", "embedding_large", "code", "discrepancy"}, default = "discrepancy"): how the trustworthiness scoring's consistency algorithm measures similarity between alternative responses considered plausible by the model. Supported similarity measures include - "semantic" (based on natural language inference), "embedding" (based on vector embedding similarity), "embedding_large" (based on a larger embedding model), @@ -287,6 +285,8 @@ def score( - name: Name of the evaluation criteria. - criteria: Instructions specifying the evaluation criteria. + use_self_reflection (bool, default = `True`): deprecated. Use `num_self_reflections` instead. + quality_preset: The quality preset to use for the TLM or Trustworthy RAG API. extra_headers: Send extra headers @@ -371,17 +371,16 @@ async def prompt( The default values corresponding to each quality preset are: - - **best:** `num_candidate_responses` = 6, `num_consistency_samples` = 8, - `use_self_reflection` = True. This preset improves LLM responses. - - **high:** `num_candidate_responses` = 4, `num_consistency_samples` = 8, - `use_self_reflection` = True. This preset improves LLM responses. - - **medium:** `num_candidate_responses` = 1, `num_consistency_samples` = 8, - `use_self_reflection` = True. - - **low:** `num_candidate_responses` = 1, `num_consistency_samples` = 4, - `use_self_reflection` = True. - - **base:** `num_candidate_responses` = 1, `num_consistency_samples` = 0, - `use_self_reflection` = False. When using `get_trustworthiness_score()` on - "base" preset, a faster self-reflection is employed. + - **best:** `num_consistency_samples` = 8, `num_self_reflections` = 3, + `reasoning_effort` = `"high"`. + - **high:** `num_consistency_samples` = 4, `num_self_reflections` = 3, + `reasoning_effort` = `"high"`. + - **medium:** `num_consistency_samples` = 0, `num_self_reflections` = 3, + `reasoning_effort` = `"high"`. + - **low:** `num_consistency_samples` = 0, `num_self_reflections` = 3, + `reasoning_effort` = `"none"`. + - **base:** `num_consistency_samples` = 0, `num_self_reflections` = 1, + `reasoning_effort` = `"none"`. By default, TLM uses the: "medium" `quality_preset`, "gpt-4.1-mini" base `model`, and `max_tokens` is set to 512. You can set custom values for these @@ -417,12 +416,11 @@ async def prompt( strange prompts or prompts that are too vague/open-ended to receive a clearly defined 'good' response. TLM measures consistency via the degree of contradiction between sampled responses that the model considers plausible. - use_self_reflection (bool, default = `True`): whether the LLM is asked to reflect on the given response and directly evaluate correctness/confidence. - Setting this False disables reflection and will reduce runtimes/costs, but potentially also the reliability of trustworthiness scores. - Reflection helps quantify aleatoric uncertainty associated with challenging prompts - and catches responses that are noticeably incorrect/bad upon further analysis. + num_self_reflections(int, default = 3): the number of self-reflections to perform where the LLM is asked to reflect on the given response and directly evaluate correctness/confidence. + The maximum number of self-reflections currently supported is 3. Lower values will reduce runtimes/costs, but potentially also the reliability of trustworthiness scores. + Reflection helps quantify aleatoric uncertainty associated with challenging prompts and catches responses that are noticeably incorrect/bad upon further analysis. - similarity_measure ({"semantic", "string", "embedding", "embedding_large", "code", "discrepancy"}, default = "semantic"): how the + similarity_measure ({"semantic", "string", "embedding", "embedding_large", "code", "discrepancy"}, default = "discrepancy"): how the trustworthiness scoring's consistency algorithm measures similarity between alternative responses considered plausible by the model. Supported similarity measures include - "semantic" (based on natural language inference), "embedding" (based on vector embedding similarity), "embedding_large" (based on a larger embedding model), @@ -441,6 +439,8 @@ async def prompt( - name: Name of the evaluation criteria. - criteria: Instructions specifying the evaluation criteria. + use_self_reflection (bool, default = `True`): deprecated. Use `num_self_reflections` instead. + quality_preset: The quality preset to use for the TLM or Trustworthy RAG API. extra_headers: Send extra headers @@ -509,17 +509,16 @@ async def score( The default values corresponding to each quality preset are: - - **best:** `num_candidate_responses` = 6, `num_consistency_samples` = 8, - `use_self_reflection` = True. This preset improves LLM responses. - - **high:** `num_candidate_responses` = 4, `num_consistency_samples` = 8, - `use_self_reflection` = True. This preset improves LLM responses. - - **medium:** `num_candidate_responses` = 1, `num_consistency_samples` = 8, - `use_self_reflection` = True. - - **low:** `num_candidate_responses` = 1, `num_consistency_samples` = 4, - `use_self_reflection` = True. - - **base:** `num_candidate_responses` = 1, `num_consistency_samples` = 0, - `use_self_reflection` = False. When using `get_trustworthiness_score()` on - "base" preset, a faster self-reflection is employed. + - **best:** `num_consistency_samples` = 8, `num_self_reflections` = 3, + `reasoning_effort` = `"high"`. + - **high:** `num_consistency_samples` = 4, `num_self_reflections` = 3, + `reasoning_effort` = `"high"`. + - **medium:** `num_consistency_samples` = 0, `num_self_reflections` = 3, + `reasoning_effort` = `"high"`. + - **low:** `num_consistency_samples` = 0, `num_self_reflections` = 3, + `reasoning_effort` = `"none"`. + - **base:** `num_consistency_samples` = 0, `num_self_reflections` = 1, + `reasoning_effort` = `"none"`. By default, TLM uses the: "medium" `quality_preset`, "gpt-4.1-mini" base `model`, and `max_tokens` is set to 512. You can set custom values for these @@ -555,12 +554,11 @@ async def score( strange prompts or prompts that are too vague/open-ended to receive a clearly defined 'good' response. TLM measures consistency via the degree of contradiction between sampled responses that the model considers plausible. - use_self_reflection (bool, default = `True`): whether the LLM is asked to reflect on the given response and directly evaluate correctness/confidence. - Setting this False disables reflection and will reduce runtimes/costs, but potentially also the reliability of trustworthiness scores. - Reflection helps quantify aleatoric uncertainty associated with challenging prompts - and catches responses that are noticeably incorrect/bad upon further analysis. + num_self_reflections(int, default = 3): the number of self-reflections to perform where the LLM is asked to reflect on the given response and directly evaluate correctness/confidence. + The maximum number of self-reflections currently supported is 3. Lower values will reduce runtimes/costs, but potentially also the reliability of trustworthiness scores. + Reflection helps quantify aleatoric uncertainty associated with challenging prompts and catches responses that are noticeably incorrect/bad upon further analysis. - similarity_measure ({"semantic", "string", "embedding", "embedding_large", "code", "discrepancy"}, default = "semantic"): how the + similarity_measure ({"semantic", "string", "embedding", "embedding_large", "code", "discrepancy"}, default = "discrepancy"): how the trustworthiness scoring's consistency algorithm measures similarity between alternative responses considered plausible by the model. Supported similarity measures include - "semantic" (based on natural language inference), "embedding" (based on vector embedding similarity), "embedding_large" (based on a larger embedding model), @@ -579,6 +577,8 @@ async def score( - name: Name of the evaluation criteria. - criteria: Instructions specifying the evaluation criteria. + use_self_reflection (bool, default = `True`): deprecated. Use `num_self_reflections` instead. + quality_preset: The quality preset to use for the TLM or Trustworthy RAG API. extra_headers: Send extra headers diff --git a/src/codex/types/project_validate_params.py b/src/codex/types/project_validate_params.py index 081dd2a..6231367 100644 --- a/src/codex/types/project_validate_params.py +++ b/src/codex/types/project_validate_params.py @@ -54,6 +54,8 @@ "MessageChatCompletionDeveloperMessageParam", "MessageChatCompletionDeveloperMessageParamContentUnionMember1", "Options", + "Tool", + "ToolFunction", ] @@ -106,17 +108,16 @@ class ProjectValidateParams(TypedDict, total=False): The default values corresponding to each quality preset are: - - **best:** `num_candidate_responses` = 6, `num_consistency_samples` = 8, - `use_self_reflection` = True. This preset improves LLM responses. - - **high:** `num_candidate_responses` = 4, `num_consistency_samples` = 8, - `use_self_reflection` = True. This preset improves LLM responses. - - **medium:** `num_candidate_responses` = 1, `num_consistency_samples` = 8, - `use_self_reflection` = True. - - **low:** `num_candidate_responses` = 1, `num_consistency_samples` = 4, - `use_self_reflection` = True. - - **base:** `num_candidate_responses` = 1, `num_consistency_samples` = 0, - `use_self_reflection` = False. When using `get_trustworthiness_score()` on - "base" preset, a faster self-reflection is employed. + - **best:** `num_consistency_samples` = 8, `num_self_reflections` = 3, + `reasoning_effort` = `"high"`. + - **high:** `num_consistency_samples` = 4, `num_self_reflections` = 3, + `reasoning_effort` = `"high"`. + - **medium:** `num_consistency_samples` = 0, `num_self_reflections` = 3, + `reasoning_effort` = `"high"`. + - **low:** `num_consistency_samples` = 0, `num_self_reflections` = 3, + `reasoning_effort` = `"none"`. + - **base:** `num_consistency_samples` = 0, `num_self_reflections` = 1, + `reasoning_effort` = `"none"`. By default, TLM uses the: "medium" `quality_preset`, "gpt-4.1-mini" base `model`, and `max_tokens` is set to 512. You can set custom values for these @@ -152,12 +153,11 @@ class ProjectValidateParams(TypedDict, total=False): strange prompts or prompts that are too vague/open-ended to receive a clearly defined 'good' response. TLM measures consistency via the degree of contradiction between sampled responses that the model considers plausible. - use_self_reflection (bool, default = `True`): whether the LLM is asked to reflect on the given response and directly evaluate correctness/confidence. - Setting this False disables reflection and will reduce runtimes/costs, but potentially also the reliability of trustworthiness scores. - Reflection helps quantify aleatoric uncertainty associated with challenging prompts - and catches responses that are noticeably incorrect/bad upon further analysis. + num_self_reflections(int, default = 3): the number of self-reflections to perform where the LLM is asked to reflect on the given response and directly evaluate correctness/confidence. + The maximum number of self-reflections currently supported is 3. Lower values will reduce runtimes/costs, but potentially also the reliability of trustworthiness scores. + Reflection helps quantify aleatoric uncertainty associated with challenging prompts and catches responses that are noticeably incorrect/bad upon further analysis. - similarity_measure ({"semantic", "string", "embedding", "embedding_large", "code", "discrepancy"}, default = "semantic"): how the + similarity_measure ({"semantic", "string", "embedding", "embedding_large", "code", "discrepancy"}, default = "discrepancy"): how the trustworthiness scoring's consistency algorithm measures similarity between alternative responses considered plausible by the model. Supported similarity measures include - "semantic" (based on natural language inference), "embedding" (based on vector embedding similarity), "embedding_large" (based on a larger embedding model), @@ -175,6 +175,8 @@ class ProjectValidateParams(TypedDict, total=False): The expected input format is a list of dictionaries, where each dictionary has the following keys: - name: Name of the evaluation criteria. - criteria: Instructions specifying the evaluation criteria. + + use_self_reflection (bool, default = `True`): deprecated. Use `num_self_reflections` instead. """ prompt: Optional[str] @@ -194,6 +196,12 @@ class ProjectValidateParams(TypedDict, total=False): task: Optional[str] + tools: Optional[Iterable[Tool]] + """Tools to use for the LLM call. + + If not provided, it is assumed no tools were provided to the LLM. + """ + x_client_library_version: Annotated[str, PropertyInfo(alias="x-client-library-version")] x_integration_type: Annotated[str, PropertyInfo(alias="x-integration-type")] @@ -649,8 +657,26 @@ class Options(TypedDict, total=False): num_consistency_samples: int + num_self_reflections: int + reasoning_effort: str similarity_measure: str use_self_reflection: bool + + +class ToolFunction(TypedDict, total=False): + name: Required[str] + + description: str + + parameters: object + + strict: Optional[bool] + + +class Tool(TypedDict, total=False): + function: Required[ToolFunction] + + type: Required[Literal["function"]] diff --git a/src/codex/types/projects/query_log_list_by_group_response.py b/src/codex/types/projects/query_log_list_by_group_response.py index ae49b95..b3c774b 100644 --- a/src/codex/types/projects/query_log_list_by_group_response.py +++ b/src/codex/types/projects/query_log_list_by_group_response.py @@ -16,6 +16,8 @@ "QueryLogsByGroupQueryLogFormattedNonGuardrailEvalScores", "QueryLogsByGroupQueryLogContext", "QueryLogsByGroupQueryLogDeterministicGuardrailsResults", + "QueryLogsByGroupQueryLogEvaluatedResponseToolCall", + "QueryLogsByGroupQueryLogEvaluatedResponseToolCallFunction", "QueryLogsByGroupQueryLogMessage", "QueryLogsByGroupQueryLogMessageChatCompletionAssistantMessageParamOutput", "QueryLogsByGroupQueryLogMessageChatCompletionAssistantMessageParamOutputAudio", @@ -41,6 +43,8 @@ "QueryLogsByGroupQueryLogMessageChatCompletionFunctionMessageParam", "QueryLogsByGroupQueryLogMessageChatCompletionDeveloperMessageParam", "QueryLogsByGroupQueryLogMessageChatCompletionDeveloperMessageParamContentUnionMember1", + "QueryLogsByGroupQueryLogTool", + "QueryLogsByGroupQueryLogToolFunction", ] @@ -93,6 +97,20 @@ class QueryLogsByGroupQueryLogDeterministicGuardrailsResults(BaseModel): matches: Optional[List[str]] = None +class QueryLogsByGroupQueryLogEvaluatedResponseToolCallFunction(BaseModel): + arguments: str + + name: str + + +class QueryLogsByGroupQueryLogEvaluatedResponseToolCall(BaseModel): + id: str + + function: QueryLogsByGroupQueryLogEvaluatedResponseToolCallFunction + + type: Literal["function"] + + class QueryLogsByGroupQueryLogMessageChatCompletionAssistantMessageParamOutputAudio(BaseModel): id: str @@ -287,6 +305,22 @@ class QueryLogsByGroupQueryLogMessageChatCompletionDeveloperMessageParam(BaseMod ] +class QueryLogsByGroupQueryLogToolFunction(BaseModel): + name: str + + description: Optional[str] = None + + parameters: Optional[object] = None + + strict: Optional[bool] = None + + +class QueryLogsByGroupQueryLogTool(BaseModel): + function: QueryLogsByGroupQueryLogToolFunction + + type: Literal["function"] + + class QueryLogsByGroupQueryLog(BaseModel): id: str @@ -357,6 +391,12 @@ class QueryLogsByGroupQueryLog(BaseModel): evaluated_response: Optional[str] = None """The response being evaluated from the RAG system (before any remediation)""" + evaluated_response_tool_calls: Optional[List[QueryLogsByGroupQueryLogEvaluatedResponseToolCall]] = None + """Tool calls from the evaluated response, if any. + + Used to log tool calls in the query log. + """ + guardrail_evals: Optional[List[str]] = None """Evals that should trigger guardrail""" @@ -383,6 +423,12 @@ class QueryLogsByGroupQueryLog(BaseModel): primary_eval_issue_score: Optional[float] = None """Score of the primary eval issue""" + tools: Optional[List[QueryLogsByGroupQueryLogTool]] = None + """Tools to use for the LLM call. + + If not provided, it is assumed no tools were provided to the LLM. + """ + class QueryLogsByGroup(BaseModel): query_logs: List[QueryLogsByGroupQueryLog] diff --git a/src/codex/types/projects/query_log_list_groups_response.py b/src/codex/types/projects/query_log_list_groups_response.py index cc3b208..6ed4d14 100644 --- a/src/codex/types/projects/query_log_list_groups_response.py +++ b/src/codex/types/projects/query_log_list_groups_response.py @@ -14,6 +14,8 @@ "FormattedNonGuardrailEvalScores", "Context", "DeterministicGuardrailsResults", + "EvaluatedResponseToolCall", + "EvaluatedResponseToolCallFunction", "Message", "MessageChatCompletionAssistantMessageParamOutput", "MessageChatCompletionAssistantMessageParamOutputAudio", @@ -39,6 +41,8 @@ "MessageChatCompletionFunctionMessageParam", "MessageChatCompletionDeveloperMessageParam", "MessageChatCompletionDeveloperMessageParamContentUnionMember1", + "Tool", + "ToolFunction", ] @@ -91,6 +95,20 @@ class DeterministicGuardrailsResults(BaseModel): matches: Optional[List[str]] = None +class EvaluatedResponseToolCallFunction(BaseModel): + arguments: str + + name: str + + +class EvaluatedResponseToolCall(BaseModel): + id: str + + function: EvaluatedResponseToolCallFunction + + type: Literal["function"] + + class MessageChatCompletionAssistantMessageParamOutputAudio(BaseModel): id: str @@ -277,6 +295,22 @@ class MessageChatCompletionDeveloperMessageParam(BaseModel): ] +class ToolFunction(BaseModel): + name: str + + description: Optional[str] = None + + parameters: Optional[object] = None + + strict: Optional[bool] = None + + +class Tool(BaseModel): + function: ToolFunction + + type: Literal["function"] + + class QueryLogListGroupsResponse(BaseModel): id: str @@ -347,6 +381,12 @@ class QueryLogListGroupsResponse(BaseModel): evaluated_response: Optional[str] = None """The response being evaluated from the RAG system (before any remediation)""" + evaluated_response_tool_calls: Optional[List[EvaluatedResponseToolCall]] = None + """Tool calls from the evaluated response, if any. + + Used to log tool calls in the query log. + """ + guardrail_evals: Optional[List[str]] = None """Evals that should trigger guardrail""" @@ -372,3 +412,9 @@ class QueryLogListGroupsResponse(BaseModel): primary_eval_issue_score: Optional[float] = None """Score of the primary eval issue""" + + tools: Optional[List[Tool]] = None + """Tools to use for the LLM call. + + If not provided, it is assumed no tools were provided to the LLM. + """ diff --git a/src/codex/types/projects/query_log_list_response.py b/src/codex/types/projects/query_log_list_response.py index 0778898..c6737b2 100644 --- a/src/codex/types/projects/query_log_list_response.py +++ b/src/codex/types/projects/query_log_list_response.py @@ -14,6 +14,8 @@ "FormattedNonGuardrailEvalScores", "Context", "DeterministicGuardrailsResults", + "EvaluatedResponseToolCall", + "EvaluatedResponseToolCallFunction", "Message", "MessageChatCompletionAssistantMessageParamOutput", "MessageChatCompletionAssistantMessageParamOutputAudio", @@ -39,6 +41,8 @@ "MessageChatCompletionFunctionMessageParam", "MessageChatCompletionDeveloperMessageParam", "MessageChatCompletionDeveloperMessageParamContentUnionMember1", + "Tool", + "ToolFunction", ] @@ -91,6 +95,20 @@ class DeterministicGuardrailsResults(BaseModel): matches: Optional[List[str]] = None +class EvaluatedResponseToolCallFunction(BaseModel): + arguments: str + + name: str + + +class EvaluatedResponseToolCall(BaseModel): + id: str + + function: EvaluatedResponseToolCallFunction + + type: Literal["function"] + + class MessageChatCompletionAssistantMessageParamOutputAudio(BaseModel): id: str @@ -277,6 +295,22 @@ class MessageChatCompletionDeveloperMessageParam(BaseModel): ] +class ToolFunction(BaseModel): + name: str + + description: Optional[str] = None + + parameters: Optional[object] = None + + strict: Optional[bool] = None + + +class Tool(BaseModel): + function: ToolFunction + + type: Literal["function"] + + class QueryLogListResponse(BaseModel): id: str @@ -341,6 +375,12 @@ class QueryLogListResponse(BaseModel): evaluated_response: Optional[str] = None """The response being evaluated from the RAG system (before any remediation)""" + evaluated_response_tool_calls: Optional[List[EvaluatedResponseToolCall]] = None + """Tool calls from the evaluated response, if any. + + Used to log tool calls in the query log. + """ + guardrail_evals: Optional[List[str]] = None """Evals that should trigger guardrail""" @@ -366,3 +406,9 @@ class QueryLogListResponse(BaseModel): primary_eval_issue_score: Optional[float] = None """Score of the primary eval issue""" + + tools: Optional[List[Tool]] = None + """Tools to use for the LLM call. + + If not provided, it is assumed no tools were provided to the LLM. + """ diff --git a/src/codex/types/projects/query_log_retrieve_response.py b/src/codex/types/projects/query_log_retrieve_response.py index 2751ef2..8fd8662 100644 --- a/src/codex/types/projects/query_log_retrieve_response.py +++ b/src/codex/types/projects/query_log_retrieve_response.py @@ -14,6 +14,8 @@ "FormattedNonGuardrailEvalScores", "Context", "DeterministicGuardrailsResults", + "EvaluatedResponseToolCall", + "EvaluatedResponseToolCallFunction", "Message", "MessageChatCompletionAssistantMessageParamOutput", "MessageChatCompletionAssistantMessageParamOutputAudio", @@ -39,6 +41,8 @@ "MessageChatCompletionFunctionMessageParam", "MessageChatCompletionDeveloperMessageParam", "MessageChatCompletionDeveloperMessageParamContentUnionMember1", + "Tool", + "ToolFunction", ] @@ -91,6 +95,20 @@ class DeterministicGuardrailsResults(BaseModel): matches: Optional[List[str]] = None +class EvaluatedResponseToolCallFunction(BaseModel): + arguments: str + + name: str + + +class EvaluatedResponseToolCall(BaseModel): + id: str + + function: EvaluatedResponseToolCallFunction + + type: Literal["function"] + + class MessageChatCompletionAssistantMessageParamOutputAudio(BaseModel): id: str @@ -277,6 +295,22 @@ class MessageChatCompletionDeveloperMessageParam(BaseModel): ] +class ToolFunction(BaseModel): + name: str + + description: Optional[str] = None + + parameters: Optional[object] = None + + strict: Optional[bool] = None + + +class Tool(BaseModel): + function: ToolFunction + + type: Literal["function"] + + class QueryLogRetrieveResponse(BaseModel): id: str @@ -345,6 +379,12 @@ class QueryLogRetrieveResponse(BaseModel): evaluated_response: Optional[str] = None """The response being evaluated from the RAG system (before any remediation)""" + evaluated_response_tool_calls: Optional[List[EvaluatedResponseToolCall]] = None + """Tool calls from the evaluated response, if any. + + Used to log tool calls in the query log. + """ + guardrail_evals: Optional[List[str]] = None """Evals that should trigger guardrail""" @@ -370,3 +410,9 @@ class QueryLogRetrieveResponse(BaseModel): primary_eval_issue_score: Optional[float] = None """Score of the primary eval issue""" + + tools: Optional[List[Tool]] = None + """Tools to use for the LLM call. + + If not provided, it is assumed no tools were provided to the LLM. + """ diff --git a/src/codex/types/projects/remediation_list_resolved_logs_response.py b/src/codex/types/projects/remediation_list_resolved_logs_response.py index d56f9a4..567a086 100644 --- a/src/codex/types/projects/remediation_list_resolved_logs_response.py +++ b/src/codex/types/projects/remediation_list_resolved_logs_response.py @@ -15,6 +15,8 @@ "QueryLogFormattedNonGuardrailEvalScores", "QueryLogContext", "QueryLogDeterministicGuardrailsResults", + "QueryLogEvaluatedResponseToolCall", + "QueryLogEvaluatedResponseToolCallFunction", "QueryLogMessage", "QueryLogMessageChatCompletionAssistantMessageParamOutput", "QueryLogMessageChatCompletionAssistantMessageParamOutputAudio", @@ -40,6 +42,8 @@ "QueryLogMessageChatCompletionFunctionMessageParam", "QueryLogMessageChatCompletionDeveloperMessageParam", "QueryLogMessageChatCompletionDeveloperMessageParamContentUnionMember1", + "QueryLogTool", + "QueryLogToolFunction", ] @@ -92,6 +96,20 @@ class QueryLogDeterministicGuardrailsResults(BaseModel): matches: Optional[List[str]] = None +class QueryLogEvaluatedResponseToolCallFunction(BaseModel): + arguments: str + + name: str + + +class QueryLogEvaluatedResponseToolCall(BaseModel): + id: str + + function: QueryLogEvaluatedResponseToolCallFunction + + type: Literal["function"] + + class QueryLogMessageChatCompletionAssistantMessageParamOutputAudio(BaseModel): id: str @@ -284,6 +302,22 @@ class QueryLogMessageChatCompletionDeveloperMessageParam(BaseModel): ] +class QueryLogToolFunction(BaseModel): + name: str + + description: Optional[str] = None + + parameters: Optional[object] = None + + strict: Optional[bool] = None + + +class QueryLogTool(BaseModel): + function: QueryLogToolFunction + + type: Literal["function"] + + class QueryLog(BaseModel): id: str @@ -348,6 +382,12 @@ class QueryLog(BaseModel): evaluated_response: Optional[str] = None """The response being evaluated from the RAG system (before any remediation)""" + evaluated_response_tool_calls: Optional[List[QueryLogEvaluatedResponseToolCall]] = None + """Tool calls from the evaluated response, if any. + + Used to log tool calls in the query log. + """ + guardrail_evals: Optional[List[str]] = None """Evals that should trigger guardrail""" @@ -374,6 +414,12 @@ class QueryLog(BaseModel): primary_eval_issue_score: Optional[float] = None """Score of the primary eval issue""" + tools: Optional[List[QueryLogTool]] = None + """Tools to use for the LLM call. + + If not provided, it is assumed no tools were provided to the LLM. + """ + class RemediationListResolvedLogsResponse(BaseModel): query_logs: List[QueryLog] diff --git a/src/codex/types/tlm_prompt_params.py b/src/codex/types/tlm_prompt_params.py index 3c04bfc..8749c5a 100644 --- a/src/codex/types/tlm_prompt_params.py +++ b/src/codex/types/tlm_prompt_params.py @@ -30,17 +30,16 @@ class TlmPromptParams(TypedDict, total=False): The default values corresponding to each quality preset are: - - **best:** `num_candidate_responses` = 6, `num_consistency_samples` = 8, - `use_self_reflection` = True. This preset improves LLM responses. - - **high:** `num_candidate_responses` = 4, `num_consistency_samples` = 8, - `use_self_reflection` = True. This preset improves LLM responses. - - **medium:** `num_candidate_responses` = 1, `num_consistency_samples` = 8, - `use_self_reflection` = True. - - **low:** `num_candidate_responses` = 1, `num_consistency_samples` = 4, - `use_self_reflection` = True. - - **base:** `num_candidate_responses` = 1, `num_consistency_samples` = 0, - `use_self_reflection` = False. When using `get_trustworthiness_score()` on - "base" preset, a faster self-reflection is employed. + - **best:** `num_consistency_samples` = 8, `num_self_reflections` = 3, + `reasoning_effort` = `"high"`. + - **high:** `num_consistency_samples` = 4, `num_self_reflections` = 3, + `reasoning_effort` = `"high"`. + - **medium:** `num_consistency_samples` = 0, `num_self_reflections` = 3, + `reasoning_effort` = `"high"`. + - **low:** `num_consistency_samples` = 0, `num_self_reflections` = 3, + `reasoning_effort` = `"none"`. + - **base:** `num_consistency_samples` = 0, `num_self_reflections` = 1, + `reasoning_effort` = `"none"`. By default, TLM uses the: "medium" `quality_preset`, "gpt-4.1-mini" base `model`, and `max_tokens` is set to 512. You can set custom values for these @@ -76,12 +75,11 @@ class TlmPromptParams(TypedDict, total=False): strange prompts or prompts that are too vague/open-ended to receive a clearly defined 'good' response. TLM measures consistency via the degree of contradiction between sampled responses that the model considers plausible. - use_self_reflection (bool, default = `True`): whether the LLM is asked to reflect on the given response and directly evaluate correctness/confidence. - Setting this False disables reflection and will reduce runtimes/costs, but potentially also the reliability of trustworthiness scores. - Reflection helps quantify aleatoric uncertainty associated with challenging prompts - and catches responses that are noticeably incorrect/bad upon further analysis. + num_self_reflections(int, default = 3): the number of self-reflections to perform where the LLM is asked to reflect on the given response and directly evaluate correctness/confidence. + The maximum number of self-reflections currently supported is 3. Lower values will reduce runtimes/costs, but potentially also the reliability of trustworthiness scores. + Reflection helps quantify aleatoric uncertainty associated with challenging prompts and catches responses that are noticeably incorrect/bad upon further analysis. - similarity_measure ({"semantic", "string", "embedding", "embedding_large", "code", "discrepancy"}, default = "semantic"): how the + similarity_measure ({"semantic", "string", "embedding", "embedding_large", "code", "discrepancy"}, default = "discrepancy"): how the trustworthiness scoring's consistency algorithm measures similarity between alternative responses considered plausible by the model. Supported similarity measures include - "semantic" (based on natural language inference), "embedding" (based on vector embedding similarity), "embedding_large" (based on a larger embedding model), @@ -99,6 +97,8 @@ class TlmPromptParams(TypedDict, total=False): The expected input format is a list of dictionaries, where each dictionary has the following keys: - name: Name of the evaluation criteria. - criteria: Instructions specifying the evaluation criteria. + + use_self_reflection (bool, default = `True`): deprecated. Use `num_self_reflections` instead. """ quality_preset: Literal["best", "high", "medium", "low", "base"] @@ -120,6 +120,8 @@ class Options(TypedDict, total=False): num_consistency_samples: int + num_self_reflections: int + reasoning_effort: str similarity_measure: str diff --git a/src/codex/types/tlm_score_params.py b/src/codex/types/tlm_score_params.py index 95bcc4c..4a0a32a 100644 --- a/src/codex/types/tlm_score_params.py +++ b/src/codex/types/tlm_score_params.py @@ -32,17 +32,16 @@ class TlmScoreParams(TypedDict, total=False): The default values corresponding to each quality preset are: - - **best:** `num_candidate_responses` = 6, `num_consistency_samples` = 8, - `use_self_reflection` = True. This preset improves LLM responses. - - **high:** `num_candidate_responses` = 4, `num_consistency_samples` = 8, - `use_self_reflection` = True. This preset improves LLM responses. - - **medium:** `num_candidate_responses` = 1, `num_consistency_samples` = 8, - `use_self_reflection` = True. - - **low:** `num_candidate_responses` = 1, `num_consistency_samples` = 4, - `use_self_reflection` = True. - - **base:** `num_candidate_responses` = 1, `num_consistency_samples` = 0, - `use_self_reflection` = False. When using `get_trustworthiness_score()` on - "base" preset, a faster self-reflection is employed. + - **best:** `num_consistency_samples` = 8, `num_self_reflections` = 3, + `reasoning_effort` = `"high"`. + - **high:** `num_consistency_samples` = 4, `num_self_reflections` = 3, + `reasoning_effort` = `"high"`. + - **medium:** `num_consistency_samples` = 0, `num_self_reflections` = 3, + `reasoning_effort` = `"high"`. + - **low:** `num_consistency_samples` = 0, `num_self_reflections` = 3, + `reasoning_effort` = `"none"`. + - **base:** `num_consistency_samples` = 0, `num_self_reflections` = 1, + `reasoning_effort` = `"none"`. By default, TLM uses the: "medium" `quality_preset`, "gpt-4.1-mini" base `model`, and `max_tokens` is set to 512. You can set custom values for these @@ -78,12 +77,11 @@ class TlmScoreParams(TypedDict, total=False): strange prompts or prompts that are too vague/open-ended to receive a clearly defined 'good' response. TLM measures consistency via the degree of contradiction between sampled responses that the model considers plausible. - use_self_reflection (bool, default = `True`): whether the LLM is asked to reflect on the given response and directly evaluate correctness/confidence. - Setting this False disables reflection and will reduce runtimes/costs, but potentially also the reliability of trustworthiness scores. - Reflection helps quantify aleatoric uncertainty associated with challenging prompts - and catches responses that are noticeably incorrect/bad upon further analysis. + num_self_reflections(int, default = 3): the number of self-reflections to perform where the LLM is asked to reflect on the given response and directly evaluate correctness/confidence. + The maximum number of self-reflections currently supported is 3. Lower values will reduce runtimes/costs, but potentially also the reliability of trustworthiness scores. + Reflection helps quantify aleatoric uncertainty associated with challenging prompts and catches responses that are noticeably incorrect/bad upon further analysis. - similarity_measure ({"semantic", "string", "embedding", "embedding_large", "code", "discrepancy"}, default = "semantic"): how the + similarity_measure ({"semantic", "string", "embedding", "embedding_large", "code", "discrepancy"}, default = "discrepancy"): how the trustworthiness scoring's consistency algorithm measures similarity between alternative responses considered plausible by the model. Supported similarity measures include - "semantic" (based on natural language inference), "embedding" (based on vector embedding similarity), "embedding_large" (based on a larger embedding model), @@ -101,6 +99,8 @@ class TlmScoreParams(TypedDict, total=False): The expected input format is a list of dictionaries, where each dictionary has the following keys: - name: Name of the evaluation criteria. - criteria: Instructions specifying the evaluation criteria. + + use_self_reflection (bool, default = `True`): deprecated. Use `num_self_reflections` instead. """ quality_preset: Literal["best", "high", "medium", "low", "base"] @@ -122,6 +122,8 @@ class Options(TypedDict, total=False): num_consistency_samples: int + num_self_reflections: int + reasoning_effort: str similarity_measure: str diff --git a/tests/api_resources/test_projects.py b/tests/api_resources/test_projects.py index ae3f4f0..7884db0 100644 --- a/tests/api_resources/test_projects.py +++ b/tests/api_resources/test_projects.py @@ -622,6 +622,7 @@ def test_method_validate_with_all_params(self, client: Codex) -> None: "model": "model", "num_candidate_responses": 0, "num_consistency_samples": 0, + "num_self_reflections": 0, "reasoning_effort": "reasoning_effort", "similarity_measure": "similarity_measure", "use_self_reflection": True, @@ -630,6 +631,17 @@ def test_method_validate_with_all_params(self, client: Codex) -> None: quality_preset="best", rewritten_question="rewritten_question", task="task", + tools=[ + { + "function": { + "name": "name", + "description": "description", + "parameters": {}, + "strict": True, + }, + "type": "function", + } + ], x_client_library_version="x-client-library-version", x_integration_type="x-integration-type", x_source="x-source", @@ -1284,6 +1296,7 @@ async def test_method_validate_with_all_params(self, async_client: AsyncCodex) - "model": "model", "num_candidate_responses": 0, "num_consistency_samples": 0, + "num_self_reflections": 0, "reasoning_effort": "reasoning_effort", "similarity_measure": "similarity_measure", "use_self_reflection": True, @@ -1292,6 +1305,17 @@ async def test_method_validate_with_all_params(self, async_client: AsyncCodex) - quality_preset="best", rewritten_question="rewritten_question", task="task", + tools=[ + { + "function": { + "name": "name", + "description": "description", + "parameters": {}, + "strict": True, + }, + "type": "function", + } + ], x_client_library_version="x-client-library-version", x_integration_type="x-integration-type", x_source="x-source", diff --git a/tests/api_resources/test_tlm.py b/tests/api_resources/test_tlm.py index 41376a4..da0a9ad 100644 --- a/tests/api_resources/test_tlm.py +++ b/tests/api_resources/test_tlm.py @@ -38,6 +38,7 @@ def test_method_prompt_with_all_params(self, client: Codex) -> None: "model": "model", "num_candidate_responses": 0, "num_consistency_samples": 0, + "num_self_reflections": 0, "reasoning_effort": "reasoning_effort", "similarity_measure": "similarity_measure", "use_self_reflection": True, @@ -96,6 +97,7 @@ def test_method_score_with_all_params(self, client: Codex) -> None: "model": "model", "num_candidate_responses": 0, "num_consistency_samples": 0, + "num_self_reflections": 0, "reasoning_effort": "reasoning_effort", "similarity_measure": "similarity_measure", "use_self_reflection": True, @@ -160,6 +162,7 @@ async def test_method_prompt_with_all_params(self, async_client: AsyncCodex) -> "model": "model", "num_candidate_responses": 0, "num_consistency_samples": 0, + "num_self_reflections": 0, "reasoning_effort": "reasoning_effort", "similarity_measure": "similarity_measure", "use_self_reflection": True, @@ -218,6 +221,7 @@ async def test_method_score_with_all_params(self, async_client: AsyncCodex) -> N "model": "model", "num_candidate_responses": 0, "num_consistency_samples": 0, + "num_self_reflections": 0, "reasoning_effort": "reasoning_effort", "similarity_measure": "similarity_measure", "use_self_reflection": True, From 8b2ae15a6976beeb10f3cbdf7ef9f9adb95b238c Mon Sep 17 00:00:00 2001 From: "stainless-app[bot]" <142633134+stainless-app[bot]@users.noreply.github.com> Date: Mon, 28 Jul 2025 17:18:19 +0000 Subject: [PATCH 20/20] release: 0.1.0-alpha.24 --- .release-please-manifest.json | 2 +- CHANGELOG.md | 33 +++++++++++++++++++++++++++++++++ pyproject.toml | 2 +- src/codex/_version.py | 2 +- 4 files changed, 36 insertions(+), 3 deletions(-) diff --git a/.release-please-manifest.json b/.release-please-manifest.json index 1c0bb88..380b6f9 100644 --- a/.release-please-manifest.json +++ b/.release-please-manifest.json @@ -1,3 +1,3 @@ { - ".": "0.1.0-alpha.23" + ".": "0.1.0-alpha.24" } \ No newline at end of file diff --git a/CHANGELOG.md b/CHANGELOG.md index 131d328..e4f0a42 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,38 @@ # Changelog +## 0.1.0-alpha.24 (2025-07-28) + +Full Changelog: [v0.1.0-alpha.23...v0.1.0-alpha.24](https://github.com/cleanlab/codex-python/compare/v0.1.0-alpha.23...v0.1.0-alpha.24) + +### Features + +* **api:** api update ([ce6d89f](https://github.com/cleanlab/codex-python/commit/ce6d89f3c885765b21c6ba43b1b7b9a1ebf8a61e)) +* **api:** api update ([1a06cfc](https://github.com/cleanlab/codex-python/commit/1a06cfc7c19943ac468b2ec9f2787215363cf77e)) +* **api:** api update ([2ee8095](https://github.com/cleanlab/codex-python/commit/2ee809593ddb15c4de776a2048883287ec5c0cdb)) +* **api:** api update ([6992031](https://github.com/cleanlab/codex-python/commit/6992031e6aa610031f24d818040050b0fc185c34)) +* **api:** api update ([7e7caf9](https://github.com/cleanlab/codex-python/commit/7e7caf9a3ad214c5df3686122e4f26b850dcb8b0)) +* **api:** api update ([0a33c47](https://github.com/cleanlab/codex-python/commit/0a33c4710d4890d17ddd973ba4a2ed183e45e4c7)) +* **api:** api update ([575d190](https://github.com/cleanlab/codex-python/commit/575d1901319984fea901ce216323a5259e17f98c)) +* **api:** api update ([f55f4b7](https://github.com/cleanlab/codex-python/commit/f55f4b768f8c1d00bdf61e56b0a7227c8424c5b6)) +* **api:** api update ([b956ce0](https://github.com/cleanlab/codex-python/commit/b956ce083ef3c507a7649577724f337a562c427a)) +* **api:** remove deprecated endpoint increment_queries ([6b52a98](https://github.com/cleanlab/codex-python/commit/6b52a985af9df1b6618d0685fafee2bae7e98566)) + + +### Bug Fixes + +* **client:** don't send Content-Type header on GET requests ([4732aae](https://github.com/cleanlab/codex-python/commit/4732aaeb03872abffb4e13df6dd1994711bd4268)) +* **parsing:** correctly handle nested discriminated unions ([b374589](https://github.com/cleanlab/codex-python/commit/b374589baf01ca1236cf0823305e6bca037cf12b)) +* **parsing:** ignore empty metadata ([1cdf391](https://github.com/cleanlab/codex-python/commit/1cdf391742b196d5a723307e8c202a69e00b371d)) +* **parsing:** parse extra field types ([3c74ca0](https://github.com/cleanlab/codex-python/commit/3c74ca0f1a913bed65cc4c6580dda25a07a90b74)) + + +### Chores + +* **internal:** bump pinned h11 dep ([7ce51e9](https://github.com/cleanlab/codex-python/commit/7ce51e93023f66f3e343e379fc1930ddba335e9b)) +* **package:** mark python 3.13 as supported ([5cba949](https://github.com/cleanlab/codex-python/commit/5cba94956fff8ca4de99426a20e5c67f0ce6a2ac)) +* **project:** add settings file for vscode ([00df8ec](https://github.com/cleanlab/codex-python/commit/00df8ec35d44e5bdc6e68661a92d9d21905222c7)) +* **readme:** fix version rendering on pypi ([d05336d](https://github.com/cleanlab/codex-python/commit/d05336d89f5a49b09d7b1f85e7cb3ed74035157a)) + ## 0.1.0-alpha.23 (2025-07-07) Full Changelog: [v0.1.0-alpha.22...v0.1.0-alpha.23](https://github.com/cleanlab/codex-python/compare/v0.1.0-alpha.22...v0.1.0-alpha.23) diff --git a/pyproject.toml b/pyproject.toml index 964b48a..a061100 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "codex-sdk" -version = "0.1.0-alpha.23" +version = "0.1.0-alpha.24" description = "Internal SDK used within cleanlab-codex package. Refer to https://pypi.org/project/cleanlab-codex/ instead." dynamic = ["readme"] license = "MIT" diff --git a/src/codex/_version.py b/src/codex/_version.py index 18f2dcb..e020cb9 100644 --- a/src/codex/_version.py +++ b/src/codex/_version.py @@ -1,4 +1,4 @@ # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. __title__ = "codex" -__version__ = "0.1.0-alpha.23" # x-release-please-version +__version__ = "0.1.0-alpha.24" # x-release-please-version