diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index fc62f784..c02247a3 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -29,3 +29,27 @@ jobs: - name: Run lints run: ./scripts/lint + + upload: + if: github.repository == 'stainless-sdks/codex-python' + timeout-minutes: 10 + name: upload + permissions: + contents: read + id-token: write + runs-on: depot-ubuntu-24.04 + steps: + - uses: actions/checkout@v4 + + - name: Get GitHub OIDC Token + id: github-oidc + uses: actions/github-script@v6 + with: + script: core.setOutput('github_token', await core.getIDToken()); + + - name: Upload tarball + env: + URL: https://pkg.stainless.com/s + AUTH: ${{ steps.github-oidc.outputs.github_token }} + SHA: ${{ github.sha }} + run: ./scripts/utils/upload-artifact.sh diff --git a/.release-please-manifest.json b/.release-please-manifest.json index b386befd..fac14074 100644 --- a/.release-please-manifest.json +++ b/.release-please-manifest.json @@ -1,3 +1,3 @@ { - ".": "0.1.0-alpha.19" + ".": "0.1.0-alpha.20" } \ No newline at end of file diff --git a/.stats.yml b/.stats.yml index f01e1b9f..12a0365a 100644 --- a/.stats.yml +++ b/.stats.yml @@ -1,3 +1,3 @@ configured_endpoints: 44 -openapi_spec_hash: 97719fe7ae4c641a5a020dd21f2978dd +openapi_spec_hash: 9d81a4b0eca6d3629ba9d5432a65655c config_hash: 659f65b6ccf5612986f920f7f9abbcb5 diff --git a/CHANGELOG.md b/CHANGELOG.md index 3032a1a6..f151d604 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,26 @@ # Changelog +## 0.1.0-alpha.20 (2025-05-15) + +Full Changelog: [v0.1.0-alpha.19...v0.1.0-alpha.20](https://github.com/cleanlab/codex-python/compare/v0.1.0-alpha.19...v0.1.0-alpha.20) + +### Features + +* **api:** api update ([2e74162](https://github.com/cleanlab/codex-python/commit/2e741628a380d0fefe117f80bb3796b111575df3)) +* **api:** api update ([9e85827](https://github.com/cleanlab/codex-python/commit/9e85827e0b1a58011a8ead15c695cb175744325a)) + + +### Bug Fixes + +* **package:** support direct resource imports ([09066c8](https://github.com/cleanlab/codex-python/commit/09066c8bf38b23fd3d902b42c4f4f769161b0e2e)) + + +### Chores + +* **ci:** upload sdks to package manager ([6594b48](https://github.com/cleanlab/codex-python/commit/6594b48736ea79e7f9457cb3b47abfa17618565b)) +* **internal:** avoid errors for isinstance checks on proxies ([a1d7faf](https://github.com/cleanlab/codex-python/commit/a1d7fafa46e9100a4d29c46b48919025b26a0cfa)) +* **internal:** version bump ([971e28d](https://github.com/cleanlab/codex-python/commit/971e28dd483b3f2d38094f368baebd5eb0906e2c)) + ## 0.1.0-alpha.19 (2025-05-07) Full Changelog: [v0.1.0-alpha.18...v0.1.0-alpha.19](https://github.com/cleanlab/codex-python/compare/v0.1.0-alpha.18...v0.1.0-alpha.19) diff --git a/pyproject.toml b/pyproject.toml index 13866c7e..04d039a2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "codex-sdk" -version = "0.1.0-alpha.19" +version = "0.1.0-alpha.20" description = "Internal SDK used within cleanlab-codex package. Refer to https://pypi.org/project/cleanlab-codex/ instead." dynamic = ["readme"] license = "MIT" diff --git a/scripts/utils/upload-artifact.sh b/scripts/utils/upload-artifact.sh new file mode 100755 index 00000000..ebb04789 --- /dev/null +++ b/scripts/utils/upload-artifact.sh @@ -0,0 +1,25 @@ +#!/usr/bin/env bash +set -exuo pipefail + +RESPONSE=$(curl -X POST "$URL" \ + -H "Authorization: Bearer $AUTH" \ + -H "Content-Type: application/json") + +SIGNED_URL=$(echo "$RESPONSE" | jq -r '.url') + +if [[ "$SIGNED_URL" == "null" ]]; then + echo -e "\033[31mFailed to get signed URL.\033[0m" + exit 1 +fi + +UPLOAD_RESPONSE=$(tar -cz . | curl -v -X PUT \ + -H "Content-Type: application/gzip" \ + --data-binary @- "$SIGNED_URL" 2>&1) + +if echo "$UPLOAD_RESPONSE" | grep -q "HTTP/[0-9.]* 200"; then + echo -e "\033[32mUploaded build to Stainless storage.\033[0m" + echo -e "\033[32mInstallation: npm install 'https://pkg.stainless.com/s/codex-python/$SHA'\033[0m" +else + echo -e "\033[31mFailed to upload artifact.\033[0m" + exit 1 +fi diff --git a/src/codex/__init__.py b/src/codex/__init__.py index d6dffe2c..5c5f678c 100644 --- a/src/codex/__init__.py +++ b/src/codex/__init__.py @@ -1,5 +1,7 @@ # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. +import typing as _t + from . import types from ._types import NOT_GIVEN, Omit, NoneType, NotGiven, Transport, ProxiesTypes from ._utils import file_from_path @@ -80,6 +82,9 @@ "DefaultAsyncHttpxClient", ] +if not _t.TYPE_CHECKING: + from ._utils._resources_proxy import resources as resources + _setup_logging() # Update the __module__ attribute for exported symbols so that diff --git a/src/codex/_utils/_proxy.py b/src/codex/_utils/_proxy.py index ffd883e9..0f239a33 100644 --- a/src/codex/_utils/_proxy.py +++ b/src/codex/_utils/_proxy.py @@ -46,7 +46,10 @@ def __dir__(self) -> Iterable[str]: @property # type: ignore @override def __class__(self) -> type: # pyright: ignore - proxied = self.__get_proxied__() + try: + proxied = self.__get_proxied__() + except Exception: + return type(self) if issubclass(type(proxied), LazyProxy): return type(proxied) return proxied.__class__ diff --git a/src/codex/_utils/_resources_proxy.py b/src/codex/_utils/_resources_proxy.py new file mode 100644 index 00000000..346e8681 --- /dev/null +++ b/src/codex/_utils/_resources_proxy.py @@ -0,0 +1,24 @@ +from __future__ import annotations + +from typing import Any +from typing_extensions import override + +from ._proxy import LazyProxy + + +class ResourcesProxy(LazyProxy[Any]): + """A proxy for the `codex.resources` module. + + This is used so that we can lazily import `codex.resources` only when + needed *and* so that users can just import `codex` and reference `codex.resources` + """ + + @override + def __load__(self) -> Any: + import importlib + + mod = importlib.import_module("codex.resources") + return mod + + +resources = ResourcesProxy().__as_proxied__() diff --git a/src/codex/_version.py b/src/codex/_version.py index 87d42e64..44d6131d 100644 --- a/src/codex/_version.py +++ b/src/codex/_version.py @@ -1,4 +1,4 @@ # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. __title__ = "codex" -__version__ = "0.1.0-alpha.19" # x-release-please-version +__version__ = "0.1.0-alpha.20" # x-release-please-version diff --git a/src/codex/resources/projects/clusters.py b/src/codex/resources/projects/clusters.py index fea7e28e..97124642 100644 --- a/src/codex/resources/projects/clusters.py +++ b/src/codex/resources/projects/clusters.py @@ -50,7 +50,9 @@ def list( self, project_id: str, *, - eval_issue_types: List[Literal["hallucination", "search_failure", "unhelpful", "difficult_query"]] + eval_issue_types: List[ + Literal["hallucination", "search_failure", "unhelpful", "difficult_query", "unsupported"] + ] | NotGiven = NOT_GIVEN, instruction_adherence_failure: Optional[Literal["html_format", "content_structure"]] | NotGiven = NOT_GIVEN, limit: int | NotGiven = NOT_GIVEN, @@ -177,7 +179,9 @@ def list( self, project_id: str, *, - eval_issue_types: List[Literal["hallucination", "search_failure", "unhelpful", "difficult_query"]] + eval_issue_types: List[ + Literal["hallucination", "search_failure", "unhelpful", "difficult_query", "unsupported"] + ] | NotGiven = NOT_GIVEN, instruction_adherence_failure: Optional[Literal["html_format", "content_structure"]] | NotGiven = NOT_GIVEN, limit: int | NotGiven = NOT_GIVEN, diff --git a/src/codex/resources/projects/projects.py b/src/codex/resources/projects/projects.py index b8fbaf7e..6195d1a4 100644 --- a/src/codex/resources/projects/projects.py +++ b/src/codex/resources/projects/projects.py @@ -426,8 +426,8 @@ def validate( query: str, response: str, use_llm_matching: bool | NotGiven = NOT_GIVEN, - bad_response_thresholds: project_validate_params.BadResponseThresholds | NotGiven = NOT_GIVEN, constrain_outputs: Optional[List[str]] | NotGiven = NOT_GIVEN, + custom_eval_thresholds: Optional[Dict[str, float]] | NotGiven = NOT_GIVEN, custom_metadata: Optional[object] | NotGiven = NOT_GIVEN, eval_scores: Optional[Dict[str, float]] | NotGiven = NOT_GIVEN, options: Optional[project_validate_params.Options] | NotGiven = NOT_GIVEN, @@ -451,10 +451,13 @@ def validate( query will be recorded in the project for SMEs to answer. Args: + custom_eval_thresholds: Optional custom thresholds for specific evals. Keys should match with the keys + in the `eval_scores` dictionary. + custom_metadata: Arbitrary metadata supplied by the user/system - eval_scores: Evaluation scores to use for flagging a response as bad. If not provided, TLM - will be used to generate scores. + eval_scores: Scores assessing different aspects of the RAG system. If not provided, TLM will + be used to generate scores. options: Typed dict of advanced configuration options for the Trustworthy Language Model. Many of these configurations are determined by the quality preset selected @@ -575,8 +578,8 @@ def validate( "prompt": prompt, "query": query, "response": response, - "bad_response_thresholds": bad_response_thresholds, "constrain_outputs": constrain_outputs, + "custom_eval_thresholds": custom_eval_thresholds, "custom_metadata": custom_metadata, "eval_scores": eval_scores, "options": options, @@ -967,8 +970,8 @@ async def validate( query: str, response: str, use_llm_matching: bool | NotGiven = NOT_GIVEN, - bad_response_thresholds: project_validate_params.BadResponseThresholds | NotGiven = NOT_GIVEN, constrain_outputs: Optional[List[str]] | NotGiven = NOT_GIVEN, + custom_eval_thresholds: Optional[Dict[str, float]] | NotGiven = NOT_GIVEN, custom_metadata: Optional[object] | NotGiven = NOT_GIVEN, eval_scores: Optional[Dict[str, float]] | NotGiven = NOT_GIVEN, options: Optional[project_validate_params.Options] | NotGiven = NOT_GIVEN, @@ -992,10 +995,13 @@ async def validate( query will be recorded in the project for SMEs to answer. Args: + custom_eval_thresholds: Optional custom thresholds for specific evals. Keys should match with the keys + in the `eval_scores` dictionary. + custom_metadata: Arbitrary metadata supplied by the user/system - eval_scores: Evaluation scores to use for flagging a response as bad. If not provided, TLM - will be used to generate scores. + eval_scores: Scores assessing different aspects of the RAG system. If not provided, TLM will + be used to generate scores. options: Typed dict of advanced configuration options for the Trustworthy Language Model. Many of these configurations are determined by the quality preset selected @@ -1116,8 +1122,8 @@ async def validate( "prompt": prompt, "query": query, "response": response, - "bad_response_thresholds": bad_response_thresholds, "constrain_outputs": constrain_outputs, + "custom_eval_thresholds": custom_eval_thresholds, "custom_metadata": custom_metadata, "eval_scores": eval_scores, "options": options, diff --git a/src/codex/types/project_validate_params.py b/src/codex/types/project_validate_params.py index f6214cbe..a855aa6f 100644 --- a/src/codex/types/project_validate_params.py +++ b/src/codex/types/project_validate_params.py @@ -7,7 +7,7 @@ from .._utils import PropertyInfo -__all__ = ["ProjectValidateParams", "BadResponseThresholds", "Options"] +__all__ = ["ProjectValidateParams", "Options"] class ProjectValidateParams(TypedDict, total=False): @@ -21,15 +21,19 @@ class ProjectValidateParams(TypedDict, total=False): use_llm_matching: bool - bad_response_thresholds: BadResponseThresholds - constrain_outputs: Optional[List[str]] + custom_eval_thresholds: Optional[Dict[str, float]] + """Optional custom thresholds for specific evals. + + Keys should match with the keys in the `eval_scores` dictionary. + """ + custom_metadata: Optional[object] """Arbitrary metadata supplied by the user/system""" eval_scores: Optional[Dict[str, float]] - """Evaluation scores to use for flagging a response as bad. + """Scores assessing different aspects of the RAG system. If not provided, TLM will be used to generate scores. """ @@ -139,16 +143,6 @@ class ProjectValidateParams(TypedDict, total=False): x_stainless_package_version: Annotated[str, PropertyInfo(alias="x-stainless-package-version")] -class BadResponseThresholds(TypedDict, total=False): - context_sufficiency: Optional[float] - - query_ease: Optional[float] - - response_helpfulness: Optional[float] - - trustworthiness: Optional[float] - - class Options(TypedDict, total=False): custom_eval_criteria: Iterable[object] diff --git a/src/codex/types/project_validate_response.py b/src/codex/types/project_validate_response.py index e2104360..a88874da 100644 --- a/src/codex/types/project_validate_response.py +++ b/src/codex/types/project_validate_response.py @@ -8,7 +8,7 @@ class EvalScores(BaseModel): - is_bad: bool + failed: bool score: Optional[float] = None @@ -18,7 +18,7 @@ class EvalScores(BaseModel): class ProjectValidateResponse(BaseModel): eval_scores: Dict[str, EvalScores] """ - Evaluation scores for the original response along with a boolean flag, `is_bad`, + Evaluation scores for the original response along with a boolean flag, `failed`, indicating whether the score is below the threshold. """ diff --git a/src/codex/types/projects/cluster_list_params.py b/src/codex/types/projects/cluster_list_params.py index cff2b85e..20284d84 100644 --- a/src/codex/types/projects/cluster_list_params.py +++ b/src/codex/types/projects/cluster_list_params.py @@ -9,7 +9,7 @@ class ClusterListParams(TypedDict, total=False): - eval_issue_types: List[Literal["hallucination", "search_failure", "unhelpful", "difficult_query"]] + eval_issue_types: List[Literal["hallucination", "search_failure", "unhelpful", "difficult_query", "unsupported"]] instruction_adherence_failure: Optional[Literal["html_format", "content_structure"]] diff --git a/src/codex/types/projects/entry_notify_sme_params.py b/src/codex/types/projects/entry_notify_sme_params.py index 409f8bc5..3d06d1a6 100644 --- a/src/codex/types/projects/entry_notify_sme_params.py +++ b/src/codex/types/projects/entry_notify_sme_params.py @@ -18,4 +18,13 @@ class EntryNotifySmeParams(TypedDict, total=False): class ViewContext(TypedDict, total=False): page: Required[int] - filter: Literal["unanswered", "answered", "all", "hallucination", "search_failure", "unhelpful", "difficult_query"] + filter: Literal[ + "unanswered", + "answered", + "all", + "hallucination", + "search_failure", + "unhelpful", + "difficult_query", + "unsupported", + ] diff --git a/tests/api_resources/test_projects.py b/tests/api_resources/test_projects.py index f7ca6e01..19e41a0a 100644 --- a/tests/api_resources/test_projects.py +++ b/tests/api_resources/test_projects.py @@ -444,13 +444,8 @@ def test_method_validate_with_all_params(self, client: Codex) -> None: query="query", response="response", use_llm_matching=True, - bad_response_thresholds={ - "context_sufficiency": 0, - "query_ease": 0, - "response_helpfulness": 0, - "trustworthiness": 0, - }, constrain_outputs=["string"], + custom_eval_thresholds={"foo": 0}, custom_metadata={}, eval_scores={"foo": 0}, options={ @@ -944,13 +939,8 @@ async def test_method_validate_with_all_params(self, async_client: AsyncCodex) - query="query", response="response", use_llm_matching=True, - bad_response_thresholds={ - "context_sufficiency": 0, - "query_ease": 0, - "response_helpfulness": 0, - "trustworthiness": 0, - }, constrain_outputs=["string"], + custom_eval_thresholds={"foo": 0}, custom_metadata={}, eval_scores={"foo": 0}, options={ diff --git a/tests/test_utils/test_proxy.py b/tests/test_utils/test_proxy.py index 1277e93e..0a34f4d9 100644 --- a/tests/test_utils/test_proxy.py +++ b/tests/test_utils/test_proxy.py @@ -21,3 +21,14 @@ def test_recursive_proxy() -> None: assert dir(proxy) == [] assert type(proxy).__name__ == "RecursiveLazyProxy" assert type(operator.attrgetter("name.foo.bar.baz")(proxy)).__name__ == "RecursiveLazyProxy" + + +def test_isinstance_does_not_error() -> None: + class AlwaysErrorProxy(LazyProxy[Any]): + @override + def __load__(self) -> Any: + raise RuntimeError("Mocking missing dependency") + + proxy = AlwaysErrorProxy() + assert not isinstance(proxy, dict) + assert isinstance(proxy, LazyProxy)