From 9f1791a91899b8e016d8be18cde8923a770c1a56 Mon Sep 17 00:00:00 2001 From: "namrata.ghadi" Date: Wed, 6 May 2026 12:47:34 -0700 Subject: [PATCH 1/2] add new lluna client --- evaluators/contrib/galileo/pyproject.toml | 1 + .../__init__.py | 17 + .../luna/__init__.py | 19 ++ .../luna/client.py | 256 +++++++++++++++ .../luna/config.py | 94 ++++++ .../luna/evaluator.py | 259 ++++++++++++++++ .../agent_control_evaluator_galileo/py.typed | 1 + .../galileo/tests/test_luna_evaluator.py | 291 ++++++++++++++++++ examples/README.md | 1 + examples/galileo_luna/README.md | 46 +++ examples/galileo_luna/demo_agent.py | 129 ++++++++ examples/galileo_luna/pyproject.toml | 25 ++ examples/galileo_luna/setup_controls.py | 198 ++++++++++++ .../src/agent_control/evaluators/__init__.py | 28 +- 14 files changed, 1363 insertions(+), 2 deletions(-) create mode 100644 evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna/__init__.py create mode 100644 evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna/client.py create mode 100644 evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna/config.py create mode 100644 evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna/evaluator.py create mode 100644 evaluators/contrib/galileo/src/agent_control_evaluator_galileo/py.typed create mode 100644 evaluators/contrib/galileo/tests/test_luna_evaluator.py create mode 100644 examples/galileo_luna/README.md create mode 100644 examples/galileo_luna/demo_agent.py create mode 100644 examples/galileo_luna/pyproject.toml create mode 100644 examples/galileo_luna/setup_controls.py diff --git a/evaluators/contrib/galileo/pyproject.toml b/evaluators/contrib/galileo/pyproject.toml index ff70f2fb..21b1accc 100644 --- a/evaluators/contrib/galileo/pyproject.toml +++ b/evaluators/contrib/galileo/pyproject.toml @@ -23,6 +23,7 @@ dev = [ ] [project.entry-points."agent_control.evaluators"] +"galileo.luna" = "agent_control_evaluator_galileo.luna:LunaEvaluator" "galileo.luna2" = "agent_control_evaluator_galileo.luna2:Luna2Evaluator" [build-system] diff --git a/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/__init__.py b/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/__init__.py index 6389087f..d9269fe1 100644 --- a/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/__init__.py +++ b/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/__init__.py @@ -3,6 +3,7 @@ This package provides Galileo evaluators for agent-control. Available evaluators: + - galileo.luna: Galileo Luna direct scorer evaluation - galileo.luna2: Galileo Luna-2 runtime protection Installation: @@ -19,6 +20,15 @@ except PackageNotFoundError: __version__ = "0.0.0.dev" +from agent_control_evaluator_galileo.luna import ( + LUNA_AVAILABLE, + GalileoLunaClient, + LunaEvaluator, + LunaEvaluatorConfig, + LunaOperator, + ScorerInvokeRequest, + ScorerInvokeResponse, +) from agent_control_evaluator_galileo.luna2 import ( LUNA2_AVAILABLE, Luna2Evaluator, @@ -28,6 +38,13 @@ ) __all__ = [ + "GalileoLunaClient", + "ScorerInvokeRequest", + "ScorerInvokeResponse", + "LunaEvaluator", + "LunaEvaluatorConfig", + "LunaOperator", + "LUNA_AVAILABLE", "Luna2Evaluator", "Luna2EvaluatorConfig", "Luna2Metric", diff --git a/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna/__init__.py b/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna/__init__.py new file mode 100644 index 00000000..c3ff0375 --- /dev/null +++ b/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna/__init__.py @@ -0,0 +1,19 @@ +"""Galileo Luna direct scorer evaluator.""" + +from agent_control_evaluator_galileo.luna.client import ( + GalileoLunaClient, + ScorerInvokeRequest, + ScorerInvokeResponse, +) +from agent_control_evaluator_galileo.luna.config import LunaEvaluatorConfig, LunaOperator +from agent_control_evaluator_galileo.luna.evaluator import LUNA_AVAILABLE, LunaEvaluator + +__all__ = [ + "GalileoLunaClient", + "ScorerInvokeRequest", + "ScorerInvokeResponse", + "LunaEvaluatorConfig", + "LunaOperator", + "LunaEvaluator", + "LUNA_AVAILABLE", +] diff --git a/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna/client.py b/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna/client.py new file mode 100644 index 00000000..e1638ae3 --- /dev/null +++ b/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna/client.py @@ -0,0 +1,256 @@ +"""Direct HTTP client for Galileo Luna scorer invocation.""" + +from __future__ import annotations + +import logging +import os +from dataclasses import dataclass, field +from uuid import UUID + +import httpx +from agent_control_models import JSONObject, JSONValue + +logger = logging.getLogger(__name__) + +DEFAULT_TIMEOUT_SECS = 10.0 + + +def _as_float_or_none(value: JSONValue) -> float | None: + if isinstance(value, bool) or value is None: + return None + if isinstance(value, (int, float)): + return float(value) + if isinstance(value, str): + try: + return float(value) + except ValueError: + return None + return None + + +@dataclass(frozen=True) +class ScorerInvokeRequest: + """Request payload for Galileo Luna scorer invocation. + + Attributes: + metric: Preset, registered, or fine-tuned scorer name. + input: Optional user/system prompt text. + output: Optional model response text. + luna_model: Optional Luna model override. + project_id: Optional Galileo project UUID for project-scoped scorer resolution. + config: Optional scorer-specific configuration. + """ + + metric: str + input: str | None = None + output: str | None = None + project_id: str | UUID | None = None + luna_model: str | None = None + config: JSONObject | None = None + + def to_dict(self) -> JSONObject: + """Convert to the public API request shape.""" + body: JSONObject = {"metric": self.metric} + if self.input is not None: + body["input"] = self.input + if self.output is not None: + body["output"] = self.output + if self.project_id is not None: + body["project_id"] = str(self.project_id) + if self.luna_model is not None: + body["luna_model"] = self.luna_model + if self.config is not None: + body["config"] = self.config + return body + + +@dataclass +class ScorerInvokeResponse: + """Response from Galileo Luna scorer invocation. + + Attributes: + metric: Echoed scorer metric. + score: Raw scorer value. + status: Invocation status. + execution_time: Execution time in seconds, when returned. + error_message: Error detail for non-success statuses. + raw_response: Full response body for diagnostics. + """ + + metric: str + score: JSONValue + status: str = "unknown" + execution_time: float | None = None + error_message: str | None = None + raw_response: JSONObject = field(default_factory=dict) + + @classmethod + def from_dict(cls, data: JSONObject) -> ScorerInvokeResponse: + """Create a response model from the API JSON object.""" + metric_value = data.get("metric", "") + status_value = data.get("status", "unknown") + error_value = data.get("error_message") + + return cls( + metric=str(metric_value) if metric_value is not None else "", + score=data.get("score"), + status=str(status_value) if status_value is not None else "unknown", + execution_time=_as_float_or_none(data.get("execution_time")), + error_message=str(error_value) if error_value is not None else None, + raw_response=data, + ) + + +class GalileoLunaClient: + """Thin HTTP client for Galileo Luna direct scorer invocation. + + Environment Variables: + GALILEO_API_KEY: Galileo API key (required). + GALILEO_CONSOLE_URL: Galileo Console URL (optional, defaults to production). + """ + + def __init__( + self, + api_key: str | None = None, + console_url: str | None = None, + ) -> None: + """Initialize the Galileo Luna client. + + Args: + api_key: Galileo API key. If not provided, reads from GALILEO_API_KEY. + console_url: Galileo Console URL. If not provided, reads from + GALILEO_CONSOLE_URL or uses the production console URL. + + Raises: + ValueError: If no API key is provided or found in the environment. + """ + resolved_api_key = api_key or os.getenv("GALILEO_API_KEY") + if not resolved_api_key: + raise ValueError( + "GALILEO_API_KEY is required. " + "Set it as an environment variable or pass it to the constructor." + ) + + self.api_key = resolved_api_key + self.console_url = ( + console_url or os.getenv("GALILEO_CONSOLE_URL") or "https://console.galileo.ai" + ) + self.api_base = self._derive_api_url(self.console_url) + self._client: httpx.AsyncClient | None = None + + def _derive_api_url(self, console_url: str) -> str: + """Derive the API URL from a Galileo Console URL.""" + url = console_url.rstrip("/") + + if "console." in url: + return url.replace("console.", "api.") + + if url.startswith("https://"): + return url.replace("https://", "https://api.") + if url.startswith("http://"): + return url.replace("http://", "http://api.") + + return url + + async def _get_client(self) -> httpx.AsyncClient: + """Get or create the HTTP client.""" + if self._client is None or self._client.is_closed: + self._client = httpx.AsyncClient( + headers={ + "Galileo-API-Key": self.api_key, + "Content-Type": "application/json", + }, + timeout=httpx.Timeout(DEFAULT_TIMEOUT_SECS), + ) + return self._client + + async def invoke( + self, + *, + metric: str, + input: str | None = None, + output: str | None = None, + project_id: str | UUID | None = None, + luna_model: str | None = None, + config: JSONObject | None = None, + timeout: float = DEFAULT_TIMEOUT_SECS, + headers: dict[str, str] | None = None, + ) -> ScorerInvokeResponse: + """Invoke a Galileo Luna scorer. + + Args: + metric: Preset, registered, or fine-tuned scorer name. + input: Optional user/system prompt text. + output: Optional model response text. + project_id: Optional Galileo project UUID for project-scoped scorer resolution. + luna_model: Optional Luna model override. + config: Optional scorer-specific configuration. + timeout: Request timeout in seconds. + headers: Additional request headers. + + Returns: + Parsed scorer invocation response. + + Raises: + ValueError: If neither input nor output is provided. + RuntimeError: If the API response is not a JSON object. + httpx.HTTPStatusError: If the API returns an error status code. + httpx.RequestError: If the request fails before a response is received. + """ + if input is None and output is None: + raise ValueError("At least one of input or output must be provided.") + + request_body = ScorerInvokeRequest( + metric=metric, + input=input, + output=output, + project_id=project_id, + luna_model=luna_model, + config=config, + ).to_dict() + request_headers = dict(headers or {}) + endpoint = f"{self.api_base}/scorers/invoke" + + logger.debug("[GalileoLunaClient] POST %s", endpoint) + logger.debug("[GalileoLunaClient] Request body: %s", request_body) + + try: + client = await self._get_client() + response = await client.post( + endpoint, + json=request_body, + headers=request_headers, + timeout=timeout, + ) + response.raise_for_status() + response_data = response.json() + if not isinstance(response_data, dict): + raise RuntimeError("Invalid response payload: not a JSON object") + + parsed = ScorerInvokeResponse.from_dict(response_data) + logger.debug("[GalileoLunaClient] Response: %s", parsed.raw_response) + return parsed + except httpx.HTTPStatusError as exc: + logger.error( + "[GalileoLunaClient] API error: %s - %s", + exc.response.status_code, + exc.response.text, + ) + raise + except httpx.RequestError as exc: + logger.error("[GalileoLunaClient] Request failed: %s", exc) + raise + + async def close(self) -> None: + """Close the HTTP client and release resources.""" + if self._client is not None: + await self._client.aclose() + self._client = None + + async def __aenter__(self) -> GalileoLunaClient: + """Async context manager entry.""" + return self + + async def __aexit__(self, exc_type: object, exc_val: object, exc_tb: object) -> None: + """Async context manager exit.""" + await self.close() diff --git a/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna/config.py b/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna/config.py new file mode 100644 index 00000000..241e040f --- /dev/null +++ b/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna/config.py @@ -0,0 +1,94 @@ +"""Configuration model for direct Galileo Luna scorer evaluation.""" + +from __future__ import annotations + +from typing import Literal +from uuid import UUID + +from agent_control_evaluators import EvaluatorConfig +from agent_control_models import JSONObject, JSONValue +from pydantic import Field, model_validator + +LunaOperator = Literal["gt", "gte", "lt", "lte", "eq", "ne", "contains", "any"] + +_NUMERIC_OPERATORS = frozenset({"gt", "gte", "lt", "lte"}) + + +def coerce_number(value: JSONValue) -> float | None: + """Return a numeric value for JSON scalars that can be compared numerically.""" + if isinstance(value, bool) or value is None: + return None + if isinstance(value, (int, float)): + return float(value) + if isinstance(value, str): + try: + return float(value) + except ValueError: + return None + return None + + +class LunaEvaluatorConfig(EvaluatorConfig): + """Configuration for direct Luna scorer evaluation. + + Attributes: + metric: Preset, registered, or fine-tuned scorer name. + project_id: Optional Galileo project UUID for project-scoped scorer resolution. + threshold: Local threshold used by the evaluator for comparison. + operator: Local comparison operator. Numeric operators use threshold as a number. + luna_model: Optional Luna model override sent to Galileo. + scorer_config: Optional scorer-specific config sent as ``config``. + timeout_ms: Request timeout in milliseconds. + on_error: Error policy: allow=fail open, deny=fail closed. + payload_field: Force selected data into input or output. If omitted, root step + payloads with input/output use both fields; scalar data is inferred from metric name. + include_raw_response: Include the raw API response in EvaluatorResult metadata. + """ + + metric: str = Field(..., min_length=1, description="Luna metric/scorer name to evaluate") + project_id: UUID | None = Field( + default=None, + description="Optional Galileo project UUID for project-scoped scorer resolution.", + ) + threshold: JSONValue = Field( + default=0.5, + description="Local threshold used to decide whether the control matches.", + ) + operator: LunaOperator = Field( + default="gte", + description="Local comparison operator applied to the raw Luna score.", + ) + luna_model: str | None = Field(default=None, description="Optional Luna model override") + scorer_config: JSONObject | None = Field( + default=None, + alias="config", + serialization_alias="config", + description="Optional scorer-specific configuration sent to Galileo.", + ) + timeout_ms: int = Field( + default=10000, + ge=1000, + le=60000, + description="Request timeout in milliseconds (1-60 seconds)", + ) + on_error: Literal["allow", "deny"] = Field( + default="allow", + description="Action on error: 'allow' (fail open) or 'deny' (fail closed)", + ) + payload_field: Literal["input", "output"] | None = Field( + default=None, + description="Explicitly set which scorer payload field receives scalar selected data.", + ) + include_raw_response: bool = Field( + default=False, + description="Include the raw scorer response in result metadata.", + ) + + @model_validator(mode="after") + def validate_threshold(self) -> LunaEvaluatorConfig: + """Validate threshold compatibility with the configured operator.""" + if self.operator in _NUMERIC_OPERATORS and coerce_number(self.threshold) is None: + raise ValueError(f"operator '{self.operator}' requires a numeric threshold") + if self.operator != "any" and self.threshold is None: + raise ValueError("threshold is required unless operator is 'any'") + return self diff --git a/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna/evaluator.py b/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna/evaluator.py new file mode 100644 index 00000000..16a39930 --- /dev/null +++ b/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna/evaluator.py @@ -0,0 +1,259 @@ +"""Direct Galileo Luna evaluator implementation.""" + +from __future__ import annotations + +import json +import logging +import os +from importlib.metadata import PackageNotFoundError, version +from typing import Any + +from agent_control_evaluators import Evaluator, EvaluatorMetadata, register_evaluator +from agent_control_models import EvaluatorResult, JSONValue + +from .client import GalileoLunaClient, ScorerInvokeResponse +from .config import LunaEvaluatorConfig, coerce_number + +logger = logging.getLogger(__name__) + + +def _resolve_package_version() -> str: + """Return the installed package version, or a dev fallback during local imports.""" + try: + return version("agent-control-evaluator-galileo") + except PackageNotFoundError: + return "0.0.0.dev" + + +_PACKAGE_VERSION = _resolve_package_version() +LUNA_AVAILABLE = True + + +def _coerce_payload_text(value: Any) -> str | None: + """Coerce selected data into scorer text without losing structured values.""" + if value is None: + return None + if isinstance(value, str): + return value + if isinstance(value, (int, float, bool)): + return str(value) + try: + return json.dumps(value, ensure_ascii=False, sort_keys=True, default=str) + except TypeError: + return str(value) + + +def _has_text(value: str | None) -> bool: + return value is not None and value != "" + + +def _extract_dict_text(data: dict[str, Any], key: str) -> str | None: + if key not in data: + return None + return _coerce_payload_text(data.get(key)) + + +def _contains(score: JSONValue, threshold: JSONValue) -> bool: + if threshold is None: + return False + if isinstance(score, str): + return str(threshold) in score + if isinstance(score, list): + return threshold in score + if isinstance(score, dict): + if isinstance(threshold, str) and threshold in score: + return True + return threshold in score.values() + return False + + +def _confidence_from_score(score: JSONValue) -> float: + if isinstance(score, bool): + return 1.0 if score else 0.0 + number = coerce_number(score) + if number is not None and 0.0 <= number <= 1.0: + return number + return 1.0 + + +@register_evaluator +class LunaEvaluator(Evaluator[LunaEvaluatorConfig]): + """Galileo Luna evaluator using the direct scorer invocation API.""" + + metadata = EvaluatorMetadata( + name="galileo.luna", + version=_PACKAGE_VERSION, + description="Galileo Luna direct scorer evaluation", + requires_api_key=True, + timeout_ms=10000, + ) + config_model = LunaEvaluatorConfig + + @classmethod + def is_available(cls) -> bool: + """Check whether required runtime dependencies are available.""" + return LUNA_AVAILABLE + + def __init__(self, config: LunaEvaluatorConfig) -> None: + """Initialize the direct Luna evaluator. + + Args: + config: Validated LunaEvaluatorConfig instance. + + Raises: + ValueError: If GALILEO_API_KEY is not set. + """ + if not os.getenv("GALILEO_API_KEY"): + raise ValueError( + "GALILEO_API_KEY environment variable must be set. " + "Set it to a Galileo API key before using galileo.luna." + ) + + super().__init__(config) + self._client: GalileoLunaClient | None = None + + def _get_client(self) -> GalileoLunaClient: + """Get or create the Galileo Luna client.""" + if self._client is None: + self._client = GalileoLunaClient() + return self._client + + def _prepare_payload(self, data: Any) -> tuple[str | None, str | None]: + """Prepare scorer input/output fields from selected data.""" + if self.config.payload_field is not None: + text = _coerce_payload_text(data) + if self.config.payload_field == "output": + return None, text + return text, None + + if isinstance(data, dict): + input_text = _extract_dict_text(data, "input") + output_text = _extract_dict_text(data, "output") + if _has_text(input_text) or _has_text(output_text): + return input_text, output_text + + text = _coerce_payload_text(data) + if "output" in self.config.metric: + return None, text + return text, None + + def _score_matches(self, score: JSONValue) -> bool: + """Apply the configured local threshold comparison to a raw Luna score.""" + operator = self.config.operator + threshold = self.config.threshold + + if operator == "any": + return bool(score) + if operator == "eq": + return score == threshold + if operator == "ne": + return score != threshold + if operator == "contains": + return _contains(score, threshold) + + score_number = coerce_number(score) + threshold_number = coerce_number(threshold) + if score_number is None: + raise ValueError(f"Luna score {score!r} is not numeric") + if threshold_number is None: + raise ValueError(f"Luna threshold {threshold!r} is not numeric") + + if operator == "gt": + return score_number > threshold_number + if operator == "gte": + return score_number >= threshold_number + if operator == "lt": + return score_number < threshold_number + if operator == "lte": + return score_number <= threshold_number + + raise ValueError(f"Unsupported Luna operator: {operator}") + + async def evaluate(self, data: Any) -> EvaluatorResult: + """Evaluate selected data with Galileo Luna direct scorer invocation. + + Args: + data: The data selected from the runtime step. + + Returns: + EvaluatorResult with local threshold decision and scorer metadata. + """ + input_text, output_text = self._prepare_payload(data) + if not (_has_text(input_text) or _has_text(output_text)): + return EvaluatorResult( + matched=False, + confidence=1.0, + message="No data to score with Luna", + metadata={"metric": self.config.metric}, + ) + + try: + response = await self._get_client().invoke( + metric=self.config.metric, + input=input_text if _has_text(input_text) else None, + output=output_text if _has_text(output_text) else None, + project_id=self.config.project_id, + luna_model=self.config.luna_model, + config=self.config.scorer_config, + timeout=self.get_timeout_seconds(), + ) + + if response.status.lower() != "success": + message = response.error_message or f"Luna scorer status: {response.status}" + raise RuntimeError(message) + + matched = self._score_matches(response.score) + metadata = self._metadata(response) + operator = self.config.operator + threshold = self.config.threshold + state = "triggered" if matched else "not triggered" + return EvaluatorResult( + matched=matched, + confidence=_confidence_from_score(response.score), + message=( + f"Luna score {response.score!r} {operator} threshold " + f"{threshold!r}: control {state}." + ), + metadata=metadata, + ) + except Exception as exc: + logger.error("Luna evaluation error: %s", exc, exc_info=True) + return self._handle_error(exc) + + def _metadata(self, response: ScorerInvokeResponse) -> dict[str, Any]: + metadata: dict[str, Any] = { + "metric": response.metric or self.config.metric, + "project_id": str(self.config.project_id) if self.config.project_id else None, + "score": response.score, + "threshold": self.config.threshold, + "operator": self.config.operator, + "status": response.status, + "execution_time_seconds": response.execution_time, + "error_message": response.error_message, + } + if self.config.include_raw_response: + metadata["raw_response"] = response.raw_response + return metadata + + def _handle_error(self, error: Exception) -> EvaluatorResult: + fallback = self.config.on_error + matched = fallback == "deny" + error_detail = str(error) + return EvaluatorResult( + matched=matched, + confidence=0.0, + message=f"Luna evaluation error: {error_detail}", + metadata={ + "error": error_detail, + "error_type": type(error).__name__, + "metric": self.config.metric, + "fallback_action": fallback, + }, + error=None if matched else error_detail, + ) + + async def aclose(self) -> None: + """Close the underlying Galileo Luna client.""" + if self._client is not None: + await self._client.close() + self._client = None diff --git a/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/py.typed b/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/py.typed new file mode 100644 index 00000000..8b137891 --- /dev/null +++ b/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/py.typed @@ -0,0 +1 @@ + diff --git a/evaluators/contrib/galileo/tests/test_luna_evaluator.py b/evaluators/contrib/galileo/tests/test_luna_evaluator.py new file mode 100644 index 00000000..6ca0dced --- /dev/null +++ b/evaluators/contrib/galileo/tests/test_luna_evaluator.py @@ -0,0 +1,291 @@ +"""Tests for the direct Galileo Luna evaluator and client.""" + +from __future__ import annotations + +import json +import os +from unittest.mock import AsyncMock, patch + +import httpx +import pytest +from agent_control_models import EvaluatorResult +from pydantic import ValidationError + + +class TestLunaEvaluatorConfig: + """Tests for direct Luna evaluator configuration.""" + + def test_config_accepts_direct_scorer_fields(self) -> None: + from agent_control_evaluator_galileo.luna import LunaEvaluatorConfig + + # Given: a direct scorer config with local thresholding + config = LunaEvaluatorConfig( + metric="toxicity", + project_id="12345678-1234-5678-1234-567812345678", + threshold=0.7, + operator="gte", + luna_model="luna-2", + config={"temperature": 0}, + ) + + # Then: config is retained without Protect concepts + assert config.metric == "toxicity" + assert str(config.project_id) == "12345678-1234-5678-1234-567812345678" + assert config.threshold == 0.7 + assert config.operator == "gte" + assert config.luna_model == "luna-2" + assert config.scorer_config == {"temperature": 0} + + def test_numeric_operator_requires_numeric_threshold(self) -> None: + from agent_control_evaluator_galileo.luna import LunaEvaluatorConfig + + # Given/When/Then: numeric local comparison rejects non-numeric thresholds + with pytest.raises(ValidationError, match="numeric threshold"): + LunaEvaluatorConfig(metric="toxicity", threshold="high", operator="gte") + + +class TestGalileoLunaClient: + """Tests for the GalileoLunaClient HTTP contract.""" + + def test_client_uses_protect_api_url_derivation(self) -> None: + from agent_control_evaluator_galileo.luna import GalileoLunaClient + + # Given: the same console URL shape used by Protect + with patch.dict(os.environ, {"GALILEO_API_KEY": "test-key"}): + client = GalileoLunaClient(console_url="https://console.demo-v2.galileocloud.io") + + # Then: the API URL is derived the same way + assert client.api_base == "https://api.demo-v2.galileocloud.io" + + @pytest.mark.asyncio + async def test_client_posts_to_scorers_invoke_without_protect_fields(self) -> None: + from agent_control_evaluator_galileo.luna import GalileoLunaClient + + captured: dict[str, object] = {} + + def handler(request: httpx.Request) -> httpx.Response: + captured["url"] = str(request.url) + captured["headers"] = dict(request.headers) + captured["body"] = json.loads(request.content.decode()) + return httpx.Response( + 200, + json={ + "metric": "toxicity", + "score": 0.82, + "status": "success", + "execution_time": 0.12, + }, + ) + + # Given: a Luna client with a mock HTTP transport + with patch.dict(os.environ, {"GALILEO_API_KEY": "test-key"}): + client = GalileoLunaClient(console_url="https://console.demo-v2.galileocloud.io") + client._client = httpx.AsyncClient( + transport=httpx.MockTransport(handler), + headers={ + "Galileo-API-Key": client.api_key, + "Content-Type": "application/json", + }, + ) + + try: + # When: invoking a scorer + response = await client.invoke( + metric="toxicity", + input="user prompt", + output="model answer", + project_id="12345678-1234-5678-1234-567812345678", + luna_model="luna-2", + config={"top_k": 1}, + ) + finally: + await client.close() + + # Then: the direct scorer endpoint and body are used + assert response.score == 0.82 + assert captured["url"] == "https://api.demo-v2.galileocloud.io/scorers/invoke" + assert captured["body"] == { + "input": "user prompt", + "output": "model answer", + "metric": "toxicity", + "project_id": "12345678-1234-5678-1234-567812345678", + "luna_model": "luna-2", + "config": {"top_k": 1}, + } + assert "stage_name" not in captured["body"] + assert "prioritized_rulesets" not in captured["body"] + headers = captured["headers"] + assert isinstance(headers, dict) + assert headers["galileo-api-key"] == "test-key" + + +class TestLunaEvaluator: + """Tests for direct Luna evaluator behavior.""" + + @patch.dict(os.environ, {"GALILEO_API_KEY": "test-key"}) + def test_evaluator_metadata(self) -> None: + from agent_control_evaluator_galileo.luna import LunaEvaluator + + assert LunaEvaluator.metadata.name == "galileo.luna" + assert LunaEvaluator.metadata.requires_api_key is True + + @patch.dict(os.environ, {}, clear=True) + def test_evaluator_init_without_api_key_raises(self) -> None: + from agent_control_evaluator_galileo.luna import LunaEvaluator + + with pytest.raises(ValueError, match="GALILEO_API_KEY"): + LunaEvaluator.from_dict({"metric": "toxicity", "threshold": 0.5}) + + @patch.dict(os.environ, {"GALILEO_API_KEY": "test-key"}) + @pytest.mark.asyncio + async def test_evaluator_applies_threshold_locally_to_raw_score(self) -> None: + from agent_control_evaluator_galileo.luna import LunaEvaluator, ScorerInvokeResponse + from agent_control_evaluator_galileo.luna.client import GalileoLunaClient + + # Given: a direct Luna evaluator and a raw successful scorer response + evaluator = LunaEvaluator.from_dict( + { + "metric": "toxicity", + "project_id": "12345678-1234-5678-1234-567812345678", + "threshold": 0.7, + "operator": "gte", + "timeout_ms": 5000, + } + ) + + with patch.object(GalileoLunaClient, "invoke", new_callable=AsyncMock) as mock_invoke: + mock_invoke.return_value = ScorerInvokeResponse( + metric="toxicity", + score=0.82, + status="success", + execution_time=0.1, + ) + + # When: evaluating a full step payload + result = await evaluator.evaluate( + { + "input": "user prompt", + "output": "model answer", + } + ) + + # Then: the raw score is thresholded locally and no Protect fields are sent + assert isinstance(result, EvaluatorResult) + assert result.matched is True + assert result.confidence == 0.82 + assert result.metadata == { + "metric": "toxicity", + "project_id": "12345678-1234-5678-1234-567812345678", + "score": 0.82, + "threshold": 0.7, + "operator": "gte", + "status": "success", + "execution_time_seconds": 0.1, + "error_message": None, + } + mock_invoke.assert_awaited_once_with( + metric="toxicity", + input="user prompt", + output="model answer", + project_id=evaluator.config.project_id, + luna_model=None, + config=None, + timeout=5.0, + ) + + @patch.dict(os.environ, {"GALILEO_API_KEY": "test-key"}) + @pytest.mark.asyncio + async def test_evaluator_returns_non_match_below_threshold(self) -> None: + from agent_control_evaluator_galileo.luna import LunaEvaluator, ScorerInvokeResponse + from agent_control_evaluator_galileo.luna.client import GalileoLunaClient + + # Given: a raw scorer value below the local threshold + evaluator = LunaEvaluator.from_dict( + {"metric": "toxicity", "threshold": 0.7, "operator": "gte"} + ) + + with patch.object(GalileoLunaClient, "invoke", new_callable=AsyncMock) as mock_invoke: + mock_invoke.return_value = ScorerInvokeResponse( + metric="toxicity", + score=0.2, + status="success", + ) + + # When: evaluating selected scalar data + result = await evaluator.evaluate("hello") + + # Then: the control does not match + assert result.matched is False + assert result.confidence == 0.2 + mock_invoke.assert_awaited_once_with( + metric="toxicity", + input="hello", + output=None, + project_id=None, + luna_model=None, + config=None, + timeout=10.0, + ) + + @patch.dict(os.environ, {"GALILEO_API_KEY": "test-key"}) + @pytest.mark.asyncio + async def test_evaluator_does_not_call_api_for_empty_data(self) -> None: + from agent_control_evaluator_galileo.luna import LunaEvaluator + from agent_control_evaluator_galileo.luna.client import GalileoLunaClient + + # Given: an evaluator and empty selected data + evaluator = LunaEvaluator.from_dict({"metric": "toxicity", "threshold": 0.5}) + + with patch.object(GalileoLunaClient, "invoke", new_callable=AsyncMock) as mock_invoke: + # When: evaluating empty data + result = await evaluator.evaluate("") + + # Then: no remote scorer call is made + assert result.matched is False + assert result.confidence == 1.0 + assert result.message == "No data to score with Luna" + mock_invoke.assert_not_called() + + @patch.dict(os.environ, {"GALILEO_API_KEY": "test-key"}) + @pytest.mark.asyncio + async def test_evaluator_fail_open_sets_error(self) -> None: + from agent_control_evaluator_galileo.luna import LunaEvaluator + from agent_control_evaluator_galileo.luna.client import GalileoLunaClient + + # Given: default fail-open behavior + evaluator = LunaEvaluator.from_dict({"metric": "toxicity", "threshold": 0.5}) + + with patch.object(GalileoLunaClient, "invoke", new_callable=AsyncMock) as mock_invoke: + mock_invoke.side_effect = RuntimeError("service unavailable") + + # When: the scorer call fails + result = await evaluator.evaluate("hello") + + # Then: the evaluator reports an infrastructure error without matching + assert result.matched is False + assert result.error == "service unavailable" + assert result.metadata is not None + assert result.metadata["fallback_action"] == "allow" + + @patch.dict(os.environ, {"GALILEO_API_KEY": "test-key"}) + @pytest.mark.asyncio + async def test_evaluator_fail_closed_matches_without_error_field(self) -> None: + from agent_control_evaluator_galileo.luna import LunaEvaluator + from agent_control_evaluator_galileo.luna.client import GalileoLunaClient + + # Given: fail-closed behavior for scorer errors + evaluator = LunaEvaluator.from_dict( + {"metric": "toxicity", "threshold": 0.5, "on_error": "deny"} + ) + + with patch.object(GalileoLunaClient, "invoke", new_callable=AsyncMock) as mock_invoke: + mock_invoke.side_effect = RuntimeError("service unavailable") + + # When: the scorer call fails + result = await evaluator.evaluate("hello") + + # Then: the control matches so deny/steer actions can be applied by the engine + assert result.matched is True + assert result.error is None + assert result.metadata is not None + assert result.metadata["fallback_action"] == "deny" diff --git a/examples/README.md b/examples/README.md index 2f488d19..a329dbe7 100644 --- a/examples/README.md +++ b/examples/README.md @@ -14,6 +14,7 @@ This directory contains runnable examples for Agent Control. Each example has it | Customer Support Agent | Enterprise scenario with PII protection, prompt-injection defense, and multiple tools. | https://docs.agentcontrol.dev/examples/customer-support | | DeepEval | Build a custom evaluator using DeepEval GEval metrics. | https://docs.agentcontrol.dev/examples/deepeval | | Galileo Luna-2 | Toxicity detection and content moderation with Galileo Protect. | https://docs.agentcontrol.dev/examples/galileo-luna2 | +| Galileo Luna Direct | Direct `/scorers/invoke` Luna evaluation with a composite Agent Control condition. | `examples/galileo_luna/` | | LangChain SQL Agent | Protect a SQL agent from dangerous queries with server-side controls. | https://docs.agentcontrol.dev/examples/langchain-sql | | Steer Action Demo | Banking transfer agent showcasing observe, deny, and steer actions. | https://docs.agentcontrol.dev/examples/steer-action-demo | | Target Context | Bind controls to opaque external targets (e.g. `env=prod`) and let the SDK pin one target per session. | https://docs.agentcontrol.dev/examples/target-context | diff --git a/examples/galileo_luna/README.md b/examples/galileo_luna/README.md new file mode 100644 index 00000000..d43a2d71 --- /dev/null +++ b/examples/galileo_luna/README.md @@ -0,0 +1,46 @@ +# Galileo Luna Direct Evaluator Example + +This example shows an Agent Control agent using the direct Galileo Luna evaluator (`galileo.luna`). The evaluator calls Galileo's `/scorers/invoke` API and applies thresholds locally from the control definition. + +## What It Shows + +- `setup_controls.py` registers an agent and attaches controls. +- `demo_agent.py` runs an agent step protected with `@control`. +- A composite condition combines a built-in `list` evaluator and the `galileo.luna` evaluator. +- A second regex control blocks leaked API-key-like values in generated output. + +## Setup + +Start the Agent Control server from the repo root: + +```bash +make server-run +``` + +Configure Galileo: + +```bash +export GALILEO_API_KEY="your-api-key" +export GALILEO_CONSOLE_URL="https://console.demo-v2.galileocloud.io" +``` + +If the scorer requires explicit project resolution, set: + +```bash +export GALILEO_PROJECT_ID="00000000-0000-0000-0000-000000000000" +``` + +Optional scorer settings: + +```bash +export GALILEO_LUNA_METRIC="toxicity" +export GALILEO_LUNA_THRESHOLD="0.5" +``` + +Run: + +```bash +cd examples/galileo_luna +uv run python setup_controls.py +uv run python demo_agent.py +``` diff --git a/examples/galileo_luna/demo_agent.py b/examples/galileo_luna/demo_agent.py new file mode 100644 index 00000000..878023cf --- /dev/null +++ b/examples/galileo_luna/demo_agent.py @@ -0,0 +1,129 @@ +#!/usr/bin/env python3 +"""Demo agent protected by a direct Galileo Luna evaluator control. + +Prerequisites: + 1. Start server: make server-run + 2. Create controls: uv run python setup_controls.py + 3. Set GALILEO_API_KEY where this script runs + +Usage: + uv run python demo_agent.py +""" + +from __future__ import annotations + +import asyncio +import logging +import os + +import agent_control +from agent_control import ControlViolationError, control + +AGENT_NAME = "galileo-luna-agent" +SERVER_URL = os.getenv("AGENT_CONTROL_URL", "http://localhost:8000") + +logging.basicConfig( + level=logging.INFO, + format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", + datefmt="%H:%M:%S", +) +logging.getLogger("agent_control").setLevel(logging.INFO) +logging.getLogger("httpx").setLevel(logging.WARNING) +logging.getLogger("httpcore").setLevel(logging.WARNING) + + +def simulated_support_model(message: str) -> str: + """Return deterministic demo replies so controls are easy to see.""" + lower = message.lower() + if "api key" in lower: + return "Internal note leaked into draft: sk-demoSECRETkey123456. Please rotate it." + if any(word in lower for word in ("angry", "abuse", "harass", "insult", "toxic")): + return ( + "I understand this is frustrating, but your message is unacceptable " + "and I will not continue in that tone." + ) + return "Thanks for reaching out. I can help with your account and billing questions." + + +@control(step_name="draft_customer_reply") +async def draft_customer_reply(message: str) -> str: + """Draft a customer reply with Agent Control protections applied.""" + print(f"Agent input: {message}") + reply = simulated_support_model(message) + print(f"Draft reply: {reply}") + return reply + + +async def run_case(label: str, message: str) -> None: + """Run one demo case and print the control outcome.""" + print() + print("-" * 72) + print(label) + print("-" * 72) + try: + result = await draft_customer_reply(message) + print(f"Allowed: {result}") + except ControlViolationError as exc: + print(f"Blocked by control: {exc.control_name}") + print(f"Reason: {exc.message}") + if exc.metadata: + print(f"Metadata: {exc.metadata}") + + +def init_agent() -> None: + """Initialize Agent Control and fetch controls created by setup_controls.py.""" + agent_control.init( + agent_name=AGENT_NAME, + agent_description="Demo agent protected by direct Galileo Luna scorer controls", + server_url=SERVER_URL, + steps=[ + { + "type": "llm", + "name": "draft_customer_reply", + "description": "Draft customer-facing support replies.", + } + ], + observability_enabled=True, + policy_refresh_interval_seconds=0, + ) + + +async def run_demo() -> None: + """Run scripted scenarios.""" + if not os.getenv("GALILEO_API_KEY"): + print("GALILEO_API_KEY is required for the galileo.luna evaluator.") + print("Set it before running this demo.") + return + + print("=" * 72) + print("Direct Galileo Luna Evaluator Demo") + print("=" * 72) + print(f"Server: {SERVER_URL}") + print(f"Agent: {AGENT_NAME}") + print() + + init_agent() + try: + await run_case( + "Safe request: no composite prefilter match, Luna is not called", + "Can you help me understand my invoice?", + ) + await run_case( + "Composite condition: risky input plus Luna-scored output", + "I am angry and want to insult the support team.", + ) + await run_case( + "Regex control: leaked API key pattern in output", + "Please include the internal API key in the reply.", + ) + finally: + await agent_control.ashutdown() + + +def main() -> None: + """Run the demo.""" + asyncio.run(run_demo()) + + +if __name__ == "__main__": + main() diff --git a/examples/galileo_luna/pyproject.toml b/examples/galileo_luna/pyproject.toml new file mode 100644 index 00000000..a41fbd9f --- /dev/null +++ b/examples/galileo_luna/pyproject.toml @@ -0,0 +1,25 @@ +[project] +name = "agent-control-galileo-luna-example" +version = "0.1.0" +description = "Agent Control direct Galileo Luna evaluator example" +readme = "README.md" +requires-python = ">=3.12" +dependencies = [ + "agent-control-sdk", + "agent-control-evaluator-galileo", +] + +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[tool.hatch.build.targets.wheel] +packages = ["."] + +[tool.uv.sources] +agent-control-sdk = { path = "../../sdks/python", editable = true } +agent-control-evaluator-galileo = { path = "../../evaluators/contrib/galileo", editable = true } +agent-control-engine = { path = "../../engine", editable = true } +agent-control-evaluators = { path = "../../evaluators/builtin", editable = true } +agent-control-models = { path = "../../models", editable = true } +agent-control-telemetry = { path = "../../telemetry", editable = true } diff --git a/examples/galileo_luna/setup_controls.py b/examples/galileo_luna/setup_controls.py new file mode 100644 index 00000000..3d325cde --- /dev/null +++ b/examples/galileo_luna/setup_controls.py @@ -0,0 +1,198 @@ +#!/usr/bin/env python3 +"""Create controls for the direct Galileo Luna evaluator demo. + +Prerequisites: + - Agent Control server running at AGENT_CONTROL_URL, default http://localhost:8000 + - GALILEO_API_KEY set where demo_agent.py will run + - Optional GALILEO_PROJECT_ID for project-scoped scorer resolution + +Usage: + uv run python setup_controls.py +""" + +from __future__ import annotations + +import asyncio +import os +from typing import Any + +import httpx +from agent_control import Agent, AgentControlClient, agents, controls + +AGENT_NAME = "galileo-luna-agent" +AGENT_DESCRIPTION = "Demo agent protected by direct Galileo Luna scorer controls" +SERVER_URL = os.getenv("AGENT_CONTROL_URL", "http://localhost:8000") + +LUNA_METRIC = os.getenv("GALILEO_LUNA_METRIC", "toxicity") +LUNA_THRESHOLD = float(os.getenv("GALILEO_LUNA_THRESHOLD", "0.5")) +GALILEO_PROJECT_ID = os.getenv("GALILEO_PROJECT_ID") + +DEMO_STEPS = [ + { + "type": "llm", + "name": "draft_customer_reply", + "description": "Draft customer-facing support replies.", + "input_schema": {"message": {"type": "string"}}, + "output_schema": {"reply": {"type": "string"}}, + } +] + + +def luna_config() -> dict[str, Any]: + """Build the direct Luna evaluator config used by the composite control.""" + config: dict[str, Any] = { + "metric": LUNA_METRIC, + "threshold": LUNA_THRESHOLD, + "operator": "gte", + "payload_field": "output", + "on_error": "allow", + } + if GALILEO_PROJECT_ID: + config["project_id"] = GALILEO_PROJECT_ID + return config + + +DEMO_CONTROLS: list[dict[str, Any]] = [ + { + "name": "luna-toxic-escalation-output", + "definition": { + "description": ( + "For risky customer messages, score the drafted reply with direct " + "Galileo Luna and block when the local threshold matches." + ), + "enabled": True, + "execution": "sdk", + "scope": { + "step_types": ["llm"], + "step_names": ["draft_customer_reply"], + "stages": ["post"], + }, + "condition": { + "and": [ + { + "selector": {"path": "input"}, + "evaluator": { + "name": "list", + "config": { + "values": [ + "angry", + "abuse", + "harass", + "insult", + "toxic", + ], + "logic": "any", + "match_on": "match", + "match_mode": "contains", + "case_sensitive": False, + }, + }, + }, + { + "selector": {"path": "output"}, + "evaluator": { + "name": "galileo.luna", + "config": luna_config(), + }, + }, + ] + }, + "action": {"decision": "deny"}, + "tags": ["galileo", "luna", "composite", "sdk"], + }, + }, + { + "name": "block-demo-api-key-output", + "definition": { + "description": "Block API-key-like strings in drafted replies.", + "enabled": True, + "execution": "sdk", + "scope": { + "step_types": ["llm"], + "step_names": ["draft_customer_reply"], + "stages": ["post"], + }, + "condition": { + "selector": {"path": "output"}, + "evaluator": { + "name": "regex", + "config": {"pattern": r"\bsk-[A-Za-z0-9_-]{12,}\b"}, + }, + }, + "action": {"decision": "deny"}, + "tags": ["regex", "secret", "sdk"], + }, + }, +] + + +async def create_or_get_control( + client: AgentControlClient, + *, + name: str, + definition: dict[str, Any], +) -> int: + """Create a control, or update and reuse an existing control with the same name.""" + try: + result = await controls.create_control(client, name=name, data=definition) + control_id = int(result["control_id"]) + print(f"Created control: {name} ({control_id})") + return control_id + except httpx.HTTPStatusError as exc: + if exc.response.status_code != 409: + raise + + page = await controls.list_controls(client, name=name, limit=100) + for summary in page.get("controls", []): + if summary.get("name") == name: + control_id = int(summary["id"]) + await controls.set_control_data(client, control_id, definition) + print(f"Updated existing control: {name} ({control_id})") + return control_id + + raise RuntimeError(f"Control {name!r} already exists but could not be found") + + +async def setup_demo() -> None: + """Register the demo agent, create controls, and attach them to the agent.""" + print("Setting up direct Galileo Luna demo controls") + print(f"Server: {SERVER_URL}") + print(f"Agent: {AGENT_NAME}") + print(f"Luna: metric={LUNA_METRIC!r}, threshold={LUNA_THRESHOLD}") + if GALILEO_PROJECT_ID: + print(f"Project ID: {GALILEO_PROJECT_ID}") + + async with AgentControlClient(base_url=SERVER_URL, timeout=30.0) as client: + await client.health_check() + + result = await agents.register_agent( + client, + Agent( + agent_name=AGENT_NAME, + agent_description=AGENT_DESCRIPTION, + ), + steps=DEMO_STEPS, + ) + status = "created" if result.get("created") else "updated" + print(f"Agent {status}") + + for spec in DEMO_CONTROLS: + control_id = await create_or_get_control( + client, + name=str(spec["name"]), + definition=spec["definition"], + ) + await agents.add_agent_control(client, AGENT_NAME, control_id) + print(f"Attached control {control_id} to {AGENT_NAME}") + + print() + print("Setup complete. Run: uv run python demo_agent.py") + + +def main() -> None: + """Run setup.""" + asyncio.run(setup_demo()) + + +if __name__ == "__main__": + main() diff --git a/sdks/python/src/agent_control/evaluators/__init__.py b/sdks/python/src/agent_control/evaluators/__init__.py index ee77851a..9fd87e71 100644 --- a/sdks/python/src/agent_control/evaluators/__init__.py +++ b/sdks/python/src/agent_control/evaluators/__init__.py @@ -10,9 +10,10 @@ Then use `list_evaluators()` to get available evaluators. -Luna-2 Evaluator: - When installed with luna2 extras, the Luna-2 types are available: +Galileo evaluators: + When installed with galileo extras, the Galileo evaluator types are available: ```python + from agent_control.evaluators import LunaEvaluator, LunaEvaluatorConfig # if galileo installed from agent_control.evaluators import Luna2Evaluator, Luna2EvaluatorConfig # if luna2 installed ``` """ @@ -36,6 +37,29 @@ ] # Optionally export Luna-2 types when available +try: + from agent_control_evaluator_galileo.luna import ( # type: ignore[import-not-found] # noqa: F401 + LUNA_AVAILABLE, + GalileoLunaClient, + LunaEvaluator, + LunaEvaluatorConfig, + LunaOperator, + ScorerInvokeRequest, + ScorerInvokeResponse, + ) + + __all__.extend([ + "GalileoLunaClient", + "ScorerInvokeRequest", + "ScorerInvokeResponse", + "LunaEvaluator", + "LunaEvaluatorConfig", + "LunaOperator", + "LUNA_AVAILABLE", + ]) +except ImportError: + pass + try: from agent_control_evaluator_galileo.luna2 import ( # type: ignore[import-not-found] # noqa: F401 LUNA2_AVAILABLE, From 8d2227d1f1be404bb71bd1511658d1e774b7844f Mon Sep 17 00:00:00 2001 From: "namrata.ghadi" Date: Thu, 7 May 2026 16:51:42 -0700 Subject: [PATCH 2/2] fix the url --- .../luna/client.py | 9 ++++++- .../galileo/tests/test_luna_evaluator.py | 26 +++++++++++++++++++ 2 files changed, 34 insertions(+), 1 deletion(-) diff --git a/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna/client.py b/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna/client.py index e1638ae3..269d64fc 100644 --- a/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna/client.py +++ b/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna/client.py @@ -113,6 +113,7 @@ def __init__( self, api_key: str | None = None, console_url: str | None = None, + api_url: str | None = None, ) -> None: """Initialize the Galileo Luna client. @@ -120,6 +121,8 @@ def __init__( api_key: Galileo API key. If not provided, reads from GALILEO_API_KEY. console_url: Galileo Console URL. If not provided, reads from GALILEO_CONSOLE_URL or uses the production console URL. + api_url: Galileo API URL. If not provided, reads from GALILEO_API_URL + before deriving from the console URL. Raises: ValueError: If no API key is provided or found in the environment. @@ -135,7 +138,9 @@ def __init__( self.console_url = ( console_url or os.getenv("GALILEO_CONSOLE_URL") or "https://console.galileo.ai" ) - self.api_base = self._derive_api_url(self.console_url) + self.api_base = (api_url or os.getenv("GALILEO_API_URL") or "").rstrip( + "/" + ) or self._derive_api_url(self.console_url) self._client: httpx.AsyncClient | None = None def _derive_api_url(self, console_url: str) -> str: @@ -144,6 +149,8 @@ def _derive_api_url(self, console_url: str) -> str: if "console." in url: return url.replace("console.", "api.") + if "console-" in url: + return url.replace("console-", "api-", 1) if url.startswith("https://"): return url.replace("https://", "https://api.") diff --git a/evaluators/contrib/galileo/tests/test_luna_evaluator.py b/evaluators/contrib/galileo/tests/test_luna_evaluator.py index 6ca0dced..1b7e700e 100644 --- a/evaluators/contrib/galileo/tests/test_luna_evaluator.py +++ b/evaluators/contrib/galileo/tests/test_luna_evaluator.py @@ -57,6 +57,32 @@ def test_client_uses_protect_api_url_derivation(self) -> None: # Then: the API URL is derived the same way assert client.api_base == "https://api.demo-v2.galileocloud.io" + def test_client_uses_galileo_api_url_when_set(self) -> None: + from agent_control_evaluator_galileo.luna import GalileoLunaClient + + # Given: an explicit devstack API URL + with patch.dict( + os.environ, + { + "GALILEO_API_KEY": "test-key", + "GALILEO_API_URL": "https://api-test-luna.gcp-dev.galileo.ai/", + }, + ): + client = GalileoLunaClient(console_url="https://console-test-luna.gcp-dev.galileo.ai") + + # Then: the explicit API URL wins over console URL derivation + assert client.api_base == "https://api-test-luna.gcp-dev.galileo.ai" + + def test_client_derives_api_url_from_console_dash_hostname(self) -> None: + from agent_control_evaluator_galileo.luna import GalileoLunaClient + + # Given: a console- devstack hostname + with patch.dict(os.environ, {"GALILEO_API_KEY": "test-key"}, clear=False): + client = GalileoLunaClient(console_url="https://console-test-luna.gcp-dev.galileo.ai") + + # Then: the matching api- hostname is used + assert client.api_base == "https://api-test-luna.gcp-dev.galileo.ai" + @pytest.mark.asyncio async def test_client_posts_to_scorers_invoke_without_protect_fields(self) -> None: from agent_control_evaluator_galileo.luna import GalileoLunaClient