From a16d44d57844225694926c4f90584cc245dfd219 Mon Sep 17 00:00:00 2001 From: fcogidi <41602287+fcogidi@users.noreply.github.com> Date: Wed, 24 Jun 2026 14:08:55 -0400 Subject: [PATCH 1/6] refactor: rename PROXY_BASE_URL and PROXY_API_KEY to OPENAI_BASE_URL and OPENAI_API_KEY to match remote env vars from secret manager --- .../methods/agentic/agent_factory.py | 44 +++++------ .../forecasting/methods/llm_processes/base.py | 14 ++-- .../llm_processes/binary_probability.py | 4 +- .../llm_processes/categorical_probability.py | 4 +- .../methods/llm_processes/quantile_grid.py | 4 +- .../llm_processes/sampled_trajectory.py | 4 +- .../methods/agentic/test_agent_factory.py | 76 +++++++++---------- .../boc_rate_decisions/rationale_eval.py | 4 +- implementations/sp500_forecasting/README.md | 4 +- planning-docs/vector-llm-proxy.md | 10 +-- playground/proxy_tests/run_matrix.py | 42 +++++----- playground/proxy_tests/run_tests.py | 40 +++++----- .../proxy_tests/test_pdf_file_format.py | 14 ++-- .../proxy_tests/test_pdf_native_blocks.py | 4 +- playground/proxy_tests/test_pdf_raw_http.py | 6 +- playground/proxy_tests/test_pdf_routing.py | 4 +- playground/proxy_tests/test_pdf_upload.py | 24 +++--- scripts/cache_wti_curriculum_news.py | 16 ++-- 18 files changed, 159 insertions(+), 159 deletions(-) diff --git a/aieng-forecasting/aieng/forecasting/methods/agentic/agent_factory.py b/aieng-forecasting/aieng/forecasting/methods/agentic/agent_factory.py index e1b339e1..2fd6047b 100644 --- a/aieng-forecasting/aieng/forecasting/methods/agentic/agent_factory.py +++ b/aieng-forecasting/aieng/forecasting/methods/agentic/agent_factory.py @@ -210,8 +210,8 @@ def _build_automatic_function_calling_config( def _build_search_tool( config: ContextRetrievalConfig, *, - proxy_base_url: str, - proxy_api_key: str | None, + openai_base_url: str, + openai_api_key: str | None, ) -> Callable[..., Any]: """Return an async ``search_web`` FunctionTool backed by the proxy's googleSearch. @@ -243,8 +243,8 @@ async def search_web(query: str, cutoff_date: str | None = None) -> str: search_model = f"openai/{search_model}" resp = await litellm.acompletion( model=search_model, - api_base=proxy_base_url, - api_key=proxy_api_key, + api_base=openai_base_url, + api_key=openai_api_key, messages=[ {"role": "system", "content": config.instruction}, {"role": "user", "content": user_content}, @@ -277,16 +277,16 @@ class AgentConfig(BaseModel): model : str | BaseLlm, default=LITE_MODEL (``"gemini-3.1-flash-lite-preview"``) Model name (bare, no provider prefix) or a custom :class:`~google.adk.models.base_llm.BaseLlm` instance. When - ``proxy_base_url`` is set and ``model`` is a plain string, + ``openai_base_url`` is set and ``model`` is a plain string, :func:`build_adk_agent` wraps it in a :class:`~google.adk.models.lite_llm.LiteLlm` instance pointing to the proxy. Pass a ``BaseLlm`` directly to skip automatic wrapping. - proxy_base_url : str | None, default=PROXY_BASE_URL env var + openai_base_url : str | None, default=OPENAI_BASE_URL env var Base URL for the OpenAI-compatible LLM proxy. Defaults to the - ``PROXY_BASE_URL`` environment variable. When set, the agent (and + ``OPENAI_BASE_URL`` environment variable. When set, the agent (and the ``search_web`` tool) route all calls through the proxy. - proxy_api_key : str | None, default=PROXY_API_KEY env var - API key for the proxy. Defaults to the ``PROXY_API_KEY`` + openai_api_key : str | None, default=OPENAI_API_KEY env var + API key for the proxy. Defaults to the ``OPENAI_API_KEY`` environment variable. description : str, default="" Description of the agent. Useful when the agent is used as a sub-agent. @@ -342,15 +342,15 @@ class AgentConfig(BaseModel): name: str = "adk_forecasting_agent" model: str | BaseLlm = LITE_MODEL - proxy_base_url: str | None = Field( - default_factory=lambda: os.getenv("PROXY_BASE_URL"), + openai_base_url: str | None = Field( + default_factory=lambda: os.getenv("OPENAI_BASE_URL"), description=( - "Base URL for the OpenAI-compatible LLM proxy. Defaults to the PROXY_BASE_URL environment variable." + "Base URL for the OpenAI-compatible LLM proxy. Defaults to the OPENAI_BASE_URL environment variable." ), ) - proxy_api_key: str | None = Field( - default_factory=lambda: os.getenv("PROXY_API_KEY"), - description="API key for the proxy. Defaults to the PROXY_API_KEY environment variable.", + openai_api_key: str | None = Field( + default_factory=lambda: os.getenv("OPENAI_API_KEY"), + description="API key for the proxy. Defaults to the OPENAI_API_KEY environment variable.", ) description: str = "" instruction: str = "" @@ -404,7 +404,7 @@ def build_adk_agent( Code execution (E2B) and the web-search context-retrieval tool are wired only when the corresponding capability blocks in ``config`` are enabled. - When ``config.proxy_base_url`` is set and ``config.model`` is a plain + When ``config.openai_base_url`` is set and ``config.model`` is a plain string, the model is automatically wrapped in a :class:`~google.adk.models.lite_llm.LiteLlm` instance that routes all calls through the proxy. Pass a ``BaseLlm`` instance directly to bypass @@ -452,7 +452,7 @@ def build_adk_agent( """ # Resolve model: wrap bare string in LiteLlm when proxy is configured. model: str | BaseLlm = config.model - if isinstance(model, str) and config.proxy_base_url: + if isinstance(model, str) and config.openai_base_url: from google.adk.models.lite_llm import LiteLlm # noqa: PLC0415 # Prefix with "openai/" so LiteLLM uses the OpenAI-compatible path. @@ -461,8 +461,8 @@ def build_adk_agent( litellm_model = model if model.startswith("openai/") else f"openai/{model}" model = LiteLlm( model=litellm_model, - api_base=config.proxy_base_url, - api_key=config.proxy_api_key, + api_base=config.openai_base_url, + api_key=config.openai_api_key, ) # Configure tools @@ -478,12 +478,12 @@ def build_adk_agent( ) if config.context_retrieval.enabled: - proxy_base_url = config.proxy_base_url or os.getenv("PROXY_BASE_URL") or "" + openai_base_url = config.openai_base_url or os.getenv("OPENAI_BASE_URL") or "" tools.append( _build_search_tool( config.context_retrieval, - proxy_base_url=proxy_base_url, - proxy_api_key=config.proxy_api_key, + openai_base_url=openai_base_url, + openai_api_key=config.openai_api_key, ) ) diff --git a/aieng-forecasting/aieng/forecasting/methods/llm_processes/base.py b/aieng-forecasting/aieng/forecasting/methods/llm_processes/base.py index 548802de..8ebe0c5a 100644 --- a/aieng-forecasting/aieng/forecasting/methods/llm_processes/base.py +++ b/aieng-forecasting/aieng/forecasting/methods/llm_processes/base.py @@ -42,22 +42,22 @@ class LLMPredictorConfig(BaseModel): description=( "Model name as expected by the proxy (bare, no provider prefix), " "e.g. 'gemini-3.1-flash-lite-preview', 'gpt-4o-mini'. " - "When proxy_base_url is set, LiteLLM routes this to the proxy via " + "When openai_base_url is set, LiteLLM routes this to the proxy via " "custom_llm_provider='openai'." ), ) - proxy_base_url: str | None = Field( - default_factory=lambda: os.getenv("PROXY_BASE_URL"), + openai_base_url: str | None = Field( + default_factory=lambda: os.getenv("OPENAI_BASE_URL"), description=( "Base URL for an OpenAI-compatible LLM proxy. Defaults to the " - "``PROXY_BASE_URL`` environment variable. When set, all completions " + "``OPENAI_BASE_URL`` environment variable. When set, all completions " "are routed through the proxy using ``api_base`` + " "``custom_llm_provider='openai'``." ), ) - proxy_api_key: str | None = Field( - default_factory=lambda: os.getenv("PROXY_API_KEY"), - description=("API key for the proxy. Defaults to the ``PROXY_API_KEY`` environment variable."), + openai_api_key: str | None = Field( + default_factory=lambda: os.getenv("OPENAI_API_KEY"), + description=("API key for the proxy. Defaults to the ``OPENAI_API_KEY`` environment variable."), ) temperature: float = Field(default=1.0, ge=0.0, le=2.0, description="Sampling temperature.") max_tokens: int = Field( diff --git a/aieng-forecasting/aieng/forecasting/methods/llm_processes/binary_probability.py b/aieng-forecasting/aieng/forecasting/methods/llm_processes/binary_probability.py index b3feb006..c68f1648 100644 --- a/aieng-forecasting/aieng/forecasting/methods/llm_processes/binary_probability.py +++ b/aieng-forecasting/aieng/forecasting/methods/llm_processes/binary_probability.py @@ -219,8 +219,8 @@ def _sample_probability( max_tokens=cfg.max_tokens, timeout_s=cfg.timeout_s, reasoning_effort=cfg.reasoning_effort, - api_base=cfg.proxy_base_url, - api_key=cfg.proxy_api_key, + api_base=cfg.openai_base_url, + api_key=cfg.openai_api_key, ), ) if not parsed: diff --git a/aieng-forecasting/aieng/forecasting/methods/llm_processes/categorical_probability.py b/aieng-forecasting/aieng/forecasting/methods/llm_processes/categorical_probability.py index 6e7dfcd2..bfe5be11 100644 --- a/aieng-forecasting/aieng/forecasting/methods/llm_processes/categorical_probability.py +++ b/aieng-forecasting/aieng/forecasting/methods/llm_processes/categorical_probability.py @@ -303,8 +303,8 @@ def _sample_distribution( max_tokens=cfg.max_tokens, timeout_s=cfg.timeout_s, reasoning_effort=cfg.reasoning_effort, - api_base=cfg.proxy_base_url, - api_key=cfg.proxy_api_key, + api_base=cfg.openai_base_url, + api_key=cfg.openai_api_key, ), ) if not parsed: diff --git a/aieng-forecasting/aieng/forecasting/methods/llm_processes/quantile_grid.py b/aieng-forecasting/aieng/forecasting/methods/llm_processes/quantile_grid.py index ac0c7cd5..19a02ecd 100644 --- a/aieng-forecasting/aieng/forecasting/methods/llm_processes/quantile_grid.py +++ b/aieng-forecasting/aieng/forecasting/methods/llm_processes/quantile_grid.py @@ -239,8 +239,8 @@ def _sample_quantile_grid( max_tokens=cfg.max_tokens, timeout_s=cfg.timeout_s, reasoning_effort=cfg.reasoning_effort, - api_base=cfg.proxy_base_url, - api_key=cfg.proxy_api_key, + api_base=cfg.openai_base_url, + api_key=cfg.openai_api_key, ), ) if not parsed: diff --git a/aieng-forecasting/aieng/forecasting/methods/llm_processes/sampled_trajectory.py b/aieng-forecasting/aieng/forecasting/methods/llm_processes/sampled_trajectory.py index 280c257b..67650cdc 100644 --- a/aieng-forecasting/aieng/forecasting/methods/llm_processes/sampled_trajectory.py +++ b/aieng-forecasting/aieng/forecasting/methods/llm_processes/sampled_trajectory.py @@ -287,8 +287,8 @@ def _sample_trajectories( max_tokens=cfg.max_tokens, timeout_s=cfg.timeout_s, reasoning_effort=cfg.reasoning_effort, - api_base=cfg.proxy_base_url, - api_key=cfg.proxy_api_key, + api_base=cfg.openai_base_url, + api_key=cfg.openai_api_key, ), ) return result diff --git a/aieng-forecasting/tests/aieng/forecasting/methods/agentic/test_agent_factory.py b/aieng-forecasting/tests/aieng/forecasting/methods/agentic/test_agent_factory.py index 3046fee7..af9619bc 100644 --- a/aieng-forecasting/tests/aieng/forecasting/methods/agentic/test_agent_factory.py +++ b/aieng-forecasting/tests/aieng/forecasting/methods/agentic/test_agent_factory.py @@ -58,24 +58,24 @@ def test_skill_dirs_must_resolve_to_real_directories(self, tmp_path: Path) -> No AgentConfig(instruction="Forecast.", skills_dirs=[missing]) def test_proxy_fields_default_from_env(self, monkeypatch: pytest.MonkeyPatch) -> None: - """proxy_base_url and proxy_api_key pick up environment variables.""" - monkeypatch.setenv("PROXY_BASE_URL", "https://proxy.example.com/v1") - monkeypatch.setenv("PROXY_API_KEY", "test-key-123") + """openai_base_url and openai_api_key pick up environment variables.""" + monkeypatch.setenv("OPENAI_BASE_URL", "https://proxy.example.com/v1") + monkeypatch.setenv("OPENAI_API_KEY", "test-key-123") config = AgentConfig(instruction="Forecast.") - assert config.proxy_base_url == "https://proxy.example.com/v1" - assert config.proxy_api_key == "test-key-123" + assert config.openai_base_url == "https://proxy.example.com/v1" + assert config.openai_api_key == "test-key-123" def test_proxy_fields_none_when_env_absent(self, monkeypatch: pytest.MonkeyPatch) -> None: """Proxy fields are None when the env vars are unset.""" - monkeypatch.delenv("PROXY_BASE_URL", raising=False) - monkeypatch.delenv("PROXY_API_KEY", raising=False) + monkeypatch.delenv("OPENAI_BASE_URL", raising=False) + monkeypatch.delenv("OPENAI_API_KEY", raising=False) config = AgentConfig(instruction="Forecast.") - assert config.proxy_base_url is None - assert config.proxy_api_key is None + assert config.openai_base_url is None + assert config.openai_api_key is None class TestBuildAdkAgent: @@ -94,11 +94,11 @@ def test_output_schema_retained_with_skills(self, tmp_path: Path) -> None: assert agent.output_schema is ContinuousAgentForecastOutput def test_string_model_wrapped_in_litellm_when_proxy_set(self) -> None: - """A plain model string is wrapped in LiteLlm when proxy_base_url is set.""" + """A plain model string is wrapped in LiteLlm when openai_base_url is set.""" config = AgentConfig( instruction="Forecast.", - proxy_base_url="https://proxy.example.com/v1", - proxy_api_key="test-key", + openai_base_url="https://proxy.example.com/v1", + openai_api_key="test-key", ) agent = build_adk_agent(config) @@ -110,13 +110,13 @@ def test_string_model_wrapped_in_litellm_when_proxy_set(self) -> None: def test_string_model_kept_as_string_without_proxy(self, monkeypatch: pytest.MonkeyPatch) -> None: """Without a proxy URL the model is passed as a plain string to LlmAgent.""" - monkeypatch.delenv("PROXY_BASE_URL", raising=False) - monkeypatch.delenv("PROXY_API_KEY", raising=False) + monkeypatch.delenv("OPENAI_BASE_URL", raising=False) + monkeypatch.delenv("OPENAI_API_KEY", raising=False) config = AgentConfig( instruction="Forecast.", - proxy_base_url=None, - proxy_api_key=None, + openai_base_url=None, + openai_api_key=None, ) agent = build_adk_agent(config) @@ -129,7 +129,7 @@ def test_baselm_instance_bypasses_wrapping(self) -> None: config = AgentConfig( instruction="Forecast.", model=custom_model, - proxy_base_url="https://proxy.example.com/v1", + openai_base_url="https://proxy.example.com/v1", ) agent = build_adk_agent(config) @@ -143,8 +143,8 @@ def test_tools_auto_disable_automatic_function_calling(self) -> None: enabled=True, instruction="Search for market news before the cutoff date.", ), - proxy_base_url="https://proxy.example.com/v1", - proxy_api_key="test-key", + openai_base_url="https://proxy.example.com/v1", + openai_api_key="test-key", ) agent = build_adk_agent(config, output_schema=ContinuousAgentForecastOutput) @@ -156,14 +156,14 @@ def test_instruction_only_agent_leaves_automatic_function_calling_unset( self, monkeypatch: pytest.MonkeyPatch ) -> None: """Minimal interactive agents keep genai AFC at provider defaults.""" - monkeypatch.delenv("PROXY_BASE_URL", raising=False) - agent = build_adk_agent(AgentConfig(instruction="You are a helpful analyst.", proxy_base_url=None)) + monkeypatch.delenv("OPENAI_BASE_URL", raising=False) + agent = build_adk_agent(AgentConfig(instruction="You are a helpful analyst.", openai_base_url=None)) assert agent.generate_content_config.automatic_function_calling is None def test_function_tools_are_attached(self, monkeypatch: pytest.MonkeyPatch) -> None: """Conventional function tools in the config are appended to the agent.""" - monkeypatch.delenv("PROXY_BASE_URL", raising=False) + monkeypatch.delenv("OPENAI_BASE_URL", raising=False) def my_tool(x: str) -> str: """Echo the input. Args: x: anything. Returns: the same string.""" @@ -173,7 +173,7 @@ def my_tool(x: str) -> str: AgentConfig( instruction="Forecast the supplied series.", function_tools=[my_tool], - proxy_base_url=None, + openai_base_url=None, ) ) @@ -199,8 +199,8 @@ def test_smr_shim_registered_and_output_schema_cleared_on_litellm_path(self) -> enabled=True, instruction="Search for market news before the cutoff date.", ), - proxy_base_url="https://proxy.example.com/v1", - proxy_api_key="test-key", + openai_base_url="https://proxy.example.com/v1", + openai_api_key="test-key", ) agent = build_adk_agent(config, output_schema=ContinuousAgentForecastOutput) @@ -218,11 +218,11 @@ def test_smr_shim_registered_without_tools_on_litellm_path(self, monkeypatch: py without the shim, ADK would send the Pydantic schema as Gemini's response_schema and 400 on $defs/$ref/additionalProperties through the proxy. """ - monkeypatch.delenv("PROXY_BASE_URL", raising=False) + monkeypatch.delenv("OPENAI_BASE_URL", raising=False) config = AgentConfig( instruction="Forecast.", - proxy_base_url="https://proxy.example.com/v1", - proxy_api_key="test-key", + openai_base_url="https://proxy.example.com/v1", + openai_api_key="test-key", ) agent = build_adk_agent(config, output_schema=ContinuousAgentForecastOutput) @@ -232,8 +232,8 @@ def test_smr_shim_registered_without_tools_on_litellm_path(self, monkeypatch: py def test_output_schema_retained_on_direct_gemini(self, monkeypatch: pytest.MonkeyPatch) -> None: """No proxy (direct Gemini): native output_schema enforcement is preserved.""" - monkeypatch.delenv("PROXY_BASE_URL", raising=False) - config = AgentConfig(instruction="Forecast.") # no proxy_base_url → model stays a plain string + monkeypatch.delenv("OPENAI_BASE_URL", raising=False) + config = AgentConfig(instruction="Forecast.") # no openai_base_url → model stays a plain string agent = build_adk_agent(config, output_schema=ContinuousAgentForecastOutput) assert agent.output_schema is ContinuousAgentForecastOutput @@ -252,8 +252,8 @@ def test_returns_callable_with_expected_signature(self) -> None: ) tool = _build_search_tool( config, - proxy_base_url="https://proxy.example.com/v1", - proxy_api_key="test-key", + openai_base_url="https://proxy.example.com/v1", + openai_api_key="test-key", ) assert callable(tool) @@ -273,8 +273,8 @@ async def test_cutoff_date_appended_when_enforce_cutoff_true(self) -> None: ) tool = _build_search_tool( config, - proxy_base_url="https://proxy.example.com/v1", - proxy_api_key="test-key", + openai_base_url="https://proxy.example.com/v1", + openai_api_key="test-key", ) captured: list[dict] = [] @@ -303,8 +303,8 @@ async def test_cutoff_not_appended_when_enforce_cutoff_false(self) -> None: ) tool = _build_search_tool( config, - proxy_base_url="https://proxy.example.com/v1", - proxy_api_key="test-key", + openai_base_url="https://proxy.example.com/v1", + openai_api_key="test-key", ) captured: list[dict] = [] @@ -331,8 +331,8 @@ async def test_source_urls_appended_from_grounding_metadata(self) -> None: ) tool = _build_search_tool( config, - proxy_base_url="https://proxy.example.com/v1", - proxy_api_key="test-key", + openai_base_url="https://proxy.example.com/v1", + openai_api_key="test-key", ) async def _fake_acompletion(**kwargs): # type: ignore[override] diff --git a/implementations/boc_rate_decisions/rationale_eval.py b/implementations/boc_rate_decisions/rationale_eval.py index e9613701..7dfde84f 100644 --- a/implementations/boc_rate_decisions/rationale_eval.py +++ b/implementations/boc_rate_decisions/rationale_eval.py @@ -187,8 +187,8 @@ def judge_rationale_alignment( max_tokens=max_tokens, timeout_s=timeout_s, reasoning_effort=reasoning_effort, - api_base=os.getenv("PROXY_BASE_URL"), - api_key=os.getenv("PROXY_API_KEY"), + api_base=os.getenv("OPENAI_BASE_URL"), + api_key=os.getenv("OPENAI_API_KEY"), ), ) if not parsed: diff --git a/implementations/sp500_forecasting/README.md b/implementations/sp500_forecasting/README.md index 3560afda..53745d74 100644 --- a/implementations/sp500_forecasting/README.md +++ b/implementations/sp500_forecasting/README.md @@ -208,12 +208,12 @@ uv run python scripts/fetch_fred.py # macro covariates (FRED `fetch_fred.py` requires a **FRED API key** in your repo-root `.env` (`FRED_API_KEY=...`). FRED keys are free but must be requested individually — **we cannot provide one for you**. -Request yours at https://fred.stlouisfed.org/docs/api/api_key.html (approval is usually +Request yours at (approval is usually quick, but allow some time). A description like "Requesting an API key to explore the effectiveness of various forecasting techniques on economic data." works well. The `llmp_*` rows call the Vector proxy, so a populated repo-root `.env` (with -`PROXY_BASE_URL` / `PROXY_API_KEY`) is required when those rows are enabled. +`OPENAI_BASE_URL` / `OPENAI_API_KEY`) is required when those rows are enabled. **How to run:** open `01_sp500_multivariate_backtest.ipynb` and **Run All**. The `EXPERIMENT_CONFIG` cell selects the 2025 comparison spec (`"smoke"` by default; diff --git a/planning-docs/vector-llm-proxy.md b/planning-docs/vector-llm-proxy.md index 9b3ed52b..c13092ef 100644 --- a/planning-docs/vector-llm-proxy.md +++ b/planning-docs/vector-llm-proxy.md @@ -11,8 +11,8 @@ Vector runs a shared LLM gateway at `proxy.vectorinstitute.ai`. It is OpenAI-API ## How it is wired in - **All model strings are bare** (e.g. `gemini-3.1-flash-lite-preview`). No `gemini/` or `openai/` prefix in user-facing config. Internally, the library prepends `openai/` before passing to LiteLLM so it routes via the OpenAI-compatible path; LiteLLM strips the prefix before sending to the proxy. -- **LLMP predictors** (`SampledTrajectoryLLMPredictor`, `QuantileGridLLMPredictor`): `LLMPredictorConfig` reads `PROXY_BASE_URL` and `PROXY_API_KEY` from the environment and passes them as `api_base`/`api_key` to `litellm.acompletion`. -- **ADK agents** (`build_adk_agent`): `AgentConfig` reads the same env vars. When `proxy_base_url` is set and `model` is a plain string, the factory automatically wraps it in `LiteLlm(model="openai/", api_base=..., api_key=...)`. +- **LLMP predictors** (`SampledTrajectoryLLMPredictor`, `QuantileGridLLMPredictor`): `LLMPredictorConfig` reads `OPENAI_BASE_URL` and `OPENAI_API_KEY` from the environment and passes them as `api_base`/`api_key` to `litellm.acompletion`. +- **ADK agents** (`build_adk_agent`): `AgentConfig` reads the same env vars. When `openai_base_url` is set and `model` is a plain string, the factory automatically wraps it in `LiteLlm(model="openai/", api_base=..., api_key=...)`. - **Web search / context retrieval**: replaced the Gemini-native `google_search` sub-agent with a `search_web` FunctionTool backed by the proxy's `{"googleSearch": {}}` server-side extension. Grounding metadata (source URLs) is extracted from `choices[0].provider_specific_fields["grounding_metadata"]`. - **Default model everywhere**: the lite model (`gemini-3.1-flash-lite-preview`). See the project model convention below. @@ -30,8 +30,8 @@ Both are defined once in `aieng.forecasting.models` as `LITE_MODEL` and `ADVANCE ## Required environment variables ``` -PROXY_BASE_URL=https://proxy.vectorinstitute.ai/v1 -PROXY_API_KEY=your_proxy_api_key +OPENAI_BASE_URL=https://proxy.vectorinstitute.ai/v1 +OPENAI_API_KEY=your_api_key ``` Both are read via `os.getenv(...)` with `None` as the fallback. If neither is set, callers fall back to direct provider routing via LiteLLM's standard env vars (`GEMINI_API_KEY`, `ANTHROPIC_API_KEY`, etc.). @@ -46,7 +46,7 @@ Both are read via `os.getenv(...)` with `None` as the fallback. If neither is se | Need | Route | | --- | --- | -| LLMP forecasting calls | Proxy — `LLMPredictorConfig` with `proxy_base_url`/`proxy_api_key` | +| LLMP forecasting calls | Proxy — `LLMPredictorConfig` with `openai_base_url`/`openai_api_key` | | ADK analyst/reasoning agent | Proxy — `AgentConfig` auto-wraps model in `LiteLlm` | | Web search / context retrieval | Proxy — `search_web` tool uses `{"googleSearch": {}}` extension | | Code execution | E2B sandbox (`CodeExecutionConfig(enabled=True)`) | diff --git a/playground/proxy_tests/run_matrix.py b/playground/proxy_tests/run_matrix.py index ed9cf214..7e307f90 100644 --- a/playground/proxy_tests/run_matrix.py +++ b/playground/proxy_tests/run_matrix.py @@ -72,8 +72,8 @@ load_dotenv(REPO_ROOT / ".env") -PROXY_BASE_URL = os.environ.get("PROXY_BASE_URL", "https://proxy.vectorinstitute.ai/v1") -PROXY_API_KEY = os.environ.get("PROXY_API_KEY", "") +OPENAI_BASE_URL = os.environ.get("OPENAI_BASE_URL", "https://proxy.vectorinstitute.ai/v1") +OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY", "") # =========================================================================== @@ -189,8 +189,8 @@ async def cap_json_schema(model: str) -> CapabilityResult: } resp = await litellm.acompletion( model=_proxy_model(model), - api_base=PROXY_BASE_URL, - api_key=PROXY_API_KEY, + api_base=OPENAI_BASE_URL, + api_key=OPENAI_API_KEY, messages=[ { "role": "user", @@ -218,8 +218,8 @@ async def cap_adk_text(model: str) -> CapabilityResult: factory.AgentConfig( name="matrix_text", model=model, - proxy_base_url=PROXY_BASE_URL, - proxy_api_key=PROXY_API_KEY, + openai_base_url=OPENAI_BASE_URL, + openai_api_key=OPENAI_API_KEY, instruction="You are a concise assistant.", ) ) @@ -249,7 +249,7 @@ def get_commodity_price(commodity: str) -> str: agent = llm_agent_cls( name="matrix_tool", - model=lite_llm_cls(model=_proxy_model(model), api_base=PROXY_BASE_URL, api_key=PROXY_API_KEY), + model=lite_llm_cls(model=_proxy_model(model), api_base=OPENAI_BASE_URL, api_key=OPENAI_API_KEY), instruction="Use the tool to answer commodity price questions.", tools=[get_commodity_price], ) @@ -293,8 +293,8 @@ class SimpleForecast(pydantic_mod.BaseModel): # type: ignore[misc, valid-type] factory.AgentConfig( name="matrix_schema", model=model, - proxy_base_url=PROXY_BASE_URL, - proxy_api_key=PROXY_API_KEY, + openai_base_url=OPENAI_BASE_URL, + openai_api_key=OPENAI_API_KEY, instruction=instruction, context_retrieval=factory.ContextRetrievalConfig( enabled=True, @@ -337,8 +337,8 @@ async def cap_smr_probe(model: str) -> CapabilityResult: try: resp = await litellm.acompletion( model=_proxy_model(model), - api_base=PROXY_BASE_URL, - api_key=PROXY_API_KEY, + api_base=OPENAI_BASE_URL, + api_key=OPENAI_API_KEY, messages=[{"role": "user", "content": "Reply with a tiny forecast object."}], response_format={"type": "json_schema", "json_schema": {"name": "forecast", "schema": schema}}, tools=[tool], @@ -372,7 +372,7 @@ def lookup(symbol: str) -> str: agent = llm_agent_cls( name="matrix_multiturn", - model=lite_llm_cls(model=_proxy_model(model), api_base=PROXY_BASE_URL, api_key=PROXY_API_KEY), + model=lite_llm_cls(model=_proxy_model(model), api_base=OPENAI_BASE_URL, api_key=OPENAI_API_KEY), instruction="Look up each symbol with the tool, one call per symbol, then summarise.", tools=[lookup], ) @@ -399,8 +399,8 @@ async def cap_reasoning_effort(model: str) -> CapabilityResult: litellm = _litellm() resp = await litellm.acompletion( model=_proxy_model(model), - api_base=PROXY_BASE_URL, - api_key=PROXY_API_KEY, + api_base=OPENAI_BASE_URL, + api_key=OPENAI_API_KEY, messages=[{"role": "user", "content": "Think step by step: what is 17*23? Give only the number."}], timeout=REQUEST_TIMEOUT, extra_body={"reasoning_effort": "high"}, @@ -426,8 +426,8 @@ async def cap_search_grounding(model: str) -> CapabilityResult: litellm = _litellm() resp = await litellm.acompletion( model=_proxy_model(SEARCH_MODEL), - api_base=PROXY_BASE_URL, - api_key=PROXY_API_KEY, + api_base=OPENAI_BASE_URL, + api_key=OPENAI_API_KEY, messages=[{"role": "user", "content": "Current WTI crude oil price? Search for it."}], tools=[{"googleSearch": {}}], timeout=REQUEST_TIMEOUT, @@ -456,7 +456,7 @@ async def cap_cutoff_probe(model: str) -> CapabilityResult: instruction="You are a web search assistant. Return a one-line summary with sources.", enforce_cutoff=True, ) - search_web = factory._build_search_tool(cfg, proxy_base_url=PROXY_BASE_URL, proxy_api_key=PROXY_API_KEY) + search_web = factory._build_search_tool(cfg, openai_base_url=OPENAI_BASE_URL, openai_api_key=OPENAI_API_KEY) out = await search_web(query="latest OPEC+ production decision", cutoff_date="2020-01-01") n_sources = out.count("http") return _ok(f"ran with cutoff=2020-01-01; sources_returned~={n_sources} (audit text manually for leakage)") @@ -574,7 +574,7 @@ def render_markdown(state: RunState, models: list[str], tier: str) -> str: lines.append(f"# Proxy capability matrix — tier `{tier}`") lines.append("") lines.append(f"- Generated: {time.strftime('%Y-%m-%d %H:%M:%S')}") - lines.append(f"- Proxy: `{PROXY_BASE_URL}`") + lines.append(f"- Proxy: `{OPENAI_BASE_URL}`") lines.append("") # Grid @@ -633,8 +633,8 @@ def _print_config() -> None: async def _amain(args: argparse.Namespace) -> int: - if not PROXY_API_KEY: - print("ERROR: PROXY_API_KEY not set. Check your .env file.") + if not OPENAI_API_KEY: + print("ERROR: OPENAI_API_KEY not set. Check your .env file.") return 1 models = [m.strip() for m in args.models.split(",") if m.strip()] or MODELS @@ -644,7 +644,7 @@ async def _amain(args: argparse.Namespace) -> int: bootstrap_litellm() - print(f"Proxy : {PROXY_BASE_URL}") + print(f"Proxy : {OPENAI_BASE_URL}") print(f"Tier : {args.tier}") print(f"Models : {', '.join(models)}") diff --git a/playground/proxy_tests/run_tests.py b/playground/proxy_tests/run_tests.py index 5fe51c33..3f9e2e84 100644 --- a/playground/proxy_tests/run_tests.py +++ b/playground/proxy_tests/run_tests.py @@ -44,8 +44,8 @@ load_dotenv(REPO_ROOT / ".env") -PROXY_BASE_URL = "https://proxy.vectorinstitute.ai/v1" -PROXY_API_KEY = os.environ.get("PROXY_API_KEY", "") +OPENAI_BASE_URL = "https://proxy.vectorinstitute.ai/v1" +OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY", "") # gpt-4o-mini: reliably handles JSON schema and function calling on the proxy. PROXY_MODEL_LITELLM = "openai/gpt-4o-mini" # LiteLLM provider/model string @@ -113,8 +113,8 @@ async def test_t1_llmp_basic() -> None: litellm = _litellm() resp = await litellm.acompletion( model=PROXY_MODEL_LITELLM, - api_base=PROXY_BASE_URL, - api_key=PROXY_API_KEY, + api_base=OPENAI_BASE_URL, + api_key=OPENAI_API_KEY, messages=[ { "role": "user", @@ -152,8 +152,8 @@ async def test_t2_adk_basic() -> None: model = lite_llm_cls( model=PROXY_MODEL_LITELLM, - api_base=PROXY_BASE_URL, - api_key=PROXY_API_KEY, + api_base=OPENAI_BASE_URL, + api_key=OPENAI_API_KEY, ) agent = llm_agent_cls( name="proxy_test_agent", @@ -197,8 +197,8 @@ def get_commodity_price(commodity: str) -> str: model = lite_llm_cls( model=PROXY_MODEL_LITELLM, - api_base=PROXY_BASE_URL, - api_key=PROXY_API_KEY, + api_base=OPENAI_BASE_URL, + api_key=OPENAI_API_KEY, ) agent = llm_agent_cls( name="proxy_tool_agent", @@ -237,8 +237,8 @@ class SimpleForecast(base_model_cls): model = lite_llm_cls( model=PROXY_MODEL_LITELLM, - api_base=PROXY_BASE_URL, - api_key=PROXY_API_KEY, + api_base=OPENAI_BASE_URL, + api_key=OPENAI_API_KEY, ) agent = llm_agent_cls( name="proxy_schema_agent", @@ -271,8 +271,8 @@ async def test_t5_google_search_raw() -> None: litellm = _litellm() resp = await litellm.acompletion( model=model, - api_base=PROXY_BASE_URL, - api_key=PROXY_API_KEY, + api_base=OPENAI_BASE_URL, + api_key=OPENAI_API_KEY, messages=[ { "role": "user", @@ -332,8 +332,8 @@ async def search_web(query: str) -> str: """ resp = await litellm.acompletion( model="openai/gemini-2.5-flash", - api_base=PROXY_BASE_URL, - api_key=PROXY_API_KEY, + api_base=OPENAI_BASE_URL, + api_key=OPENAI_API_KEY, messages=[{"role": "user", "content": query}], tools=[{"googleSearch": {}}], max_tokens=512, @@ -356,8 +356,8 @@ async def search_web(query: str) -> str: model = lite_llm_cls( model=PROXY_MODEL_LITELLM, - api_base=PROXY_BASE_URL, - api_key=PROXY_API_KEY, + api_base=OPENAI_BASE_URL, + api_key=OPENAI_API_KEY, ) agent = llm_agent_cls( name="proxy_search_agent", @@ -385,13 +385,13 @@ async def search_web(query: str) -> str: async def main() -> None: """Run all proxy integration checks in sequence.""" - if not PROXY_API_KEY: - print("ERROR: PROXY_API_KEY not set. Check your .env file.") + if not OPENAI_API_KEY: + print("ERROR: OPENAI_API_KEY not set. Check your .env file.") sys.exit(1) - print(f"Proxy URL : {PROXY_BASE_URL}") + print(f"Proxy URL : {OPENAI_BASE_URL}") print(f"Model : {PROXY_MODEL_LITELLM}") - print(f"API key : {PROXY_API_KEY[:12]}...") + print(f"API key : {OPENAI_API_KEY[:12]}...") await test_t1_llmp_basic() await test_t2_adk_basic() diff --git a/playground/proxy_tests/test_pdf_file_format.py b/playground/proxy_tests/test_pdf_file_format.py index 2bcce858..d2989cc1 100644 --- a/playground/proxy_tests/test_pdf_file_format.py +++ b/playground/proxy_tests/test_pdf_file_format.py @@ -36,8 +36,8 @@ from aieng.forecasting.models import ADVANCED_MODEL, LITE_MODEL -PROXY_BASE_URL = os.environ.get("PROXY_BASE_URL", "https://proxy.vectorinstitute.ai/v1") -PROXY_API_KEY = os.environ.get("PROXY_API_KEY", "") +OPENAI_BASE_URL = os.environ.get("OPENAI_BASE_URL", "https://proxy.vectorinstitute.ai/v1") +OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY", "") DEFAULT_PDF = REPO_ROOT / "data" / "reports" / "cfpr" / "2021_en.pdf" MINIMAL_PROMPT = ( @@ -89,8 +89,8 @@ async def _call(label: str, model: str, content_parts: list, *, extra_body=None) kwargs = { "model": f"openai/{model}", - "api_base": PROXY_BASE_URL, - "api_key": PROXY_API_KEY, + "api_base": OPENAI_BASE_URL, + "api_key": OPENAI_API_KEY, "messages": [{"role": "user", "content": content_parts}], "max_tokens": 128, "timeout": 90, @@ -106,15 +106,15 @@ async def _call(label: str, model: str, content_parts: list, *, extra_body=None) async def main() -> None: - if not PROXY_API_KEY: - print("ERROR: PROXY_API_KEY not set.") + if not OPENAI_API_KEY: + print("ERROR: OPENAI_API_KEY not set.") sys.exit(1) pdf_path = Path(os.environ.get("TEST_PDF_PATH", DEFAULT_PDF)) if not pdf_path.exists(): print(f"ERROR: PDF not found at {pdf_path}") sys.exit(1) - print(f"Proxy : {PROXY_BASE_URL}") + print(f"Proxy : {OPENAI_BASE_URL}") print(f"PDF : {pdf_path} ({pdf_path.stat().st_size:,} bytes)") b64 = _pdf_b64(pdf_path) data_uri = f"data:application/pdf;base64,{b64}" diff --git a/playground/proxy_tests/test_pdf_native_blocks.py b/playground/proxy_tests/test_pdf_native_blocks.py index 4c74b955..64ae3d19 100644 --- a/playground/proxy_tests/test_pdf_native_blocks.py +++ b/playground/proxy_tests/test_pdf_native_blocks.py @@ -24,8 +24,8 @@ REPO_ROOT = Path(__file__).resolve().parents[2] load_dotenv(REPO_ROOT / ".env") -BASE = os.environ.get("PROXY_BASE_URL", "https://proxy.vectorinstitute.ai/v1") -KEY = os.environ.get("PROXY_API_KEY", "") +BASE = os.environ.get("OPENAI_BASE_URL", "https://proxy.vectorinstitute.ai/v1") +KEY = os.environ.get("OPENAI_API_KEY", "") PDF = REPO_ROOT / "data" / "reports" / "cfpr" / "2021_en.pdf" Q = "What edition number is printed on the title page? Answer with just the number." diff --git a/playground/proxy_tests/test_pdf_raw_http.py b/playground/proxy_tests/test_pdf_raw_http.py index 762529d5..1ad1b759 100644 --- a/playground/proxy_tests/test_pdf_raw_http.py +++ b/playground/proxy_tests/test_pdf_raw_http.py @@ -24,8 +24,8 @@ REPO_ROOT = Path(__file__).resolve().parents[2] load_dotenv(REPO_ROOT / ".env") -BASE = os.environ.get("PROXY_BASE_URL", "https://proxy.vectorinstitute.ai/v1") -KEY = os.environ.get("PROXY_API_KEY", "") +BASE = os.environ.get("OPENAI_BASE_URL", "https://proxy.vectorinstitute.ai/v1") +KEY = os.environ.get("OPENAI_API_KEY", "") PDF = REPO_ROOT / "data" / "reports" / "cfpr" / "2021_en.pdf" PROMPT = "What edition number is printed on the title page of this document? Answer with just the number." MODEL = os.environ.get("TEST_MODEL", "gemini-3.5-flash") @@ -75,7 +75,7 @@ def _shape(body: dict) -> object: def main() -> None: if not KEY: - print("PROXY_API_KEY not set") + print("OPENAI_API_KEY not set") sys.exit(1) b64 = base64.b64encode(PDF.read_bytes()).decode() data_uri = f"data:application/pdf;base64,{b64}" diff --git a/playground/proxy_tests/test_pdf_routing.py b/playground/proxy_tests/test_pdf_routing.py index 83ff7988..57d0879a 100644 --- a/playground/proxy_tests/test_pdf_routing.py +++ b/playground/proxy_tests/test_pdf_routing.py @@ -27,8 +27,8 @@ REPO_ROOT = Path(__file__).resolve().parents[2] load_dotenv(REPO_ROOT / ".env") -BASE = os.environ.get("PROXY_BASE_URL", "https://proxy.vectorinstitute.ai/v1") -KEY = os.environ.get("PROXY_API_KEY", "") +BASE = os.environ.get("OPENAI_BASE_URL", "https://proxy.vectorinstitute.ai/v1") +KEY = os.environ.get("OPENAI_API_KEY", "") PDF = REPO_ROOT / "data" / "reports" / "cfpr" / "2021_en.pdf" # 1x1 red PNG — a trivially describable image to confirm image routing. diff --git a/playground/proxy_tests/test_pdf_upload.py b/playground/proxy_tests/test_pdf_upload.py index 8cdf3c3c..d2785990 100644 --- a/playground/proxy_tests/test_pdf_upload.py +++ b/playground/proxy_tests/test_pdf_upload.py @@ -43,8 +43,8 @@ bootstrap_litellm() init_langfuse_tracing() -PROXY_BASE_URL = "https://proxy.vectorinstitute.ai/v1" -PROXY_API_KEY = os.environ.get("PROXY_API_KEY", "") +OPENAI_BASE_URL = "https://proxy.vectorinstitute.ai/v1" +OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY", "") MODEL_ADVANCED = ADVANCED_MODEL # "gemini-3.5-flash" MODEL_LITE = LITE_MODEL # "gemini-3.1-flash-lite-preview" @@ -156,8 +156,8 @@ async def test_t7a_openai_data_uri(pdf_b64: str) -> None: resp = await litellm.acompletion( model=f"openai/{MODEL_ADVANCED}", - api_base=PROXY_BASE_URL, - api_key=PROXY_API_KEY, + api_base=OPENAI_BASE_URL, + api_key=OPENAI_API_KEY, messages=messages, max_tokens=128, timeout=60, @@ -192,8 +192,8 @@ async def test_t7b_extra_body_inline_data(pdf_b64: str) -> None: resp = await litellm.acompletion( model=f"openai/{MODEL_ADVANCED}", - api_base=PROXY_BASE_URL, - api_key=PROXY_API_KEY, + api_base=OPENAI_BASE_URL, + api_key=OPENAI_API_KEY, messages=[{"role": "user", "content": MINIMAL_PROMPT}], max_tokens=128, timeout=60, @@ -254,8 +254,8 @@ async def test_t7d_lite_model(pdf_b64: str) -> None: resp = await litellm.acompletion( model=f"openai/{MODEL_LITE}", - api_base=PROXY_BASE_URL, - api_key=PROXY_API_KEY, + api_base=OPENAI_BASE_URL, + api_key=OPENAI_API_KEY, messages=messages, max_tokens=128, timeout=60, @@ -331,8 +331,8 @@ async def test_t7c_gemini_native_direct(pdf_path: Path) -> None: async def main() -> None: """Run all PDF upload integration checks.""" - if not PROXY_API_KEY: - print("ERROR: PROXY_API_KEY not set. Check your .env file.") + if not OPENAI_API_KEY: + print("ERROR: OPENAI_API_KEY not set. Check your .env file.") sys.exit(1) pdf_path = Path(os.environ.get("TEST_PDF_PATH", DEFAULT_PDF)) @@ -340,11 +340,11 @@ async def main() -> None: print(f"ERROR: test PDF not found at {pdf_path}") sys.exit(1) - print(f"Proxy URL : {PROXY_BASE_URL}") + print(f"Proxy URL : {OPENAI_BASE_URL}") print(f"Advanced : {MODEL_ADVANCED}") print(f"Lite : {MODEL_LITE}") print(f"Test PDF : {pdf_path} ({pdf_path.stat().st_size:,} bytes)") - print(f"API key : {PROXY_API_KEY[:12]}...") + print(f"API key : {OPENAI_API_KEY[:12]}...") pdf_b64 = _pdf_base64(pdf_path) print(f"Base64 : {len(pdf_b64):,} chars") diff --git a/scripts/cache_wti_curriculum_news.py b/scripts/cache_wti_curriculum_news.py index e56c1912..b91cc1b6 100644 --- a/scripts/cache_wti_curriculum_news.py +++ b/scripts/cache_wti_curriculum_news.py @@ -20,7 +20,7 @@ Environment ----------- -Requires ``PROXY_BASE_URL`` and ``PROXY_API_KEY`` environment variables (or a +Requires ``OPENAI_BASE_URL`` and ``OPENAI_API_KEY`` environment variables (or a ``.env`` file at the repo root). These are used by the Vector LLM proxy to route the Google Search calls. @@ -49,7 +49,7 @@ _REPO_ROOT = Path(__file__).resolve().parent.parent sys.path.insert(0, str(_REPO_ROOT / "aieng-forecasting")) -# Load .env if present (for PROXY_BASE_URL / PROXY_API_KEY) +# Load .env if present (for OPENAI_BASE_URL / OPENAI_API_KEY) try: from dotenv import load_dotenv @@ -152,12 +152,12 @@ async def main( *, dry_run: bool = False, ) -> None: - proxy_base_url = os.getenv("PROXY_BASE_URL", "") - proxy_api_key = os.getenv("PROXY_API_KEY") + openai_base_url = os.getenv("OPENAI_BASE_URL", "") + openai_api_key = os.getenv("OPENAI_API_KEY") - if not proxy_base_url and not dry_run: + if not openai_base_url and not dry_run: print( - "ERROR: PROXY_BASE_URL is not set. Export it or add it to your .env file.", + "ERROR: OPENAI_BASE_URL is not set. Export it or add it to your .env file.", file=sys.stderr, ) sys.exit(1) @@ -170,8 +170,8 @@ async def main( ) search_web = _build_search_tool( config, - proxy_base_url=proxy_base_url, - proxy_api_key=proxy_api_key, + openai_base_url=openai_base_url, + openai_api_key=openai_api_key, ) _OUTPUT_DIR.mkdir(parents=True, exist_ok=True) From 33b080b7db3f3f79fde9aa69811ccc3cffa4f005 Mon Sep 17 00:00:00 2001 From: fcogidi <41602287+fcogidi@users.noreply.github.com> Date: Wed, 24 Jun 2026 14:31:56 -0400 Subject: [PATCH 2/6] feat(tests): add test script for onboarding --- pyproject.toml | 2 + tests/conftest.py | 36 ++++++++++++++++ tests/test_integration.py | 88 +++++++++++++++++++++++++++++++++++++++ uv.lock | 8 ++-- 4 files changed, 130 insertions(+), 4 deletions(-) create mode 100644 tests/conftest.py create mode 100644 tests/test_integration.py diff --git a/pyproject.toml b/pyproject.toml index dbdc784b..6dc56514 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -120,6 +120,8 @@ ignore = [ # Playground: throwaway exploration/diagnostics. Same relaxations as scripts, # plus PLC0415 (optional deps are imported inside try/except by design). "playground/**" = ["D1", "E402", "W505", "ERA001", "PLC0415"] +# Repo-root integration tests: optional extras imported inside try/except. +"tests/**" = ["D1", "E402", "W505", "ERA001", "PLC0415"] [tool.ruff.lint.pep8-naming] ignore-names = ["X*", "setUp"] diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 00000000..83740c86 --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,36 @@ +"""Shared fixtures for repo-root integration tests.""" + +import os +from pathlib import Path + +import pytest +from dotenv import load_dotenv + + +ROOT = Path(__file__).resolve().parents[1] + +# Optional local keys (e.g. FRED) only. Never overrides onboard-injected shell env. +load_dotenv(ROOT / ".env", override=False) + + +def _is_placeholder(value: str) -> bool: + s = value.strip() + return (not s) or s.startswith("your_") or s.endswith("...") + + +def env(key: str) -> str: + """Return a stripped env value, or '' if missing/placeholder.""" + raw = os.environ.get(key, "").strip() + return "" if _is_placeholder(raw) else raw + + +def require_env(*keys: str) -> None: + """Fail the test if any required key is missing or still a placeholder.""" + missing = [key for key in keys if not bool(env(key))] + if missing: + pytest.fail( + f"Required environment variable(s) not configured: {', '.join(missing)}. " + "On Coder workspaces, bootcamp keys are injected by onboarding into your shell. " + 'Locally, run eval "$(onboard --bootcamp-name agentic-forecasting --skip-test)" ' + "or set the variables in your environment." + ) diff --git a/tests/test_integration.py b/tests/test_integration.py new file mode 100644 index 00000000..80bd8db1 --- /dev/null +++ b/tests/test_integration.py @@ -0,0 +1,88 @@ +"""Onboarding gate: verify bootcamp API keys against live services. + +Run automatically on Coder workspace startup, or manually with +``onboard --bootcamp-name agentic-forecasting --test-script tests/test_integration.py``. + +Every variable in ``.env.example`` is checked except ``FRED_API_KEY``. +""" + +import json + +import pytest +from conftest import env, require_env + + +pytestmark = pytest.mark.integration_test + + +def test_vector_proxy_llm() -> None: + """LLM inference via the Vector proxy.""" + require_env("OPENAI_BASE_URL", "OPENAI_API_KEY") + + try: + import litellm + from aieng.forecasting.models import LITE_MODEL + except ImportError as exc: + pytest.fail(f"Missing llm extra: {exc}. Run: uv sync --all-extras --dev --all-packages") + + resp = litellm.completion( + model=f"openai/{LITE_MODEL}", + api_base=env("OPENAI_BASE_URL"), + api_key=env("OPENAI_API_KEY"), + messages=[{"role": "user", "content": "Reply with exactly: OK"}], + max_tokens=16, + temperature=0, + ) + text = (resp.choices[0].message.content or "").strip() + assert text, "Proxy returned an empty completion" + + +def test_langfuse_auth() -> None: + """Langfuse tracing credentials.""" + require_env("LANGFUSE_PUBLIC_KEY", "LANGFUSE_SECRET_KEY") + + try: + from aieng.forecasting.langfuse_tracing import init_langfuse_tracing + from langfuse import get_client + except ImportError as exc: + pytest.fail(f"Missing langfuse dependencies: {exc}. Run: uv sync --all-extras --dev --all-packages") + + init_langfuse_tracing() + client = get_client() + assert client.auth_check(), ( + "Langfuse auth_check() returned False. Re-check LANGFUSE_PUBLIC_KEY, LANGFUSE_SECRET_KEY, and LANGFUSE_HOST." + ) + + +@pytest.mark.asyncio +async def test_e2b_code_execution() -> None: + """E2B code execution sandbox.""" + require_env("E2B_API_KEY") + + try: + from aieng.agents.tools.code_interpreter import CodeInterpreter + from aieng.forecasting.methods.agentic.agent_factory import CodeExecutionConfig + except ImportError as exc: + pytest.fail(f"Missing agentic extra: {exc}. Run: uv sync --all-extras --dev --all-packages") + + template_name = CodeExecutionConfig().template_name + ci = CodeInterpreter(template_name=template_name) + + try: + raw = await ci.run_code("print(1 + 1)") + except Exception as exc: + msg = str(exc).lower() + if "template" in msg and ("not found" in msg or "does not exist" in msg or "notfound" in msg): + pytest.fail( + f"The sandbox template {template_name!r} has not been built yet. " + "Build it once (admin): uv run scripts/build_e2b_template.py " + 'after eval "$(onboard --bootcamp-name agentic-forecasting --skip-test)".' + ) + raise + + out = json.loads(raw) + stdout = "".join(out.get("stdout", [])) + if out.get("error"): + err = out["error"] + pytest.fail(f"Sandbox raised: {err.get('name')}: {err.get('value')}") + assert "2" in stdout, f"Expected '2' in stdout, got: {stdout!r}" diff --git a/uv.lock b/uv.lock index 6e9f352b..e4d730f4 100644 --- a/uv.lock +++ b/uv.lock @@ -179,7 +179,7 @@ dev = [ [package.metadata] requires-dist = [ - { name = "aieng-agents", extras = ["code-interpreter"], marker = "extra == 'agentic'", specifier = ">=0.3.0" }, + { name = "aieng-agents", extras = ["code-interpreter"], marker = "extra == 'agentic'", specifier = ">=0.3.1" }, { name = "darts", marker = "extra == 'numerical'", specifier = ">=0.44.1" }, { name = "fredapi", specifier = ">=0.5.2" }, { name = "google-adk", marker = "extra == 'agentic'", specifier = ">=2.2.0" }, @@ -1779,11 +1779,11 @@ wheels = [ [[package]] name = "griffelib" -version = "2.0.2" +version = "2.1.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/9d/82/74f4a3310cdabfbb10da554c3a672847f1ed33c6f61dd472681ce7f1fe67/griffelib-2.0.2.tar.gz", hash = "sha256:3cf20b3bc470e83763ffbf236e0076b1211bac1bc67de13daf494640f2de707e", size = 166461, upload-time = "2026-03-27T11:34:51.091Z" } +sdist = { url = "https://files.pythonhosted.org/packages/33/e4/8d187ea29c2e30b3a09505c567513077d6117861bde1fbd997a167f262ec/griffelib-2.1.0.tar.gz", hash = "sha256:762a186d2c6fd6794d4ea20d428d597ffb857cb56b66421651cbba15bdd5e813", size = 216234, upload-time = "2026-06-19T12:05:42.278Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/11/8c/c9138d881c79aa0ea9ed83cbd58d5ca75624378b38cee225dcf5c42cc91f/griffelib-2.0.2-py3-none-any.whl", hash = "sha256:925c857658fb1ba40c0772c37acbc2ab650bd794d9c1b9726922e36ea4117ea1", size = 142357, upload-time = "2026-03-27T11:34:46.275Z" }, + { url = "https://files.pythonhosted.org/packages/e4/d3/5268aeabf2ad82658c4e2ff3a060648d0f02f3926cb53247c0e4d0dab49e/griffelib-2.1.0-py3-none-any.whl", hash = "sha256:cc7b3d2d2865ad0b909fcc38086e3f554b5ea7acbaa7bbb7ecaa3f5dfb7d9f00", size = 142560, upload-time = "2026-06-19T12:05:38.742Z" }, ] [[package]] From f74a6dd8318baf79112d3c3f03a47646fefdedce Mon Sep 17 00:00:00 2001 From: fcogidi <41602287+fcogidi@users.noreply.github.com> Date: Wed, 24 Jun 2026 14:32:24 -0400 Subject: [PATCH 3/6] chore: update .env.example to reflect new environment variable names and add usage notes --- .env.example | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/.env.example b/.env.example index f6a0fa2d..a0021519 100644 --- a/.env.example +++ b/.env.example @@ -1,9 +1,13 @@ +# In the Coder environment bootcamp keys (OPENAI_*, E2B_*, LANGFUSE_*) are injected +# into your shell environment — do NOT copy those into .env on Coder workspaces. +# Optional personal keys (e.g. FRED_API_KEY) still need to be set in .env. + # E2B Code Execution Service E2B_API_KEY=your_e2b_api_key # Vector LLM Proxy Service -PROXY_BASE_URL=https://proxy.vectorinstitute.ai/v1 -PROXY_API_KEY=your_proxy_api_key +OPENAI_BASE_URL=https://proxy.vectorinstitute.ai/v1 +OPENAI_API_KEY=your_api_key # Langfuse — required for trace logging in playground/news_search and misalignment_qa LANGFUSE_PUBLIC_KEY=pk-lf-... @@ -12,11 +16,3 @@ LANGFUSE_HOST=https://us.cloud.langfuse.com # Optional keys FRED_API_KEY=your_fred_api_key # You need to request this from FRED if you want to use it - -# Claude Code setup — route Claude Code through the Vector proxy. -# Set ANTHROPIC_AUTH_TOKEN to your Vector proxy API key (same key as PROXY_API_KEY). -ANTHROPIC_BASE_URL="https://proxy.vectorinstitute.ai" -ANTHROPIC_AUTH_TOKEN="your_vector_proxy_api_key" -ANTHROPIC_MODEL="Qwen3-Coder-Next" -ANTHROPIC_CUSTOM_MODEL_OPTION="Qwen3-Coder-Next" -ANTHROPIC_CUSTOM_MODEL_OPTION_NAME="Qwen3-Coder-Next" From ff741b1639d46fc5c0988c26bc2c17fa0934c681 Mon Sep 17 00:00:00 2001 From: fcogidi <41602287+fcogidi@users.noreply.github.com> Date: Wed, 24 Jun 2026 14:34:23 -0400 Subject: [PATCH 4/6] docs: update README to clarify mechanisms by which environment variables are set, especially in the coder environment --- README.md | 45 +++++++++++++++++++++-- implementations/getting_started/README.md | 2 +- 2 files changed, 42 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 59012064..a2a8e6f9 100644 --- a/README.md +++ b/README.md @@ -27,7 +27,6 @@ Each is independent and self-contained — pick the one that matches the problem **Start here → #0 [`getting_started/`](implementations/getting_started/)** — one CPI series, one month ahead. The smallest end-to-end loop: a `Predictor`, a `BacktestSpec` and `EvalSpec`, naive + AutoARIMA baselines, CRPS scoring. The place to learn the evaluation framework before picking a domain below. - | # | Implementation | The problem | Concepts & techniques it demonstrates | | --- | -------------------------------------------------------------------- | ------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | 1 | [`sp500_forecasting/`](implementations/sp500_forecasting/) | S&P 500 returns under a macro/market covariate panel. | A head-to-head of conventional numerical methods (naive, ETS, Kalman, AutoARIMA, linear regression, LightGBM) plus a covariate-aware LLM-Process, all reading the same leak-safe covariate panel. Cumulative-return targets at 1/5/21-business-day horizons, CRPS + direction metrics, config-driven specs. | @@ -35,7 +34,6 @@ Each is independent and self-contained — pick the one that matches the problem | 3 | [`energy_oil_forecasting/`](implementations/energy_oil_forecasting/) | Daily WTI crude-oil price under regime-breaking news. | A capability progression — Prophet → LLM-Process → news-grounded agent → code-executing agent — plus an adaptive agent that learns a strategy from data and is scored before vs after. Continuous trajectories, a binary up-shock task, and interactive scenario analysis. | | 4 | [`boc_rate_decisions/`](implementations/boc_rate_decisions/) | Will the Bank of Canada cut, hold, or hike at its next meeting? | Discrete-event forecasting: ordered-categorical outcomes on an irregular calendar, RPS scoring and one-vs-rest calibration (instead of CRPS), a binary (Brier) special case, cutoff-aware document ingestion, and an LLM-as-judge that scores an agent's reasoning against the official rationale. | - **Not sure where to start building?** Each of the four domain implementations above ends with a `99_starter_agent.ipynb` — a fresh, hackable **starter agent** (a `starter_agent/` module) with toggleable news search and code execution, two lightweight tool-usage skills, an interactive cell, and one scored forecast. It's the consistent "continue from here" entry point for taking any reference use case in an agentic direction, and a quick end-to-end test of that use case's agent stack. ## Time Series Data sources @@ -62,12 +60,15 @@ Once you have the key, add it to your repo-root `.env`: FRED_API_KEY=your_fred_api_key ``` +On Coder workspaces, bootcamp keys (`OPENAI_*`, `E2B_*`, `LANGFUSE_*`) live in your shell environment — **not** in repo `.env`. See [Bootcamp environment](#bootcamp-environment-coder). + ## Repository layout ```text aieng-forecasting/ # Installable library: import as aieng.forecasting implementations/ # Self-contained reference implementations + co-located specs scripts/ # Data-fetch scripts + E2B template builder +tests/ # Onboarding integration tests (not run in CI) planning-docs/ # Architecture notes and the extension/roadmap catalog playground/ # Exploration and archived demos (not reference implementations) ``` @@ -79,7 +80,7 @@ Install dependencies from the repo root: ```bash git clone . # If running locally. Coder environment setup clones repo automatically. cd agentic-forecasting -uv sync +uv sync --dev ``` **macOS — LightGBM and OpenMP.** The library depends on **LightGBM** (used by `DartsLightGBMPredictor` and some notebooks). The PyPI wheel expects **OpenMP** at runtime. If you see `Library not loaded: @rpath/libomp.dylib` when importing or training, install Homebrew's OpenMP once and restart your shell or Jupyter kernel: @@ -90,6 +91,39 @@ brew install libomp On Apple Silicon the dylib is typically under `/opt/homebrew/opt/libomp/lib/`; on Intel Homebrew, `/usr/local/opt/libomp/lib/`. +### Coder Workspaces + +When you open a **Coder workspace**, startup runs automatically in the background. By the time you connect you should have: + +- The repo cloned, a Python venv, and dependencies installed +- Bootcamp API keys (`OPENAI_*`, `E2B_*`, `LANGFUSE_*`) available in your shell (not in `.env`) +- A shell that opens in the repo with the venv activated + +**Your next step:** run [`00_environment_check.ipynb`](implementations/getting_started/00_environment_check.ipynb) top to bottom. That notebook will confirm that startup succeeded. + +On first boot, keys are verified against live services and your onboarding status is recorded. Workspace restarts reload keys without re-running the full test suite. + +**Local machine or troubleshooting** — fetch and verify keys manually: + +```bash +eval "$(onboard --bootcamp-name agentic-forecasting --test-script tests/test_integration.py)" +``` + +Reload keys in a new shell without re-testing: + +```bash +eval "$(onboard --bootcamp-name agentic-forecasting --skip-test)" +``` + +Headless verification (same checks as first-boot onboarding): + +```bash +uv sync --all-extras --dev --all-packages +uv run pytest tests/test_integration.py -v +``` + +**Credential model:** bootcamp keys live in your shell environment. Optional personal keys (e.g. `FRED_API_KEY`) go in a `.env` only — see [`.env.example`](.env.example). + ### Verify your environment first New to the project? Open [`implementations/getting_started/00_environment_check.ipynb`](implementations/getting_started/00_environment_check.ipynb) and run it top to bottom. It's a self-guided preflight that checks every major capability — proxy LLM inference, Langfuse, E2B code execution, StatCan/FRED data access, and a full end-to-end mini backtest — one cell at a time, and tells you exactly what to fix when something isn't set up (most often a missing or placeholder key in your `.env`). It's the fastest way to confirm setup before working through the reference implementations. @@ -110,10 +144,13 @@ If this was unsuccessful, or if you prefer to run with E2B in an alternative env 1. Create a free account at [e2b.dev](https://e2b.dev) and copy your API key. 2. Add it to your `.env` file alongside the other keys (see `.env.example`): + ``` E2B_API_KEY=your_e2b_api_key ``` -3. Build the template (takes a few minutes on first run): + +1. Build the template (takes a few minutes on first run): + ```bash uv run --env-file .env scripts/build_e2b_template.py ``` diff --git a/implementations/getting_started/README.md b/implementations/getting_started/README.md index 9367e31c..f548cfb3 100644 --- a/implementations/getting_started/README.md +++ b/implementations/getting_started/README.md @@ -59,7 +59,7 @@ before opening the notebooks below. The FRED check is optional for `getting_started` itself, but required by the S&P 500 reference implementation and useful for the BoC rate decisions one. FRED API keys are free but must be requested individually — **we cannot provide one -for you**. Request yours early at https://fred.stlouisfed.org/docs/api/api_key.html +for you**. Request yours early at (approval is usually quick but can take some time). A description like "Requesting an API key to explore the effectiveness of various forecasting techniques on economic data." works well. Once approved, add `FRED_API_KEY=your_key` to your From 27df83542319ea2e9f2ad3d9ba43587d5686882c Mon Sep 17 00:00:00 2001 From: fcogidi <41602287+fcogidi@users.noreply.github.com> Date: Wed, 24 Jun 2026 14:35:17 -0400 Subject: [PATCH 5/6] docs: enhance environment check notebook with setup instructions and clarify API key handling --- .../00_environment_check.ipynb | 1435 +++++++++-------- 1 file changed, 721 insertions(+), 714 deletions(-) diff --git a/implementations/getting_started/00_environment_check.ipynb b/implementations/getting_started/00_environment_check.ipynb index 17878527..1564db1c 100644 --- a/implementations/getting_started/00_environment_check.ipynb +++ b/implementations/getting_started/00_environment_check.ipynb @@ -1,717 +1,724 @@ { - "cells": [ - { - "cell_type": "markdown", - "id": "0be1ce82", - "metadata": {}, - "source": [ - "# 00 · Environment Check — start here\n", - "\n", - "Welcome! This notebook is a **self-guided preflight** for the agentic-forecasting\n", - "project. It checks every major capability you'll need — one cell at a time — and\n", - "tells you in plain language what (if anything) is wrong and how to fix it.\n", - "\n", - "**How to use it**\n", - "\n", - "1. Run the cells top to bottom (`Run All` is safe — nothing here changes your data).\n", - "2. Read each result:\n", - " - ✅ **PASS** — that capability works.\n", - " - ⚠️ **WARN** — optional or degraded; you can usually proceed, but read the note.\n", - " - ❌ **FAIL** — something needs fixing before the forecasting notebooks will work.\n", - "3. The final cell gives you a single verdict and a prioritized to-do list.\n", - "\n", - "**The most common cause of a ❌ is a missing or placeholder API key.** Your\n", - "environment is set up from a `.env` file at the repository root (copied from\n", - "`.env.example`). If a key wasn't filled in correctly during setup, the relevant\n", - "check below will tell you exactly which variable to fix.\n", - "\n", - "When everything is green, continue to\n", - "[`01_cpi_data_exploration.ipynb`](01_cpi_data_exploration.ipynb) and\n", - "[`02_cpi_backtest_demo.ipynb`](02_cpi_backtest_demo.ipynb)." - ] + "cells": [ + { + "cell_type": "markdown", + "id": "0be1ce82", + "metadata": {}, + "source": [ + "# 00 · Environment Check — start here\n", + "\n", + "Welcome! This notebook is a **self-guided preflight** for the agentic-forecasting\n", + "project. It checks every major capability you'll need — one cell at a time — and\n", + "tells you in plain language what (if anything) is wrong and how to fix it.\n", + "\n", + "**How to use it**\n", + "\n", + "1. Run the cells top to bottom (`Run All` is safe — nothing here changes your data).\n", + "2. Read each result:\n", + " - ✅ **PASS** — that capability works.\n", + " - ⚠️ **WARN** — optional or degraded; you can usually proceed, but read the note.\n", + " - ❌ **FAIL** — something needs fixing before the forecasting notebooks will work.\n", + "3. The final cell gives you a single verdict and a prioritized to-do list.\n", + "\n", + "**The most common cause of a ❌ is a missing or placeholder API key.**\n", + "\n", + "On **Coder workspaces**, bootcamp keys (`OPENAI_*`, `E2B_*`, `LANGFUSE_*`) are\n", + "injected into your shell at startup. You do not need those in a repo `.env`.\n", + "Optional personal keys (e.g. `FRED_API_KEY`) go in `.env` only. The inventory\n", + "below reads the live environment, so bootcamp keys may show as ✅ even when\n", + "`.env` is absent. If a key wasn't filled in correctly during setup, the relevant\n", + "check below will tell you exactly which variable to fix.\n", + "\n", + "When everything is green, continue to\n", + "[`01_cpi_data_exploration.ipynb`](01_cpi_data_exploration.ipynb) and\n", + "[`02_cpi_backtest_demo.ipynb`](02_cpi_backtest_demo.ipynb)." + ] + }, + { + "cell_type": "markdown", + "id": "9f8a87ef", + "metadata": {}, + "source": [ + "## Setup\n", + "\n", + "This cell optionally loads a repo `.env` (for personal keys like FRED_API_KEY), locates\n", + "the repository root, and defines the small helpers used by every check below.\n", + "Bootcamp keys come from your shell environment and are never overwritten by\n", + "`.env`. It imports nothing from the project yet, so it should always succeed." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d3f3d463", + "metadata": {}, + "outputs": [], + "source": [ + "from __future__ import annotations\n", + "\n", + "import asyncio\n", + "import contextvars\n", + "import os\n", + "from concurrent.futures import ThreadPoolExecutor\n", + "from pathlib import Path\n", + "\n", + "from dotenv import load_dotenv\n", + "\n", + "\n", + "# --- Locate the repo root robustly (works regardless of the kernel's cwd) ----\n", + "def find_repo_root(start: Path | None = None) -> Path:\n", + " \"\"\"Walk upward until we find the workspace root (has pyproject + aieng-forecasting).\"\"\"\n", + " here = (start or Path.cwd()).resolve()\n", + " for cand in (here, *here.parents):\n", + " if (cand / \"pyproject.toml\").exists() and (cand / \"aieng-forecasting\").is_dir():\n", + " return cand\n", + " # Fallback: this notebook lives two levels under the root.\n", + " return Path.cwd().resolve().parents[1]\n", + "\n", + "\n", + "ROOT = find_repo_root()\n", + "load_dotenv(ROOT / \".env\", override=False) # optional FRED etc.; shell env wins\n", + "print(f\"Repository root: {ROOT}\")\n", + "print(f\".env present: {(ROOT / '.env').exists()}\")\n", + "\n", + "# --- Result tracking + uniform reporting ------------------------------------\n", + "RESULTS: list[dict[str, str]] = []\n", + "_ICONS = {\"PASS\": \"✅\", \"WARN\": \"⚠️\", \"FAIL\": \"❌\"}\n", + "\n", + "\n", + "def report(name: str, status: str, detail: str = \"\", fix: str = \"\") -> str:\n", + " \"\"\"Print a uniform check result and record it for the final summary.\"\"\"\n", + " RESULTS.append({\"name\": name, \"status\": status, \"detail\": detail})\n", + " print(f\"{_ICONS[status]} {status} — {name}\")\n", + " for line in str(detail).splitlines():\n", + " print(f\" {line}\")\n", + " if fix:\n", + " print(\" ── How to fix ─────────────────────────────\")\n", + " for line in fix.strip(\"\\n\").splitlines():\n", + " print(f\" {line}\")\n", + " return status\n", + "\n", + "\n", + "def ok(name: str, detail: str = \"\") -> str:\n", + " return report(name, \"PASS\", detail)\n", + "\n", + "\n", + "def warn(name: str, detail: str = \"\", fix: str = \"\") -> str:\n", + " return report(name, \"WARN\", detail, fix)\n", + "\n", + "\n", + "def fail(name: str, detail: str = \"\", fix: str = \"\") -> str:\n", + " return report(name, \"FAIL\", detail, fix)\n", + "\n", + "\n", + "# --- Environment-variable helpers -------------------------------------------\n", + "def _is_placeholder(value: str) -> bool:\n", + " s = value.strip()\n", + " return (not s) or s.startswith(\"your_\") or s.endswith(\"...\")\n", + "\n", + "\n", + "def env(key: str) -> str:\n", + " \"\"\"Return a stripped env value, or '' if missing/placeholder.\"\"\"\n", + " raw = os.environ.get(key, \"\").strip()\n", + " return \"\" if _is_placeholder(raw) else raw\n", + "\n", + "\n", + "def env_ok(key: str) -> bool:\n", + " return bool(env(key))\n", + "\n", + "\n", + "def mask(value: str) -> str:\n", + " \"\"\"Show only the last 4 characters of a secret (never echo it in full).\"\"\"\n", + " v = (value or \"\").strip()\n", + " if not v:\n", + " return \"(not set)\"\n", + " return v if len(v) <= 4 else \"…\" + v[-4:]\n", + "\n", + "\n", + "# --- Run an async coroutine from a notebook cell ----------------------------\n", + "def run_async(coro):\n", + " \"\"\"Run a coroutine whether or not an event loop is already running (Jupyter).\"\"\"\n", + " try:\n", + " asyncio.get_running_loop()\n", + " except RuntimeError:\n", + " return asyncio.run(coro)\n", + " ctx = contextvars.copy_context()\n", + " with ThreadPoolExecutor(max_workers=1) as pool:\n", + " return pool.submit(ctx.run, asyncio.run, coro).result()\n", + "\n", + "\n", + "print(\"Helpers ready.\")" + ] + }, + { + "cell_type": "markdown", + "id": "edcb21f4", + "metadata": {}, + "source": [ + "## 1 · API key inventory\n", + "\n", + "A quick look at which environment variables are present in your **shell\n", + "environment** (and optional `.env`), missing, or still hold a placeholder value.\n", + "On Coder, bootcamp keys mostly come from onboarding — not from `.env`. This is\n", + "**informational** — it doesn't pass or fail on its own, but it explains most of\n", + "the results further down.\n", + "\n", + "| Tier | Variable | Used for |\n", + "|---|---|---|\n", + "| Required | `OPENAI_BASE_URL`, `OPENAI_API_KEY` | LLM inference via the Vector proxy |\n", + "| Required | `E2B_API_KEY` | Sandboxed code execution for agents |\n", + "| Recommended | `LANGFUSE_PUBLIC_KEY`, `LANGFUSE_SECRET_KEY`, `LANGFUSE_HOST` | Trace logging |\n", + "| Optional | `FRED_API_KEY` | FRED data (apply for a free key if you want it) |" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "268ed976", + "metadata": {}, + "outputs": [], + "source": [ + "_INVENTORY = [\n", + " (\"Required\", \"OPENAI_BASE_URL\"),\n", + " (\"Required\", \"OPENAI_API_KEY\"),\n", + " (\"Required\", \"E2B_API_KEY\"),\n", + " (\"Recommended\", \"LANGFUSE_PUBLIC_KEY\"),\n", + " (\"Recommended\", \"LANGFUSE_SECRET_KEY\"),\n", + " (\"Recommended\", \"LANGFUSE_HOST\"),\n", + " (\"Optional\", \"FRED_API_KEY\"),\n", + "]\n", + "\n", + "\n", + "def _status_symbol(key: str) -> str:\n", + " raw = os.environ.get(key, \"\").strip()\n", + " if not raw:\n", + " return \"❌ missing\"\n", + " if _is_placeholder(raw):\n", + " return \"⚠️ placeholder\"\n", + " return \"✅ set\"\n", + "\n", + "\n", + "print(f\"{'Tier':<12} {'Variable':<22} {'Status':<16} Value\")\n", + "print(\"-\" * 70)\n", + "for tier, key in _INVENTORY:\n", + " # OPENAI_BASE_URL / LANGFUSE_HOST are URLs — fine to show; secrets are masked.\n", + " show = os.environ.get(key, \"\") if key.endswith((\"_URL\", \"_HOST\")) else mask(os.environ.get(key, \"\"))\n", + " print(f\"{tier:<12} {key:<22} {_status_symbol(key):<16} {show or '(not set)'}\")\n", + "\n", + "print()\n", + "print(\"Legend: ✅ set ⚠️ still a placeholder from .env.example ❌ not set\")" + ] + }, + { + "cell_type": "markdown", + "id": "11f149e9", + "metadata": {}, + "source": [ + "## 2 · Package imports & native libraries\n", + "\n", + "Confirms the project packages import cleanly and that LightGBM's native\n", + "dependency (OpenMP) loads. The most common snag here is on macOS, where the\n", + "LightGBM wheel needs Homebrew's `libomp`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2a74283d", + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " # Import the project and LightGBM; LightGBM's import triggers the native\n", + " # OpenMP (libomp) load that is the usual macOS setup snag.\n", + " import aieng.forecasting # noqa: F401\n", + " import lightgbm # noqa: F401\n", + " from aieng.forecasting.data import DataService, SeriesMetadata # noqa: F401\n", + " from aieng.forecasting.evaluation import BacktestSpec, backtest # noqa: F401\n", + " from aieng.forecasting.methods import LastValuePredictor # noqa: F401\n", + " from aieng.forecasting.models import LITE_MODEL\n", + "\n", + " ok(\n", + " \"Package imports & LightGBM/OpenMP\",\n", + " f\"aieng.forecasting, LightGBM {lightgbm.__version__}, default model {LITE_MODEL!r}.\",\n", + " )\n", + "except Exception as exc: # noqa: BLE001\n", + " msg = str(exc)\n", + " if \"libomp\" in msg or \"Library not loaded\" in msg:\n", + " fail(\n", + " \"Package imports & LightGBM/OpenMP\",\n", + " f\"LightGBM could not load OpenMP: {msg}\",\n", + " fix=(\n", + " \"macOS only — install Homebrew's OpenMP, then restart the Jupyter kernel:\\n\"\n", + " \" brew install libomp\\n\"\n", + " \"On Apple Silicon the dylib lives under /opt/homebrew/opt/libomp/lib/.\"\n", + " ),\n", + " )\n", + " else:\n", + " fail(\n", + " \"Package imports & LightGBM/OpenMP\",\n", + " f\"Import failed: {type(exc).__name__}: {msg}\",\n", + " fix=(\n", + " \"Reinstall the workspace from the repo root:\\n\"\n", + " \" uv sync\\n\"\n", + " \"Then restart the Jupyter kernel and re-run this cell.\"\n", + " ),\n", + " )" + ] + }, + { + "cell_type": "markdown", + "id": "ae84abd8", + "metadata": {}, + "source": [ + "## 3 · LLM inference via the Vector proxy\n", + "\n", + "Sends one tiny completion to the **default model** through the proxy. This is the\n", + "single most important check — almost every notebook depends on it. It routes\n", + "exactly the way the library does (`openai/` + `api_base`)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6b959bbc", + "metadata": {}, + "outputs": [], + "source": [ + "_NAME = \"LLM inference via proxy\"\n", + "\n", + "if not env_ok(\"OPENAI_BASE_URL\") or not env_ok(\"OPENAI_API_KEY\"):\n", + " missing = [k for k in (\"OPENAI_BASE_URL\", \"OPENAI_API_KEY\") if not env_ok(k)]\n", + " fail(\n", + " _NAME,\n", + " f\"Required proxy setting(s) not configured: {', '.join(missing)}.\",\n", + " fix=(\n", + " \"Set these in your .env at the repository root (see .env.example):\\n\"\n", + " \" OPENAI_BASE_URL=https://proxy.vectorinstitute.ai/v1\\n\"\n", + " \" OPENAI_API_KEY=\\n\"\n", + " \"If they look set but this still fails, check for a leftover placeholder value.\"\n", + " ),\n", + " )\n", + "else:\n", + " try:\n", + " import litellm\n", + " from aieng.forecasting.models import LITE_MODEL\n", + "\n", + " resp = litellm.completion(\n", + " model=f\"openai/{LITE_MODEL}\",\n", + " api_base=env(\"OPENAI_BASE_URL\"),\n", + " api_key=env(\"OPENAI_API_KEY\"),\n", + " messages=[{\"role\": \"user\", \"content\": \"Reply with exactly: OK\"}],\n", + " max_tokens=16,\n", + " temperature=0,\n", + " )\n", + " text = (resp.choices[0].message.content or \"\").strip()\n", + " ok(_NAME, f\"Model {LITE_MODEL!r} responded: {text!r}\")\n", + " except Exception as exc: # noqa: BLE001\n", + " msg = str(exc)\n", + " low = msg.lower()\n", + " if any(t in low for t in (\"auth\", \"401\", \"403\", \"api key\", \"unauthorized\", \"forbidden\")):\n", + " fix = (\n", + " \"Your OPENAI_API_KEY was rejected. Re-check it in .env — copy it again from \"\n", + " \"your setup credentials, with no surrounding quotes or whitespace.\"\n", + " )\n", + " elif any(t in low for t in (\"connect\", \"timeout\", \"resolve\", \"connection\", \"getaddrinfo\", \"name or service\")):\n", + " fix = (\n", + " f\"Could not reach the proxy at {env('OPENAI_BASE_URL')!r}. Check your network/VPN \"\n", + " \"and that OPENAI_BASE_URL is correct (it should end in /v1).\"\n", + " )\n", + " else:\n", + " fix = (\n", + " \"Verify OPENAI_BASE_URL and OPENAI_API_KEY in .env, then restart the kernel. \"\n", + " \"The full error above usually names the cause.\"\n", + " )\n", + " fail(_NAME, f\"{type(exc).__name__}: {msg}\", fix=fix)" + ] + }, + { + "cell_type": "markdown", + "id": "71466114", + "metadata": {}, + "source": [ + "## 4 · Langfuse tracing connection\n", + "\n", + "Langfuse records traces of LLM and agent runs so you can inspect them in the UI.\n", + "It's **recommended but optional** — the forecasting notebooks run without it, you\n", + "just won't get trace links." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2010eb9a", + "metadata": {}, + "outputs": [], + "source": [ + "_NAME = \"Langfuse tracing\"\n", + "\n", + "if not (env_ok(\"LANGFUSE_PUBLIC_KEY\") and env_ok(\"LANGFUSE_SECRET_KEY\")):\n", + " warn(\n", + " _NAME,\n", + " \"Langfuse credentials are not set — tracing will be skipped (this is OK to proceed).\",\n", + " fix=(\n", + " \"To enable trace logging, set these in .env (from your Langfuse project settings):\\n\"\n", + " \" LANGFUSE_PUBLIC_KEY=pk-lf-...\\n\"\n", + " \" LANGFUSE_SECRET_KEY=sk-lf-...\\n\"\n", + " \" LANGFUSE_HOST=https://us.cloud.langfuse.com\"\n", + " ),\n", + " )\n", + "else:\n", + " try:\n", + " from aieng.forecasting.langfuse_tracing import init_langfuse_tracing\n", + " from langfuse import get_client\n", + "\n", + " init_langfuse_tracing()\n", + " client = get_client()\n", + " if client.auth_check():\n", + " host = env(\"LANGFUSE_HOST\") or \"https://cloud.langfuse.com\"\n", + " ok(_NAME, f\"Authenticated to {host} (public key {mask(env('LANGFUSE_PUBLIC_KEY'))}).\")\n", + " else:\n", + " fail(\n", + " _NAME,\n", + " \"Credentials are set but Langfuse auth_check() returned False.\",\n", + " fix=(\n", + " \"Re-check LANGFUSE_PUBLIC_KEY / LANGFUSE_SECRET_KEY and that LANGFUSE_HOST \"\n", + " \"matches your project's region (e.g. https://us.cloud.langfuse.com).\"\n", + " ),\n", + " )\n", + " except Exception as exc: # noqa: BLE001\n", + " fail(\n", + " _NAME,\n", + " f\"{type(exc).__name__}: {exc}\",\n", + " fix=(\n", + " \"Confirm the three LANGFUSE_* variables in .env and that LANGFUSE_HOST is reachable, \"\n", + " \"then restart the kernel.\"\n", + " ),\n", + " )" + ] + }, + { + "cell_type": "markdown", + "id": "98e557ee", + "metadata": {}, + "source": [ + "## 5 · E2B code execution sandbox\n", + "\n", + "Agentic forecasters run code in an E2B cloud sandbox. This runs a trivial snippet\n", + "(`print(1 + 1)`) end-to-end. A failure here is usually either a missing\n", + "`E2B_API_KEY` or a sandbox **template that hasn't been built yet** — the messages\n", + "below distinguish the two." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0a5ec930", + "metadata": {}, + "outputs": [], + "source": [ + "_NAME = \"E2B code execution\"\n", + "\n", + "if not env_ok(\"E2B_API_KEY\"):\n", + " fail(\n", + " _NAME,\n", + " \"E2B_API_KEY is not set — code execution cannot run.\",\n", + " fix=(\n", + " \"1. Create a free account at https://e2b.dev and copy your API key.\\n\"\n", + " \"2. Add it to .env at the repository root:\\n\"\n", + " \" E2B_API_KEY=\\n\"\n", + " \"3. Restart the kernel and re-run this cell.\"\n", + " ),\n", + " )\n", + "else:\n", + " # Mirror the project default; fall back to the literal if the agentic extra\n", + " # (which pulls in google-adk) is not importable in this kernel.\n", + " try:\n", + " from aieng.forecasting.methods.agentic.agent_factory import CodeExecutionConfig\n", + "\n", + " template_name = CodeExecutionConfig().template_name\n", + " except Exception: # noqa: BLE001\n", + " template_name = \"agentic-forecasting-bootcamp\"\n", + "\n", + " try:\n", + " import json\n", + "\n", + " from aieng.agents.tools.code_interpreter import CodeInterpreter\n", + "\n", + " ci = CodeInterpreter(template_name=template_name)\n", + " raw = run_async(ci.run_code(\"print(1 + 1)\"))\n", + " out = json.loads(raw)\n", + " stdout = \"\".join(out.get(\"stdout\", []))\n", + " if out.get(\"error\"):\n", + " err = out[\"error\"]\n", + " fail(_NAME, f\"Sandbox ran but raised: {err.get('name')}: {err.get('value')}\")\n", + " elif \"2\" in stdout:\n", + " ok(_NAME, f\"Sandbox (template {template_name!r}) executed code and returned: {stdout.strip()!r}\")\n", + " else:\n", + " warn(_NAME, f\"Sandbox ran but produced unexpected output: {stdout!r}\")\n", + " except Exception as exc: # noqa: BLE001\n", + " msg = str(exc)\n", + " low = msg.lower()\n", + " if \"template\" in low and (\"not found\" in low or \"does not exist\" in low or \"notfound\" in low):\n", + " fix = (\n", + " f\"The sandbox template {template_name!r} hasn't been built yet. Build it once \"\n", + " \"(takes a few minutes):\\n\"\n", + " \" uv run --env-file .env scripts/build_e2b_template.py\"\n", + " )\n", + " elif any(t in low for t in (\"auth\", \"401\", \"403\", \"api key\", \"unauthorized\", \"invalid\")):\n", + " fix = (\n", + " \"Your E2B_API_KEY was rejected. Re-copy it from https://e2b.dev into .env, \"\n", + " \"with no surrounding quotes or whitespace.\"\n", + " )\n", + " else:\n", + " fix = (\n", + " \"Check that E2B_API_KEY is valid and the template has been built \"\n", + " \"(uv run --env-file .env scripts/build_e2b_template.py). The error above names the cause.\"\n", + " )\n", + " fail(_NAME, f\"{type(exc).__name__}: {msg}\", fix=fix)" + ] + }, + { + "cell_type": "markdown", + "id": "30f2226b", + "metadata": {}, + "source": [ + "## 6 · StatCan data access\n", + "\n", + "Pulls one real CPI series (Canada gasoline) from Statistics Canada. The first run\n", + "downloads and caches the table under `data/statcan/`; later runs read the cache.\n", + "If you're offline but the cache already exists, this degrades to a ⚠️." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f4fd163a", + "metadata": {}, + "outputs": [], + "source": [ + "_NAME = \"StatCan data pull\"\n", + "\n", + "try:\n", + " from aieng.forecasting.data.adapters import StatCanAdapter\n", + "\n", + " adapter = StatCanAdapter(\n", + " table_id=\"18-10-0004-11\",\n", + " member_filter={\"GEO\": \"Canada\", \"Products and product groups\": \"Gasoline\"},\n", + " cache_dir=ROOT / \"data\" / \"statcan\",\n", + " )\n", + " df = adapter.fetch()\n", + " start = df[\"timestamp\"].min().strftime(\"%Y-%m\")\n", + " end = df[\"timestamp\"].max().strftime(\"%Y-%m\")\n", + " ok(_NAME, f\"Fetched cpi_gasoline_canada: {len(df)} rows, {start} → {end}.\")\n", + "except Exception as exc: # noqa: BLE001\n", + " cache_file_exists = (ROOT / \"data\" / \"statcan\").exists() and any((ROOT / \"data\" / \"statcan\").glob(\"*.zip\"))\n", + " if cache_file_exists:\n", + " warn(\n", + " _NAME,\n", + " f\"Live fetch failed ({type(exc).__name__}: {exc}) but a local StatCan cache exists.\",\n", + " fix=\"Likely a transient network issue. The cached data is usable; re-run later to refresh.\",\n", + " )\n", + " else:\n", + " fail(\n", + " _NAME,\n", + " f\"{type(exc).__name__}: {exc}\",\n", + " fix=(\n", + " \"Populate the local data cache once from the repo root:\\n\"\n", + " \" uv run python scripts/fetch_cpi.py\\n\"\n", + " \"This needs network access to Statistics Canada the first time.\"\n", + " ),\n", + " )" + ] + }, + { + "cell_type": "markdown", + "id": "9ddb7840", + "metadata": {}, + "source": [ + "## 7 · FRED data access (optional)\n", + "\n", + "FRED (US Federal Reserve Economic Data) needs a **free API key**. It's optional —\n", + "only some implementations use it. If you don't have a key, this is a ⚠️ with\n", + "instructions, not a failure. If you do, we validate it with a live fetch." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d74f5ea7", + "metadata": {}, + "outputs": [], + "source": [ + "_NAME = \"FRED data pull\"\n", + "\n", + "if not env_ok(\"FRED_API_KEY\"):\n", + " warn(\n", + " _NAME,\n", + " \"FRED_API_KEY is not set. This is optional — skip it unless you need FRED series.\",\n", + " fix=(\n", + " \"FRED requires a free API key. To get one:\\n\"\n", + " \" 1. Request it at https://fred.stlouisfed.org/docs/api/api_key.html\\n\"\n", + " \" 2. Add it to .env at the repository root:\\n\"\n", + " \" FRED_API_KEY=\\n\"\n", + " \" 3. Restart the kernel and re-run this cell.\"\n", + " ),\n", + " )\n", + "else:\n", + " try:\n", + " from aieng.forecasting.data.adapters import FREDAdapter\n", + "\n", + " # refresh=True forces a live API call so we actually validate the key.\n", + " adapter = FREDAdapter(\"EXCAUS\", cache_dir=ROOT / \"data\" / \"fred\", refresh=True)\n", + " df = adapter.fetch()\n", + " latest = df.iloc[-1]\n", + " ok(\n", + " _NAME,\n", + " f\"Validated FRED key — fetched EXCAUS (CAD/USD): {len(df)} rows, \"\n", + " f\"latest {latest['timestamp'].strftime('%Y-%m')} = {latest['value']:.4f}.\",\n", + " )\n", + " except Exception as exc: # noqa: BLE001\n", + " fail(\n", + " _NAME,\n", + " f\"{type(exc).__name__}: {exc}\",\n", + " fix=(\n", + " \"Your FRED_API_KEY may be invalid. Re-copy it from \"\n", + " \"https://fred.stlouisfed.org/docs/api/api_key.html into .env, then restart the kernel.\"\n", + " ),\n", + " )" + ] + }, + { + "cell_type": "markdown", + "id": "b0eaa759", + "metadata": {}, + "source": [ + "## 8 · End-to-end mini forecast\n", + "\n", + "The real thing in miniature: load the getting-started backtest spec, register the\n", + "gasoline series, run a `LastValuePredictor` backtest, and score it (CRPS). This\n", + "proves the whole **data → predictor → backtest → score** loop works — not just the\n", + "individual services. It uses only the StatCan cache (no LLM/network)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "59a46292", + "metadata": {}, + "outputs": [], + "source": [ + "_NAME = \"End-to-end mini forecast\"\n", + "\n", + "try:\n", + " import yaml\n", + " from aieng.forecasting.data import DataService, SeriesMetadata\n", + " from aieng.forecasting.data.adapters import StatCanAdapter\n", + " from aieng.forecasting.evaluation import BacktestSpec, backtest\n", + " from aieng.forecasting.methods import LastValuePredictor\n", + "\n", + " spec_path = ROOT / \"implementations\" / \"getting_started\" / \"specs\" / \"cpi_gasoline_1m.yaml\"\n", + " spec = BacktestSpec.model_validate(yaml.safe_load(spec_path.read_text()))\n", + "\n", + " svc = DataService()\n", + " svc.register(\n", + " \"cpi_gasoline_canada\",\n", + " StatCanAdapter(\n", + " table_id=\"18-10-0004-11\",\n", + " member_filter={\"GEO\": \"Canada\", \"Products and product groups\": \"Gasoline\"},\n", + " cache_dir=ROOT / \"data\" / \"statcan\",\n", + " ),\n", + " SeriesMetadata(\n", + " series_id=\"cpi_gasoline_canada\",\n", + " description=\"CPI Gasoline, Canada (2002=100)\",\n", + " source=\"StatCan\",\n", + " units=\"Index 2002=100\",\n", + " frequency=\"MS\",\n", + " table_id=\"18-10-0004-11\",\n", + " ),\n", + " )\n", + "\n", + " result = backtest(LastValuePredictor(), spec, svc)\n", + " ok(\n", + " _NAME,\n", + " f\"Ran {result.predictor_id} over the gasoline backtest — \"\n", + " f\"mean {result.metric.upper()} = {result.mean_score:.4f}.\",\n", + " )\n", + "except Exception as exc: # noqa: BLE001\n", + " fail(\n", + " _NAME,\n", + " f\"{type(exc).__name__}: {exc}\",\n", + " fix=(\n", + " \"This depends on the StatCan check above. If that failed, fix it first \"\n", + " \"(uv run python scripts/fetch_cpi.py), then restart the kernel and re-run.\"\n", + " ),\n", + " )" + ] + }, + { + "cell_type": "markdown", + "id": "6e319070", + "metadata": {}, + "source": [ + "## Summary\n", + "\n", + "A single verdict and, if needed, a prioritized list of what to fix." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3d141741", + "metadata": {}, + "outputs": [], + "source": [ + "_passed = [r for r in RESULTS if r[\"status\"] == \"PASS\"]\n", + "_warned = [r for r in RESULTS if r[\"status\"] == \"WARN\"]\n", + "_failed = [r for r in RESULTS if r[\"status\"] == \"FAIL\"]\n", + "\n", + "print(\"=\" * 64)\n", + "print(f\" Checks run: {len(RESULTS)} ✅ {len(_passed)} ⚠️ {len(_warned)} ❌ {len(_failed)}\")\n", + "print(\"=\" * 64)\n", + "\n", + "if _failed:\n", + " print(\"\\n❌ Fix these before continuing (most are missing/placeholder keys in .env):\")\n", + " for r in _failed:\n", + " print(f\" • {r['name']}\")\n", + "if _warned:\n", + " print(\"\\n⚠️ Optional / heads-up (you can usually proceed):\")\n", + " for r in _warned:\n", + " print(f\" • {r['name']}\")\n", + "\n", + "print()\n", + "if not _failed:\n", + " print(\"🎉 You're ready! Open 01_cpi_data_exploration.ipynb to begin.\")\n", + " if _warned:\n", + " print(\" (The ⚠️ items above are optional — enable them later if you need them.)\")\n", + "else:\n", + " print(\"Re-run this notebook after editing .env and restarting the kernel.\")\n", + " print(\"Most ❌ items are a key that wasn't filled in during setup — scroll up for the exact fix.\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.12" + } }, - { - "cell_type": "markdown", - "id": "9f8a87ef", - "metadata": {}, - "source": [ - "## Setup\n", - "\n", - "This cell loads your `.env`, locates the repository root, and defines the small\n", - "helpers used by every check below. It imports nothing from the project yet, so it\n", - "should always succeed." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d3f3d463", - "metadata": {}, - "outputs": [], - "source": [ - "from __future__ import annotations\n", - "\n", - "import asyncio\n", - "import contextvars\n", - "import os\n", - "from concurrent.futures import ThreadPoolExecutor\n", - "from pathlib import Path\n", - "\n", - "from dotenv import load_dotenv\n", - "\n", - "\n", - "# --- Locate the repo root robustly (works regardless of the kernel's cwd) ----\n", - "def find_repo_root(start: Path | None = None) -> Path:\n", - " \"\"\"Walk upward until we find the workspace root (has pyproject + aieng-forecasting).\"\"\"\n", - " here = (start or Path.cwd()).resolve()\n", - " for cand in (here, *here.parents):\n", - " if (cand / \"pyproject.toml\").exists() and (cand / \"aieng-forecasting\").is_dir():\n", - " return cand\n", - " # Fallback: this notebook lives two levels under the root.\n", - " return Path.cwd().resolve().parents[1]\n", - "\n", - "\n", - "ROOT = find_repo_root()\n", - "load_dotenv(ROOT / \".env\")\n", - "print(f\"Repository root: {ROOT}\")\n", - "print(f\".env present: {(ROOT / '.env').exists()}\")\n", - "\n", - "# --- Result tracking + uniform reporting ------------------------------------\n", - "RESULTS: list[dict[str, str]] = []\n", - "_ICONS = {\"PASS\": \"✅\", \"WARN\": \"⚠️\", \"FAIL\": \"❌\"}\n", - "\n", - "\n", - "def report(name: str, status: str, detail: str = \"\", fix: str = \"\") -> str:\n", - " \"\"\"Print a uniform check result and record it for the final summary.\"\"\"\n", - " RESULTS.append({\"name\": name, \"status\": status, \"detail\": detail})\n", - " print(f\"{_ICONS[status]} {status} — {name}\")\n", - " for line in str(detail).splitlines():\n", - " print(f\" {line}\")\n", - " if fix:\n", - " print(\" ── How to fix ─────────────────────────────\")\n", - " for line in fix.strip(\"\\n\").splitlines():\n", - " print(f\" {line}\")\n", - " return status\n", - "\n", - "\n", - "def ok(name: str, detail: str = \"\") -> str:\n", - " return report(name, \"PASS\", detail)\n", - "\n", - "\n", - "def warn(name: str, detail: str = \"\", fix: str = \"\") -> str:\n", - " return report(name, \"WARN\", detail, fix)\n", - "\n", - "\n", - "def fail(name: str, detail: str = \"\", fix: str = \"\") -> str:\n", - " return report(name, \"FAIL\", detail, fix)\n", - "\n", - "\n", - "# --- Environment-variable helpers -------------------------------------------\n", - "def _is_placeholder(value: str) -> bool:\n", - " s = value.strip()\n", - " return (not s) or s.startswith(\"your_\") or s.endswith(\"...\")\n", - "\n", - "\n", - "def env(key: str) -> str:\n", - " \"\"\"Return a stripped env value, or '' if missing/placeholder.\"\"\"\n", - " raw = os.environ.get(key, \"\").strip()\n", - " return \"\" if _is_placeholder(raw) else raw\n", - "\n", - "\n", - "def env_ok(key: str) -> bool:\n", - " return bool(env(key))\n", - "\n", - "\n", - "def mask(value: str) -> str:\n", - " \"\"\"Show only the last 4 characters of a secret (never echo it in full).\"\"\"\n", - " v = (value or \"\").strip()\n", - " if not v:\n", - " return \"(not set)\"\n", - " return v if len(v) <= 4 else \"…\" + v[-4:]\n", - "\n", - "\n", - "# --- Run an async coroutine from a notebook cell ----------------------------\n", - "def run_async(coro):\n", - " \"\"\"Run a coroutine whether or not an event loop is already running (Jupyter).\"\"\"\n", - " try:\n", - " asyncio.get_running_loop()\n", - " except RuntimeError:\n", - " return asyncio.run(coro)\n", - " ctx = contextvars.copy_context()\n", - " with ThreadPoolExecutor(max_workers=1) as pool:\n", - " return pool.submit(ctx.run, asyncio.run, coro).result()\n", - "\n", - "\n", - "print(\"Helpers ready.\")" - ] - }, - { - "cell_type": "markdown", - "id": "edcb21f4", - "metadata": {}, - "source": [ - "## 1 · `.env` key inventory\n", - "\n", - "A quick look at which environment variables are present, missing, or still hold a\n", - "placeholder value from `.env.example`. This is **informational** — it doesn't pass\n", - "or fail on its own, but it explains most of the results further down.\n", - "\n", - "| Tier | Variable | Used for |\n", - "|---|---|---|\n", - "| Required | `PROXY_BASE_URL`, `PROXY_API_KEY` | LLM inference via the Vector proxy |\n", - "| Required | `E2B_API_KEY` | Sandboxed code execution for agents |\n", - "| Recommended | `LANGFUSE_PUBLIC_KEY`, `LANGFUSE_SECRET_KEY`, `LANGFUSE_HOST` | Trace logging |\n", - "| Optional | `FRED_API_KEY` | FRED data (apply for a free key if you want it) |" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "268ed976", - "metadata": {}, - "outputs": [], - "source": [ - "_INVENTORY = [\n", - " (\"Required\", \"PROXY_BASE_URL\"),\n", - " (\"Required\", \"PROXY_API_KEY\"),\n", - " (\"Required\", \"E2B_API_KEY\"),\n", - " (\"Recommended\", \"LANGFUSE_PUBLIC_KEY\"),\n", - " (\"Recommended\", \"LANGFUSE_SECRET_KEY\"),\n", - " (\"Recommended\", \"LANGFUSE_HOST\"),\n", - " (\"Optional\", \"FRED_API_KEY\"),\n", - "]\n", - "\n", - "\n", - "def _status_symbol(key: str) -> str:\n", - " raw = os.environ.get(key, \"\").strip()\n", - " if not raw:\n", - " return \"❌ missing\"\n", - " if _is_placeholder(raw):\n", - " return \"⚠️ placeholder\"\n", - " return \"✅ set\"\n", - "\n", - "\n", - "print(f\"{'Tier':<12} {'Variable':<22} {'Status':<16} Value\")\n", - "print(\"-\" * 70)\n", - "for tier, key in _INVENTORY:\n", - " # PROXY_BASE_URL / LANGFUSE_HOST are URLs — fine to show; secrets are masked.\n", - " show = os.environ.get(key, \"\") if key.endswith((\"_URL\", \"_HOST\")) else mask(os.environ.get(key, \"\"))\n", - " print(f\"{tier:<12} {key:<22} {_status_symbol(key):<16} {show or '(not set)'}\")\n", - "\n", - "print()\n", - "print(\"Legend: ✅ set ⚠️ still a placeholder from .env.example ❌ not set\")" - ] - }, - { - "cell_type": "markdown", - "id": "11f149e9", - "metadata": {}, - "source": [ - "## 2 · Package imports & native libraries\n", - "\n", - "Confirms the project packages import cleanly and that LightGBM's native\n", - "dependency (OpenMP) loads. The most common snag here is on macOS, where the\n", - "LightGBM wheel needs Homebrew's `libomp`." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "2a74283d", - "metadata": {}, - "outputs": [], - "source": [ - "try:\n", - " # Import the project and LightGBM; LightGBM's import triggers the native\n", - " # OpenMP (libomp) load that is the usual macOS setup snag.\n", - " import aieng.forecasting # noqa: F401\n", - " import lightgbm # noqa: F401\n", - " from aieng.forecasting.data import DataService, SeriesMetadata # noqa: F401\n", - " from aieng.forecasting.evaluation import BacktestSpec, backtest # noqa: F401\n", - " from aieng.forecasting.methods import LastValuePredictor # noqa: F401\n", - " from aieng.forecasting.models import LITE_MODEL\n", - "\n", - " ok(\n", - " \"Package imports & LightGBM/OpenMP\",\n", - " f\"aieng.forecasting, LightGBM {lightgbm.__version__}, default model {LITE_MODEL!r}.\",\n", - " )\n", - "except Exception as exc: # noqa: BLE001\n", - " msg = str(exc)\n", - " if \"libomp\" in msg or \"Library not loaded\" in msg:\n", - " fail(\n", - " \"Package imports & LightGBM/OpenMP\",\n", - " f\"LightGBM could not load OpenMP: {msg}\",\n", - " fix=(\n", - " \"macOS only — install Homebrew's OpenMP, then restart the Jupyter kernel:\\n\"\n", - " \" brew install libomp\\n\"\n", - " \"On Apple Silicon the dylib lives under /opt/homebrew/opt/libomp/lib/.\"\n", - " ),\n", - " )\n", - " else:\n", - " fail(\n", - " \"Package imports & LightGBM/OpenMP\",\n", - " f\"Import failed: {type(exc).__name__}: {msg}\",\n", - " fix=(\n", - " \"Reinstall the workspace from the repo root:\\n\"\n", - " \" uv sync\\n\"\n", - " \"Then restart the Jupyter kernel and re-run this cell.\"\n", - " ),\n", - " )" - ] - }, - { - "cell_type": "markdown", - "id": "ae84abd8", - "metadata": {}, - "source": [ - "## 3 · LLM inference via the Vector proxy\n", - "\n", - "Sends one tiny completion to the **default model** through the proxy. This is the\n", - "single most important check — almost every notebook depends on it. It routes\n", - "exactly the way the library does (`openai/` + `api_base`)." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "6b959bbc", - "metadata": {}, - "outputs": [], - "source": [ - "_NAME = \"LLM inference via proxy\"\n", - "\n", - "if not env_ok(\"PROXY_BASE_URL\") or not env_ok(\"PROXY_API_KEY\"):\n", - " missing = [k for k in (\"PROXY_BASE_URL\", \"PROXY_API_KEY\") if not env_ok(k)]\n", - " fail(\n", - " _NAME,\n", - " f\"Required proxy setting(s) not configured: {', '.join(missing)}.\",\n", - " fix=(\n", - " \"Set these in your .env at the repository root (see .env.example):\\n\"\n", - " \" PROXY_BASE_URL=https://proxy.vectorinstitute.ai/v1\\n\"\n", - " \" PROXY_API_KEY=\\n\"\n", - " \"If they look set but this still fails, check for a leftover placeholder value.\"\n", - " ),\n", - " )\n", - "else:\n", - " try:\n", - " import litellm\n", - " from aieng.forecasting.models import LITE_MODEL\n", - "\n", - " resp = litellm.completion(\n", - " model=f\"openai/{LITE_MODEL}\",\n", - " api_base=env(\"PROXY_BASE_URL\"),\n", - " api_key=env(\"PROXY_API_KEY\"),\n", - " messages=[{\"role\": \"user\", \"content\": \"Reply with exactly: OK\"}],\n", - " max_tokens=16,\n", - " temperature=0,\n", - " )\n", - " text = (resp.choices[0].message.content or \"\").strip()\n", - " ok(_NAME, f\"Model {LITE_MODEL!r} responded: {text!r}\")\n", - " except Exception as exc: # noqa: BLE001\n", - " msg = str(exc)\n", - " low = msg.lower()\n", - " if any(t in low for t in (\"auth\", \"401\", \"403\", \"api key\", \"unauthorized\", \"forbidden\")):\n", - " fix = (\n", - " \"Your PROXY_API_KEY was rejected. Re-check it in .env — copy it again from \"\n", - " \"your setup credentials, with no surrounding quotes or whitespace.\"\n", - " )\n", - " elif any(t in low for t in (\"connect\", \"timeout\", \"resolve\", \"connection\", \"getaddrinfo\", \"name or service\")):\n", - " fix = (\n", - " f\"Could not reach the proxy at {env('PROXY_BASE_URL')!r}. Check your network/VPN \"\n", - " \"and that PROXY_BASE_URL is correct (it should end in /v1).\"\n", - " )\n", - " else:\n", - " fix = (\n", - " \"Verify PROXY_BASE_URL and PROXY_API_KEY in .env, then restart the kernel. \"\n", - " \"The full error above usually names the cause.\"\n", - " )\n", - " fail(_NAME, f\"{type(exc).__name__}: {msg}\", fix=fix)" - ] - }, - { - "cell_type": "markdown", - "id": "71466114", - "metadata": {}, - "source": [ - "## 4 · Langfuse tracing connection\n", - "\n", - "Langfuse records traces of LLM and agent runs so you can inspect them in the UI.\n", - "It's **recommended but optional** — the forecasting notebooks run without it, you\n", - "just won't get trace links." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "2010eb9a", - "metadata": {}, - "outputs": [], - "source": [ - "_NAME = \"Langfuse tracing\"\n", - "\n", - "if not (env_ok(\"LANGFUSE_PUBLIC_KEY\") and env_ok(\"LANGFUSE_SECRET_KEY\")):\n", - " warn(\n", - " _NAME,\n", - " \"Langfuse credentials are not set — tracing will be skipped (this is OK to proceed).\",\n", - " fix=(\n", - " \"To enable trace logging, set these in .env (from your Langfuse project settings):\\n\"\n", - " \" LANGFUSE_PUBLIC_KEY=pk-lf-...\\n\"\n", - " \" LANGFUSE_SECRET_KEY=sk-lf-...\\n\"\n", - " \" LANGFUSE_HOST=https://us.cloud.langfuse.com\"\n", - " ),\n", - " )\n", - "else:\n", - " try:\n", - " from aieng.forecasting.langfuse_tracing import init_langfuse_tracing\n", - " from langfuse import get_client\n", - "\n", - " init_langfuse_tracing()\n", - " client = get_client()\n", - " if client.auth_check():\n", - " host = env(\"LANGFUSE_HOST\") or \"https://cloud.langfuse.com\"\n", - " ok(_NAME, f\"Authenticated to {host} (public key {mask(env('LANGFUSE_PUBLIC_KEY'))}).\")\n", - " else:\n", - " fail(\n", - " _NAME,\n", - " \"Credentials are set but Langfuse auth_check() returned False.\",\n", - " fix=(\n", - " \"Re-check LANGFUSE_PUBLIC_KEY / LANGFUSE_SECRET_KEY and that LANGFUSE_HOST \"\n", - " \"matches your project's region (e.g. https://us.cloud.langfuse.com).\"\n", - " ),\n", - " )\n", - " except Exception as exc: # noqa: BLE001\n", - " fail(\n", - " _NAME,\n", - " f\"{type(exc).__name__}: {exc}\",\n", - " fix=(\n", - " \"Confirm the three LANGFUSE_* variables in .env and that LANGFUSE_HOST is reachable, \"\n", - " \"then restart the kernel.\"\n", - " ),\n", - " )" - ] - }, - { - "cell_type": "markdown", - "id": "98e557ee", - "metadata": {}, - "source": [ - "## 5 · E2B code execution sandbox\n", - "\n", - "Agentic forecasters run code in an E2B cloud sandbox. This runs a trivial snippet\n", - "(`print(1 + 1)`) end-to-end. A failure here is usually either a missing\n", - "`E2B_API_KEY` or a sandbox **template that hasn't been built yet** — the messages\n", - "below distinguish the two." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "0a5ec930", - "metadata": {}, - "outputs": [], - "source": [ - "_NAME = \"E2B code execution\"\n", - "\n", - "if not env_ok(\"E2B_API_KEY\"):\n", - " fail(\n", - " _NAME,\n", - " \"E2B_API_KEY is not set — code execution cannot run.\",\n", - " fix=(\n", - " \"1. Create a free account at https://e2b.dev and copy your API key.\\n\"\n", - " \"2. Add it to .env at the repository root:\\n\"\n", - " \" E2B_API_KEY=\\n\"\n", - " \"3. Restart the kernel and re-run this cell.\"\n", - " ),\n", - " )\n", - "else:\n", - " # Mirror the project default; fall back to the literal if the agentic extra\n", - " # (which pulls in google-adk) is not importable in this kernel.\n", - " try:\n", - " from aieng.forecasting.methods.agentic.agent_factory import CodeExecutionConfig\n", - "\n", - " template_name = CodeExecutionConfig().template_name\n", - " except Exception: # noqa: BLE001\n", - " template_name = \"agentic-forecasting-bootcamp\"\n", - "\n", - " try:\n", - " import json\n", - "\n", - " from aieng.agents.tools.code_interpreter import CodeInterpreter\n", - "\n", - " ci = CodeInterpreter(template_name=template_name)\n", - " raw = run_async(ci.run_code(\"print(1 + 1)\"))\n", - " out = json.loads(raw)\n", - " stdout = \"\".join(out.get(\"stdout\", []))\n", - " if out.get(\"error\"):\n", - " err = out[\"error\"]\n", - " fail(_NAME, f\"Sandbox ran but raised: {err.get('name')}: {err.get('value')}\")\n", - " elif \"2\" in stdout:\n", - " ok(_NAME, f\"Sandbox (template {template_name!r}) executed code and returned: {stdout.strip()!r}\")\n", - " else:\n", - " warn(_NAME, f\"Sandbox ran but produced unexpected output: {stdout!r}\")\n", - " except Exception as exc: # noqa: BLE001\n", - " msg = str(exc)\n", - " low = msg.lower()\n", - " if \"template\" in low and (\"not found\" in low or \"does not exist\" in low or \"notfound\" in low):\n", - " fix = (\n", - " f\"The sandbox template {template_name!r} hasn't been built yet. Build it once \"\n", - " \"(takes a few minutes):\\n\"\n", - " \" uv run --env-file .env scripts/build_e2b_template.py\"\n", - " )\n", - " elif any(t in low for t in (\"auth\", \"401\", \"403\", \"api key\", \"unauthorized\", \"invalid\")):\n", - " fix = (\n", - " \"Your E2B_API_KEY was rejected. Re-copy it from https://e2b.dev into .env, \"\n", - " \"with no surrounding quotes or whitespace.\"\n", - " )\n", - " else:\n", - " fix = (\n", - " \"Check that E2B_API_KEY is valid and the template has been built \"\n", - " \"(uv run --env-file .env scripts/build_e2b_template.py). The error above names the cause.\"\n", - " )\n", - " fail(_NAME, f\"{type(exc).__name__}: {msg}\", fix=fix)" - ] - }, - { - "cell_type": "markdown", - "id": "30f2226b", - "metadata": {}, - "source": [ - "## 6 · StatCan data access\n", - "\n", - "Pulls one real CPI series (Canada gasoline) from Statistics Canada. The first run\n", - "downloads and caches the table under `data/statcan/`; later runs read the cache.\n", - "If you're offline but the cache already exists, this degrades to a ⚠️." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "f4fd163a", - "metadata": {}, - "outputs": [], - "source": [ - "_NAME = \"StatCan data pull\"\n", - "\n", - "try:\n", - " from aieng.forecasting.data.adapters import StatCanAdapter\n", - "\n", - " adapter = StatCanAdapter(\n", - " table_id=\"18-10-0004-11\",\n", - " member_filter={\"GEO\": \"Canada\", \"Products and product groups\": \"Gasoline\"},\n", - " cache_dir=ROOT / \"data\" / \"statcan\",\n", - " )\n", - " df = adapter.fetch()\n", - " start = df[\"timestamp\"].min().strftime(\"%Y-%m\")\n", - " end = df[\"timestamp\"].max().strftime(\"%Y-%m\")\n", - " ok(_NAME, f\"Fetched cpi_gasoline_canada: {len(df)} rows, {start} → {end}.\")\n", - "except Exception as exc: # noqa: BLE001\n", - " cache_file_exists = (ROOT / \"data\" / \"statcan\").exists() and any((ROOT / \"data\" / \"statcan\").glob(\"*.zip\"))\n", - " if cache_file_exists:\n", - " warn(\n", - " _NAME,\n", - " f\"Live fetch failed ({type(exc).__name__}: {exc}) but a local StatCan cache exists.\",\n", - " fix=\"Likely a transient network issue. The cached data is usable; re-run later to refresh.\",\n", - " )\n", - " else:\n", - " fail(\n", - " _NAME,\n", - " f\"{type(exc).__name__}: {exc}\",\n", - " fix=(\n", - " \"Populate the local data cache once from the repo root:\\n\"\n", - " \" uv run python scripts/fetch_cpi.py\\n\"\n", - " \"This needs network access to Statistics Canada the first time.\"\n", - " ),\n", - " )" - ] - }, - { - "cell_type": "markdown", - "id": "9ddb7840", - "metadata": {}, - "source": [ - "## 7 · FRED data access (optional)\n", - "\n", - "FRED (US Federal Reserve Economic Data) needs a **free API key**. It's optional —\n", - "only some implementations use it. If you don't have a key, this is a ⚠️ with\n", - "instructions, not a failure. If you do, we validate it with a live fetch." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d74f5ea7", - "metadata": {}, - "outputs": [], - "source": [ - "_NAME = \"FRED data pull\"\n", - "\n", - "if not env_ok(\"FRED_API_KEY\"):\n", - " warn(\n", - " _NAME,\n", - " \"FRED_API_KEY is not set. This is optional — skip it unless you need FRED series.\",\n", - " fix=(\n", - " \"FRED requires a free API key. To get one:\\n\"\n", - " \" 1. Request it at https://fred.stlouisfed.org/docs/api/api_key.html\\n\"\n", - " \" 2. Add it to .env at the repository root:\\n\"\n", - " \" FRED_API_KEY=\\n\"\n", - " \" 3. Restart the kernel and re-run this cell.\"\n", - " ),\n", - " )\n", - "else:\n", - " try:\n", - " from aieng.forecasting.data.adapters import FREDAdapter\n", - "\n", - " # refresh=True forces a live API call so we actually validate the key.\n", - " adapter = FREDAdapter(\"EXCAUS\", cache_dir=ROOT / \"data\" / \"fred\", refresh=True)\n", - " df = adapter.fetch()\n", - " latest = df.iloc[-1]\n", - " ok(\n", - " _NAME,\n", - " f\"Validated FRED key — fetched EXCAUS (CAD/USD): {len(df)} rows, \"\n", - " f\"latest {latest['timestamp'].strftime('%Y-%m')} = {latest['value']:.4f}.\",\n", - " )\n", - " except Exception as exc: # noqa: BLE001\n", - " fail(\n", - " _NAME,\n", - " f\"{type(exc).__name__}: {exc}\",\n", - " fix=(\n", - " \"Your FRED_API_KEY may be invalid. Re-copy it from \"\n", - " \"https://fred.stlouisfed.org/docs/api/api_key.html into .env, then restart the kernel.\"\n", - " ),\n", - " )" - ] - }, - { - "cell_type": "markdown", - "id": "b0eaa759", - "metadata": {}, - "source": [ - "## 8 · End-to-end mini forecast\n", - "\n", - "The real thing in miniature: load the getting-started backtest spec, register the\n", - "gasoline series, run a `LastValuePredictor` backtest, and score it (CRPS). This\n", - "proves the whole **data → predictor → backtest → score** loop works — not just the\n", - "individual services. It uses only the StatCan cache (no LLM/network)." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "59a46292", - "metadata": {}, - "outputs": [], - "source": [ - "_NAME = \"End-to-end mini forecast\"\n", - "\n", - "try:\n", - " import yaml\n", - " from aieng.forecasting.data import DataService, SeriesMetadata\n", - " from aieng.forecasting.data.adapters import StatCanAdapter\n", - " from aieng.forecasting.evaluation import BacktestSpec, backtest\n", - " from aieng.forecasting.methods import LastValuePredictor\n", - "\n", - " spec_path = ROOT / \"implementations\" / \"getting_started\" / \"specs\" / \"cpi_gasoline_1m.yaml\"\n", - " spec = BacktestSpec.model_validate(yaml.safe_load(spec_path.read_text()))\n", - "\n", - " svc = DataService()\n", - " svc.register(\n", - " \"cpi_gasoline_canada\",\n", - " StatCanAdapter(\n", - " table_id=\"18-10-0004-11\",\n", - " member_filter={\"GEO\": \"Canada\", \"Products and product groups\": \"Gasoline\"},\n", - " cache_dir=ROOT / \"data\" / \"statcan\",\n", - " ),\n", - " SeriesMetadata(\n", - " series_id=\"cpi_gasoline_canada\",\n", - " description=\"CPI Gasoline, Canada (2002=100)\",\n", - " source=\"StatCan\",\n", - " units=\"Index 2002=100\",\n", - " frequency=\"MS\",\n", - " table_id=\"18-10-0004-11\",\n", - " ),\n", - " )\n", - "\n", - " result = backtest(LastValuePredictor(), spec, svc)\n", - " ok(\n", - " _NAME,\n", - " f\"Ran {result.predictor_id} over the gasoline backtest — \"\n", - " f\"mean {result.metric.upper()} = {result.mean_score:.4f}.\",\n", - " )\n", - "except Exception as exc: # noqa: BLE001\n", - " fail(\n", - " _NAME,\n", - " f\"{type(exc).__name__}: {exc}\",\n", - " fix=(\n", - " \"This depends on the StatCan check above. If that failed, fix it first \"\n", - " \"(uv run python scripts/fetch_cpi.py), then restart the kernel and re-run.\"\n", - " ),\n", - " )" - ] - }, - { - "cell_type": "markdown", - "id": "6e319070", - "metadata": {}, - "source": [ - "## Summary\n", - "\n", - "A single verdict and, if needed, a prioritized list of what to fix." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "3d141741", - "metadata": {}, - "outputs": [], - "source": [ - "_passed = [r for r in RESULTS if r[\"status\"] == \"PASS\"]\n", - "_warned = [r for r in RESULTS if r[\"status\"] == \"WARN\"]\n", - "_failed = [r for r in RESULTS if r[\"status\"] == \"FAIL\"]\n", - "\n", - "print(\"=\" * 64)\n", - "print(f\" Checks run: {len(RESULTS)} ✅ {len(_passed)} ⚠️ {len(_warned)} ❌ {len(_failed)}\")\n", - "print(\"=\" * 64)\n", - "\n", - "if _failed:\n", - " print(\"\\n❌ Fix these before continuing (most are missing/placeholder keys in .env):\")\n", - " for r in _failed:\n", - " print(f\" • {r['name']}\")\n", - "if _warned:\n", - " print(\"\\n⚠️ Optional / heads-up (you can usually proceed):\")\n", - " for r in _warned:\n", - " print(f\" • {r['name']}\")\n", - "\n", - "print()\n", - "if not _failed:\n", - " print(\"🎉 You're ready! Open 01_cpi_data_exploration.ipynb to begin.\")\n", - " if _warned:\n", - " print(\" (The ⚠️ items above are optional — enable them later if you need them.)\")\n", - "else:\n", - " print(\"Re-run this notebook after editing .env and restarting the kernel.\")\n", - " print(\"Most ❌ items are a key that wasn't filled in during setup — scroll up for the exact fix.\")" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": ".venv", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.12.12" - } - }, - "nbformat": 4, - "nbformat_minor": 5 + "nbformat": 4, + "nbformat_minor": 5 } From cbfe00bbb32b00d63a55680083a4b0623e345f76 Mon Sep 17 00:00:00 2001 From: fcogidi <41602287+fcogidi@users.noreply.github.com> Date: Wed, 24 Jun 2026 14:35:54 -0400 Subject: [PATCH 6/6] deps: update aieng-agents[code-interpreter] to version 0.3.1 (minimum) --- aieng-forecasting/pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aieng-forecasting/pyproject.toml b/aieng-forecasting/pyproject.toml index f87d68bb..11c07640 100644 --- a/aieng-forecasting/pyproject.toml +++ b/aieng-forecasting/pyproject.toml @@ -25,7 +25,7 @@ documents = [ "pyyaml>=6.0", ] agentic = [ - "aieng-agents[code-interpreter]>=0.3.0", + "aieng-agents[code-interpreter]>=0.3.1", "google-adk>=2.2.0", "google-cloud-storage>=2.18,<4", "langfuse>=4.5.1",