From 6f39930b6dae4df2eaae6e15c1e16b4995d17687 Mon Sep 17 00:00:00 2001 From: Bob MacCallum Date: Thu, 26 Mar 2026 13:24:35 +0000 Subject: [PATCH 1/3] add beta-sites.yaml --- .../integrations/veupathdb/beta-sites.yaml | 111 ++++++++++++++++++ 1 file changed, 111 insertions(+) create mode 100644 apps/api/src/veupath_chatbot/integrations/veupathdb/beta-sites.yaml diff --git a/apps/api/src/veupath_chatbot/integrations/veupathdb/beta-sites.yaml b/apps/api/src/veupath_chatbot/integrations/veupathdb/beta-sites.yaml new file mode 100644 index 00000000..5c9bacc9 --- /dev/null +++ b/apps/api/src/veupath_chatbot/integrations/veupathdb/beta-sites.yaml @@ -0,0 +1,111 @@ +# VEuPathDB Beta Sites Configuration +# Base URLs for beta/staging versions of all VEuPathDB resources. +# Override this file at runtime with env: VEUPATHDB_SITES_CONFIG=/path/to/your.yaml + +sites: + # Portal (queries across all component sites) + veupathdb: + name: VEuPathDB + display_name: VEuPathDB Portal (All organisms) + base_url: https://beta.veupathdb.org/veupathdb.beta/service + project_id: EuPathDB + is_portal: true + + # Apicomplexan parasites + plasmodb: + name: PlasmoDB + display_name: PlasmoDB (Plasmodium) + base_url: https://beta.plasmodb.org/plasmo.beta/service + project_id: PlasmoDB + is_portal: false + + toxodb: + name: ToxoDB + display_name: ToxoDB (Toxoplasma) + base_url: https://beta.toxodb.org/toxo.beta/service + project_id: ToxoDB + is_portal: false + + cryptodb: + name: CryptoDB + display_name: CryptoDB (Cryptosporidium) + base_url: https://beta.cryptodb.org/cryptodb.beta/service + project_id: CryptoDB + is_portal: false + + piroplasmadb: + name: PiroplasmaDB + display_name: PiroplasmaDB (Piroplasma) + base_url: https://beta.piroplasmadb.org/piro.beta/service + project_id: PiroplasmaDB + is_portal: false + + # Other protozoan parasites + giardiadb: + name: GiardiaDB + display_name: GiardiaDB (Giardia) + base_url: https://beta.giardiadb.org/giardiadb.beta/service + project_id: GiardiaDB + is_portal: false + + amoebadb: + name: AmoebaDB + display_name: AmoebaDB (Amoeba) + base_url: https://beta.amoebadb.org/amoeba.beta/service + project_id: AmoebaDB + is_portal: false + + microsporidiadb: + name: MicrosporidiaDB + display_name: MicrosporidiaDB (Microsporidia) + base_url: https://beta.microsporidiadb.org/micro.beta/service + project_id: MicrosporidiaDB + is_portal: false + + tritrypdb: + name: TriTrypDB + display_name: TriTrypDB (Kinetoplastids) + base_url: https://beta.tritrypdb.org/tritrypdb.beta/service + project_id: TriTrypDB + is_portal: false + + # Fungi + fungidb: + name: FungiDB + display_name: FungiDB (Fungi) + base_url: https://beta.fungidb.org/fungidb.beta/service + project_id: FungiDB + is_portal: false + + # Hosts and vectors + hostdb: + name: HostDB + display_name: HostDB (Hosts) + base_url: https://beta.hostdb.org/hostdb.beta/service + project_id: HostDB + is_portal: false + + vectorbase: + name: VectorBase + display_name: VectorBase (Vectors) + base_url: https://beta.vectorbase.org/vectorbase.beta/service + project_id: VectorBase + is_portal: false + + # Orthology + orthomcl: + name: OrthoMCL + display_name: OrthoMCL (Orthologs) + base_url: https://beta.orthomcl.org/orthomcl.beta/service + project_id: OrthoMCL + is_portal: false + +# Default site to use +default_site: veupathdb + +# Routing (timeouts for WDK client) +routing: + # Portal timeout (portal can be slow due to fan-out) + portal_timeout: 120 + # Component site timeout + component_timeout: 30 From 16f19c5344297d79bae49981666728fbd06c7fce Mon Sep 17 00:00:00 2001 From: Bob MacCallum Date: Thu, 26 Mar 2026 14:39:32 +0000 Subject: [PATCH 2/3] fix readme/docs and docker-compose for qdrant reindexing --- README-podman-quadlets.md | 10 ++++++++-- docker-compose.yml | 2 +- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/README-podman-quadlets.md b/README-podman-quadlets.md index b98586de..648d639a 100644 --- a/README-podman-quadlets.md +++ b/README-podman-quadlets.md @@ -184,8 +184,14 @@ needed when you want to **reset and fully rebuild** the Qdrant collections. Requires `pathfinder-qdrant` to be running. Run from the project root so the report output path resolves correctly: +```bash +systemctl --user status pathfinder-qdrant +# if required: +systemctl --user start pathfinder-qdrant +``` ```bash +# make sure this is user-owned, not root: mkdir -p apps/api/ingest_reports podman run --rm \ @@ -196,8 +202,8 @@ podman run --rm \ -v "$PWD/apps/api/ingest_reports:/reports:Z" \ -w /app/apps/api \ localhost/pathfinder-api:latest \ - /bin/sh -lc "uv run python -m veupath_chatbot.services.vectorstore.ingest.wdk_catalog --sites all --reset && \ - uv run python -m veupath_chatbot.services.vectorstore.ingest.public_strategies --sites all --reset --report-path /reports/ingest_public_strategies_report.jsonl" + /bin/sh -lc "uv run python -m veupath_chatbot.integrations.vectorstore.ingest.wdk_catalog --sites all --reset && \ + uv run python -m veupath_chatbot.integrations.vectorstore.ingest.public_strategies --sites all --reset --report-path /reports/ingest_public_strategies_report.jsonl" ``` Both jobs require `OPENAI_API_KEY` (used for embeddings). The second job writes diff --git a/docker-compose.yml b/docker-compose.yml index 3ded0bca..7b126560 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -138,7 +138,7 @@ services: [ "/bin/sh", "-lc", - "uv run python -m veupath_chatbot.services.vectorstore.ingest.wdk_catalog --sites all --reset && uv run python -m veupath_chatbot.services.vectorstore.ingest.public_strategies --sites all --reset --report-path /reports/ingest_public_strategies_report.jsonl" + "uv run python -m veupath_chatbot.integrations.vectorstore.ingest.wdk_catalog --sites all --reset && uv run python -m veupath_chatbot.integrations.vectorstore.ingest.public_strategies --sites all --reset --report-path /reports/ingest_public_strategies_report.jsonl" ] restart: "no" From 1126b84f4ed92990fb0a71c24f99080ec0a9c37d Mon Sep 17 00:00:00 2001 From: Bob MacCallum Date: Thu, 26 Mar 2026 18:15:59 +0000 Subject: [PATCH 3/3] add beta-sites.yaml help re auth keys --- README-podman-quadlets.md | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/README-podman-quadlets.md b/README-podman-quadlets.md index 648d639a..8a45d4b3 100644 --- a/README-podman-quadlets.md +++ b/README-podman-quadlets.md @@ -209,6 +209,21 @@ podman run --rm \ Both jobs require `OPENAI_API_KEY` (used for embeddings). The second job writes a JSONL report to `apps/api/ingest_reports/` (gitignored). +**Beta sites:** if your `VEUPATHDB_SITES_CONFIG` points at `beta-sites.yaml`, +many WDK endpoints require authentication. Add both variables to +`~/.config/pathfinder/.env`: + +```bash +# Note: path is inside the container, not on the host +VEUPATHDB_SITES_CONFIG=/app/apps/api/src/veupath_chatbot/integrations/veupathdb/beta-sites.yaml +VEUPATHDB_AUTH_TOKEN=your_api_key_here +``` + +Logged-in users can find their API key at their VEuPathDB profile page under +**Service Access** — for example: +`https://beta.plasmodb.org/plasmo.beta/app/user/profile#serviceAccess` +(any component site works). + ## Services overview | Service | Image | Published port | Depends on |