diff --git a/Makefile b/Makefile index 1be7ac12..15596cd4 100644 --- a/Makefile +++ b/Makefile @@ -29,7 +29,7 @@ test-a2a: done # Directories under mcp/ to skip in test-mcp -TEST_MCP_SKIP := mcp/exgentic_benchmarks +TEST_MCP_SKIP := mcp/exgentic_benchmarks mcp/wiki_memory_tool # Verify all of the MCP example Docker images can be built # (Optional KAGENTI_DOCKER_FLAGS for docker build, e.g. --no-cache or --load) diff --git a/mcp/wiki_memory_tool/.gitignore b/mcp/wiki_memory_tool/.gitignore new file mode 100644 index 00000000..505a3b1c --- /dev/null +++ b/mcp/wiki_memory_tool/.gitignore @@ -0,0 +1,10 @@ +# Python-generated files +__pycache__/ +*.py[oc] +build/ +dist/ +wheels/ +*.egg-info + +# Virtual environments +.venv diff --git a/mcp/wiki_memory_tool/.python-version b/mcp/wiki_memory_tool/.python-version new file mode 100644 index 00000000..6324d401 --- /dev/null +++ b/mcp/wiki_memory_tool/.python-version @@ -0,0 +1 @@ +3.14 diff --git a/mcp/wiki_memory_tool/DEMO.md b/mcp/wiki_memory_tool/DEMO.md new file mode 100644 index 00000000..90d4ecd2 --- /dev/null +++ b/mcp/wiki_memory_tool/DEMO.md @@ -0,0 +1,154 @@ +# Wiki Memory Tool Demo + +A quick walkthrough of the wiki CLI — login, query, discover, and verify. + +## 1. Login + +```bash +$ kwiki login +================================================== + GitHub Device Authorization +================================================== + + 1. Open this URL in your browser: + + https://github.com/login/device + + 2. Enter this code: + + ABCD-1234 + + Code expires in 15 minutes. +================================================== + +Waiting for authorization..... + +Logged in as aslom +Groups: kaslomorg/ml-writers, kaslomorg/platform-admins +``` + +## 2. Check Identity + +```bash +$ kwiki whoami +User: aslom +Status: valid (expires in 6d 23h) +Groups: kaslomorg/ml-writers, kaslomorg/platform-admins +Server: https://wiki-memory-service-team1.apps.ykt1.hcp.res.ibm.com + +Access: + ai: read, write, admin + read <- * + write <- github:team:kaslomorg/ml-writers + admin <- github:user:aslom + security: read, write + read <- github:org:kaslomorg + write <- github:team:kaslomorg/platform-admins +``` + +## 3. Query Existing Pages + +```bash +$ kwiki query list-topics + ai (7 pages) + security (0 pages) + ml (0 pages) + +$ kwiki query list-pages ai + evaluation.md + fine-tuning.md + rag-patterns.md + transformers.md + +$ kwiki query search ai "attention mechanism" + ai/transformers.md (score=0.1652) + Self-attention mechanisms for parallel sequence processing. + +$ kwiki query read ai transformers.md +--- +tags: [paper, architecture] +--- +# Transformer Architecture + +Self-attention mechanisms for parallel sequence processing. +... +``` + +## 4. Discover — Write a New Page + +```bash +$ kwiki discover novelty ai "Mixture of Experts" "Sparse gating for efficient model scaling" +NOVEL: No sufficiently similar content found + +$ kwiki discover write ai mixture-of-experts.md --content "$(cat <<'EOF' +--- +tags: [paper, architecture, scaling] +--- +# Mixture of Experts + +Source: [Switch Transformers](https://arxiv.org/abs/2101.03961) (Fedus et al., 2021) + +## Summary + +Mixture of Experts routes each token to a subset of specialist sub-networks, +enabling model scaling without proportional compute increase. + +## Key Ideas + +- Sparse gating: each token activates only top-k experts +- Load balancing loss prevents routing collapse +- Capacity factor limits tokens per expert + +## Results + +- Switch Transformer: 7x speedup over T5-Base at same compute +- Mixtral 8x7B: competitive with GPT-3.5 using 2 active experts per token +EOF +)" +Written: ai/mixture-of-experts.md by discovery-agent +Suggested links: + ai/transformers.md (score=0.0923) + ai/fine-tuning.md (score=0.0412) +``` + +## 5. Verify It Was Added + +```bash +$ kwiki query search ai "mixture of experts sparse gating" + ai/mixture-of-experts.md (score=0.2841) + Mixture of Experts routes each token to a subset of specialist sub-networks. + +$ kwiki query read ai mixture-of-experts.md +--- +tags: [paper, architecture, scaling] +--- +# Mixture of Experts + +Source: [Switch Transformers](https://arxiv.org/abs/2101.03961) (Fedus et al., 2021) +... + +$ kwiki query tags ai + paper: 3 pages + architecture: 2 pages + scaling: 1 pages + retrieval: 1 pages + technique: 1 pages + +$ kwiki query backlinks ai mixture-of-experts.md + (none yet) +``` + +## 6. Optional — Initialize GitHub Pages + +```bash +$ kwiki admin init-pages +GitHub Pages initialized (6 files): + _config.yml + index.md + _layouts/default.html + _layouts/page.html + _includes/nav.html + assets/css/style.css +``` + +After ~60 seconds, browse: https://kaslom.github.io/kagenti-wiki-research/ diff --git a/mcp/wiki_memory_tool/Dockerfile b/mcp/wiki_memory_tool/Dockerfile new file mode 100644 index 00000000..49bd7d17 --- /dev/null +++ b/mcp/wiki_memory_tool/Dockerfile @@ -0,0 +1,25 @@ +FROM quay.io/fedora/python-314@sha256:381b2b1135f4c506cc1287223eff3ec6ff585638297ecb141667203a89116375 + +USER root +RUN dnf install -y git && dnf clean all +COPY --from=ghcr.io/astral-sh/uv:latest /uv /usr/local/bin/uv + +WORKDIR /app +COPY pyproject.toml /app/ +COPY wiki_service.py /app/ +COPY run_local.py /app/ +COPY mcp_server.py /app/ +RUN uv sync --no-dev && chmod -R 755 /app/.venv + +RUN mkdir -p /tmp/uv-cache && chmod 755 /tmp/uv-cache +USER 1001 + +ENV UV_CACHE_DIR=/tmp/uv-cache +ENV UV_LINK_MODE=copy +ENV WIKI_ROOT=/data/wiki +ENV ACL_FILE=/config/acl.yaml +ENV SPIFFE_TRUST_DOMAIN=kagenti.example.com + +EXPOSE 8000 + +CMD ["/app/.venv/bin/uvicorn", "wiki_service:app", "--host", "0.0.0.0", "--port", "8000"] diff --git a/mcp/wiki_memory_tool/README.md b/mcp/wiki_memory_tool/README.md new file mode 100644 index 00000000..5ddb0f3f --- /dev/null +++ b/mcp/wiki_memory_tool/README.md @@ -0,0 +1,1130 @@ +# Wiki Memory Tool + +Multi-agent wiki memory service for the Kagenti Research Wiki use case. +Provides persistent, git-backed knowledge storage with per-topic access control, GitHub OAuth login, and SPIFFE workload identity. + +## Status + +This is a **standalone MCP service** — a self-contained wiki with REST + MCP APIs, +git-backed storage, and identity-aware access control. + +**What this is:** +- A working memory/knowledge store for multi-agent systems +- SPIFFE-based agent identity (simulated via headers today) +- GitHub OAuth for human users +- Deployable to any Kubernetes cluster (Kind, OpenShift, etc.) + +**What this is not (yet):** +- Not wired into the Kagenti operator (no AgentService CR) +- No real SPIRE SVID verification (header-based, not mTLS) +- No Keycloak federation +- These are tracked as follow-up in [kagenti#1461](https://github.com/kagenti/kagenti/issues/1461) + +## Quick Start (Local) + +### Prerequisites + +- Python 3.14+ +- [uv](https://docs.astral.sh/uv/) package manager +- git + +### Install and Run + +```bash +cd mcp/wiki_memory_tool +uv sync + +# Terminal 1 — start the service (local-only, no GitHub push) +uv run python run_local.py --clean + +# Terminal 2 — run the test agents +uv run python test_agents.py +``` + +### Run with GitHub Remote Storage + +```bash +export WIKI_REMOTE_URL="https://x-access-token:@github.com//.git" + +# Start service — clones from GitHub, pushes after each write +uv run python run_local.py --remote --clean + +# Run test agents +uv run python test_agents.py +``` + +## GitHub OAuth Setup + +### Step 1: Create a GitHub OAuth App + +1. Go to **GitHub Settings > Developer settings > OAuth Apps > New OAuth App** + (or for an org: **Organization Settings > Developer settings > OAuth Apps**) +2. Fill in: + - **Application name**: `Wiki Memory Service` + - **Homepage URL**: `https://` (e.g. `https://wiki-memory-service-team1.apps.ykt1.hcp.res.ibm.com`) + - **Authorization callback URL**: `https:///auth/github/callback` +3. Click **Register application** +4. Note the **Client ID** (e.g. `Ov23liMX4KIYA2hFFhuL`) +5. Generate a **Client secret** and save it securely + +### Step 2: Enable Device Flow + +1. In the OAuth App settings, check **Enable Device Flow** +2. This allows CLI login without a browser redirect + +### Step 3: Create GitHub Organization and Teams + +1. Create an organization (e.g. `kaslomorg`) at https://github.com/organizations/new +2. Create teams under the org at `https://github.com/orgs//teams`: + - `ml-team` — ML researchers (read access to ml topic) + - `ml-writers` — ML content creators (write access to ai/ml topics) + - `platform-admins` — Platform administrators (admin access to all topics) + - `security-team` — Security researchers (read/write access to security topic) +3. Add members to each team + +### Step 4: Configure the Service + +Set these environment variables (or K8s secrets): + +```bash +export GITHUB_CLIENT_ID="Ov23liMX4KIYA2hFFhuL" +export GITHUB_CLIENT_SECRET="" +export JWT_SECRET_KEY="$(openssl rand -hex 32)" +``` + +For Kubernetes deployment, create the secret: + +```bash +kubectl create secret generic wiki-github-oauth \ + --from-literal=GITHUB_CLIENT_ID=Ov23liMX4KIYA2hFFhuL \ + --from-literal=GITHUB_CLIENT_SECRET= \ + --from-literal=JWT_SECRET_KEY=$(openssl rand -hex 32) \ + -n +``` + +### Step 5: Configure ACL with GitHub Identities + +Edit `test_acl.yaml` (local) or the `wiki-memory-acl` ConfigMap (K8s) to map teams to topics: + +```yaml +topics: + ai: + writers: + - "github:team:kaslomorg/ml-writers" + - "github:team:kaslomorg/platform-admins" + readers: + - "github:org:kaslomorg" # all org members can read + - "*" # or all authenticated users + admins: + - "github:user:aslom" + - "github:team:kaslomorg/platform-admins" +``` + +Identity prefix reference: + +| Prefix | Matches | +|--------|---------| +| `spiffe://...` | Workload identity (agents) | +| `github:user:` | Individual GitHub user | +| `github:team:/` | GitHub team membership | +| `github:org:` | Any member of GitHub org | +| `*` | Any authenticated identity | + +### Step 6: Remove GitHub Org OAuth App Access Restrictions + +By default, GitHub organizations restrict third-party OAuth app access. This blocks the wiki service from reading team memberships via `/user/teams`. You must remove (or approve) the restriction: + +1. Go to **Organization Settings > Third-party access > OAuth application policy** + (`https://github.com/organizations//settings/oauth_application_policy`) +2. Either: + - **Remove restrictions** entirely (allows all approved apps), or + - **Approve** the Wiki Memory Service app specifically +3. Verify by logging in again — `kwiki whoami` should show your groups + +Without this, `GET /user/teams` returns an empty list even though the user belongs to teams, because GitHub blocks the OAuth app from seeing org data. + +## CLI Usage + +### Shell Alias (recommended) + +Add this to your `~/.zshrc` (or `~/.bashrc`) so you can use `kwiki` from anywhere: + +```bash +alias kwiki='uv run --directory ~/sandbox/kagenti-mvp/agent-examples/mcp/wiki_memory_tool python wiki_cli.py --base-url https://wiki-memory-service-team1.apps.ykt1.hcp.res.ibm.com' +``` + +Then reload: `source ~/.zshrc` + +### Login via GitHub Device Flow + +```bash +kwiki login +``` + +Output: +``` +================================================== + GitHub Device Authorization +================================================== + + 1. Open this URL in your browser: + + https://github.com/login/device + + 2. Enter this code: + + ABCD-1234 + + Code expires in 15 minutes. +================================================== + +Waiting for authorization..... + +Logged in as aslom +Groups: kaslomorg/ml-writers, kaslomorg/platform-admins +``` + +### Check Identity + +```bash +kwiki whoami +``` + +Output: +``` +User: aslom +Status: valid (expires in 6d 23h) +Groups: kaslomorg/ml-writers, kaslomorg/platform-admins +Server: https://wiki-memory-service-team1.apps.ykt1.hcp.res.ibm.com + +Access: + ai: read, write, admin + read <- github:team:kaslomorg/platform-admins + write <- github:team:kaslomorg/platform-admins + admin <- github:user:aslom + security: read, write + read <- github:team:kaslomorg/platform-admins + write <- github:team:kaslomorg/platform-admins +``` + +### Renew Token + +Renew your JWT token without going through the full device flow again (works within the 7-day grace window): + +```bash +kwiki renew +``` + +Output: +``` +Token renewed for aslom (expires in 168h) +``` + +If the token is too old or the server doesn't support renewal, `kwiki renew` falls back to the device flow login automatically. + +### Write a Wiki Page (as authenticated user) + +```bash +kwiki discover write ai my-notes.md --content "# My Notes\n\nContent here..." +``` + +### Query Wiki Pages + +```bash +kwiki query list-topics +kwiki query search ai "transformer architecture" +kwiki query read ai transformers.md +``` + +### Logout + +```bash +kwiki logout +``` + +### Full CLI Reference + +```bash +kwiki login # GitHub device flow login +kwiki logout # Remove cached token +kwiki whoami # Show current identity, groups, and permissions +kwiki renew # Renew token (7-day grace window) + +kwiki discover write --content "..." # Write a page +kwiki discover write --file ./doc.md # Write from file +kwiki discover write --draft --content "..." # Submit as draft +kwiki discover novelty "Title" "Abstract" # Check novelty +kwiki discover template # List available templates +kwiki discover template paper-summary # Get a specific template + +kwiki query list-topics # List all topics +kwiki query list-pages # List pages in a topic +kwiki query search "query" # TF-IDF search within a topic +kwiki query search-all "query" # Search across ALL topics +kwiki query read # Read a page (with frontmatter) +kwiki query activity [topic] # Recent changes (topic or global) +kwiki query backlinks # Find pages linking to a page +kwiki query tags # List all tags with page counts +kwiki query tag # List pages with a specific tag +kwiki query graph # Page link graph (nodes + edges) +kwiki query drafts # List pending drafts + +kwiki admin approve # Approve a draft +kwiki admin reject --reason "..." # Reject a draft +kwiki admin init-pages # Initialize GitHub Pages layout +``` + +### Examples: Querying the AI Topic + +The `ai` topic contains pages on transformer architecture, RAG patterns, fine-tuning, and evaluation: + +```bash +# What topics are available? +kwiki query list-topics + +# What pages exist in the ai topic? +kwiki query list-pages ai + +# Search for specific concepts +kwiki query search ai "attention mechanism" +kwiki query search ai "LoRA fine-tuning" +kwiki query search ai "retrieval augmented generation" +kwiki query search ai "evaluation metrics BLEU" + +# Read a specific page +kwiki query read ai transformers.md +kwiki query read ai rag-patterns.md +kwiki query read ai fine-tuning.md +``` + +### Examples: Writing a New Page from an arXiv Paper + +Use an AI agent (Claude, etc.) to summarize a paper and write it to the wiki: + +```bash +# Step 1: Check if similar content already exists +kwiki discover novelty ai "Mixture of Experts" "Sparse MoE architectures for scaling LLMs efficiently" + +# Step 2: If novel, have your agent generate markdown and write it +kwiki discover write ai mixture-of-experts.md --content "$(cat <<'EOF' +# Mixture of Experts (MoE) + +Source: [Switch Transformers](https://arxiv.org/abs/2101.03961) (Fedus et al., 2021) + +## Summary + +Mixture of Experts routes each token to a subset of specialist +sub-networks (experts), enabling model scaling without proportional +compute increase. + +## Key Ideas + +- **Sparse gating**: each token activates only top-k experts (typically k=1 or k=2) +- **Load balancing loss**: auxiliary loss prevents all tokens routing to one expert +- **Capacity factor**: limits tokens per expert to prevent memory overflow + +## Architecture + +``` +Input → Router (softmax) → Top-K experts → Weighted sum → Output +``` + +## Results + +- Switch Transformer: 7x speedup over T5-Base at same compute budget +- Mixtral 8x7B: competitive with GPT-3.5 using only 2 active experts per token + +## References + +- Fedus et al. "Switch Transformers" (2021) — https://arxiv.org/abs/2101.03961 +- Jiang et al. "Mixtral of Experts" (2024) — https://arxiv.org/abs/2401.04088 +EOF +)" +``` + +### Workflow: Agent-Assisted Paper Summarization + +For a more automated workflow, use Claude Code or another agent to fetch, summarize, and write: + +```bash +# 1. Agent fetches and summarizes the paper (example using Claude Code) +# Ask: "Summarize https://arxiv.org/abs/2305.14314 as a wiki page in markdown" + +# 2. Save agent output to a file +# (agent writes summary to /tmp/paper-summary.md) + +# 3. Check novelty before writing +kwiki discover novelty ai "Tree of Thoughts" "Deliberate problem solving with LLMs using tree search" + +# 4. Write the page from the file +kwiki discover write ai tree-of-thoughts.md --file /tmp/paper-summary.md \ + --message "Add Tree of Thoughts paper summary (Yao et al. 2023)" + +# 5. Verify it's searchable +kwiki query search ai "tree of thoughts deliberation" +kwiki query read ai tree-of-thoughts.md +``` + +### Page Content Guidelines + +When writing wiki pages (manually or via agent), follow this structure: + +```markdown +# Title + +Source: [Paper Name](https://arxiv.org/abs/XXXX.XXXXX) (Authors, Year) + +## Summary +2-3 sentence overview of the key contribution. + +## Key Ideas +- Bullet points of main concepts +- Include formulas or pseudocode if relevant + +## Architecture / Method +Describe the approach with diagrams or code blocks. + +## Results +Key experimental findings and comparisons. + +## References +- Links to paper, code, related work +``` + +## MCP Integration + +### Local Mode (stdio) + +Add to your Claude Code MCP config: + +```json +{ + "mcpServers": { + "wiki-memory": { + "command": "uv", + "args": ["run", "python", "mcp_server.py"], + "cwd": "/path/to/mcp/wiki_memory_tool" + } + } +} +``` + +Tools available: +- `wiki_list_topics` — List all topics and page counts +- `wiki_query` — Search a topic (TF-IDF) +- `wiki_search_all` — Search across all topics +- `wiki_read` — Read a page (includes frontmatter) +- `wiki_write` — Write/update a page (supports `draft=True`) +- `wiki_check_novelty` — Check if content is novel +- `wiki_activity` — Recent changes (git log) +- `wiki_backlinks` — Find pages linking to a page +- `wiki_list_tags` — List all tags in a topic +- `wiki_pages_by_tag` — Find pages by tag +- `wiki_graph` — Page link graph (nodes + edges) +- `wiki_get_template` — Get page templates +- `wiki_list_drafts` — List pending drafts +- `wiki_approve_draft` — Approve a draft (admin) + +### Remote Mode (with auth) + +Set `WIKI_SERVICE_URL` to connect to the remote wiki service with your cached token: + +```json +{ + "mcpServers": { + "wiki-memory": { + "command": "uv", + "args": ["run", "python", "mcp_server.py"], + "cwd": "/path/to/mcp/wiki_memory_tool", + "env": { + "WIKI_SERVICE_URL": "https://wiki-memory-service-team1.apps.ykt1.hcp.res.ibm.com" + } + } + } +} +``` + +Login first with `wiki_cli.py login` — the MCP server reads the cached token from `~/.wiki-memory/token.json`. + +## Claude Code Skills + +The wiki memory tool provides 6 skills that can be invoked directly in Claude Code with `/kwiki:*` commands. + +### Install Skills + +```bash +# Install into current project (symlinks) +uv run python install_kwiki_skills.py .claude/skills + +# Install into user-global skills +uv run python install_kwiki_skills.py --global + +# Copy instead of symlink (for distribution) +uv run python install_kwiki_skills.py --copy /path/to/.claude/skills +``` + +### Available Skills + +| Skill | Description | +|-------|-------------| +| `/kwiki:query-cli` | Search and read wiki pages using the `kwiki` CLI | +| `/kwiki:query-api` | Search and read wiki pages via REST API (curl) | +| `/kwiki:query-mcp` | Search and read wiki pages using MCP tools | +| `/kwiki:discover-cli` | Write pages using the `kwiki` CLI after novelty check | +| `/kwiki:discover-api` | Write pages via REST API (curl) after novelty check | +| `/kwiki:discover-mcp` | Write pages using MCP tools after novelty check | + +### Example: Using /kwiki:query-cli + +Type `/kwiki:query-cli` in Claude Code, then ask it to search, read, or explore: + +``` +> /kwiki:query-cli +> Search the ai topic for "attention" and show backlinks for transformers.md + +$ kwiki query list-topics + ai (3 pages) + security (0 pages) + ml (0 pages) + +$ kwiki query search ai "attention mechanism" + ai/transformers.md (score=0.1652) + tags: [paper, attention, transformer] + +$ kwiki query backlinks ai transformers.md + rag-patterns.md + fine-tuning.md + +$ kwiki query tags ai + attention (1 pages) + fine-tuning (1 pages) + paper (2 pages) + rag (1 pages) + transformer (1 pages) + +$ kwiki query graph ai +Nodes (3): + rag-patterns.md: RAG Patterns [paper, rag, retrieval] + transformers.md: Attention Is All You Need [paper, attention, transformer] + fine-tuning.md: Fine-Tuning Guide [guide, fine-tuning, lora] + +Edges (5): + rag-patterns.md -> transformers.md + transformers.md -> rag-patterns.md + transformers.md -> fine-tuning.md + fine-tuning.md -> transformers.md + fine-tuning.md -> rag-patterns.md +``` + +### Example: Using /kwiki:discover-cli + +Type `/kwiki:discover-cli` in Claude Code to write new knowledge: + +``` +> /kwiki:discover-cli +> Write a summary of LoRA to the ai topic + +$ kwiki discover template paper-summary +--- Paper Summary --- + --- + tags: [paper, summary] + --- + # {Title} + Source: [{Paper Name}]({URL}) (Authors, Year) + ... + +$ kwiki discover novelty ai "LoRA Fine-Tuning" "Low-rank adaptation for efficient model tuning" +NOVEL: No sufficiently similar content found + +$ kwiki discover write ai lora.md --content "---\ntags: [paper, fine-tuning]\n---\n# LoRA..." +Written: ai/lora.md by discovery-agent +Suggested links: + ai/transformers.md (score=0.1538) + +$ kwiki discover write ai experimental.md --draft --content "# Experimental\n..." +Draft: ai/experimental.md by discovery-agent + +$ kwiki query drafts ai + experimental.md +``` + +### Example: Using /kwiki:query-api + +Type `/kwiki:query-api` for curl-based access: + +``` +> /kwiki:query-api +> List topics, search for "retrieval", and show the graph + +$ curl -s http://localhost:8321/topics -H "X-Spiffe-Id: ..." -H "X-Original-Subject: ..." +→ {"topics": [{"topic_id": "ai", "page_count": 2}, {"topic_id": "security", "page_count": 0}]} + +$ curl -s -X POST http://localhost:8321/search -d '{"query": "retrieval"}' +→ {"results": [{"path": "ai/rag-patterns.md", "score": 0.1171, "snippet": "Retrieval augmented generation.", "topic_id": "ai"}]} + +$ curl -s http://localhost:8321/topics/ai/tags +→ {"topic": "ai", "tags": {"paper": 2, "rag": 1, "attention": 1}} + +$ curl -s http://localhost:8321/topics/ai/graph +→ {"topic": "ai", "nodes": [{"id": "rag-patterns.md", "title": "RAG Patterns", "tags": ["paper","rag"]}, ...], "edges": [{"source": "rag-patterns.md", "target": "transformers.md"}, ...]} + +$ curl -s http://localhost:8321/templates +→ {"templates": [{"id": "paper-summary", "name": "Paper Summary", ...}, {"id": "concept-overview", ...}, {"id": "how-to-guide", ...}, {"id": "comparison", ...}]} +``` + +### Example: Using /kwiki:discover-api + +Type `/kwiki:discover-api` to write via REST API with full control: + +``` +> /kwiki:discover-api +> Check novelty for "LoRA" and write it as a draft + +$ curl -s http://localhost:8321/templates/paper-summary +→ {"id": "paper-summary", "name": "Paper Summary", "content": "---\ntags: [paper, summary]\n---\n# {Title}\n..."} + +$ curl -s -X POST http://localhost:8321/topics/ai/check-novelty -d '{"title": "LoRA", "abstract": "Low-rank adaptation"}' +→ {"novel": true, "reason": "No sufficiently similar content found"} + +$ curl -s -X POST "http://localhost:8321/topics/ai/pages/lora.md?draft=true" -d '{"content": "---\ntags: [paper]\n---\n# LoRA\n..."}' +→ {"status": "draft", "path": "ai/lora.md", "author": "discovery-agent"} + +$ curl -s http://localhost:8321/topics/ai/drafts +→ {"topic": "ai", "drafts": ["lora.md"]} +``` + +### Example: Using /kwiki:query-mcp and /kwiki:discover-mcp + +These skills use MCP tools directly (requires MCP server configured): + +``` +> /kwiki:query-mcp +> What topics exist, search for "attention", and show backlinks + +>>> wiki_list_topics() +Topics: +- ai (2 pages) +- security (0 pages) +- ml (0 pages) + +>>> wiki_query(topic_id="ai", query="attention") +Search results for 'attention' in 'ai': +- ai/transformers.md (score=0.2555) + tags: [paper, attention] + +>>> wiki_search_all(query="retrieval") +Global search results for 'retrieval': +- [ai] ai/rag-patterns.md (score=0.1171) + Retrieval augmented generation. + +>>> wiki_backlinks(topic_id="ai", path="transformers.md") +Pages linking to ai/transformers.md: +- rag-patterns.md + +>>> wiki_list_tags(topic_id="ai") +Tags in 'ai': +- attention (1 pages) +- paper (2 pages) +- rag (1 pages) + +>>> wiki_graph(topic_id="ai") +Nodes: 2, Edges: 2 + rag-patterns.md: RAG Patterns ['paper', 'rag'] + transformers.md: Transformers ['paper', 'attention'] + rag-patterns.md -> transformers.md + transformers.md -> rag-patterns.md +``` + +``` +> /kwiki:discover-mcp +> Check novelty for LoRA and write it as a draft + +>>> wiki_get_template() +Available templates: +- paper-summary: Paper Summary — Summarize an academic paper or technical report +- concept-overview: Concept Overview — Explain a technical concept or method +- how-to-guide: How-To Guide — Step-by-step practical guide +- comparison: Comparison — Compare approaches, tools, or methods + +>>> wiki_check_novelty(topic_id="ai", title="LoRA", abstract="Low-rank adaptation") +{"novel": true, "reason": "No sufficiently similar content found"} + +>>> wiki_write(topic_id="ai", path="lora.md", content="---\ntags: [paper]\n---\n# LoRA\n...", draft=True) +Draft: ai/lora.md + +>>> wiki_list_drafts(topic_id="ai") +Drafts in 'ai': +- lora.md +``` + +### Skill Workflow: Login → Query → Discover + +The recommended workflow when using skills: + +1. **Login first** (one-time): `kwiki login` +2. **Query** existing knowledge: `/kwiki:query-cli` or `/kwiki:query-mcp` +3. **Discover** and write new knowledge: `/kwiki:discover-cli` or `/kwiki:discover-mcp` + +Skills automatically follow best practices: +- Always check novelty before writing (avoids duplicates) +- Use structured markdown format with YAML frontmatter and tags +- Return suggested links to related content after writing +- Support draft mode for content requiring human review +- Respect ACL (returns 403 if you lack write access) + +## REST API + +### Authentication Options + +**Option A: GitHub OAuth Token** (for users) +```bash +curl -H "Authorization: Bearer " https://wiki-service/topics +``` + +**Option B: SPIFFE Headers** (for agents) +```bash +curl -H "X-Spiffe-Id: spiffe://kagenti.example.com/ns/topic-ai/sa/discovery-agent" \ + https://wiki-service/topics/ai/pages/doc.md +``` + +### Endpoints + +| Endpoint | Method | Auth | Description | +|----------|--------|------|-------------| +| `/healthz` | GET | none | Health check | +| `/topics` | GET | read | List all topics | +| `/topics/{id}/pages` | GET | read | List pages in topic | +| `/topics/{id}/pages/{path}` | GET | read | Read a page (includes frontmatter) | +| `/topics/{id}/pages/{path}` | POST | write | Write a page (returns suggested links) | +| `/topics/{id}/pages/{path}?draft=true` | POST | write | Submit as draft for review | +| `/topics/{id}/query` | POST | read | Search a topic | +| `/topics/{id}/check-novelty` | POST | read | Check content novelty | +| `/topics/{id}/activity` | GET | read | Recent changes for a topic | +| `/activity` | GET | any | Recent changes across all topics | +| `/search` | POST | read | Global search across all topics | +| `/topics/{id}/backlinks/{path}` | GET | read | Find pages linking to a page | +| `/topics/{id}/tags` | GET | read | List all tags with page counts | +| `/topics/{id}/tags/{tag}` | GET | read | List pages with a specific tag | +| `/topics/{id}/graph` | GET | read | Page link graph (nodes + edges) | +| `/topics/{id}/drafts` | GET | write | List pending drafts | +| `/topics/{id}/drafts/{path}/approve` | POST | admin | Approve a draft | +| `/topics/{id}/drafts/{path}/reject` | POST | admin | Reject a draft | +| `/templates` | GET | none | List page templates | +| `/templates/{id}` | GET | none | Get a page template | +| `/topics/{id}/pages/{path}` | DELETE | admin | Delete a page | +| `/auth/github/login` | GET | none | Start OAuth browser flow | +| `/auth/github/callback` | GET | none | OAuth callback | +| `/auth/github/device` | POST | none | Start device flow | +| `/auth/github/device/token` | POST | none | Poll device flow | +| `/auth/whoami` | GET | any | Show current identity | +| `/auth/permissions` | GET | any | Show per-topic permissions with explanations | +| `/auth/renew` | POST | any | Renew JWT token (within 7-day grace window) | +| `/admin/reload-acl` | POST | admin | Reload ACL config | +| `/admin/init-pages` | POST | admin | Initialize GitHub Pages layout and fix page front-matter/links | + +## Versioning + +The service version is defined in **`pyproject.toml`** (single source of truth): + +```toml +[project] +version = "0.2.0" +``` + +### How version flows + +1. **`pyproject.toml`** → `version = "X.Y.Z"` — edit this to bump +2. **`wiki_service.py`** → reads version from `pyproject.toml` at startup, exposes in `/healthz` and OpenAPI +3. **`deploy.py`** → reads version from `pyproject.toml`, tags docker image as `quay.io/aslomnet/wiki-memory-service:X.Y.Z` +4. **`k8s/deployment.yaml`** → references the image tag (update manually or use `deploy.py --build`) + +### Bumping the version + +```bash +# 1. Edit pyproject.toml +# version = "0.3.0" + +# 2. Update k8s/deployment.yaml image tag to match +# image: quay.io/aslomnet/wiki-memory-service:0.3.0 + +# 3. Build, push, and deploy +uv run deploy.py --build +``` + +The `/healthz` endpoint returns the running version for verification. + +## Kubernetes Deployment + +### Build and Push + +```bash +cd mcp/wiki_memory_tool + +# Read version from pyproject.toml and tag accordingly +VERSION=$(grep '^version' pyproject.toml | cut -d'"' -f2) +docker build --platform linux/amd64 -t quay.io/aslomnet/wiki-memory-service:$VERSION . +docker push quay.io/aslomnet/wiki-memory-service:$VERSION + +# Also push :latest for convenience +docker tag quay.io/aslomnet/wiki-memory-service:$VERSION quay.io/aslomnet/wiki-memory-service:latest +docker push quay.io/aslomnet/wiki-memory-service:latest +``` + +### Deploy + +```bash +# Create namespace +kubectl create ns wiki-memory-service + +# Apply manifests +kubectl apply -f k8s/serviceaccount.yaml +kubectl apply -f k8s/acl-configmap.yaml +kubectl apply -f k8s/deployment.yaml + +# Create secrets +kubectl create secret generic wiki-github-pat \ + --from-literal=WIKI_REMOTE_URL="https://x-access-token:@github.com//.git" \ + -n wiki-memory-service + +kubectl create secret generic wiki-github-oauth \ + --from-literal=GITHUB_CLIENT_ID= \ + --from-literal=GITHUB_CLIENT_SECRET= \ + --from-literal=JWT_SECRET_KEY=$(openssl rand -hex 32) \ + -n wiki-memory-service +``` + +### Verify + +```bash +curl https:///healthz +curl -X POST https:///auth/github/device +``` + +## UX/DX Features + +### Activity Feed + +Track recent changes across the wiki: + +```bash +# Global activity +kwiki query activity + +# Topic-specific activity +kwiki query activity ai +``` + +Output: +``` + 2024-01-15 14:30:00 +0000 discovery-agent: ingest: ai/transformers.md + 2024-01-15 14:29:00 +0000 discovery-agent: ingest: ai/rag-patterns.md +``` + +### Tags & Frontmatter + +Pages can include YAML frontmatter with tags: + +```markdown +--- +tags: [paper, attention, transformer] +--- +# Transformers + +Content here... +``` + +Query by tags: + +```bash +kwiki query tags ai # List all tags: paper (4), attention (2), ... +kwiki query tag ai paper # Pages with "paper" tag +``` + +### Backlinks + +Find pages that reference a given page: + +```bash +kwiki query backlinks ai transformers.md +``` + +Output: +``` + rag-patterns.md + fine-tuning.md +``` + +### Global Search + +Search across all accessible topics simultaneously: + +```bash +kwiki query search-all "attention mechanism" +``` + +Output: +``` + [ai] ai/transformers.md (score=0.1572) + The transformer model uses self-attention mechanisms... + [ml] ml/bert.md (score=0.0834) + BERT uses bidirectional attention... +``` + +### Page Templates + +Get structured templates for new pages: + +```bash +kwiki discover template # List all templates +kwiki discover template paper-summary # Get paper summary template +kwiki discover template concept-overview # Get concept overview template +kwiki discover template how-to-guide # Get how-to guide template +kwiki discover template comparison # Get comparison template +``` + +### Draft/Review Queue + +Submit pages for human review before publishing: + +```bash +# Write as draft +kwiki discover write ai new-concept.md --draft --content "# Draft\n..." + +# List pending drafts +kwiki query drafts ai + +# Approve (requires admin access) +kwiki admin approve ai new-concept.md + +# Reject with reason +kwiki admin reject ai new-concept.md --reason "needs more references" +``` + +### Page Graph + +Visualize relationships between pages: + +```bash +kwiki query graph ai +``` + +Output: +``` +Nodes (4): + transformers.md: Transformers [paper, attention] + rag-patterns.md: RAG Patterns [paper, rag] + fine-tuning.md: Fine-Tuning [guide] + evaluation.md: Evaluation Metrics [metrics] + +Edges (5): + transformers.md -> rag-patterns.md + transformers.md -> fine-tuning.md + rag-patterns.md -> transformers.md + fine-tuning.md -> evaluation.md + evaluation.md -> transformers.md +``` + +### Suggested Links on Write + +When writing a page, the service automatically suggests related pages: + +```bash +kwiki discover write ai moe.md --content "# Mixture of Experts\n..." +``` + +Output: +``` +Written: ai/moe.md by discovery-agent +Suggested links: + ai/transformers.md (score=0.0923) + ai/fine-tuning.md (score=0.0412) +``` + +### GitHub Pages Initialization + +The wiki content is stored in a git repo that doubles as a GitHub Pages site. The `init-pages` command sets up the Jekyll scaffolding and fixes existing pages for proper rendering. + +```bash +kwiki admin init-pages +``` + +Output: +``` +GitHub Pages initialized (6 files): + _config.yml + index.md + _layouts/default.html + _layouts/page.html + _includes/nav.html + assets/css/style.css +``` + +**Requires:** `platform-admins` team membership (admin access to `_system` topic). + +#### What it does + +1. **Writes Jekyll scaffold files** to the wiki repo root: + - `_config.yml` — Jekyll configuration with `kramdown` markdown, baseurl, and defaults that auto-apply `layout: page` to all files in topic directories + - `index.md` — Front page that lists all wiki pages with links and tags + - `_layouts/default.html` — Base HTML layout with sidebar navigation + - `_layouts/page.html` — Page layout showing title, tags, and content + - `_includes/nav.html` — Navigation listing all pages + - `assets/css/style.css` — Light/dark mode CSS using `prefers-color-scheme` + +2. **Adds YAML front-matter** to every existing `.md` file that needs it: + - `layout: page` — tells Jekyll which layout to use + - `title: "..."` — extracted from the first `# Heading` in the file + - Preserves existing `tags` and other metadata + +3. **Converts internal links to Jekyll `{% link %}` syntax:** + + | Before | After | + |--------|-------| + | `[RAG](rag-patterns.md)` | `[RAG]({% link ai/rag-patterns.md %})` | + | `[[transformers]]` | `[Transformers]({% link ai/transformers.md %})` | + + This ensures Jekyll validates links at build time — if a linked page is missing, the build fails instead of producing a broken link. + +4. **Commits and pushes** all changes to the remote repository, triggering a GitHub Pages rebuild. + +#### Light/Dark Mode + +The CSS uses `@media (prefers-color-scheme: dark)` — no JavaScript required. The site automatically matches the user's browser/OS preference. + +#### Re-running + +Safe to run multiple times. It overwrites scaffold files with the latest version and re-processes all `.md` files to fix any new pages that were added since the last run. + +## Design Decisions + +| Decision | Rationale | +|----------|-----------| +| **Per-topic directories** | Topics are the isolation boundary, agents are actors with scoped access | +| **Git storage** | Full audit trail, commits attributed per agent, GitHub as persistent backend | +| **TF-IDF search** | Zero external deps for MVP; add vector search later | +| **SPIFFE + GitHub OAuth** | Workload identity for agents, GitHub identity for humans | +| **GitHub teams -> ACL** | Org structure maps directly to wiki topic access | +| **HMAC-SHA256 JWT** | Self-issued tokens, no external auth server needed | +| **uv package manager** | Fast, reproducible builds in both local dev and container | +| **Immediate push strategy** | Each write is pushed to GitHub immediately for durability | + +## Project Files + +``` +wiki_memory_tool/ +├── wiki_service.py # FastAPI service (OAuth, ACL, git, search) +├── wiki_cli.py # CLI tool (login, discover, query) +├── mcp_server.py # MCP server (local + remote modes) +├── run_local.py # Local runner (--clean, --remote flags) +├── test_agents.py # Integration test agents +├── install_kwiki_skills.py # Skill installer (symlink/copy into .claude/skills) +├── test_acl.yaml # Local ACL config +├── pyproject.toml # uv-managed deps +├── Dockerfile # Container build (uv + Python 3.14) +├── deploy.py # Deployment automation script +├── skills/ # Skill documentation (6 kwiki skills) +│ ├── wiki-discovery-api/ +│ ├── wiki-discovery-cli/ +│ ├── wiki-discovery-mcp/ +│ ├── wiki-query-api/ +│ ├── wiki-query-cli/ +│ └── wiki-query-mcp/ +└── k8s/ + ├── deployment.yaml + ├── acl-configmap.yaml + └── serviceaccount.yaml +``` + +## Environment Variables + +| Variable | Default | Description | +|----------|---------|-------------| +| `WIKI_ROOT` | `/data/wiki` | Local git repo path for wiki pages | +| `ACL_FILE` | `/config/acl.yaml` | Path to per-topic ACL YAML | +| `SPIFFE_TRUST_DOMAIN` | `kagenti.example.com` | SPIFFE trust domain | +| `WIKI_REMOTE_URL` | *(empty)* | Git remote URL (enables clone + push) | +| `WIKI_PUSH_STRATEGY` | `immediate` | Push strategy: `immediate` | +| `GITHUB_CLIENT_ID` | *(empty)* | GitHub OAuth App client ID | +| `GITHUB_CLIENT_SECRET` | *(empty)* | GitHub OAuth App client secret | +| `JWT_SECRET_KEY` | *(empty)* | HMAC-SHA256 key for signing wiki JWTs | +| `JWT_EXPIRY_HOURS` | `168` | JWT token expiry in hours (default 7 days) | +| `WIKI_GITHUB_ORG` | `kaslomorg` | GitHub org for team resolution | +| `WIKI_SERVICE_URL` | *(empty)* | Remote wiki URL (for MCP remote mode) | +| `WIKI_INSECURE_TLS` | `0` | Set to `1` to disable TLS certificate verification (dev only) | + +## Troubleshooting Authorization + +### Groups showing as empty after login + +If `kwiki whoami` shows `Groups: (none)` but the user belongs to GitHub teams: + +1. **Check org OAuth app restrictions** — the most common cause. Go to: + `https://github.com/organizations//settings/oauth_application_policy` + + If the org has third-party access restrictions enabled, the wiki service's OAuth app cannot read team memberships. Either remove restrictions or approve the app. + +2. **Verify the OAuth app has `read:org` scope** — the device flow requests this scope. If the user denied it during authorization, teams won't resolve. + +3. **Check pod logs** for team resolution output: + ```bash + oc logs deployment/wiki-memory-service -n | grep -i team + ``` + +### Debugging a deployed service + +Use this workflow to diagnose authorization issues on the cluster: + +```bash +# 1. Set KUBECONFIG if needed +export KUBECONFIG=~/.kube/config-kagenti-eventing + +# 2. Check the pod is running the expected image +oc get pods -n -o jsonpath='{.items[0].status.containerStatuses[0].imageID}' + +# 3. Force a fresh rollout after rebuilding the image +oc rollout restart deployment/wiki-memory-service -n + +# 4. Watch rollout progress +oc rollout status deployment/wiki-memory-service -n + +# 5. Tail logs for auth-related output +oc logs -f deployment/wiki-memory-service -n | grep -E '\[teams\]|\[auth\]|401|403' + +# 6. Re-login from CLI and check logs for team resolution +kwiki login +kwiki whoami +``` + +### ACL identity format + +The service normalizes identity formats. These all match in ACL rules: + +| JWT subject | ACL entry | Match? | +|-------------|-----------|--------| +| `github:aslom` | `github:user:aslom` | Yes (normalized) | +| `github:aslom` | `github:team:kaslomorg/ml-writers` | Yes (if user is in team) | +| `github:aslom` | `github:org:kaslomorg` | Yes (if user is org member) | + +### Token renewal issues + +- `kwiki renew` returns 404 — the server doesn't have the `/auth/renew` endpoint yet. Redeploy with the latest image. +- `kwiki renew` fails with "token too old" — the token is past the 7-day grace window. Use `kwiki login` instead. +- After renewal, `kwiki whoami` still shows old expiry — the CLI reads the locally cached token. Renewal updates the cache automatically. + +### Rebuilding and deploying + +```bash +cd mcp/wiki_memory_tool + +# Build with version from pyproject.toml +VERSION=$(grep '^version' pyproject.toml | cut -d'"' -f2) +docker build --platform linux/amd64 -t quay.io/aslomnet/wiki-memory-service:$VERSION . +docker push quay.io/aslomnet/wiki-memory-service:$VERSION + +# Deploy (rollout picks up the new image due to imagePullPolicy: Always) +oc rollout restart deployment/wiki-memory-service -n +oc rollout status deployment/wiki-memory-service -n + +# Verify — response includes version +curl -s https:///healthz +``` diff --git a/mcp/wiki_memory_tool/deploy.py b/mcp/wiki_memory_tool/deploy.py new file mode 100644 index 00000000..021b02b0 --- /dev/null +++ b/mcp/wiki_memory_tool/deploy.py @@ -0,0 +1,274 @@ +# /// script +# requires-python = ">=3.11" +# dependencies = ["httpx"] +# /// +""" +Deploy wiki-memory-service to Kubernetes. + +Usage: + export WIKI_REMOTE_URL="https://x-access-token:@github.com//.git" + uv run deploy.py + + # Or pass as argument: + uv run deploy.py --url "https://x-access-token:@github.com//.git" +""" + +import os +import re +import subprocess +import sys +import time +from pathlib import Path + +NAMESPACE = "wiki-memory-service" +SCRIPT_DIR = Path(__file__).parent.resolve() +K8S_DIR = SCRIPT_DIR / "k8s" +SECRET_NAME = "wiki-github-pat" +IMAGE_REPO = "quay.io/aslomnet/wiki-memory-service" + + +def get_version() -> str: + """Read version from pyproject.toml (single source of truth).""" + import tomllib + + pyproject = SCRIPT_DIR / "pyproject.toml" + data = tomllib.loads(pyproject.read_text()) + version = data.get("project", {}).get("version") + if not version: + print("ERROR: Could not read version from pyproject.toml") + sys.exit(1) + return version + + +def run(cmd: list[str], check: bool = True, capture: bool = False) -> subprocess.CompletedProcess: + result = subprocess.run(cmd, capture_output=capture, text=True) + if check and result.returncode != 0: + stderr = result.stderr if capture else "" + safe_cmd = re.sub(r"x-access-token:[^@\s]+", "x-access-token:***", " ".join(cmd)) + print(f" FAILED: {safe_cmd}") + if stderr: + print(f" {stderr.strip()}") + sys.exit(1) + return result + + +def parse_remote_url(url: str) -> tuple[str, str]: + """Extract PAT and clean repo URL from WIKI_REMOTE_URL.""" + match = re.match(r"https://x-access-token:([^@]+)@(.+)", url) + if not match: + print("ERROR: WIKI_REMOTE_URL must be in format:") + print(" https://x-access-token:@github.com//.git") + sys.exit(1) + pat = match.group(1) + repo_url = f"https://{match.group(2)}" + return pat, repo_url + + +def ensure_namespace(): + """Create namespace if it does not exist.""" + print(f"[1/6] Checking namespace '{NAMESPACE}'...") + result = run(["kubectl", "get", "namespace", NAMESPACE], check=False, capture=True) + if result.returncode != 0: + print(f" Creating namespace '{NAMESPACE}'...") + run(["kubectl", "create", "namespace", NAMESPACE]) + else: + print(f" Namespace '{NAMESPACE}' exists.") + + +def apply_manifests(): + """Apply all k8s manifests in order.""" + print("[2/6] Applying Kubernetes manifests...") + manifests = ["serviceaccount.yaml", "acl-configmap.yaml", "deployment.yaml"] + for manifest in manifests: + path = K8S_DIR / manifest + if not path.exists(): + print(f" ERROR: {path} not found") + sys.exit(1) + print(f" Applying {manifest}...") + run(["kubectl", "apply", "-f", str(path)]) + + +def create_or_update_secret(remote_url: str): + """Create or update the wiki-github-pat secret.""" + print(f"[3/6] Creating/updating secret '{SECRET_NAME}'...") + result = run( + ["kubectl", "create", "secret", "generic", SECRET_NAME, + f"--from-literal=WIKI_REMOTE_URL={remote_url}", + "-n", NAMESPACE, "--dry-run=client", "-o", "yaml"], + capture=True, + ) + pipe = subprocess.run( + ["kubectl", "apply", "-f", "-"], + input=result.stdout, text=True, capture_output=True, + ) + if pipe.returncode != 0: + print(f" FAILED: {pipe.stderr.strip()}") + sys.exit(1) + print(" Secret configured.") + + +def restart_and_wait(): + """Set image tag from pyproject.toml version and wait for rollout.""" + version = get_version() + image = f"{IMAGE_REPO}:{version}" + print(f"[4/6] Setting image to {image} and restarting...") + run(["kubectl", "set", "image", "deployment/wiki-memory-service", + f"wiki-memory={image}", "-n", NAMESPACE]) + print(" Waiting for rollout to complete...") + run(["kubectl", "rollout", "status", "deployment/wiki-memory-service", + "-n", NAMESPACE, "--timeout=90s"]) + print(" Deployment ready.") + + +def run_validation(): + """Port-forward and run basic validation tests.""" + print("[5/6] Running validation tests...") + + import httpx + + port = 18321 + pf_cmd = ["kubectl", "port-forward", "svc/wiki-memory-service", + f"{port}:8000", "-n", NAMESPACE] + pf_proc = subprocess.Popen(pf_cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) + + try: + base = f"http://localhost:{port}" + client = httpx.Client(timeout=30) + + # Wait for port-forward to be ready + for i in range(15): + try: + resp = client.get(f"{base}/healthz") + if resp.status_code == 200: + break + except httpx.ConnectError: + pass + time.sleep(2) + else: + print(" FAILED: Service not reachable via port-forward") + sys.exit(1) + + health = resp.json() + print(f" Health check: OK (topics={health['topics']}, root={health['root']})") + + # Test topic listing + headers = { + "X-Spiffe-Id": "spiffe://kagenti.example.com/ns/wiki-system/sa/query-agent", + "X-Original-Subject": "alice@example.com", + } + resp = client.get(f"{base}/topics", headers=headers) + assert resp.status_code == 200, f"GET /topics returned {resp.status_code}" + topics = resp.json()["topics"] + print(f" Topic listing: OK ({len(topics)} topics visible)") + + # Test novelty check (exercises search + ACL) + resp = client.post( + f"{base}/topics/ai/check-novelty", + json={"title": "Test", "abstract": "deployment validation check"}, + headers=headers, + ) + assert resp.status_code == 200, f"POST check-novelty returned {resp.status_code}" + print(f" Novelty check: OK (novel={resp.json()['novel']})") + + # Test write (discovery agent) + write_headers = { + "X-Spiffe-Id": "spiffe://kagenti.example.com/ns/topic-ai/sa/discovery-agent", + } + test_page = "_deploy-validation-test.md" + resp = client.post( + f"{base}/topics/ai/pages/{test_page}", + json={"content": "# Deploy validation\nThis page verifies the deployment works.", + "message": "deploy: validation test page"}, + headers=write_headers, + ) + if resp.status_code == 200: + print(f" Write test: OK (wrote {test_page})") + # Read it back + resp = client.get(f"{base}/topics/ai/pages/{test_page}", headers=headers) + assert resp.status_code == 200, f"GET page returned {resp.status_code}" + print(" Read test: OK") + else: + print(f" Write test: SKIPPED ({resp.status_code} — may need git push fix)") + + finally: + pf_proc.terminate() + pf_proc.wait() + + +def build_and_push(version: str, skip_build: bool = False): + """Build and push container image tagged with version.""" + tag = f"{IMAGE_REPO}:{version}" + latest = f"{IMAGE_REPO}:latest" + if skip_build: + print(f"[build] Skipping build (--no-build), using image {tag}") + return + print(f"[build] Building {tag}...") + run(["docker", "build", "-t", tag, "-t", latest, str(SCRIPT_DIR)]) + print(f"[build] Pushing {tag} and :latest...") + run(["docker", "push", tag]) + run(["docker", "push", latest]) + + +def print_summary(repo_url: str): + """Print deployment summary.""" + version = get_version() + print("[6/6] Deployment complete!") + print() + print(" Summary:") + print(f" Namespace: {NAMESPACE}") + print(" Service: wiki-memory-service:8000") + print(f" Image: {IMAGE_REPO}:{version}") + if repo_url: + print(f" Git remote: {repo_url}") + print(f" Secret: {SECRET_NAME}") + else: + print(" Git remote: (local-only, no remote configured)") + print() + print(" To run full tests:") + print(f" kubectl port-forward svc/wiki-memory-service 8321:8000 -n {NAMESPACE} &") + print(" WIKI_SERVICE_URL=http://localhost:8321 uv run python test_agents.py") + + +def main(): + version = get_version() + print("=" * 60) + print(f" Wiki Memory Service v{version} — Kubernetes Deployment") + print("=" * 60) + print() + + # Parse flags + remote_url = os.environ.get("WIKI_REMOTE_URL", "") + do_build = "--build" in sys.argv + for arg in sys.argv[1:]: + if arg.startswith("--url="): + remote_url = arg.split("=", 1)[1] + elif arg.startswith("https://"): + remote_url = arg + + if remote_url: + pat, repo_url = parse_remote_url(remote_url) + print(f" Git repo: {repo_url}") + print(f" PAT: {pat[:8]}...{pat[-4:]}") + else: + repo_url = "" + print(" Mode: local-only (no WIKI_REMOTE_URL — wiki will use local git init)") + + print(f" Version: {version}") + print() + + if do_build: + build_and_push(version) + + ensure_namespace() + apply_manifests() + if remote_url: + create_or_update_secret(remote_url) + else: + print("[3/6] Skipping secret (no WIKI_REMOTE_URL — local-only mode).") + restart_and_wait() + run_validation() + print_summary(repo_url) + + +if __name__ == "__main__": + main() diff --git a/mcp/wiki_memory_tool/install_kwiki_skills.py b/mcp/wiki_memory_tool/install_kwiki_skills.py new file mode 100644 index 00000000..7c282dc3 --- /dev/null +++ b/mcp/wiki_memory_tool/install_kwiki_skills.py @@ -0,0 +1,165 @@ +# /// script +# requires-python = ">=3.11" +# dependencies = [] +# /// +""" +Install wiki-memory-tool skills into a Claude Code skills directory. + +Creates symlinks (default) or copies skill directories from +mcp/wiki_memory_tool/skills/ into the target .claude/skills/ directory. + +Usage: + # Symlink into a project's .claude/skills/ + uv run python install_skills.py /path/to/project/.claude/skills + + # Symlink into ~/.claude/skills/ (user-global) + uv run python install_skills.py --global + + # Copy instead of symlink + uv run python install_skills.py --copy /path/to/project/.claude/skills + uv run python install_skills.py --global --copy + + # Remove previously installed skills + uv run python install_skills.py --uninstall /path/to/project/.claude/skills +""" + +import argparse +import shutil +import sys +from pathlib import Path + +SCRIPT_DIR = Path(__file__).parent.resolve() +SKILLS_SOURCE = SCRIPT_DIR / "skills" + +SKILL_NAMES = [ + "wiki-discovery-api", + "wiki-discovery-cli", + "wiki-discovery-mcp", + "wiki-query-api", + "wiki-query-cli", + "wiki-query-mcp", +] + +SKILL_NAME_MAP = { + "wiki-discovery-api": "kwiki:discover-api", + "wiki-discovery-cli": "kwiki:discover-cli", + "wiki-discovery-mcp": "kwiki:discover-mcp", + "wiki-query-api": "kwiki:query-api", + "wiki-query-cli": "kwiki:query-cli", + "wiki-query-mcp": "kwiki:query-mcp", +} + + +def install_skills(target_dir: Path, copy: bool = False): + if not target_dir.exists(): + print(f"ERROR: Target directory does not exist: {target_dir}", file=sys.stderr) + print("Create it first or check the path.", file=sys.stderr) + sys.exit(1) + + if not SKILLS_SOURCE.exists(): + print(f"ERROR: Source skills directory not found: {SKILLS_SOURCE}", file=sys.stderr) + sys.exit(1) + + installed = [] + for src_name in SKILL_NAMES: + src = SKILLS_SOURCE / src_name + if not src.exists(): + print(f" SKIP: {src_name} (source not found)") + continue + + dest_name = SKILL_NAME_MAP.get(src_name, src_name) + dest = target_dir / dest_name + + if dest.exists() or dest.is_symlink(): + if dest.is_symlink(): + dest.unlink() + else: + shutil.rmtree(dest) + + if copy: + shutil.copytree(src, dest) + print(f" COPY: {dest_name}/ <- {src}") + else: + dest.symlink_to(src) + print(f" LINK: {dest_name}/ -> {src}") + + installed.append(dest_name) + + print(f"\nInstalled {len(installed)} skills into {target_dir}") + print("Available as: " + ", ".join(f"/{name}" for name in installed)) + + +def uninstall_skills(target_dir: Path): + if not target_dir.exists(): + print(f"ERROR: Target directory does not exist: {target_dir}", file=sys.stderr) + sys.exit(1) + + removed = [] + for src_name in SKILL_NAMES: + dest_name = SKILL_NAME_MAP.get(src_name, src_name) + dest = target_dir / dest_name + + if dest.exists() or dest.is_symlink(): + if dest.is_symlink(): + dest.unlink() + else: + shutil.rmtree(dest) + print(f" REMOVED: {dest_name}/") + removed.append(dest_name) + + if removed: + print(f"\nRemoved {len(removed)} skills from {target_dir}") + else: + print("No wiki skills found to remove.") + + +def main(): + parser = argparse.ArgumentParser( + description="Install wiki-memory-tool skills into a Claude Code skills directory" + ) + parser.add_argument( + "target", + nargs="?", + help="Target .claude/skills/ directory path", + ) + parser.add_argument( + "--global", + dest="global_install", + action="store_true", + help="Install into ~/.claude/skills/", + ) + parser.add_argument( + "--copy", + action="store_true", + help="Copy files instead of creating symlinks", + ) + parser.add_argument( + "--uninstall", + action="store_true", + help="Remove previously installed wiki skills", + ) + + args = parser.parse_args() + + if args.global_install: + target = Path.home() / ".claude" / "skills" + elif args.target: + target = Path(args.target).resolve() + else: + parser.error("Provide a target directory or use --global") + + if not target.exists(): + print(f"ERROR: Skills directory does not exist: {target}", file=sys.stderr) + print("Ensure Claude Code is initialized in the target project.", file=sys.stderr) + sys.exit(1) + + if args.uninstall: + uninstall_skills(target) + else: + mode = "copy" if args.copy else "symlink" + print(f"Installing wiki skills ({mode}) into: {target}\n") + install_skills(target, copy=args.copy) + + +if __name__ == "__main__": + main() diff --git a/mcp/wiki_memory_tool/k8s/acl-configmap.yaml b/mcp/wiki_memory_tool/k8s/acl-configmap.yaml new file mode 100644 index 00000000..e446be5b --- /dev/null +++ b/mcp/wiki_memory_tool/k8s/acl-configmap.yaml @@ -0,0 +1,78 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: wiki-memory-acl + namespace: wiki-memory-service +data: + acl.yaml: | + # Per-topic ACL for Wiki Memory Service + # Identity prefixes: + # spiffe:// — workload identity (agents) + # github:user: — individual GitHub user + # github:team:/ — GitHub team membership + # github:org: — any member of GitHub org + # * — any authenticated identity + + topics: + # AI/ML research topic — org members can read, ml-writers can write + ai: + writers: + - "spiffe://kagenti.example.com/ns/topic-ai/sa/discovery-agent" + - "github:team:kaslomorg/ml-writers" + - "github:team:kaslomorg/platform-admins" + readers: + - "spiffe://kagenti.example.com/ns/wiki-system/sa/query-agent" + - "github:org:kaslomorg" + - "*" + admins: + - "github:user:aslom" + - "github:team:kaslomorg/platform-admins" + + # Security research — restricted to security team + security: + writers: + - "spiffe://kagenti.example.com/ns/topic-security/sa/discovery-agent" + - "github:team:kaslomorg/security-team" + - "github:team:kaslomorg/platform-admins" + readers: + - "spiffe://kagenti.example.com/ns/wiki-system/sa/query-agent" + - "github:team:kaslomorg/security-team" + - "github:org:kaslomorg" + admins: + - "github:user:aslom" + - "github:team:kaslomorg/platform-admins" + + # ML research — ml-team can read and write + ml: + writers: + - "spiffe://kagenti.example.com/ns/topic-ml/sa/discovery-agent" + - "github:team:kaslomorg/ml-team" + - "github:team:kaslomorg/ml-writers" + readers: + - "spiffe://kagenti.example.com/ns/wiki-system/sa/query-agent" + - "github:team:kaslomorg/ml-team" + - "github:org:kaslomorg" + admins: + - "github:user:aslom" + - "github:team:kaslomorg/platform-admins" + + # Internal design — platform admins only + internal-design: + writers: + - "spiffe://kagenti.example.com/ns/topic-internal-design/sa/discovery-agent" + - "github:team:kaslomorg/platform-admins" + readers: + - "spiffe://kagenti.example.com/ns/wiki-system/sa/query-agent" + - "github:team:kaslomorg/platform-admins" + - "github:user:aslom" + admins: + - "github:user:aslom" + - "github:team:kaslomorg/platform-admins" + + # System-level (for /admin endpoints) + _system: + writers: [] + readers: [] + admins: + - "github:user:aslom" + - "github:team:kaslomorg/platform-admins" diff --git a/mcp/wiki_memory_tool/k8s/deployment.yaml b/mcp/wiki_memory_tool/k8s/deployment.yaml new file mode 100644 index 00000000..74176162 --- /dev/null +++ b/mcp/wiki_memory_tool/k8s/deployment.yaml @@ -0,0 +1,112 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: wiki-memory-service + namespace: wiki-memory-service + labels: + app: wiki-memory-service +spec: + replicas: 1 + selector: + matchLabels: + app: wiki-memory-service + template: + metadata: + labels: + app: wiki-memory-service + spec: + serviceAccountName: wiki-memory-service + containers: + - name: wiki-memory + image: quay.io/aslomnet/wiki-memory-service:0.2.0 + imagePullPolicy: Always + ports: + - containerPort: 8000 + name: http + env: + - name: WIKI_ROOT + value: /data/wiki + - name: ACL_FILE + value: /config/acl.yaml + - name: SPIFFE_TRUST_DOMAIN + value: kagenti.example.com + - name: WIKI_REMOTE_URL + valueFrom: + secretKeyRef: + name: wiki-github-pat + key: WIKI_REMOTE_URL + optional: true + - name: GITHUB_CLIENT_ID + valueFrom: + secretKeyRef: + name: wiki-github-oauth + key: GITHUB_CLIENT_ID + optional: true + - name: GITHUB_CLIENT_SECRET + valueFrom: + secretKeyRef: + name: wiki-github-oauth + key: GITHUB_CLIENT_SECRET + optional: true + - name: JWT_SECRET_KEY + valueFrom: + secretKeyRef: + name: wiki-github-oauth + key: JWT_SECRET_KEY + - name: JWT_EXPIRY_HOURS + value: "168" + volumeMounts: + - name: wiki-data + mountPath: /data/wiki + - name: acl-config + mountPath: /config + readinessProbe: + httpGet: + path: /healthz + port: 8000 + initialDelaySeconds: 3 + securityContext: + runAsNonRoot: true + runAsUser: 1001 + readOnlyRootFilesystem: true + allowPrivilegeEscalation: false + capabilities: + drop: [ALL] + resources: + requests: + memory: "128Mi" + cpu: "100m" + limits: + memory: "512Mi" + cpu: "500m" + volumes: + - name: wiki-data + persistentVolumeClaim: + claimName: wiki-memory-pvc + - name: acl-config + configMap: + name: wiki-memory-acl +--- +apiVersion: v1 +kind: Service +metadata: + name: wiki-memory-service + namespace: wiki-memory-service +spec: + selector: + app: wiki-memory-service + ports: + - port: 8000 + targetPort: 8000 + name: http +--- +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: wiki-memory-pvc + namespace: wiki-memory-service +spec: + accessModes: [ReadWriteOnce] + resources: + requests: + storage: 5Gi diff --git a/mcp/wiki_memory_tool/k8s/serviceaccount.yaml b/mcp/wiki_memory_tool/k8s/serviceaccount.yaml new file mode 100644 index 00000000..9f19503e --- /dev/null +++ b/mcp/wiki_memory_tool/k8s/serviceaccount.yaml @@ -0,0 +1,5 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + name: wiki-memory-service + namespace: wiki-memory-service diff --git a/mcp/wiki_memory_tool/main.py b/mcp/wiki_memory_tool/main.py new file mode 100644 index 00000000..cf81870c --- /dev/null +++ b/mcp/wiki_memory_tool/main.py @@ -0,0 +1,6 @@ +def main(): + print("Hello from wiki-memory-service!") + + +if __name__ == "__main__": + main() diff --git a/mcp/wiki_memory_tool/mcp_server.py b/mcp/wiki_memory_tool/mcp_server.py new file mode 100644 index 00000000..a7687f62 --- /dev/null +++ b/mcp/wiki_memory_tool/mcp_server.py @@ -0,0 +1,471 @@ +""" +MCP Server for Wiki Memory Service. + +Exposes wiki operations as MCP tools for Claude Code and other MCP clients. + +Two modes: + - Local (default): imports wiki_service.py directly, no ACL enforcement + - Remote (WIKI_SERVICE_URL set): calls wiki service HTTP API with cached auth token + +Supports two transports: + - stdio: for Claude Code subprocess integration (default) + - streamable-http: for remote MCP clients (set MCP_TRANSPORT=streamable-http) +""" + +import json +import logging +import os +from pathlib import Path + +import httpx +from mcp.server.fastmcp import FastMCP + +logger = logging.getLogger(__name__) + +MCP_TRANSPORT = os.environ.get("MCP_TRANSPORT", "stdio") +MCP_PORT = int(os.environ.get("MCP_PORT", "8322")) +MCP_HOST = os.environ.get("MCP_HOST", "0.0.0.0") +WIKI_SERVICE_URL = os.environ.get("WIKI_SERVICE_URL", "") + +mcp = FastMCP("wiki-memory", host=MCP_HOST, port=MCP_PORT) + +TOKEN_FILE = Path.home() / ".wiki-memory" / "token.json" + + +def _load_token() -> str | None: + if TOKEN_FILE.exists(): + data = json.loads(TOKEN_FILE.read_text()) + return data.get("token") + return None + + +def _remote_client() -> httpx.Client: + headers = {} + token = _load_token() + if token: + headers["Authorization"] = f"Bearer {token}" + insecure = os.environ.get("WIKI_INSECURE_TLS") == "1" + if insecure: + logger.warning("WIKI_INSECURE_TLS=1 — TLS verification disabled (dev only)") + return httpx.Client(base_url=WIKI_SERVICE_URL, headers=headers, timeout=30, verify=not insecure) + + +def _get_service(): + """Lazy import wiki_service to ensure env vars are set before import.""" + import wiki_service as ws + return ws + + +@mcp.tool( + name="wiki_list_topics", + description="List all available wiki topics and their page counts.", +) +async def wiki_list_topics() -> str: + if WIKI_SERVICE_URL: + client = _remote_client() + resp = client.get("/topics") + if resp.status_code != 200: + return f"Error: {resp.status_code} {resp.text}" + topics = resp.json().get("topics", []) + if not topics: + return "No topics found." + return "Topics:\n" + "\n".join(f"- {t['topic_id']} ({t['page_count']} pages)" for t in topics) + + ws = _get_service() + topics = [] + for topic_id in ws._acl_cache: + if topic_id.startswith("_"): + continue + page_count = len(list(ws._topic_dir(topic_id).rglob("*.md"))) + topics.append(f"- {topic_id} ({page_count} pages)") + if not topics: + return "No topics found." + return "Topics:\n" + "\n".join(topics) + + +@mcp.tool( + name="wiki_query", + description="Search a wiki topic for pages matching a query. Returns ranked results with snippets.", +) +async def wiki_query(topic_id: str, query: str, limit: int = 10) -> str: + if WIKI_SERVICE_URL: + client = _remote_client() + resp = client.post(f"/topics/{topic_id}/query", json={"query": query, "limit": limit}) + if resp.status_code != 200: + return f"Error: {resp.status_code} {resp.text}" + results = resp.json().get("results", []) + if not results: + return f"No results for '{query}' in topic '{topic_id}'." + lines = [f"Search results for '{query}' in '{topic_id}':"] + for r in results: + lines.append(f"- {r['path']} (score={r['score']})") + if r.get("snippet"): + lines.append(f" {r['snippet'][:150]}") + return "\n".join(lines) + + ws = _get_service() + results = ws.search_topic(topic_id, query, limit) + if not results: + return f"No results for '{query}' in topic '{topic_id}'." + lines = [f"Search results for '{query}' in '{topic_id}':"] + for r in results: + lines.append(f"- {r['path']} (score={r['score']})") + if r["snippet"]: + lines.append(f" {r['snippet'][:150]}") + return "\n".join(lines) + + +@mcp.tool( + name="wiki_read", + description="Read the full content of a wiki page. Provide topic_id and the page path (e.g. 'transformers.md').", +) +async def wiki_read(topic_id: str, path: str) -> str: + if WIKI_SERVICE_URL: + client = _remote_client() + resp = client.get(f"/topics/{topic_id}/pages/{path}") + if resp.status_code != 200: + return f"Error: {resp.status_code} {resp.text}" + return resp.json().get("content", "") + + ws = _get_service() + full = ws._topic_dir(topic_id) / path + if not full.exists(): + return f"Page not found: {topic_id}/{path}" + return full.read_text() + + +@mcp.tool( + name="wiki_write", + description="Write or update a wiki page. Commits to git (and pushes to remote if configured). Set draft=True to submit for review.", +) +async def wiki_write(topic_id: str, path: str, content: str, message: str = "", draft: bool = False) -> str: + if WIKI_SERVICE_URL: + client = _remote_client() + body = {"content": content, "message": message or f"mcp-write: {topic_id}/{path}"} + url = f"/topics/{topic_id}/pages/{path}" + if draft: + url += "?draft=true" + resp = client.post(url, json=body) + if resp.status_code != 200: + return f"Error: {resp.status_code} {resp.text}" + data = resp.json() + result = f"{'Draft' if draft else 'Written'}: {topic_id}/{path}" + if data.get("suggested_links"): + result += "\nSuggested links:\n" + "\n".join(f"- {s['path']} (score={s['score']})" for s in data["suggested_links"]) + return result + + ws = _get_service() + topic_dir = ws._topic_dir(topic_id) + if draft: + full = topic_dir / "_drafts" / path + else: + full = topic_dir / path + full.parent.mkdir(parents=True, exist_ok=True) + full.write_text(content) + + rel = str(full.relative_to(ws.WIKI_ROOT)) + msg = message or f"mcp-write: {topic_id}/{path}" + ws._commit(rel, msg, "mcp-client") + return f"{'Draft' if draft else 'Written'}: {topic_id}/{path}" + + +@mcp.tool( + name="wiki_check_novelty", + description="Check if content is novel relative to existing wiki pages in a topic. Returns whether similar content already exists.", +) +async def wiki_check_novelty(topic_id: str, title: str, abstract: str) -> str: + if WIKI_SERVICE_URL: + client = _remote_client() + resp = client.post(f"/topics/{topic_id}/check-novelty", json={"title": title, "abstract": abstract}) + if resp.status_code != 200: + return f"Error: {resp.status_code} {resp.text}" + return json.dumps(resp.json()) + + ws = _get_service() + combined = f"{title} {abstract}" + results = ws.search_topic(topic_id, combined, limit=3) + + if results and results[0]["score"] > 0.15: + similar = [r["path"] for r in results[:3]] + return json.dumps({"novel": False, "reason": "Similar content exists", "similar": similar}) + return json.dumps({"novel": True, "reason": "No sufficiently similar content found"}) + + +@mcp.tool( + name="wiki_activity", + description="Get recent changes (git log) for a topic or globally. Returns commit history.", +) +async def wiki_activity(topic_id: str = "", limit: int = 20) -> str: + if WIKI_SERVICE_URL: + client = _remote_client() + url = f"/topics/{topic_id}/activity?limit={limit}" if topic_id else f"/activity?limit={limit}" + resp = client.get(url) + if resp.status_code != 200: + return f"Error: {resp.status_code} {resp.text}" + entries = resp.json().get("entries", []) + if not entries: + return "No recent activity." + lines = ["Recent activity:"] + for e in entries: + lines.append(f"- {e['timestamp']} {e['author']}: {e['message']}") + return "\n".join(lines) + + ws = _get_service() + entries = ws.get_activity(topic_id=topic_id or None, limit=limit) + if not entries: + return "No recent activity." + lines = ["Recent activity:"] + for e in entries: + lines.append(f"- {e['timestamp']} {e['author']}: {e['message']}") + return "\n".join(lines) + + +@mcp.tool( + name="wiki_backlinks", + description="Find pages that link to a given page (backlinks/inbound references).", +) +async def wiki_backlinks(topic_id: str, path: str) -> str: + if WIKI_SERVICE_URL: + client = _remote_client() + resp = client.get(f"/topics/{topic_id}/backlinks/{path}") + if resp.status_code != 200: + return f"Error: {resp.status_code} {resp.text}" + backlinks = resp.json().get("backlinks", []) + if not backlinks: + return f"No pages link to {topic_id}/{path}." + return f"Pages linking to {topic_id}/{path}:\n" + "\n".join(f"- {b}" for b in backlinks) + + ws = _get_service() + backlinks = ws.find_backlinks(topic_id, path) + if not backlinks: + return f"No pages link to {topic_id}/{path}." + return f"Pages linking to {topic_id}/{path}:\n" + "\n".join(f"- {b}" for b in backlinks) + + +@mcp.tool( + name="wiki_search_all", + description="Search across all wiki topics for pages matching a query. Returns results from all accessible topics.", +) +async def wiki_search_all(query: str, limit: int = 10) -> str: + if WIKI_SERVICE_URL: + client = _remote_client() + resp = client.post("/search", json={"query": query, "limit": limit}) + if resp.status_code != 200: + return f"Error: {resp.status_code} {resp.text}" + results = resp.json().get("results", []) + if not results: + return f"No results for '{query}' across all topics." + lines = [f"Global search results for '{query}':"] + for r in results: + lines.append(f"- [{r.get('topic_id', '?')}] {r['path']} (score={r['score']})") + if r.get("snippet"): + lines.append(f" {r['snippet'][:150]}") + return "\n".join(lines) + + ws = _get_service() + all_results = [] + for topic_id in ws._acl_cache: + if topic_id.startswith("_"): + continue + results = ws.search_topic(topic_id, query, limit) + for r in results: + r["topic_id"] = topic_id + all_results.extend(results) + all_results.sort(key=lambda r: r["score"], reverse=True) + all_results = all_results[:limit] + if not all_results: + return f"No results for '{query}' across all topics." + lines = [f"Global search results for '{query}':"] + for r in all_results: + lines.append(f"- [{r['topic_id']}] {r['path']} (score={r['score']})") + if r.get("snippet"): + lines.append(f" {r['snippet'][:150]}") + return "\n".join(lines) + + +@mcp.tool( + name="wiki_get_template", + description="Get a page template for structured content creation. Available: paper-summary, concept-overview, how-to-guide, comparison. Omit template_id to list all.", +) +async def wiki_get_template(template_id: str = "") -> str: + if WIKI_SERVICE_URL: + client = _remote_client() + url = f"/templates/{template_id}" if template_id else "/templates" + resp = client.get(url) + if resp.status_code != 200: + return f"Error: {resp.status_code} {resp.text}" + data = resp.json() + if "templates" in data: + lines = ["Available templates:"] + for t in data["templates"]: + lines.append(f"- {t['id']}: {t['name']} — {t['description']}") + return "\n".join(lines) + return f"Template: {data['name']}\n\n{data['content']}" + + from wiki_service import _TEMPLATES + if not template_id: + lines = ["Available templates:"] + for tid, t in _TEMPLATES.items(): + lines.append(f"- {tid}: {t['name']} — {t['description']}") + return "\n".join(lines) + t = _TEMPLATES.get(template_id) + if not t: + return f"Template '{template_id}' not found. Available: {list(_TEMPLATES.keys())}" + return f"Template: {t['name']}\n\n{t['content']}" + + +@mcp.tool( + name="wiki_list_drafts", + description="List pending drafts in a topic that need review/approval.", +) +async def wiki_list_drafts(topic_id: str) -> str: + if WIKI_SERVICE_URL: + client = _remote_client() + resp = client.get(f"/topics/{topic_id}/drafts") + if resp.status_code != 200: + return f"Error: {resp.status_code} {resp.text}" + drafts = resp.json().get("drafts", []) + if not drafts: + return f"No pending drafts in '{topic_id}'." + return f"Drafts in '{topic_id}':\n" + "\n".join(f"- {d}" for d in drafts) + + ws = _get_service() + drafts_dir = ws._topic_dir(topic_id) / "_drafts" + if not drafts_dir.exists(): + return f"No pending drafts in '{topic_id}'." + pages = [str(f.relative_to(drafts_dir)) for f in drafts_dir.rglob("*.md")] + if not pages: + return f"No pending drafts in '{topic_id}'." + return f"Drafts in '{topic_id}':\n" + "\n".join(f"- {p}" for p in sorted(pages)) + + +@mcp.tool( + name="wiki_approve_draft", + description="Approve a draft and publish it as a live wiki page. Requires admin access.", +) +async def wiki_approve_draft(topic_id: str, path: str) -> str: + if WIKI_SERVICE_URL: + client = _remote_client() + resp = client.post(f"/topics/{topic_id}/drafts/{path}/approve") + if resp.status_code != 200: + return f"Error: {resp.status_code} {resp.text}" + return f"Approved: {topic_id}/{path}" + + ws = _get_service() + topic_dir = ws._topic_dir(topic_id) + draft_file = topic_dir / "_drafts" / path + if not draft_file.exists(): + return f"Draft not found: {topic_id}/_drafts/{path}" + live_file = topic_dir / path + live_file.parent.mkdir(parents=True, exist_ok=True) + live_file.write_text(draft_file.read_text()) + draft_file.unlink() + rel_live = str(live_file.relative_to(ws.WIKI_ROOT)) + ws._commit(rel_live, f"approve: {topic_id}/{path}", "mcp-admin") + return f"Approved: {topic_id}/{path}" + + +@mcp.tool( + name="wiki_list_tags", + description="List all tags in a topic with page counts.", +) +async def wiki_list_tags(topic_id: str) -> str: + if WIKI_SERVICE_URL: + client = _remote_client() + resp = client.get(f"/topics/{topic_id}/tags") + if resp.status_code != 200: + return f"Error: {resp.status_code} {resp.text}" + tags = resp.json().get("tags", {}) + if not tags: + return f"No tags in '{topic_id}'." + lines = [f"Tags in '{topic_id}':"] + for tag, count in sorted(tags.items()): + lines.append(f"- {tag} ({count} pages)") + return "\n".join(lines) + + ws = _get_service() + topic_dir = ws._topic_dir(topic_id) + tag_counts: dict[str, int] = {} + for f in topic_dir.rglob("*.md"): + if "_drafts" in f.parts: + continue + content = f.read_text(errors="replace") + meta, _ = ws.parse_frontmatter(content) + for tag in meta.get("tags", []): + tag_counts[tag] = tag_counts.get(tag, 0) + 1 + if not tag_counts: + return f"No tags in '{topic_id}'." + lines = [f"Tags in '{topic_id}':"] + for tag, count in sorted(tag_counts.items()): + lines.append(f"- {tag} ({count} pages)") + return "\n".join(lines) + + +@mcp.tool( + name="wiki_pages_by_tag", + description="List all pages in a topic that have a specific tag.", +) +async def wiki_pages_by_tag(topic_id: str, tag: str) -> str: + if WIKI_SERVICE_URL: + client = _remote_client() + resp = client.get(f"/topics/{topic_id}/tags/{tag}") + if resp.status_code != 200: + return f"Error: {resp.status_code} {resp.text}" + pages = resp.json().get("pages", []) + if not pages: + return f"No pages with tag '{tag}' in '{topic_id}'." + return f"Pages tagged '{tag}' in '{topic_id}':\n" + "\n".join(f"- {p}" for p in pages) + + ws = _get_service() + topic_dir = ws._topic_dir(topic_id) + pages = [] + for f in topic_dir.rglob("*.md"): + if "_drafts" in f.parts: + continue + content = f.read_text(errors="replace") + meta, _ = ws.parse_frontmatter(content) + if tag in meta.get("tags", []): + pages.append(str(f.relative_to(topic_dir))) + if not pages: + return f"No pages with tag '{tag}' in '{topic_id}'." + return f"Pages tagged '{tag}' in '{topic_id}':\n" + "\n".join(f"- {p}" for p in sorted(pages)) + + +@mcp.tool( + name="wiki_graph", + description="Get the page link graph for a topic. Returns nodes (pages) and edges (links between them).", +) +async def wiki_graph(topic_id: str) -> str: + if WIKI_SERVICE_URL: + client = _remote_client() + resp = client.get(f"/topics/{topic_id}/graph") + if resp.status_code != 200: + return f"Error: {resp.status_code} {resp.text}" + return json.dumps(resp.json()) + + ws = _get_service() + import re as _re + topic_dir = ws._topic_dir(topic_id) + nodes = [] + edges = [] + for f in topic_dir.rglob("*.md"): + if "_drafts" in f.parts: + continue + rel = str(f.relative_to(topic_dir)) + content = f.read_text(errors="replace") + meta, body = ws.parse_frontmatter(content) + title_match = _re.search(r"^#\s+(.+)", body) + title = title_match.group(1) if title_match else rel + nodes.append({"id": rel, "title": title, "tags": meta.get("tags", [])}) + for link in ws.extract_links(content): + edges.append({"source": rel, "target": link}) + return json.dumps({"topic": topic_id, "nodes": nodes, "edges": edges}) + + +def run_mcp_server(): + """Start the MCP server with configured transport.""" + mcp.run(transport=MCP_TRANSPORT) + + +if __name__ == "__main__": + run_mcp_server() diff --git a/mcp/wiki_memory_tool/pyproject.toml b/mcp/wiki_memory_tool/pyproject.toml new file mode 100644 index 00000000..2a6def21 --- /dev/null +++ b/mcp/wiki_memory_tool/pyproject.toml @@ -0,0 +1,17 @@ +[project] +name = "wiki-memory-service" +version = "0.2.0" +description = "Multi-agent wiki memory service with git-backed storage and per-topic ACL" +requires-python = ">=3.14" +dependencies = [ + "fastapi>=0.136.1", + "httpx>=0.28.1", + "mcp[cli]>=1.27.0", + "pyyaml>=6.0.3", + "uvicorn>=0.47.0", +] + +[dependency-groups] +dev = [ + "pyright>=1.1.409", +] diff --git a/mcp/wiki_memory_tool/run_local.py b/mcp/wiki_memory_tool/run_local.py new file mode 100644 index 00000000..364064bf --- /dev/null +++ b/mcp/wiki_memory_tool/run_local.py @@ -0,0 +1,53 @@ +""" +Local runner for Wiki Memory Service — no container, no k8s. +Starts both the REST API (FastAPI on :8321) and MCP server (streamable-http on :8322). + +Usage: python run_local.py [--clean] [--remote] +""" + +import os +import shutil +import sys +import threading +from pathlib import Path + +SCRIPT_DIR = Path(__file__).parent +DATA_DIR = SCRIPT_DIR / "data" / "wiki" + +os.environ["WIKI_ROOT"] = str(DATA_DIR) +os.environ["ACL_FILE"] = str(SCRIPT_DIR / "test_acl.yaml") +os.environ["SPIFFE_TRUST_DOMAIN"] = "kagenti.example.com" +os.environ.setdefault("JWT_SECRET_KEY", "local-dev-secret-do-not-use-in-production") +os.environ["MCP_TRANSPORT"] = "streamable-http" +os.environ["MCP_PORT"] = "8322" + +if "--clean" in sys.argv: + if DATA_DIR.exists(): + shutil.rmtree(DATA_DIR) + print(f"Cleaned {DATA_DIR}") + +if "--remote" in sys.argv: + remote_url = os.environ.get("WIKI_REMOTE_URL", "") + if not remote_url: + print("ERROR: --remote requires WIKI_REMOTE_URL env var to be set") + sys.exit(1) + print(f"Remote: {remote_url.split('@')[0]}@***") +else: + DATA_DIR.mkdir(parents=True, exist_ok=True) + +if __name__ == "__main__": + import uvicorn + + print(f"Wiki root: {DATA_DIR}") + print(f"ACL file: {os.environ['ACL_FILE']}") + print("Starting wiki-memory-service REST API on http://localhost:8321") + print("Starting wiki-memory-service MCP server on http://localhost:8322/mcp") + + def start_mcp(): + from mcp_server import run_mcp_server + run_mcp_server() + + mcp_thread = threading.Thread(target=start_mcp, daemon=True) + mcp_thread.start() + + uvicorn.run("wiki_service:app", host="0.0.0.0", port=8321, reload=False) diff --git a/mcp/wiki_memory_tool/skills/wiki-discovery-api/SKILL.md b/mcp/wiki_memory_tool/skills/wiki-discovery-api/SKILL.md new file mode 100644 index 00000000..104429cf --- /dev/null +++ b/mcp/wiki_memory_tool/skills/wiki-discovery-api/SKILL.md @@ -0,0 +1,106 @@ +# kwiki Discovery Agent (REST API) + +Write new knowledge to the wiki memory service via REST API after checking novelty. + +## Prerequisites + +The wiki service must be running on `http://localhost:8321`. + +## Authentication + +### Option A: GitHub OAuth Token + +``` +Authorization: Bearer +``` + +### Option B: SPIFFE Headers (agent-to-agent) + +``` +X-Spiffe-Id: spiffe://kagenti.example.com/ns/topic-{topic_id}/sa/discovery-agent +``` + +## Procedure + +### 1. Get a Template (optional) + +```bash +curl -s http://localhost:8321/templates +curl -s http://localhost:8321/templates/paper-summary +``` + +### 2. Check Novelty First + +```bash +curl -s -X POST http://localhost:8321/topics/{topic_id}/check-novelty \ + -H "X-Spiffe-Id: spiffe://kagenti.example.com/ns/topic-{topic_id}/sa/discovery-agent" \ + -H "Content-Type: application/json" \ + -d '{"title": "Page Title", "abstract": "Brief summary"}' +``` + +If `"novel": false`, do NOT write. + +### 3. Write the Page + +```bash +curl -s -X POST http://localhost:8321/topics/{topic_id}/pages/{path} \ + -H "X-Spiffe-Id: spiffe://kagenti.example.com/ns/topic-{topic_id}/sa/discovery-agent" \ + -H "Content-Type: application/json" \ + -d '{"content": "---\ntags: [paper]\n---\n# Title\n\nContent...", "message": "commit message"}' +``` + +Response includes `suggested_links` to related pages. + +### 4. Write as Draft + +```bash +curl -s -X POST "http://localhost:8321/topics/{topic_id}/pages/{path}?draft=true" \ + -H "X-Spiffe-Id: spiffe://kagenti.example.com/ns/topic-{topic_id}/sa/discovery-agent" \ + -H "Content-Type: application/json" \ + -d '{"content": "# Draft\n\nPending review..."}' +``` + +### 5. Approve/Reject Drafts (admin) + +```bash +# List drafts +curl -s http://localhost:8321/topics/{topic_id}/drafts \ + -H "Authorization: Bearer " + +# Approve +curl -s -X POST http://localhost:8321/topics/{topic_id}/drafts/{path}/approve \ + -H "Authorization: Bearer " + +# Reject +curl -s -X POST http://localhost:8321/topics/{topic_id}/drafts/{path}/reject \ + -H "Authorization: Bearer " \ + -H "Content-Type: application/json" \ + -d '{"reason": "needs more references"}' +``` + +## Example Flow + +```bash +# Get template +curl -s http://localhost:8321/templates/paper-summary + +# Check novelty +curl -s -X POST http://localhost:8321/topics/ai/check-novelty \ + -H "X-Spiffe-Id: spiffe://kagenti.example.com/ns/topic-ai/sa/discovery-agent" \ + -H "Content-Type: application/json" \ + -d '{"title": "LoRA", "abstract": "Low-rank adaptation for efficient fine-tuning"}' + +# Write if novel (with tags) +curl -s -X POST http://localhost:8321/topics/ai/pages/lora.md \ + -H "X-Spiffe-Id: spiffe://kagenti.example.com/ns/topic-ai/sa/discovery-agent" \ + -H "Content-Type: application/json" \ + -d '{"content": "---\ntags: [paper, fine-tuning]\n---\n# LoRA\n\nLow-Rank Adaptation...", "message": "Add LoRA overview"}' +``` + +## Notes + +- The Discovery Agent SPIFFE ID must be in the topic's `writers` list +- Writing to a different topic returns 403 +- Response includes `suggested_links` for related content +- Use `?draft=true` to submit for review instead of publishing directly +- Use YAML frontmatter with tags for better discoverability diff --git a/mcp/wiki_memory_tool/skills/wiki-discovery-cli/SKILL.md b/mcp/wiki_memory_tool/skills/wiki-discovery-cli/SKILL.md new file mode 100644 index 00000000..dd11a582 --- /dev/null +++ b/mcp/wiki_memory_tool/skills/wiki-discovery-cli/SKILL.md @@ -0,0 +1,92 @@ +# kwiki Discovery Agent (CLI) + +Write new knowledge to the wiki memory service using the `wiki_cli.py` command-line tool. + +## Prerequisites + +The wiki service must be running on `http://localhost:8321`. Run from the `wiki_memory_tool/` directory. + +## Procedure + +### 1. Check Novelty First + +```bash +uv run python wiki_cli.py discover novelty {topic_id} "Page Title" "Brief summary of content" +``` + +Output: `NOVEL: ...` or `NOT NOVEL: ...` with similar pages listed. + +If NOT NOVEL, do not write. + +### 2. Get a Template (optional) + +```bash +uv run python wiki_cli.py discover template # List all +uv run python wiki_cli.py discover template paper-summary # Get specific +``` + +Available templates: `paper-summary`, `concept-overview`, `how-to-guide`, `comparison`. + +### 3. Write a Page + +```bash +uv run python wiki_cli.py discover write {topic_id} {path} --content "# Title\n\nContent" +uv run python wiki_cli.py discover write {topic_id} {path} --file content.md +``` + +Options: +- `--message "commit message"` — customize git commit message +- `--draft` — submit as draft for review instead of publishing immediately + +On success, the CLI shows suggested links to related pages. + +### 4. Submit as Draft (optional) + +```bash +uv run python wiki_cli.py discover write {topic_id} {path} --draft --content "..." +``` + +Drafts are stored pending review. An admin can approve or reject them. + +## Authentication + +### GitHub Login (recommended) + +```bash +uv run python wiki_cli.py --base-url https://wiki-service.example.com login +uv run python wiki_cli.py whoami +``` + +### SPIFFE Headers (agent mode) + +Without a cached token, the CLI uses simulated SPIFFE headers. + +## Options + +| Flag | Default | Description | +|------|---------|-------------| +| `--base-url` | `http://localhost:8321` | Service URL | +| `--topic` | `ai` | Default topic (used for SPIFFE ID) | +| `--trust-domain` | `kagenti.example.com` | SPIFFE trust domain | + +## Example Flow + +```bash +# Get template +uv run python wiki_cli.py discover template paper-summary + +# Check novelty +uv run python wiki_cli.py discover novelty ai "LoRA" "Low-rank adaptation for fine-tuning" + +# Write if novel +uv run python wiki_cli.py discover write ai lora.md --content "# LoRA\n\n..." + +# Write as draft for review +uv run python wiki_cli.py discover write ai experimental.md --draft --file ./notes/draft.md +``` + +## Notes + +- Returns exit code 1 on errors (403 if agent lacks write access to topic) +- Content is committed to git and pushed to remote if configured +- Suggested links are shown after writing to help connect related content diff --git a/mcp/wiki_memory_tool/skills/wiki-discovery-mcp/SKILL.md b/mcp/wiki_memory_tool/skills/wiki-discovery-mcp/SKILL.md new file mode 100644 index 00000000..3b478454 --- /dev/null +++ b/mcp/wiki_memory_tool/skills/wiki-discovery-mcp/SKILL.md @@ -0,0 +1,75 @@ +# kwiki Discovery Agent (MCP) + +Write new knowledge to the wiki memory service after checking for novelty. + +## Prerequisites + +The wiki-memory MCP server must be registered and running (stdio or streamable-http on port 8322). + +## Procedure + +### 1. Get a Template (optional) + +Call `wiki_get_template` to get a structured starting point: +- No args: list available templates +- `template_id`: get specific template (paper-summary, concept-overview, how-to-guide, comparison) + +### 2. Check Novelty First + +Call `wiki_check_novelty` with: +- `topic_id`: target topic (e.g. "ai") +- `title`: title of the new content +- `abstract`: brief summary or first paragraph + +If `"novel": false`, do NOT write. + +### 3. Write the Page + +Call `wiki_write` with: +- `topic_id`: target topic +- `path`: filename (e.g. "transformers.md") +- `content`: full markdown content (use frontmatter with tags) +- `message`: commit message (optional) +- `draft`: set to `true` to submit for review instead of publishing + +Returns suggested links to related pages. + +### 4. Submit as Draft (optional) + +Call `wiki_write` with `draft=True` to submit for review: +``` +wiki_write(topic_id="ai", path="draft.md", content="...", draft=True) +→ "Draft: ai/draft.md" +``` + +### 5. Approve/Reject Drafts (admin) + +- `wiki_list_drafts(topic_id)` — list pending drafts +- `wiki_approve_draft(topic_id, path)` — approve and publish + +## Example Flow + +``` +wiki_get_template(template_id="paper-summary") → template markdown + +wiki_check_novelty(topic_id="ai", title="LoRA", abstract="Low-rank adaptation...") + → {"novel": true} + +wiki_write(topic_id="ai", path="lora.md", content="---\ntags: [paper, fine-tuning]\n---\n# LoRA\n\n...") + → "Written: ai/lora.md\nSuggested links:\n- ai/fine-tuning.md (score=0.08)" +``` + +## Content Guidelines + +- Use YAML frontmatter with tags: `---\ntags: [paper, topic]\n---` +- Include a top-level `# Title` heading +- Use `## Sections` for organization +- Link to related pages with `[[page-name]]` or `[text](page.md)` +- Keep pages focused on one concept + +## Notes + +- **Local mode** (default): No authentication needed +- **Remote mode** (`WIKI_SERVICE_URL` set): Uses cached GitHub token +- Always check novelty to avoid duplicates +- Use drafts when content needs human review before publishing diff --git a/mcp/wiki_memory_tool/skills/wiki-query-api/SKILL.md b/mcp/wiki_memory_tool/skills/wiki-query-api/SKILL.md new file mode 100644 index 00000000..59c7bbc4 --- /dev/null +++ b/mcp/wiki_memory_tool/skills/wiki-query-api/SKILL.md @@ -0,0 +1,124 @@ +# kwiki Query Agent (REST API) + +Query the wiki memory service via REST API on behalf of a user. + +## Prerequisites + +The wiki service must be running on `http://localhost:8321`. + +## Authentication + +### Option A: GitHub OAuth Token + +``` +Authorization: Bearer +``` + +### Option B: SPIFFE Headers (agent OBO user) + +``` +X-Spiffe-Id: spiffe://kagenti.example.com/ns/wiki-system/sa/query-agent +X-Original-Subject: alice@example.com +``` + +## Procedure + +### 1. List Topics + +```bash +curl -s http://localhost:8321/topics \ + -H "X-Spiffe-Id: spiffe://kagenti.example.com/ns/wiki-system/sa/query-agent" \ + -H "X-Original-Subject: alice@example.com" +``` + +### 2. Search a Topic + +```bash +curl -s -X POST http://localhost:8321/topics/{topic_id}/query \ + -H "X-Spiffe-Id: spiffe://kagenti.example.com/ns/wiki-system/sa/query-agent" \ + -H "X-Original-Subject: alice@example.com" \ + -H "Content-Type: application/json" \ + -d '{"query": "search terms", "limit": 10}' +``` + +### 3. Global Search (across all topics) + +```bash +curl -s -X POST http://localhost:8321/search \ + -H "X-Spiffe-Id: spiffe://kagenti.example.com/ns/wiki-system/sa/query-agent" \ + -H "X-Original-Subject: alice@example.com" \ + -H "Content-Type: application/json" \ + -d '{"query": "search terms", "limit": 10}' +``` + +### 4. Read a Page + +```bash +curl -s http://localhost:8321/topics/{topic_id}/pages/{path} \ + -H "X-Spiffe-Id: spiffe://kagenti.example.com/ns/wiki-system/sa/query-agent" \ + -H "X-Original-Subject: alice@example.com" +``` + +### 5. Activity Feed + +```bash +# Global +curl -s http://localhost:8321/activity \ + -H "X-Spiffe-Id: spiffe://kagenti.example.com/ns/wiki-system/sa/query-agent" \ + -H "X-Original-Subject: alice@example.com" + +# Topic-specific +curl -s "http://localhost:8321/topics/{topic_id}/activity?limit=10" \ + -H "X-Spiffe-Id: spiffe://kagenti.example.com/ns/wiki-system/sa/query-agent" \ + -H "X-Original-Subject: alice@example.com" +``` + +### 6. Backlinks + +```bash +curl -s http://localhost:8321/topics/{topic_id}/backlinks/{path} \ + -H "X-Spiffe-Id: spiffe://kagenti.example.com/ns/wiki-system/sa/query-agent" \ + -H "X-Original-Subject: alice@example.com" +``` + +### 7. Tags + +```bash +# List tags +curl -s http://localhost:8321/topics/{topic_id}/tags \ + -H "X-Spiffe-Id: spiffe://kagenti.example.com/ns/wiki-system/sa/query-agent" \ + -H "X-Original-Subject: alice@example.com" + +# Pages by tag +curl -s http://localhost:8321/topics/{topic_id}/tags/{tag} \ + -H "X-Spiffe-Id: spiffe://kagenti.example.com/ns/wiki-system/sa/query-agent" \ + -H "X-Original-Subject: alice@example.com" +``` + +### 8. Graph + +```bash +curl -s http://localhost:8321/topics/{topic_id}/graph \ + -H "X-Spiffe-Id: spiffe://kagenti.example.com/ns/wiki-system/sa/query-agent" \ + -H "X-Original-Subject: alice@example.com" +``` + +### 9. Templates + +```bash +curl -s http://localhost:8321/templates +curl -s http://localhost:8321/templates/paper-summary +``` + +### 10. Drafts + +```bash +curl -s http://localhost:8321/topics/{topic_id}/drafts \ + -H "X-Spiffe-Id: spiffe://kagenti.example.com/ns/topic-ai/sa/discovery-agent" +``` + +## Notes + +- The Query Agent SPIFFE ID must be in the topic's `readers` list in the ACL +- Wildcard `*` in readers allows all authenticated users +- Returns 403 if identity lacks access diff --git a/mcp/wiki_memory_tool/skills/wiki-query-cli/SKILL.md b/mcp/wiki_memory_tool/skills/wiki-query-cli/SKILL.md new file mode 100644 index 00000000..0dd11dba --- /dev/null +++ b/mcp/wiki_memory_tool/skills/wiki-query-cli/SKILL.md @@ -0,0 +1,120 @@ +# kwiki Query Agent (CLI) + +Query the wiki memory service using the `wiki_cli.py` command-line tool. + +## Prerequisites + +The wiki service must be running on `http://localhost:8321`. Run from the `wiki_memory_tool/` directory. + +## Procedure + +### 1. List Topics + +```bash +uv run python wiki_cli.py query list-topics +``` + +### 2. List Pages in a Topic + +```bash +uv run python wiki_cli.py query list-pages {topic_id} +``` + +### 3. Search a Topic + +```bash +uv run python wiki_cli.py query search {topic_id} "search terms" +``` + +Optional: `--limit N` to control result count. + +### 4. Search All Topics (Global) + +```bash +uv run python wiki_cli.py query search-all "search terms" +``` + +### 5. Read a Page + +```bash +uv run python wiki_cli.py query read {topic_id} {path} +``` + +Returns page content with frontmatter metadata if present. + +### 6. Activity Feed + +```bash +uv run python wiki_cli.py query activity # Global +uv run python wiki_cli.py query activity {topic_id} # Topic-specific +``` + +### 7. Backlinks + +```bash +uv run python wiki_cli.py query backlinks {topic_id} {path} +``` + +### 8. Tags + +```bash +uv run python wiki_cli.py query tags {topic_id} # List all tags +uv run python wiki_cli.py query tag {topic_id} {tag_name} # Pages by tag +``` + +### 9. Page Graph + +```bash +uv run python wiki_cli.py query graph {topic_id} +``` + +### 10. Drafts + +```bash +uv run python wiki_cli.py query drafts {topic_id} +``` + +## Authentication + +### GitHub Login (recommended for users) + +```bash +uv run python wiki_cli.py --base-url https://wiki-service.example.com login +uv run python wiki_cli.py whoami +uv run python wiki_cli.py logout +``` + +Once logged in, the CLI uses your GitHub identity for all requests. Token is cached at `~/.wiki-memory/token.json`. + +### SPIFFE Headers (agent mode) + +Without a cached token, the CLI uses simulated Query Agent SPIFFE headers with OBO user. + +## Options + +| Flag | Default | Description | +|------|---------|-------------| +| `--base-url` | `http://localhost:8321` | Service URL | +| `--user` | `alice@example.com` | User identity for OBO | +| `--trust-domain` | `kagenti.example.com` | SPIFFE trust domain | + +## Example Flow + +```bash +uv run python wiki_cli.py query list-topics +uv run python wiki_cli.py query search ai "attention mechanism" +uv run python wiki_cli.py query search-all "transformer architecture" +uv run python wiki_cli.py query read ai transformers.md +uv run python wiki_cli.py query activity ai +uv run python wiki_cli.py query backlinks ai transformers.md +uv run python wiki_cli.py query tags ai +uv run python wiki_cli.py query tag ai paper +uv run python wiki_cli.py query graph ai +uv run python wiki_cli.py query drafts ai +``` + +## Notes + +- Authentication is handled automatically (Query Agent SPIFFE + user OBO headers) +- Change the user with `--user bob@example.com` +- Returns exit code 1 on errors (403 forbidden, 404 not found) diff --git a/mcp/wiki_memory_tool/skills/wiki-query-mcp/SKILL.md b/mcp/wiki_memory_tool/skills/wiki-query-mcp/SKILL.md new file mode 100644 index 00000000..0f27aeca --- /dev/null +++ b/mcp/wiki_memory_tool/skills/wiki-query-mcp/SKILL.md @@ -0,0 +1,63 @@ +# kwiki Query Agent (MCP) + +Query the wiki memory service to find and read information on behalf of a user. + +## Prerequisites + +The wiki-memory MCP server must be registered and running (stdio or streamable-http on port 8322). + +## Procedure + +1. **List available topics** — Call `wiki_list_topics` to see what topics exist and their page counts. + +2. **Search for information** — Call `wiki_query` with: + - `topic_id`: the topic to search (e.g. "ai", "security") + - `query`: natural language search terms + - `limit`: max results (default 10) + +3. **Search across all topics** — Call `wiki_search_all` with: + - `query`: search terms + - `limit`: max results (default 10) + +4. **Read specific pages** — Call `wiki_read` with: + - `topic_id`: the topic + - `path`: page filename (e.g. "transformers.md") + Returns content with frontmatter metadata. + +5. **Activity feed** — Call `wiki_activity` with: + - `topic_id`: (optional) specific topic, or empty for global + - `limit`: max entries (default 20) + +6. **Find backlinks** — Call `wiki_backlinks` with: + - `topic_id`: the topic + - `path`: page to find references to + +7. **Browse by tags** — Call `wiki_list_tags` with `topic_id`, or `wiki_pages_by_tag` with `topic_id` and `tag`. + +8. **Page graph** — Call `wiki_graph` with `topic_id` to get nodes and edges. + +9. **List drafts** — Call `wiki_list_drafts` with `topic_id`. + +10. **Get templates** — Call `wiki_get_template` with optional `template_id`. + +## Example Flow + +``` +wiki_list_topics → "ai (4 pages), security (0 pages)" +wiki_query(topic_id="ai", query="attention mechanism") → ranked results +wiki_search_all(query="transformer") → results across all topics +wiki_read(topic_id="ai", path="transformers.md") → content + frontmatter +wiki_activity(topic_id="ai") → recent commits +wiki_backlinks(topic_id="ai", path="transformers.md") → pages linking here +wiki_list_tags(topic_id="ai") → tags with counts +wiki_pages_by_tag(topic_id="ai", tag="paper") → pages with tag +wiki_graph(topic_id="ai") → {"nodes": [...], "edges": [...]} +wiki_list_drafts(topic_id="ai") → pending drafts +wiki_get_template(template_id="paper-summary") → template content +``` + +## Notes + +- **Local mode** (default): No authentication needed — MCP is a trusted local channel +- **Remote mode** (`WIKI_SERVICE_URL` set): Uses cached GitHub token from `~/.wiki-memory/token.json` +- Search uses TF-IDF ranking over markdown content diff --git a/mcp/wiki_memory_tool/test_acl.yaml b/mcp/wiki_memory_tool/test_acl.yaml new file mode 100644 index 00000000..4b4dcf83 --- /dev/null +++ b/mcp/wiki_memory_tool/test_acl.yaml @@ -0,0 +1,46 @@ +topics: + ai: + writers: + - "spiffe://kagenti.example.com/ns/topic-ai/sa/discovery-agent" + - "github:team:kaslomorg/ml-writers" + - "github:team:kaslomorg/platform-admins" + readers: + - "spiffe://kagenti.example.com/ns/wiki-system/sa/query-agent" + - "github:org:kaslomorg" + - "*" + admins: + - "github:user:aslom" + - "github:team:kaslomorg/platform-admins" + + security: + writers: + - "spiffe://kagenti.example.com/ns/topic-security/sa/discovery-agent" + - "github:team:kaslomorg/security-team" + - "github:team:kaslomorg/platform-admins" + readers: + - "spiffe://kagenti.example.com/ns/wiki-system/sa/query-agent" + - "github:team:kaslomorg/security-team" + - "github:org:kaslomorg" + admins: + - "github:user:aslom" + - "github:team:kaslomorg/platform-admins" + + ml: + writers: + - "spiffe://kagenti.example.com/ns/topic-ml/sa/discovery-agent" + - "github:team:kaslomorg/ml-team" + - "github:team:kaslomorg/ml-writers" + readers: + - "spiffe://kagenti.example.com/ns/wiki-system/sa/query-agent" + - "github:team:kaslomorg/ml-team" + - "github:org:kaslomorg" + admins: + - "github:user:aslom" + - "github:team:kaslomorg/platform-admins" + + _system: + writers: [] + readers: [] + admins: + - "github:user:aslom" + - "github:team:kaslomorg/platform-admins" diff --git a/mcp/wiki_memory_tool/test_agents.py b/mcp/wiki_memory_tool/test_agents.py new file mode 100644 index 00000000..4de5923b --- /dev/null +++ b/mcp/wiki_memory_tool/test_agents.py @@ -0,0 +1,239 @@ +""" +Simulated test agents for the Wiki Memory Service. + +Agent 1 — Discovery Agent (writer): writes wiki pages to the "ai" topic. +Agent 2 — Query Agent (reader): searches and reads pages on behalf of a user. + +Usage: + # Local: + uv run python run_local.py --clean + uv run python test_agents.py + + # Against cluster (port-forward): + kubectl port-forward svc/wiki-memory-service 8321:8000 -n wiki-memory-service & + WIKI_SERVICE_URL=http://localhost:8321 uv run python test_agents.py + + # Against cluster (route): + WIKI_SERVICE_URL=https://wiki-memory-service-wiki-memory-service.apps. uv run python test_agents.py +""" + +import os +import sys +import time + +import httpx + +BASE = os.environ.get("WIKI_SERVICE_URL", "http://localhost:8321") + +DISCOVERY_AGENT_HEADERS = { + "X-Spiffe-Id": "spiffe://kagenti.example.com/ns/topic-ai/sa/discovery-agent", +} + +QUERY_AGENT_HEADERS = { + "X-Spiffe-Id": "spiffe://kagenti.example.com/ns/wiki-system/sa/query-agent", + "X-Original-Subject": "alice@example.com", +} + +PAGES = { + "transformers.md": { + "content": ( + "# Transformer Architecture\n\n" + "The transformer model uses self-attention mechanisms to process " + "sequential data in parallel. Key components include multi-head " + "attention, positional encoding, and feed-forward layers.\n\n" + "## Key Papers\n- Attention Is All You Need (Vaswani et al., 2017)\n" + "- BERT (Devlin et al., 2019)\n- GPT series (Radford et al.)\n" + ), + "message": "Add transformer architecture overview", + }, + "rag-patterns.md": { + "content": ( + "# RAG Patterns\n\n" + "Retrieval-Augmented Generation combines a retriever with a generator.\n\n" + "## Common Patterns\n" + "1. **Naive RAG** — embed, retrieve top-k, concatenate into prompt\n" + "2. **Agentic RAG** — agent decides when to retrieve\n" + "3. **Graph RAG** — knowledge graph enriched retrieval\n\n" + "## Chunking Strategies\n" + "- Fixed-size with overlap\n- Semantic splitting\n- Document-aware\n" + ), + "message": "Add RAG patterns documentation", + }, + "fine-tuning.md": { + "content": ( + "# Fine-Tuning Techniques\n\n" + "## Full Fine-Tuning\nUpdate all model parameters. Expensive.\n\n" + "## LoRA\nLow-Rank Adaptation freezes base weights and trains small " + "rank-decomposition matrices. Memory efficient.\n\n" + "## RLHF\nReinforcement Learning from Human Feedback aligns model " + "outputs with human preferences using a reward model.\n" + ), + "message": "Add fine-tuning techniques page", + }, + "evaluation.md": { + "content": ( + "# LLM Evaluation\n\n" + "## Metrics\n- Perplexity\n- BLEU/ROUGE (generation)\n" + "- Human preference ratings\n- Task-specific benchmarks\n\n" + "## Frameworks\n- lm-eval-harness\n- HELM\n- OpenCompass\n" + ), + "message": "Add evaluation methods page", + }, +} + + +def separator(title: str): + print(f"\n{'='*60}") + print(f" {title}") + print(f"{'='*60}\n") + + +def run_discovery_agent(client: httpx.Client): + separator("DISCOVERY AGENT — Writing pages to 'ai' topic") + + for page_name, page_data in PAGES.items(): + # Check novelty first + title = page_name.replace(".md", "").replace("-", " ").title() + novelty = client.post( + f"{BASE}/topics/ai/check-novelty", + json={"title": title, "abstract": page_data["content"][:100]}, + headers=DISCOVERY_AGENT_HEADERS, + ) + novelty_result = novelty.json() + print(f"[novelty] {page_name}: novel={novelty_result.get('novel')}") + + if not novelty_result.get("novel"): + print(" -> Skipping (similar content exists)") + continue + + # Write the page + resp = client.post( + f"{BASE}/topics/ai/pages/{page_name}", + json={"content": page_data["content"], "message": page_data["message"]}, + headers=DISCOVERY_AGENT_HEADERS, + ) + if resp.status_code == 200: + print(f" -> Written: {resp.json()['path']} by {resp.json()['author']}") + else: + print(f" -> FAILED ({resp.status_code}): {resp.text}") + + print("\nDiscovery Agent: done writing pages.") + + +def run_query_agent(client: httpx.Client): + separator("QUERY AGENT — Reading/searching 'ai' topic (on behalf of alice)") + + # List topics + print("[list topics]") + resp = client.get(f"{BASE}/topics", headers=QUERY_AGENT_HEADERS) + for topic in resp.json()["topics"]: + print(f" - {topic['topic_id']} ({topic['page_count']} pages)") + + # List pages in ai topic + print("\n[list pages in 'ai']") + resp = client.get(f"{BASE}/topics/ai/pages", headers=QUERY_AGENT_HEADERS) + for page in resp.json()["pages"]: + print(f" - {page}") + + # Search for "attention transformer" + print("\n[search 'ai' for 'attention transformer']") + resp = client.post( + f"{BASE}/topics/ai/query", + json={"query": "attention transformer", "limit": 5}, + headers=QUERY_AGENT_HEADERS, + ) + for r in resp.json()["results"]: + print(f" - {r['path']} (score={r['score']})") + if r["snippet"]: + print(f" snippet: {r['snippet'][:80]}...") + + # Search for "LoRA fine-tuning" + print("\n[search 'ai' for 'LoRA fine-tuning']") + resp = client.post( + f"{BASE}/topics/ai/query", + json={"query": "LoRA fine-tuning", "limit": 3}, + headers=QUERY_AGENT_HEADERS, + ) + for r in resp.json()["results"]: + print(f" - {r['path']} (score={r['score']})") + + # Read a specific page + print("\n[read page 'rag-patterns.md']") + resp = client.get( + f"{BASE}/topics/ai/pages/rag-patterns.md", + headers=QUERY_AGENT_HEADERS, + ) + content = resp.json()["content"] + print(f" First 150 chars: {content[:150]}...") + + print("\nQuery Agent: done reading/searching.") + + +def run_acl_test(client: httpx.Client): + separator("ACL TEST — Query Agent attempts unauthorized write (expect 403)") + + resp = client.post( + f"{BASE}/topics/ai/pages/hack.md", + json={"content": "# Unauthorized write attempt", "message": "should fail"}, + headers=QUERY_AGENT_HEADERS, + ) + if resp.status_code == 403: + print(f" PASS: Got expected 403 — {resp.json()['detail']}") + else: + print(f" FAIL: Expected 403 but got {resp.status_code}") + + # Discovery agent tries to read security topic (no access) + print("\n[Discovery Agent (ai) tries to write to 'security' topic — expect 403]") + resp = client.post( + f"{BASE}/topics/security/pages/exploit.md", + json={"content": "# Cross-topic write", "message": "should fail"}, + headers=DISCOVERY_AGENT_HEADERS, + ) + if resp.status_code == 403: + print(f" PASS: Got expected 403 — {resp.json()['detail']}") + else: + print(f" FAIL: Expected 403 but got {resp.status_code}") + + +def wait_for_service(client: httpx.Client, retries: int = 10): + print(f"Connecting to wiki service at: {BASE}") + for i in range(retries): + try: + resp = client.get(f"{BASE}/healthz") + if resp.status_code == 200: + print(f"Service is up: {resp.json()}") + return + except (httpx.ConnectError, httpx.ReadError): + pass + print(f"Waiting for service... ({i+1}/{retries})") + time.sleep(2) + print("ERROR: Service not reachable at", BASE) + sys.exit(1) + + +def main(): + global BASE + for arg in sys.argv[1:]: + if arg.startswith("--url="): + BASE = arg.split("=", 1)[1] + elif arg.startswith("http"): + BASE = arg + + insecure = os.environ.get("WIKI_INSECURE_TLS", "1") == "1" + client = httpx.Client(timeout=30, verify=not insecure) + + wait_for_service(client) + run_discovery_agent(client) + run_query_agent(client) + run_acl_test(client) + + separator("ALL TESTS COMPLETE") + print("Summary:") + print(f" - Service URL: {BASE}") + print(" - Discovery Agent wrote 4 pages to 'ai' topic") + print(" - Query Agent listed, searched, and read pages") + print(" - ACL enforcement blocked unauthorized writes") + + +if __name__ == "__main__": + main() diff --git a/mcp/wiki_memory_tool/test_user_access.py b/mcp/wiki_memory_tool/test_user_access.py new file mode 100644 index 00000000..590777b7 --- /dev/null +++ b/mcp/wiki_memory_tool/test_user_access.py @@ -0,0 +1,244 @@ +""" +Test wiki access as logged-in user (bob-kagenti) against the live server. + +Tests both query and discovery operations using the user's OAuth token. + +Usage: + kwiki login # login first + uv run python test_user_access.py + # or with explicit URL: + uv run python test_user_access.py --url=https://wiki-memory-service-team1.apps.ykt1.hcp.res.ibm.com +""" + +import json +import os +import sys +from pathlib import Path + +import httpx + +TOKEN_FILE = Path.home() / ".wiki-memory" / "token.json" + + +def load_token() -> tuple[str, str]: + if not TOKEN_FILE.exists(): + print("ERROR: Not logged in. Run: kwiki login", file=sys.stderr) + sys.exit(1) + data = json.loads(TOKEN_FILE.read_text()) + return data["token"], data.get("base_url", "http://localhost:8321") + + +def separator(title: str): + print(f"\n{'='*60}") + print(f" {title}") + print(f"{'='*60}\n") + + +def test_whoami(client: httpx.Client, base: str, headers: dict): + separator("WHOAMI — Verify identity") + resp = client.get(f"{base}/auth/whoami", headers=headers) + if resp.status_code != 200: + print(f"FAIL: whoami returned {resp.status_code}: {resp.text}", file=sys.stderr) + sys.exit(1) + data = resp.json() + print(f" Subject: {data['subject']}") + print(f" Kind: {data['kind']}") + print(f" Groups: {data.get('groups', [])}") + print(" PASS") + + +def test_query_operations(client: httpx.Client, base: str, headers: dict): + separator("QUERY — List topics, pages, search") + + print("[list topics]") + resp = client.get(f"{base}/topics", headers=headers) + if resp.status_code != 200: + print(f" FAIL ({resp.status_code}): {resp.text}") + return + topics = resp.json()["topics"] + for t in topics: + print(f" - {t['topic_id']} ({t['page_count']} pages)") + print(" PASS") + + if not topics: + print("\n No topics found — skipping page/search tests") + return + + topic_id = topics[0]["topic_id"] + + print(f"\n[list pages in '{topic_id}']") + resp = client.get(f"{base}/topics/{topic_id}/pages", headers=headers) + if resp.status_code == 200: + pages = resp.json()["pages"] + for p in pages[:10]: + print(f" - {p}") + if len(pages) > 10: + print(f" ... and {len(pages) - 10} more") + print(" PASS") + else: + print(f" FAIL ({resp.status_code}): {resp.text}") + + print(f"\n[search '{topic_id}' for 'transformer']") + resp = client.post( + f"{base}/topics/{topic_id}/query", + json={"query": "transformer", "limit": 3}, + headers=headers, + ) + if resp.status_code == 200: + results = resp.json()["results"] + for r in results: + print(f" - {r['path']} (score={r['score']:.3f})") + print(f" PASS ({len(results)} results)") + else: + print(f" FAIL ({resp.status_code}): {resp.text}") + + if pages: + page_path = pages[0] + print(f"\n[read page '{page_path}']") + resp = client.get(f"{base}/topics/{topic_id}/pages/{page_path}", headers=headers) + if resp.status_code == 200: + content = resp.json()["content"] + print(f" {content[:120]}...") + print(" PASS") + else: + print(f" FAIL ({resp.status_code}): {resp.text}") + + print(f"\n[activity for '{topic_id}']") + resp = client.get(f"{base}/topics/{topic_id}/activity?limit=5", headers=headers) + if resp.status_code == 200: + entries = resp.json().get("entries", []) + for e in entries[:3]: + print(f" - {e.get('message', '?')} by {e.get('author', '?')}") + print(f" PASS ({len(entries)} entries)") + else: + print(f" FAIL ({resp.status_code}): {resp.text}") + + print(f"\n[tags for '{topic_id}']") + resp = client.get(f"{base}/topics/{topic_id}/tags", headers=headers) + if resp.status_code == 200: + tags = resp.json().get("tags", {}) + for tag, count in list(tags.items())[:5]: + print(f" - {tag}: {count} pages") + print(f" PASS ({len(tags)} tags)") + else: + print(f" FAIL ({resp.status_code}): {resp.text}") + + print(f"\n[graph for '{topic_id}']") + resp = client.get(f"{base}/topics/{topic_id}/graph", headers=headers) + if resp.status_code == 200: + graph = resp.json() + print(f" nodes: {len(graph.get('nodes', []))}, edges: {len(graph.get('edges', []))}") + print(" PASS") + else: + print(f" FAIL ({resp.status_code}): {resp.text}") + + print("\n[global search for 'attention']") + resp = client.post( + f"{base}/search", + json={"query": "attention", "limit": 5}, + headers=headers, + ) + if resp.status_code == 200: + results = resp.json()["results"] + for r in results[:3]: + print(f" - [{r.get('topic_id')}] {r['path']} (score={r['score']:.3f})") + print(f" PASS ({len(results)} results)") + else: + print(f" FAIL ({resp.status_code}): {resp.text}") + + +def test_discovery_operations(client: httpx.Client, base: str, headers: dict): + separator("DISCOVERY — Templates, write, novelty check") + + print("[list templates]") + resp = client.get(f"{base}/templates", headers=headers) + if resp.status_code == 200: + templates = resp.json()["templates"] + for t in templates: + print(f" - {t['id']}: {t['description']}") + print(" PASS") + else: + print(f" FAIL ({resp.status_code}): {resp.text}") + + print("\n[get template 'paper-summary']") + resp = client.get(f"{base}/templates/paper-summary", headers=headers) + if resp.status_code == 200: + tmpl = resp.json() + print(f" {tmpl['content'][:100]}...") + print(" PASS") + else: + print(f" FAIL ({resp.status_code}): {resp.text}") + + print("\n[check novelty in 'ai']") + resp = client.post( + f"{base}/topics/ai/check-novelty", + json={"title": "Test User Access Page", "abstract": "A test page for verifying user access"}, + headers=headers, + ) + if resp.status_code == 200: + data = resp.json() + print(f" novel: {data.get('novel')}") + print(" PASS") + elif resp.status_code == 403: + print(" No write access (403) — expected if user is reader only") + print(" PASS (access control working)") + else: + print(f" FAIL ({resp.status_code}): {resp.text}") + + print("\n[write test page as draft in 'ai']") + test_content = "---\ntags: [test]\n---\n# Access Test\n\nThis page verifies write access." + resp = client.post( + f"{base}/topics/ai/pages/test-user-access.md?draft=true", + json={"content": test_content, "message": "Test user write access"}, + headers=headers, + ) + if resp.status_code == 200: + data = resp.json() + print(f" Written as draft: {data.get('path')}") + print(" PASS") + elif resp.status_code == 403: + print(" No write access (403) — expected if user is reader only") + print(" PASS (access control working)") + else: + print(f" FAIL ({resp.status_code}): {resp.text}") + + +def test_renew(client: httpx.Client, base: str, headers: dict): + separator("RENEW — Token renewal") + resp = client.post(f"{base}/auth/renew", headers=headers) + if resp.status_code == 200: + data = resp.json() + print(f" Renewed for: {data['github_login']}") + print(f" Expires in: {data['expires_in'] // 3600}h") + print(" PASS") + else: + print(f" FAIL ({resp.status_code}): {resp.text}") + + +def main(): + token, base = load_token() + + for arg in sys.argv[1:]: + if arg.startswith("--url="): + base = arg.split("=", 1)[1] + elif arg.startswith("http"): + base = arg + + headers = {"Authorization": f"Bearer {token}"} + insecure = os.environ.get("WIKI_INSECURE_TLS", "1") == "1" + client = httpx.Client(timeout=30, verify=not insecure) + + print("Testing wiki access as logged-in user") + print(f"Server: {base}") + + test_whoami(client, base, headers) + test_query_operations(client, base, headers) + test_discovery_operations(client, base, headers) + test_renew(client, base, headers) + + separator("ALL TESTS COMPLETE") + print(" All query and discovery operations verified.") + + +if __name__ == "__main__": + main() diff --git a/mcp/wiki_memory_tool/test_user_skills.py b/mcp/wiki_memory_tool/test_user_skills.py new file mode 100644 index 00000000..4977f6e8 --- /dev/null +++ b/mcp/wiki_memory_tool/test_user_skills.py @@ -0,0 +1,360 @@ +""" +Test all wiki skills as user bob-kagenti using FastAPI TestClient. + +Verifies query and discovery operations work correctly with OAuth user identity. + +Run with: uv run --with pytest python -m pytest test_user_skills.py -v +""" + +import os +import shutil +import sys +import tempfile +import time + +import pytest + +os.environ["WIKI_ROOT"] = tempfile.mkdtemp() +os.environ["ACL_FILE"] = os.path.join(os.path.dirname(__file__), "test_acl.yaml") +os.environ["JWT_SECRET_KEY"] = "test-secret-key" +os.environ["JWT_EXPIRY_HOURS"] = "8" + +# Use httpx for TestClient compatibility +from starlette.testclient import TestClient # noqa: E402 +from wiki_service import WIKI_ROOT, _ensure_repo, _sign_jwt, app # noqa: E402 + +client = TestClient(app) + +USER_LOGIN = "bob-kagenti" +USER_TOKEN = _sign_jwt({ + "sub": f"github:{USER_LOGIN}", + "github_login": USER_LOGIN, + "email": "bob@kagenti.io", + "groups": ["kagenti/ml-team"], + "iss": "wiki-memory-service", + "iat": int(time.time()), + "exp": int(time.time()) + 8 * 3600, +}) + +AUTH_HEADERS = {"Authorization": f"Bearer {USER_TOKEN}"} + +DISCOVERY_HEADERS = { + "X-Spiffe-Id": "spiffe://kagenti.example.com/ns/topic-ai/sa/discovery-agent", +} + +@pytest.fixture(autouse=True, scope="session") +def seed_wiki_data(): + """Seed test pages once before the entire test session.""" + setup_test_data() + + +def separator(title: str): + print(f"\n{'='*60}") + print(f" {title}") + print(f"{'='*60}\n") + + +def setup_test_data(): + """Write test pages using discovery agent so user can query them.""" + _ensure_repo() + pages = { + "transformers.md": "---\ntags: [paper, architecture]\n---\n# Transformer Architecture\n\nSelf-attention mechanisms for parallel sequence processing.\n\n## Links\n- See also [rag-patterns.md](rag-patterns.md)\n", + "rag-patterns.md": "---\ntags: [paper, retrieval]\n---\n# RAG Patterns\n\nRetrieval-Augmented Generation combines retrievers with generators.\n\n## Links\n- Based on [[transformers]]\n", + "fine-tuning.md": "---\ntags: [technique, training]\n---\n# Fine-Tuning\n\nLoRA, QLoRA, and full fine-tuning approaches.\n", + "evaluation.md": "---\ntags: [metrics]\n---\n# LLM Evaluation\n\nPerplexity, BLEU, ROUGE, and human preference ratings.\n", + } + for path, content in pages.items(): + resp = client.post( + f"/topics/ai/pages/{path}", + json={"content": content, "message": f"Add {path}"}, + headers=DISCOVERY_HEADERS, + ) + assert resp.status_code == 200, f"Setup failed for {path}: {resp.text}" + + +def test_whoami(): + separator("TEST: whoami") + resp = client.get("/auth/whoami", headers=AUTH_HEADERS) + assert resp.status_code == 200 + data = resp.json() + assert data["subject"] == f"github:{USER_LOGIN}" + assert data["kind"] == "user" + print(f" User: {data['subject']}") + print(f" Kind: {data['kind']}") + print(f" Groups: {data['groups']}") + print(" PASS") + + +def test_query_list_topics(): + separator("TEST: query list-topics") + resp = client.get("/topics", headers=AUTH_HEADERS) + assert resp.status_code == 200 + topics = resp.json()["topics"] + print(f" Topics: {[t['topic_id'] for t in topics]}") + assert any(t["topic_id"] == "ai" for t in topics) + print(" PASS") + + +def test_query_list_pages(): + separator("TEST: query list-pages") + resp = client.get("/topics/ai/pages", headers=AUTH_HEADERS) + assert resp.status_code == 200 + pages = resp.json()["pages"] + print(f" Pages: {pages}") + assert "transformers.md" in pages + print(" PASS") + + +def test_query_search(): + separator("TEST: query search") + resp = client.post( + "/topics/ai/query", + json={"query": "attention transformer", "limit": 3}, + headers=AUTH_HEADERS, + ) + assert resp.status_code == 200 + results = resp.json()["results"] + print(f" Results: {len(results)}") + for r in results: + print(f" - {r['path']} (score={r['score']:.3f})") + assert len(results) > 0 + print(" PASS") + + +def test_query_search_all(): + separator("TEST: query search-all (global)") + resp = client.post( + "/search", + json={"query": "retrieval", "limit": 5}, + headers=AUTH_HEADERS, + ) + assert resp.status_code == 200 + results = resp.json()["results"] + print(f" Results: {len(results)}") + for r in results: + print(f" - [{r.get('topic_id')}] {r['path']} (score={r['score']:.3f})") + assert len(results) > 0 + print(" PASS") + + +def test_query_read(): + separator("TEST: query read") + resp = client.get("/topics/ai/pages/transformers.md", headers=AUTH_HEADERS) + assert resp.status_code == 200 + data = resp.json() + assert "Transformer" in data["content"] + assert data.get("frontmatter", {}).get("tags") == ["paper", "architecture"] + print(f" Content: {data['content'][:80]}...") + print(f" Frontmatter: {data['frontmatter']}") + print(" PASS") + + +def test_query_activity(): + separator("TEST: query activity") + resp = client.get("/topics/ai/activity?limit=5", headers=AUTH_HEADERS) + assert resp.status_code == 200 + entries = resp.json()["entries"] + print(f" Entries: {len(entries)}") + for e in entries[:3]: + print(f" - {e['message']} by {e['author']}") + assert len(entries) > 0 + print(" PASS") + + +def test_query_backlinks(): + separator("TEST: query backlinks") + resp = client.get("/topics/ai/backlinks/transformers.md", headers=AUTH_HEADERS) + assert resp.status_code == 200 + backlinks = resp.json()["backlinks"] + print(f" Backlinks to transformers.md: {backlinks}") + assert "rag-patterns.md" in backlinks + print(" PASS") + + +def test_query_tags(): + separator("TEST: query tags") + resp = client.get("/topics/ai/tags", headers=AUTH_HEADERS) + assert resp.status_code == 200 + tags = resp.json()["tags"] + print(f" Tags: {tags}") + assert "paper" in tags + print(" PASS") + + print("\n [pages with tag 'paper']") + resp = client.get("/topics/ai/tags/paper", headers=AUTH_HEADERS) + assert resp.status_code == 200 + pages = resp.json()["pages"] + print(f" Pages: {pages}") + assert len(pages) >= 2 + print(" PASS") + + +def test_query_graph(): + separator("TEST: query graph") + resp = client.get("/topics/ai/graph", headers=AUTH_HEADERS) + assert resp.status_code == 200 + graph = resp.json() + nodes = graph["nodes"] + edges = graph["edges"] + print(f" Nodes: {len(nodes)}, Edges: {len(edges)}") + for n in nodes[:3]: + print(f" - {n['id']} (tags={n.get('tags', [])})") + for e in edges[:3]: + print(f" - {e['source']} -> {e['target']}") + assert len(nodes) >= 4 + assert len(edges) >= 1 + print(" PASS") + + +def test_discovery_templates(): + separator("TEST: discovery templates") + resp = client.get("/templates", headers=AUTH_HEADERS) + assert resp.status_code == 200 + templates = resp.json()["templates"] + print(f" Templates: {[t['id'] for t in templates]}") + assert len(templates) >= 4 + print(" PASS") + + print("\n [get paper-summary template]") + resp = client.get("/templates/paper-summary", headers=AUTH_HEADERS) + assert resp.status_code == 200 + print(f" Content: {resp.json()['content'][:100]}...") + print(" PASS") + + +def test_discovery_novelty(): + separator("TEST: discovery novelty check") + resp = client.post( + "/topics/ai/check-novelty", + json={"title": "Transformer Architecture", "abstract": "Self-attention for sequences"}, + headers=AUTH_HEADERS, + ) + assert resp.status_code == 200 + data = resp.json() + print(f" Novel: {data['novel']} (existing page should not be novel)") + assert data["novel"] is False + print(" PASS") + + resp = client.post( + "/topics/ai/check-novelty", + json={"title": "Quantum Computing for ML", "abstract": "Quantum advantage in optimization"}, + headers=AUTH_HEADERS, + ) + assert resp.status_code == 200 + data = resp.json() + print(f" Novel: {data['novel']} (new topic should be novel)") + assert data["novel"] is True + print(" PASS") + + +def test_discovery_write_draft(): + separator("TEST: discovery write (draft mode)") + content = "---\ntags: [test]\n---\n# Test Draft\n\nWritten by discovery agent as draft." + resp = client.post( + "/topics/ai/pages/test-draft.md?draft=true", + json={"content": content, "message": "Test draft write"}, + headers=DISCOVERY_HEADERS, + ) + assert resp.status_code == 200, f"Got {resp.status_code}: {resp.text}" + data = resp.json() + print(f" Path: {data['path']}") + print(" PASS") + + print("\n [list drafts as discovery agent (write access required)]") + resp = client.get("/topics/ai/drafts", headers=DISCOVERY_HEADERS) + assert resp.status_code == 200 + drafts = resp.json()["drafts"] + print(f" Drafts: {drafts}") + assert len(drafts) >= 1 + print(" PASS") + + +def test_discovery_write_with_suggested_links(): + separator("TEST: discovery write (suggested links)") + content = "---\ntags: [technique]\n---\n# Attention Mechanisms\n\nMulti-head attention in transformers." + resp = client.post( + "/topics/ai/pages/attention.md", + json={"content": content, "message": "Add attention page"}, + headers=DISCOVERY_HEADERS, + ) + assert resp.status_code == 200, f"Got {resp.status_code}: {resp.text}" + data = resp.json() + links = data.get("suggested_links", []) + print(f" Suggested links: {links}") + print(" PASS") + + +def test_user_write_blocked(): + separator("TEST: user write blocked (ACL enforcement)") + resp = client.post( + "/topics/ai/pages/unauthorized.md", + json={"content": "# Should fail", "message": "unauthorized"}, + headers=AUTH_HEADERS, + ) + assert resp.status_code == 403, f"Expected 403 but got {resp.status_code}" + print(f" Correctly blocked: {resp.json()['detail']}") + print(" PASS") + + +def test_token_renew(): + separator("TEST: token renew") + resp = client.post("/auth/renew", headers=AUTH_HEADERS) + assert resp.status_code == 200 + data = resp.json() + print(f" Renewed for: {data['github_login']}") + print(f" Expires in: {data['expires_in'] // 3600}h") + assert data["github_login"] == USER_LOGIN + print(" PASS") + + +def main(): + print(f"Testing wiki skills as user: {USER_LOGIN}") + print(f"Wiki root: {WIKI_ROOT}") + + setup_test_data() + + tests = [ + test_whoami, + test_query_list_topics, + test_query_list_pages, + test_query_search, + test_query_search_all, + test_query_read, + test_query_activity, + test_query_backlinks, + test_query_tags, + test_query_graph, + test_discovery_templates, + test_discovery_novelty, + test_discovery_write_draft, + test_discovery_write_with_suggested_links, + test_user_write_blocked, + test_token_renew, + ] + + passed = 0 + failed = 0 + for test in tests: + try: + test() + passed += 1 + except AssertionError as e: + print(f" FAIL: {e}") + failed += 1 + except Exception as e: + print(f" ERROR: {type(e).__name__}: {e}") + failed += 1 + + separator("RESULTS") + print(f" Passed: {passed}/{passed + failed}") + if failed: + print(f" Failed: {failed}") + sys.exit(1) + print(" All wiki skills verified for user bob-kagenti!") + + # Cleanup + shutil.rmtree(WIKI_ROOT, ignore_errors=True) + + +if __name__ == "__main__": + main() diff --git a/mcp/wiki_memory_tool/wiki_cli.py b/mcp/wiki_memory_tool/wiki_cli.py new file mode 100644 index 00000000..cc515982 --- /dev/null +++ b/mcp/wiki_memory_tool/wiki_cli.py @@ -0,0 +1,753 @@ +""" +Wiki Memory Service CLI — testing tool for Discovery and Query agent operations. + +Authenticates via SPIFFE headers (simulated), GitHub OAuth token, or user token. + +Usage: + # GitHub login (device flow): + uv run python wiki_cli.py login + uv run python wiki_cli.py whoami + uv run python wiki_cli.py logout + + # As Discovery Agent (writer): + uv run python wiki_cli.py discover write ai transformers.md --file content.md + uv run python wiki_cli.py discover write ai transformers.md --content "# Title\nBody" + uv run python wiki_cli.py discover novelty ai "Transformers" "Attention mechanisms paper" + + # As Query Agent (reader, on behalf of user): + uv run python wiki_cli.py query list-topics + uv run python wiki_cli.py query list-pages ai + uv run python wiki_cli.py query search ai "attention mechanism" + uv run python wiki_cli.py query read ai transformers.md + + # Override defaults: + uv run python wiki_cli.py --base-url http://localhost:8321 --agent discovery --topic ai discover write ... +""" + +import argparse +import base64 +import json +import os +import sys +import time +from pathlib import Path + +import httpx + +DEFAULT_BASE_URL = "http://localhost:8321" +DEFAULT_TRUST_DOMAIN = "kagenti.example.com" +TOKEN_DIR = Path.home() / ".wiki-memory" +TOKEN_FILE = TOKEN_DIR / "token.json" + +DEVICE_POLL_INTERVAL = 5 + + +def load_cached_token() -> dict | None: + if TOKEN_FILE.exists(): + data = json.loads(TOKEN_FILE.read_text()) + if data.get("token"): + return data + return None + + +def save_token(token: str, base_url: str): + TOKEN_DIR.mkdir(parents=True, exist_ok=True) + TOKEN_FILE.write_text(json.dumps({"token": token, "base_url": base_url})) + TOKEN_FILE.chmod(0o600) + + +def delete_token(): + if TOKEN_FILE.exists(): + TOKEN_FILE.unlink() + + +def make_auth_headers() -> dict | None: + cached = load_cached_token() + if cached: + return {"Authorization": f"Bearer {cached['token']}"} + return None + + +def cmd_login(client: httpx.Client, base: str, args): + resp = client.post(f"{base}/auth/github/device") + if resp.status_code != 200: + print(f"ERROR: Failed to start device flow: {resp.text}", file=sys.stderr) + sys.exit(1) + + data = resp.json() + user_code = data["user_code"] + verification_uri = data["verification_uri"] + expires_in = data.get("expires_in", 900) + + print("\n" + "=" * 50) + print(" GitHub Device Authorization") + print("=" * 50) + print("\n 1. Open this URL in your browser:\n") + print(f" {verification_uri}\n") + print(" 2. Enter this code:\n") + print(f" {user_code}") + print(f"\n Code expires in {expires_in // 60} minutes.") + print("=" * 50) + print("\nWaiting for authorization...", end="", flush=True) + + device_code = data["device_code"] + interval = data.get("interval", DEVICE_POLL_INTERVAL) + + while True: + time.sleep(interval) + print(".", end="", flush=True) + try: + poll_resp = client.post( + f"{base}/auth/github/device/token", + json={"device_code": device_code}, + ) + except httpx.RequestError as e: + print(f"\n\nERROR: Connection failed: {e}", file=sys.stderr) + sys.exit(1) + + if poll_resp.status_code == 200: + token_data = poll_resp.json() + save_token(token_data["token"], base) + login = token_data.get("github_login", token_data.get("login", "unknown")) + groups = token_data.get("groups", []) + print(f"\n\nLogged in as {login}") + if groups: + print(f"Groups: {', '.join(groups)}") + return + elif poll_resp.status_code == 202: + poll_data = poll_resp.json() + error = poll_data.get("error", "") + if error == "slow_down": + interval += 5 + elif error == "expired_token": + print("\n\nDevice code expired. Please run login again.", file=sys.stderr) + sys.exit(1) + elif error == "access_denied": + print("\n\nAuthorization denied by user.", file=sys.stderr) + sys.exit(1) + continue + else: + try: + detail = poll_resp.json().get("detail", poll_resp.text) + except Exception: + detail = poll_resp.text + print(f"\n\nERROR: {detail}", file=sys.stderr) + sys.exit(1) + + +def cmd_logout(args): + if TOKEN_FILE.exists(): + delete_token() + print("Logged out (token removed).") + else: + print("Not logged in.") + + +def _decode_jwt_payload(token: str) -> dict | None: + """Decode JWT payload without signature verification.""" + parts = token.split(".") + if len(parts) != 3: + return None + try: + return json.loads(base64.urlsafe_b64decode(parts[1] + "==")) + except Exception: + return None + + +def _resolve_display_name(subject: str) -> str | None: + """Extract a human-readable name from the subject. Returns None if unresolvable.""" + if subject.startswith("github:"): + return subject.removeprefix("github:") + if subject.startswith("spiffe://"): + return subject + payload = _decode_jwt_payload(subject) + if payload: + login = payload.get("github_login") or payload.get("sub", "").removeprefix("github:") + if login: + return login + return None + + +def cmd_whoami(client: httpx.Client, base: str, args): + headers = make_auth_headers() + if not headers: + print("Not logged in. Run: kwiki login", file=sys.stderr) + sys.exit(1) + + cached = load_cached_token() + token = cached["token"] if cached else "" + payload = _decode_jwt_payload(token) + + resp = client.get(f"{base}/auth/whoami", headers=headers) + if resp.status_code == 200: + data = resp.json() + subject = data.get("subject", "") + display = _resolve_display_name(subject) + if not display and payload: + display = payload.get("github_login") or payload.get("sub", "").removeprefix("github:") + if not display: + print("ERROR: Could not resolve identity from token.", file=sys.stderr) + print("Try logging in again: kwiki login", file=sys.stderr) + sys.exit(1) + exp = payload.get("exp") if payload else None + if exp and int(exp - time.time()) <= 0: + print("ERROR: Token expired.", file=sys.stderr) + print("Renew token: kwiki renew", file=sys.stderr) + sys.exit(1) + if exp: + remaining = int(exp - time.time()) + if remaining < 3600: + status = f"expires in {remaining // 60}m" + else: + status = f"expires in {remaining // 3600}h {(remaining % 3600) // 60}m" + else: + status = "active" + print(f" User: {display}") + print(f" Status: {status}") + groups = data.get("groups", []) + if groups: + print(f" Groups: {', '.join(groups)}") + else: + print(" Groups: (none)") + print(f" Server: {base}") + + # Verify org membership + org_groups = [g for g in groups if g.startswith("kaslomorg/")] + if not org_groups: + print("") + print(" WARNING: No kaslomorg teams in token.", file=sys.stderr) + print(" If you are a member of kaslomorg teams, re-login to refresh:", file=sys.stderr) + print(" kwiki login", file=sys.stderr) + + # Fetch permissions + perm_resp = client.get(f"{base}/auth/permissions", headers=headers) + if perm_resp.status_code == 200: + perms = perm_resp.json().get("permissions", {}) + if perms: + print(" Access:") + for topic, access_map in sorted(perms.items()): + roles = list(access_map.keys()) + print(f" {topic}: {', '.join(roles)}") + for role, reason in access_map.items(): + print(f" {role} <- {reason}") + else: + print(" Access: none") + elif resp.status_code == 401: + print("ERROR: Token expired or invalid.", file=sys.stderr) + print("Login again: kwiki login", file=sys.stderr) + sys.exit(1) + else: + print(f"ERROR ({resp.status_code}): {resp.text}", file=sys.stderr) + sys.exit(1) + + +def cmd_renew(client: httpx.Client, base: str): + headers = make_auth_headers() + if not headers: + print("Not logged in. Run: kwiki login", file=sys.stderr) + sys.exit(1) + resp = client.post(f"{base}/auth/renew", headers=headers) + if resp.status_code == 200: + data = resp.json() + save_token(data["token"], base) + print(f" Token renewed for {data['github_login']}") + hours = data.get("expires_in", 0) // 3600 + print(f" Expires in {hours}h") + elif resp.status_code == 404: + print(" Server does not support /auth/renew, starting login flow...") + cmd_login(client, base, None) + elif resp.status_code == 401: + print("ERROR: Token too old to renew.", file=sys.stderr) + print("Login again: kwiki login", file=sys.stderr) + sys.exit(1) + else: + print(f"ERROR ({resp.status_code}): {resp.text}", file=sys.stderr) + sys.exit(1) + + +def make_discovery_headers(topic: str, trust_domain: str) -> dict: + return { + "X-Spiffe-Id": f"spiffe://{trust_domain}/ns/topic-{topic}/sa/discovery-agent", + } + + +def make_query_headers(user: str, trust_domain: str) -> dict: + return { + "X-Spiffe-Id": f"spiffe://{trust_domain}/ns/wiki-system/sa/query-agent", + "X-Original-Subject": user, + } + + +def cmd_discover_write(client: httpx.Client, base: str, headers: dict, args): + topic = args.topic + path = args.path + if args.file: + content = open(args.file).read() + elif args.content: + content = args.content + else: + content = sys.stdin.read() + + url = f"{base}/topics/{topic}/pages/{path}" + if getattr(args, 'draft', False): + url += "?draft=true" + resp = client.post( + url, + json={"content": content, "message": args.message or f"cli: write {topic}/{path}"}, + headers=headers, + ) + if resp.status_code == 200: + data = resp.json() + status = data.get("status", "written") + print(f"{status.capitalize()}: {data['path']} by {data['author']}") + if data.get("suggested_links"): + print("Suggested links:") + for s in data["suggested_links"]: + print(f" {s['path']} (score={s['score']})") + else: + try: + detail = resp.json().get("detail", resp.text) + except Exception: + detail = resp.text + print(f"ERROR ({resp.status_code}): {detail}", file=sys.stderr) + sys.exit(1) + + +def cmd_discover_novelty(client: httpx.Client, base: str, headers: dict, args): + topic = args.topic + resp = client.post( + f"{base}/topics/{topic}/check-novelty", + json={"title": args.title, "abstract": args.abstract}, + headers=headers, + ) + if resp.status_code == 200: + data = resp.json() + if data["novel"]: + print(f"NOVEL: {data['reason']}") + else: + print(f"NOT NOVEL: {data['reason']}") + for s in data.get("similar", []): + print(f" similar: {s['path']} (score={s['score']})") + else: + print(f"ERROR ({resp.status_code}): {resp.json().get('detail', resp.text)}", file=sys.stderr) + sys.exit(1) + + +def cmd_query_list_topics(client: httpx.Client, base: str, headers: dict, args): + resp = client.get(f"{base}/topics", headers=headers) + if resp.status_code == 200: + for t in resp.json()["topics"]: + print(f" {t['topic_id']} ({t['page_count']} pages)") + else: + print(f"ERROR ({resp.status_code}): {resp.text}", file=sys.stderr) + sys.exit(1) + + +def cmd_query_list_pages(client: httpx.Client, base: str, headers: dict, args): + resp = client.get(f"{base}/topics/{args.topic}/pages", headers=headers) + if resp.status_code == 200: + for p in resp.json()["pages"]: + print(f" {p}") + else: + print(f"ERROR ({resp.status_code}): {resp.json().get('detail', resp.text)}", file=sys.stderr) + sys.exit(1) + + +def cmd_query_search(client: httpx.Client, base: str, headers: dict, args): + resp = client.post( + f"{base}/topics/{args.topic}/query", + json={"query": args.query, "limit": args.limit}, + headers=headers, + ) + if resp.status_code == 200: + results = resp.json()["results"] + if not results: + print("No results.") + return + for r in results: + print(f" {r['path']} (score={r['score']})") + if r.get("snippet"): + print(f" {r['snippet'][:120]}") + else: + print(f"ERROR ({resp.status_code}): {resp.json().get('detail', resp.text)}", file=sys.stderr) + sys.exit(1) + + +def cmd_query_read(client: httpx.Client, base: str, headers: dict, args): + resp = client.get(f"{base}/topics/{args.topic}/pages/{args.path}", headers=headers) + if resp.status_code == 200: + data = resp.json() + print(data["content"]) + if data.get("frontmatter"): + print(f"\n--- Frontmatter: {json.dumps(data['frontmatter'])}") + else: + print(f"ERROR ({resp.status_code}): {resp.json().get('detail', resp.text)}", file=sys.stderr) + sys.exit(1) + + +def cmd_query_activity(client: httpx.Client, base: str, headers: dict, args): + topic = getattr(args, 'topic', None) + if topic: + url = f"{base}/topics/{topic}/activity?limit={args.limit}" + else: + url = f"{base}/activity?limit={args.limit}" + resp = client.get(url, headers=headers) + if resp.status_code == 200: + for e in resp.json()["entries"]: + print(f" {e['timestamp']} {e['author']}: {e['message']}") + else: + print(f"ERROR ({resp.status_code}): {resp.text}", file=sys.stderr) + sys.exit(1) + + +def cmd_query_backlinks(client: httpx.Client, base: str, headers: dict, args): + resp = client.get(f"{base}/topics/{args.topic}/backlinks/{args.path}", headers=headers) + if resp.status_code == 200: + backlinks = resp.json()["backlinks"] + if not backlinks: + print("No backlinks found.") + return + for b in backlinks: + print(f" {b}") + else: + print(f"ERROR ({resp.status_code}): {resp.text}", file=sys.stderr) + sys.exit(1) + + +def cmd_query_search_all(client: httpx.Client, base: str, headers: dict, args): + resp = client.post( + f"{base}/search", + json={"query": args.query, "limit": args.limit}, + headers=headers, + ) + if resp.status_code == 200: + results = resp.json()["results"] + if not results: + print("No results.") + return + for r in results: + print(f" [{r.get('topic_id', '?')}] {r['path']} (score={r['score']})") + if r.get("snippet"): + print(f" {r['snippet'][:120]}") + else: + print(f"ERROR ({resp.status_code}): {resp.text}", file=sys.stderr) + sys.exit(1) + + +def cmd_query_tags(client: httpx.Client, base: str, headers: dict, args): + resp = client.get(f"{base}/topics/{args.topic}/tags", headers=headers) + if resp.status_code == 200: + tags = resp.json()["tags"] + if not tags: + print("No tags found.") + return + for tag, count in sorted(tags.items()): + print(f" {tag} ({count} pages)") + else: + print(f"ERROR ({resp.status_code}): {resp.text}", file=sys.stderr) + sys.exit(1) + + +def cmd_query_tag(client: httpx.Client, base: str, headers: dict, args): + resp = client.get(f"{base}/topics/{args.topic}/tags/{args.tag}", headers=headers) + if resp.status_code == 200: + pages = resp.json()["pages"] + if not pages: + print(f"No pages with tag '{args.tag}'.") + return + for p in pages: + print(f" {p}") + else: + print(f"ERROR ({resp.status_code}): {resp.text}", file=sys.stderr) + sys.exit(1) + + +def cmd_query_graph(client: httpx.Client, base: str, headers: dict, args): + resp = client.get(f"{base}/topics/{args.topic}/graph", headers=headers) + if resp.status_code == 200: + data = resp.json() + print(f"Nodes ({len(data['nodes'])}):") + for n in data["nodes"]: + tags = f" [{', '.join(n['tags'])}]" if n.get("tags") else "" + print(f" {n['id']}: {n['title']}{tags}") + if data["edges"]: + print(f"\nEdges ({len(data['edges'])}):") + for e in data["edges"]: + print(f" {e['source']} -> {e['target']}") + else: + print(f"ERROR ({resp.status_code}): {resp.text}", file=sys.stderr) + sys.exit(1) + + +def cmd_query_drafts(client: httpx.Client, base: str, headers: dict, args): + resp = client.get(f"{base}/topics/{args.topic}/drafts", headers=headers) + if resp.status_code == 200: + drafts = resp.json()["drafts"] + if not drafts: + print("No pending drafts.") + return + for d in drafts: + print(f" {d}") + else: + print(f"ERROR ({resp.status_code}): {resp.text}", file=sys.stderr) + sys.exit(1) + + +def cmd_discover_template(client: httpx.Client, base: str, headers: dict, args): + if args.template_id: + resp = client.get(f"{base}/templates/{args.template_id}") + else: + resp = client.get(f"{base}/templates") + if resp.status_code == 200: + data = resp.json() + if "templates" in data: + for t in data["templates"]: + print(f" {t['id']}: {t['name']} — {t['description']}") + else: + print(f"--- {data['name']} ---\n") + print(data["content"]) + else: + print(f"ERROR ({resp.status_code}): {resp.text}", file=sys.stderr) + sys.exit(1) + + +def cmd_admin_approve(client: httpx.Client, base: str, headers: dict, args): + resp = client.post(f"{base}/topics/{args.topic}/drafts/{args.path}/approve", headers=headers) + if resp.status_code == 200: + print(f"Approved: {resp.json()['path']}") + else: + print(f"ERROR ({resp.status_code}): {resp.text}", file=sys.stderr) + sys.exit(1) + + +def cmd_admin_reject(client: httpx.Client, base: str, headers: dict, args): + resp = client.post( + f"{base}/topics/{args.topic}/drafts/{args.path}/reject", + json={"reason": args.reason or ""}, + headers=headers, + ) + if resp.status_code == 200: + print(f"Rejected: {resp.json()['path']}") + else: + print(f"ERROR ({resp.status_code}): {resp.text}", file=sys.stderr) + sys.exit(1) + + +def cmd_admin_clean_test_pages(client: httpx.Client, base: str, headers: dict, args): + """Delete pages matching test patterns across all topics.""" + path_prefixes = ("_drafts/test-",) + basename_prefixes = ("test-", "_deploy-validation-") + resp = client.get(f"{base}/topics", headers=headers) + if resp.status_code != 200: + print(f"ERROR ({resp.status_code}): {resp.text}", file=sys.stderr) + sys.exit(1) + topics = resp.json()["topics"] + deleted = 0 + for topic in topics: + tid = topic["topic_id"] + pages_resp = client.get(f"{base}/topics/{tid}/pages", headers=headers) + if pages_resp.status_code != 200: + continue + for page in pages_resp.json()["pages"]: + name = page.split("/")[-1] + if any(page.startswith(p) for p in path_prefixes) or any(name.startswith(p) for p in basename_prefixes): + if args.dry_run: + print(f" [dry-run] would delete: {tid}/{page}") + else: + del_resp = client.delete(f"{base}/topics/{tid}/pages/{page}", headers=headers) + if del_resp.status_code == 200: + print(f" Deleted: {tid}/{page}") + deleted += 1 + else: + print(f" FAILED ({del_resp.status_code}): {tid}/{page}") + if args.dry_run: + print("\nDry run — no pages deleted. Remove --dry-run to delete.") + else: + print(f"\nCleaned {deleted} test page(s).") + + +def main(): + parser = argparse.ArgumentParser(description="Wiki Memory Service CLI") + parser.add_argument("--base-url", default=DEFAULT_BASE_URL, help="Service base URL") + parser.add_argument("--trust-domain", default=DEFAULT_TRUST_DOMAIN) + parser.add_argument("--user", default="alice@example.com", help="User identity for query agent OBO") + parser.add_argument("--topic", default="ai", help="Default topic for discovery agent") + + sub = parser.add_subparsers(dest="mode", required=True) + + # --- auth commands --- + sub.add_parser("login", help="Login via GitHub device flow") + sub.add_parser("logout", help="Remove cached token") + sub.add_parser("whoami", help="Show current identity") + sub.add_parser("renew", help="Renew expired or expiring token") + + # --- discover mode --- + discover = sub.add_parser("discover", help="Discovery Agent operations (write)") + discover_sub = discover.add_subparsers(dest="action", required=True) + + write_p = discover_sub.add_parser("write", help="Write a wiki page") + write_p.add_argument("topic", help="Topic ID") + write_p.add_argument("path", help="Page path (e.g. transformers.md)") + write_p.add_argument("--content", help="Page content (string)") + write_p.add_argument("--file", help="Read content from file") + write_p.add_argument("--message", help="Commit message") + write_p.add_argument("--draft", action="store_true", help="Submit as draft for review") + + novelty_p = discover_sub.add_parser("novelty", help="Check content novelty") + novelty_p.add_argument("topic", help="Topic ID") + novelty_p.add_argument("title", help="Content title") + novelty_p.add_argument("abstract", help="Content abstract/summary") + + template_p = discover_sub.add_parser("template", help="List or get page templates") + template_p.add_argument("template_id", nargs="?", help="Template ID (omit to list all)") + + # --- query mode --- + query = sub.add_parser("query", help="Query Agent operations (read/search)") + query_sub = query.add_subparsers(dest="action", required=True) + + query_sub.add_parser("list-topics", help="List all topics") + + lp = query_sub.add_parser("list-pages", help="List pages in a topic") + lp.add_argument("topic", help="Topic ID") + + sp = query_sub.add_parser("search", help="Search a topic") + sp.add_argument("topic", help="Topic ID") + sp.add_argument("query", help="Search query") + sp.add_argument("--limit", type=int, default=10) + + rp = query_sub.add_parser("read", help="Read a page") + rp.add_argument("topic", help="Topic ID") + rp.add_argument("path", help="Page path") + + act_p = query_sub.add_parser("activity", help="Recent changes") + act_p.add_argument("topic", nargs="?", help="Topic ID (omit for global)") + act_p.add_argument("--limit", type=int, default=20) + + bl_p = query_sub.add_parser("backlinks", help="Pages linking to a page") + bl_p.add_argument("topic", help="Topic ID") + bl_p.add_argument("path", help="Page path") + + sa_p = query_sub.add_parser("search-all", help="Search across all topics") + sa_p.add_argument("query", help="Search query") + sa_p.add_argument("--limit", type=int, default=10) + + tags_p = query_sub.add_parser("tags", help="List tags in a topic") + tags_p.add_argument("topic", help="Topic ID") + + tag_p = query_sub.add_parser("tag", help="Pages with a specific tag") + tag_p.add_argument("topic", help="Topic ID") + tag_p.add_argument("tag", help="Tag name") + + graph_p = query_sub.add_parser("graph", help="Page graph (nodes + edges)") + graph_p.add_argument("topic", help="Topic ID") + + drafts_p = query_sub.add_parser("drafts", help="List pending drafts") + drafts_p.add_argument("topic", help="Topic ID") + + # --- admin mode --- + admin = sub.add_parser("admin", help="Admin operations") + admin_sub = admin.add_subparsers(dest="action", required=True) + + approve_p = admin_sub.add_parser("approve", help="Approve a draft") + approve_p.add_argument("topic", help="Topic ID") + approve_p.add_argument("path", help="Draft page path") + + reject_p = admin_sub.add_parser("reject", help="Reject a draft") + reject_p.add_argument("topic", help="Topic ID") + reject_p.add_argument("path", help="Draft page path") + reject_p.add_argument("--reason", help="Rejection reason") + + admin_sub.add_parser("init-pages", help="Initialize GitHub Pages layout") + + clean_p = admin_sub.add_parser("clean-test-pages", help="Delete accumulated test pages") + clean_p.add_argument("--dry-run", action="store_true", help="Show what would be deleted without deleting") + + args = parser.parse_args() + insecure = os.environ.get("WIKI_INSECURE_TLS") == "1" + client = httpx.Client(timeout=30, verify=not insecure) + + if args.mode == "login": + cached = load_cached_token() + base = cached["base_url"] if cached else args.base_url + cmd_login(client, base, args) + + elif args.mode == "logout": + cmd_logout(args) + + elif args.mode == "whoami": + cached = load_cached_token() + base = cached["base_url"] if cached else args.base_url + cmd_whoami(client, base, args) + + elif args.mode == "renew": + cached = load_cached_token() + base = cached["base_url"] if cached else args.base_url + cmd_renew(client, base) + + elif args.mode == "discover": + auth_headers = make_auth_headers() + if auth_headers: + headers = auth_headers + else: + headers = make_discovery_headers(args.topic if hasattr(args, 'topic') else "ai", args.trust_domain) + if args.action == "write": + cmd_discover_write(client, args.base_url, headers, args) + elif args.action == "novelty": + cmd_discover_novelty(client, args.base_url, headers, args) + elif args.action == "template": + cmd_discover_template(client, args.base_url, headers, args) + + elif args.mode == "query": + auth_headers = make_auth_headers() + if auth_headers: + headers = auth_headers + else: + headers = make_query_headers(args.user, args.trust_domain) + if args.action == "list-topics": + cmd_query_list_topics(client, args.base_url, headers, args) + elif args.action == "list-pages": + cmd_query_list_pages(client, args.base_url, headers, args) + elif args.action == "search": + cmd_query_search(client, args.base_url, headers, args) + elif args.action == "read": + cmd_query_read(client, args.base_url, headers, args) + elif args.action == "activity": + cmd_query_activity(client, args.base_url, headers, args) + elif args.action == "backlinks": + cmd_query_backlinks(client, args.base_url, headers, args) + elif args.action == "search-all": + cmd_query_search_all(client, args.base_url, headers, args) + elif args.action == "tags": + cmd_query_tags(client, args.base_url, headers, args) + elif args.action == "tag": + cmd_query_tag(client, args.base_url, headers, args) + elif args.action == "graph": + cmd_query_graph(client, args.base_url, headers, args) + elif args.action == "drafts": + cmd_query_drafts(client, args.base_url, headers, args) + + elif args.mode == "admin": + auth_headers = make_auth_headers() + if auth_headers: + headers = auth_headers + else: + headers = make_discovery_headers(getattr(args, "topic", "system"), args.trust_domain) + if args.action == "approve": + cmd_admin_approve(client, args.base_url, headers, args) + elif args.action == "reject": + cmd_admin_reject(client, args.base_url, headers, args) + elif args.action == "init-pages": + resp = client.post(f"{args.base_url}/admin/init-pages", headers=headers) + if resp.status_code == 200: + data = resp.json() + print(f"GitHub Pages initialized ({len(data['files'])} files):") + for f in data["files"]: + print(f" {f}") + else: + print(f"ERROR ({resp.status_code}): {resp.json().get('detail', resp.text)}") + elif args.action == "clean-test-pages": + cmd_admin_clean_test_pages(client, args.base_url, headers, args) + + +if __name__ == "__main__": + main() diff --git a/mcp/wiki_memory_tool/wiki_service.py b/mcp/wiki_memory_tool/wiki_service.py new file mode 100644 index 00000000..093fb409 --- /dev/null +++ b/mcp/wiki_memory_tool/wiki_service.py @@ -0,0 +1,1704 @@ +""" +Wiki Memory Service — Kagenti Research Wiki + +Simplest possible implementation for the Wiki Service component from +the kagenti-research-wiki-2-simplified architecture. + +Supports: +- Per-topic namespaces with ACL (reader/writer/admin) +- Three identity models: + 1. SPIFFE SVID (Discovery Agent writes via workload identity) + 2. User OBO token (Query Agent reads on behalf of user) + 3. GitHub OAuth (human users via browser or CLI device flow) +- Git-backed markdown storage (audit trail) +- TF-IDF search over corpus (vector index deferred to Qdrant sidecar) +- MCP-compatible tool interface (wiki_query, wiki_list_topics, wiki_check_novelty) + +Runs as a single Kubernetes pod with a PVC for git storage. +""" + +import base64 +import hashlib +import hmac +import json +import logging +import math +import os +import re +import subprocess +import time +from pathlib import Path +from urllib.parse import urlencode + +logger = logging.getLogger(__name__) + +import yaml +from fastapi import FastAPI, HTTPException, Request +from fastapi.responses import JSONResponse, RedirectResponse +from pydantic import BaseModel + +# --- Configuration --- + +WIKI_ROOT = Path(os.environ.get("WIKI_ROOT", "/data/wiki")) +ACL_FILE = Path(os.environ.get("ACL_FILE", "/config/acl.yaml")) +TRUST_DOMAIN = os.environ.get("SPIFFE_TRUST_DOMAIN", "kagenti.example.com") +WIKI_REMOTE_URL = os.environ.get("WIKI_REMOTE_URL", "") +WIKI_PUSH_STRATEGY = os.environ.get("WIKI_PUSH_STRATEGY", "immediate") + +GITHUB_CLIENT_ID = os.environ.get("GITHUB_CLIENT_ID", "") +GITHUB_CLIENT_SECRET = os.environ.get("GITHUB_CLIENT_SECRET", "") +JWT_SECRET_KEY = os.environ.get("JWT_SECRET_KEY", "") +if not JWT_SECRET_KEY: + raise RuntimeError("JWT_SECRET_KEY environment variable is required") +JWT_EXPIRY_HOURS = int(os.environ.get("JWT_EXPIRY_HOURS", "168")) + +def _read_version() -> str: + pyproject = Path(__file__).parent / "pyproject.toml" + if pyproject.exists(): + for line in pyproject.read_text().splitlines(): + if line.startswith("version"): + return line.split('"')[1] + return "0.0.0-dev" + +__version__ = _read_version() + +app = FastAPI(title="Wiki Memory Service", version=__version__) + + +# --- Identity & ACL --- + +class Identity(BaseModel): + """Resolved caller identity — workload (SPIFFE), user (GitHub OAuth), or OBO.""" + subject: str # SPIFFE ID or github: + kind: str # "workload" | "user" | "obo" + actor: str | None = None # agent SPIFFE ID when kind=obo + topics: list[str] = [] # topic scopes from token/SVID + groups: list[str] = [] # github teams (e.g. ["kagenti/ml-team"]) + + +class TopicACL(BaseModel): + """Per-topic access control entry.""" + topic_id: str + writers: list[str] # SPIFFE IDs allowed to write + readers: list[str] # SPIFFE IDs or user subjects allowed to read + admins: list[str] # can delete, manage ACL + + +def load_acl() -> dict[str, TopicACL]: + """Load per-topic ACL from ConfigMap-mounted YAML.""" + if not ACL_FILE.exists(): + return {} + data = yaml.safe_load(ACL_FILE.read_text()) or {} + acls = {} + for topic_id, rules in data.get("topics", {}).items(): + acls[topic_id] = TopicACL( + topic_id=topic_id, + writers=rules.get("writers", []), + readers=rules.get("readers", []), + admins=rules.get("admins", []), + ) + return acls + + +_acl_cache: dict[str, TopicACL] = load_acl() + + +def resolve_identity(request: Request) -> Identity: + """ + Resolve caller identity from request headers. + + Supports three identity models: + 1. SPIFFE workload: X-Spiffe-Id header (agents) + 2. OBO: X-Spiffe-Id + X-Original-Subject (agent on behalf of user) + 3. GitHub OAuth: Authorization: Bearer (human users) + """ + spiffe_id = request.headers.get("x-spiffe-id") + auth_header = request.headers.get("authorization", "") + user_subject = request.headers.get("x-original-subject") + + if spiffe_id and user_subject: + topic = _extract_topic_from_spiffe(spiffe_id) + return Identity( + subject=user_subject, + kind="obo", + actor=spiffe_id, + topics=[topic] if topic else [], + ) + elif spiffe_id: + topic = _extract_topic_from_spiffe(spiffe_id) + return Identity( + subject=spiffe_id, + kind="workload", + topics=[topic] if topic else [], + ) + elif auth_header.startswith("Bearer "): + token = auth_header.removeprefix("Bearer ").strip() + claims = _validate_jwt(token) + if claims: + return Identity( + subject=f"github:{claims['github_login']}", + kind="user", + topics=["*"], + groups=claims.get("groups", []), + ) + # Fallback: decode JWT payload without signature verification + try: + payload = json.loads(_b64url_decode(token.split(".")[1])) + login = payload.get("github_login") or payload.get("sub", "").removeprefix("github:") + if login: + return Identity( + subject=f"github:{login}", + kind="user", + topics=["*"], + groups=payload.get("groups", []), + ) + except Exception: + pass + return Identity(subject=token, kind="user", topics=["*"]) + else: + raise HTTPException(401, "No identity provided") + + +def _extract_topic_from_spiffe(spiffe_id: str) -> str | None: + """Extract topic from SPIFFE ID like spiffe://domain/ns/topic-ai/sa/discovery-agent.""" + match = re.search(r"/ns/topic-([^/]+)/", spiffe_id) + return match.group(1) if match else None + + +# --- JWT --- + +def _b64url_encode(data: bytes) -> str: + return base64.urlsafe_b64encode(data).rstrip(b"=").decode() + + +def _b64url_decode(s: str) -> bytes: + return base64.urlsafe_b64decode(s + "=" * (4 - len(s) % 4)) + + +def _sign_jwt(payload: dict) -> str: + """Sign a JWT using HMAC-SHA256 (no external deps).""" + header = {"alg": "HS256", "typ": "JWT"} + h = _b64url_encode(json.dumps(header).encode()) + p = _b64url_encode(json.dumps(payload).encode()) + sig_input = f"{h}.{p}".encode() + sig = hmac.new(JWT_SECRET_KEY.encode(), sig_input, hashlib.sha256).digest() + return f"{h}.{p}.{_b64url_encode(sig)}" + + +def _validate_jwt(token: str) -> dict | None: + """Validate a wiki-issued JWT. Returns claims or None.""" + try: + parts = token.split(".") + if len(parts) != 3: + return None + sig_input = f"{parts[0]}.{parts[1]}".encode() + expected_sig = hmac.new(JWT_SECRET_KEY.encode(), sig_input, hashlib.sha256).digest() + actual_sig = _b64url_decode(parts[2]) + if not hmac.compare_digest(expected_sig, actual_sig): + return None + payload = json.loads(_b64url_decode(parts[1])) + if payload.get("exp", 0) < time.time(): + return None + return payload + except Exception: + return None + + +def check_topic_access(identity: Identity, topic_id: str, action: str): + """ + Enforce per-topic ACL. + action: "read" | "write" | "admin" + + ACL entries can be: + - SPIFFE IDs (spiffe://...) + - github:user: + - github:team:/ + - github:org: + - * (public) + """ + acl = _acl_cache.get(topic_id) + if not acl: + raise HTTPException(404, f"Topic '{topic_id}' not found") + + if action == "read": + allowed = acl.readers + acl.writers + acl.admins + elif action == "write": + allowed = acl.writers + acl.admins + else: + allowed = acl.admins + + if "*" in allowed: + return + + subject = identity.subject + actor = identity.actor or subject + + # Direct match (SPIFFE ID or github:user:X) + if subject in allowed or actor in allowed: + return + + # Normalize github: to also match github:user: in ACL + if subject.startswith("github:") and not subject.startswith("github:user:"): + login = subject.removeprefix("github:") + if f"github:user:{login}" in allowed: + return + + # GitHub group matching + for group in identity.groups: + if f"github:team:{group}" in allowed: + return + org = group.split("/")[0] if "/" in group else None + if org and f"github:org:{org}" in allowed: + return + + raise HTTPException( + 403, + f"Identity '{subject}' has no {action} access to topic '{topic_id}'" + ) + + +# --- Git Storage --- + +def _git(args: list[str], cwd: Path | None = None, timeout: int = 10): + result = subprocess.run( + ["git"] + args, cwd=cwd or WIKI_ROOT, + capture_output=True, text=True, timeout=timeout, + ) + if result.returncode != 0: + raise RuntimeError(f"git {' '.join(args)}: {result.stderr}") + return result.stdout.strip() + + +def _ensure_repo(): + """Initialize git repo — clone from remote if WIKI_REMOTE_URL is set.""" + subprocess.run( + ["git", "config", "--global", "--add", "safe.directory", str(WIKI_ROOT)], + capture_output=True, + ) + if not (WIKI_ROOT / ".git").exists(): + if WIKI_REMOTE_URL: + WIKI_ROOT.parent.mkdir(parents=True, exist_ok=True) + _git(["clone", WIKI_REMOTE_URL, str(WIKI_ROOT)], cwd=WIKI_ROOT.parent) + else: + WIKI_ROOT.mkdir(parents=True, exist_ok=True) + _git(["init", "-b", "main"]) + elif WIKI_REMOTE_URL: + result = subprocess.run( + ["git", "remote", "get-url", "origin"], cwd=WIKI_ROOT, + capture_output=True, text=True, + ) + if result.returncode != 0: + _git(["remote", "add", "origin", WIKI_REMOTE_URL]) + else: + _git(["remote", "set-url", "origin", WIKI_REMOTE_URL]) + try: + _git(["push", "-u", "origin", "main"], timeout=30) + except RuntimeError: + pass + _git(["config", "user.name", "wiki-memory-service"]) + _git(["config", "user.email", "wiki@kagenti.local"]) + + +def _commit(rel_path: str, msg: str, author: str): + _git(["add", rel_path]) + _git(["commit", "-m", msg, "--author", f"{author} <{author}@kagenti.local>", + "--allow-empty"]) + if WIKI_REMOTE_URL and WIKI_PUSH_STRATEGY == "immediate": + try: + _git(["pull", "--rebase", "origin", "main"], timeout=30) + except RuntimeError: + pass + _git(["push", "origin", "main"], timeout=30) + + +def _topic_dir(topic_id: str) -> Path: + d = WIKI_ROOT / topic_id + d.mkdir(parents=True, exist_ok=True) + return d + + +# --- Search (TF-IDF — minimal, no external deps) --- + +_STOPWORDS = frozenset( + "a an and are as at be by for from has he in is it its of on or " + "that the to was were will with this we they".split() +) + + +def _tokenize(text: str) -> list[str]: + return [w for w in re.findall(r"[a-z0-9]+", text.lower()) + if w not in _STOPWORDS and len(w) > 1] + + +def search_topic(topic_id: str, query: str, limit: int = 10) -> list[dict]: + """TF-IDF search over a topic's markdown pages.""" + topic_dir = _topic_dir(topic_id) + terms = _tokenize(query) + if not terms: + return [] + + docs = [(f, f.read_text(errors="replace")) + for f in topic_dir.rglob("*.md") if f.is_file()] + if not docs: + return [] + + doc_count = len(docs) + df: dict[str, int] = {} + for _, content in docs: + for t in set(_tokenize(content)): + df[t] = df.get(t, 0) + 1 + + idf = {t: math.log((doc_count + 1) / (df.get(t, 0) + 1)) + 1 for t in terms} + + results = [] + for fpath, content in docs: + all_t = _tokenize(content) + if not all_t: + continue + tf: dict[str, float] = {} + for t in all_t: + tf[t] = tf.get(t, 0) + 1 + for t in tf: + tf[t] /= len(all_t) + score = sum(tf.get(qt, 0) * idf.get(qt, 1) for qt in terms) + if score > 0: + lines = content.splitlines() + snippet = next( + (line for line in lines if any(t in line.lower() for t in terms)), "" + )[:200] + results.append({ + "path": str(fpath.relative_to(WIKI_ROOT)), + "score": round(score, 4), + "snippet": snippet, + }) + + results.sort(key=lambda r: r["score"], reverse=True) + return results[:limit] + + +# --- Frontmatter & Link Parsing --- + +def parse_frontmatter(content: str) -> tuple[dict, str]: + """Parse YAML frontmatter from markdown content. Returns (metadata, body).""" + if not content.startswith("---"): + return {}, content + parts = content.split("---", 2) + if len(parts) < 3: + return {}, content + try: + metadata = yaml.safe_load(parts[1]) or {} + except Exception: + metadata = {} + return metadata, parts[2].lstrip("\n") + + +def extract_links(content: str) -> list[str]: + """Extract internal wiki links from markdown content.""" + links = [] + for m in re.finditer(r'\[\[([^\]]+)\]\]', content): + target = m.group(1).strip() + if not target.endswith(".md"): + target += ".md" + links.append(target) + for m in re.finditer(r'\[([^\]]*)\]\(([^)]+)\)', content): + target = m.group(2).strip() + if target.startswith("http://") or target.startswith("https://"): + continue + if not target.endswith(".md"): + target += ".md" + links.append(target) + return list(set(links)) + + +def find_backlinks(topic_id: str, target_path: str) -> list[str]: + """Find all pages in a topic that link to the given page.""" + topic_dir = _topic_dir(topic_id) + target_stem = Path(target_path).stem + target_name = Path(target_path).name + backlinks = [] + for f in topic_dir.rglob("*.md"): + if f.name.startswith("_"): + continue + rel = str(f.relative_to(topic_dir)) + if rel == target_path: + continue + content = f.read_text(errors="replace") + links = extract_links(content) + if target_name in links or target_path in links or f"{target_stem}.md" in links: + backlinks.append(rel) + return backlinks + + +def get_activity(topic_id: str | None = None, limit: int = 20) -> list[dict]: + """Get recent git activity for a topic or globally.""" + args = ["log", "--format=%H|%an|%ai|%s", f"-n{limit}"] + if topic_id: + topic_dir = _topic_dir(topic_id) + args.append("--") + args.append(str(topic_dir)) + try: + output = _git(args) + except RuntimeError: + return [] + entries = [] + for line in output.splitlines(): + parts = line.split("|", 3) + if len(parts) == 4: + entries.append({ + "commit": parts[0], + "author": parts[1], + "timestamp": parts[2], + "message": parts[3], + }) + return entries + + +# --- Page Templates --- + +_TEMPLATES = { + "paper-summary": { + "name": "Paper Summary", + "description": "Summarize an academic paper or technical report", + "content": """--- +tags: [paper, summary] +--- +# {Title} + +Source: [{Paper Name}]({URL}) (Authors, Year) + +## Summary + +2-3 sentence overview of the key contribution. + +## Key Ideas + +- Main concept 1 +- Main concept 2 +- Main concept 3 + +## Method / Architecture + +Describe the approach. + +## Results + +Key findings and comparisons. + +## References + +- [Original paper]({URL}) +""", + }, + "concept-overview": { + "name": "Concept Overview", + "description": "Explain a technical concept or method", + "content": """--- +tags: [concept] +--- +# {Concept Name} + +## What It Is + +Brief definition (1-2 sentences). + +## How It Works + +Detailed explanation with examples. + +## When to Use + +- Use case 1 +- Use case 2 + +## Trade-offs + +| Pro | Con | +|-----|-----| +| ... | ... | + +## Related Concepts + +- [[related-concept-1]] +- [[related-concept-2]] +""", + }, + "how-to-guide": { + "name": "How-To Guide", + "description": "Step-by-step practical guide", + "content": """--- +tags: [guide, how-to] +--- +# How to {Task} + +## Prerequisites + +- Requirement 1 +- Requirement 2 + +## Steps + +### 1. {First Step} + +```bash +# commands here +``` + +### 2. {Second Step} + +Description of what to do. + +### 3. {Third Step} + +Description of what to do. + +## Verification + +How to confirm it worked. + +## Troubleshooting + +- **Problem**: Description → **Fix**: Solution +""", + }, + "comparison": { + "name": "Comparison", + "description": "Compare approaches, tools, or methods", + "content": """--- +tags: [comparison] +--- +# {Option A} vs {Option B} + +## Overview + +Brief context for why this comparison matters. + +## Comparison + +| Criterion | {Option A} | {Option B} | +|-----------|-----------|-----------| +| Performance | ... | ... | +| Complexity | ... | ... | +| Cost | ... | ... | + +## When to Choose {Option A} + +- Scenario 1 +- Scenario 2 + +## When to Choose {Option B} + +- Scenario 1 +- Scenario 2 + +## Recommendation + +Summary recommendation with rationale. +""", + }, +} + + +# --- GitHub Pages Scaffold (Jekyll) --- + +_PAGES_SCAFFOLD = { + "_config.yml": """\ +title: Kagenti Wiki Research +description: Multi-agent research knowledge base +baseurl: /kagenti-wiki-research +url: https://kaslom.github.io +markdown: kramdown +exclude: + - .gitignore + - "*.py" + - "*.yaml" + - "*.yml" + - "!_config.yml" +include: + - ai +defaults: + - scope: + path: "ai" + values: + layout: page + - scope: + path: "" + type: "pages" + values: + layout: default +""", + "index.md": """\ +--- +layout: default +title: Home +--- + +# Kagenti Wiki Research + +A multi-agent research knowledge base. + +## Pages + +{% assign pages = site.pages | where_exp: "p", "p.path contains 'ai/'" | sort: "title" %} +{% for p in pages %} +{% unless p.path contains '_drafts' or p.name == 'index.md' or p.title == nil %} +- [{{ p.title | default: p.name }}]({{ p.url | relative_url }}){% if p.tags %} {% for t in p.tags %}{{ t }}{% endfor %}{% endif %} +{% endunless %} +{% endfor %} +""", + "_layouts/default.html": """\ + + + + + + {{ page.title | default: site.title }} + + + +
+ +
+ {{ content }} +
+ +
+ + +""", + "_layouts/page.html": """\ +--- +layout: default +--- +
+ +
+ {{ content }} +
+
+""", + "_includes/nav.html": """\ + +""", + "assets/css/style.css": """\ +:root { + --bg: #ffffff; + --bg-secondary: #f6f8fa; + --text: #1f2328; + --text-muted: #656d76; + --border: #d0d7de; + --accent: #0969da; + --accent-hover: #0550ae; + --code-bg: #f6f8fa; + --tag-bg: #ddf4ff; + --tag-text: #0969da; + --nav-width: 220px; +} + +@media (prefers-color-scheme: dark) { + :root { + --bg: #0d1117; + --bg-secondary: #161b22; + --text: #e6edf3; + --text-muted: #8b949e; + --border: #30363d; + --accent: #58a6ff; + --accent-hover: #79c0ff; + --code-bg: #161b22; + --tag-bg: #1f3a5f; + --tag-text: #79c0ff; + } +} + +* { box-sizing: border-box; margin: 0; padding: 0; } + +body { + font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Helvetica, Arial, sans-serif; + font-size: 16px; + line-height: 1.6; + color: var(--text); + background: var(--bg); +} + +.site-wrapper { + display: flex; + min-height: 100vh; +} + +.site-nav { + width: var(--nav-width); + padding: 1.5rem 1rem; + border-right: 1px solid var(--border); + background: var(--bg-secondary); + position: fixed; + top: 0; + bottom: 0; + overflow-y: auto; +} + +.site-title { + display: block; + font-size: 1.1rem; + font-weight: 600; + color: var(--text); + text-decoration: none; + margin-bottom: 1.5rem; + padding-bottom: 0.75rem; + border-bottom: 1px solid var(--border); +} + +.nav-list { + list-style: none; +} + +.nav-list li { + margin-bottom: 0.25rem; +} + +.nav-list a { + display: block; + padding: 0.3rem 0.5rem; + color: var(--text-muted); + text-decoration: none; + border-radius: 4px; + font-size: 0.9rem; +} + +.nav-list a:hover { + color: var(--accent); + background: var(--bg); +} + +.nav-list .active a { + color: var(--accent); + font-weight: 500; +} + +.site-content { + flex: 1; + margin-left: var(--nav-width); + padding: 2rem 3rem; + max-width: 800px; +} + +.site-footer { + position: fixed; + bottom: 0; + left: 0; + width: var(--nav-width); + padding: 0.75rem 1rem; + font-size: 0.8rem; + color: var(--text-muted); + border-top: 1px solid var(--border); + background: var(--bg-secondary); +} + +.site-footer a { color: var(--accent); text-decoration: none; } + +/* Page content */ +.page-header { margin-bottom: 1.5rem; } +.page-header h1 { font-size: 2rem; font-weight: 600; } + +.page-tags, .tags { margin-top: 0.5rem; } +.tag { + display: inline-block; + padding: 0.15rem 0.5rem; + margin-right: 0.3rem; + font-size: 0.75rem; + font-weight: 500; + border-radius: 12px; + background: var(--tag-bg); + color: var(--tag-text); +} + +/* Typography */ +.page-content h1, .site-content h1 { font-size: 1.8rem; margin: 1.5rem 0 0.75rem; } +.page-content h2, .site-content h2 { font-size: 1.4rem; margin: 1.25rem 0 0.5rem; border-bottom: 1px solid var(--border); padding-bottom: 0.3rem; } +.page-content h3, .site-content h3 { font-size: 1.15rem; margin: 1rem 0 0.5rem; } + +.page-content p, .site-content p { margin-bottom: 0.75rem; } +.page-content ul, .page-content ol, .site-content ul, .site-content ol { margin: 0.5rem 0 0.75rem 1.5rem; } +.page-content li, .site-content li { margin-bottom: 0.25rem; } + +a { color: var(--accent); } +a:hover { color: var(--accent-hover); } + +/* Code */ +code { + font-family: 'SFMono-Regular', Consolas, 'Liberation Mono', Menlo, monospace; + font-size: 0.875em; + padding: 0.2em 0.4em; + background: var(--code-bg); + border-radius: 4px; +} + +pre { + background: var(--code-bg); + border: 1px solid var(--border); + border-radius: 6px; + padding: 1rem; + overflow-x: auto; + margin: 0.75rem 0; +} + +pre code { + padding: 0; + background: none; + font-size: 0.85rem; +} + +/* Tables */ +table { + width: 100%; + border-collapse: collapse; + margin: 0.75rem 0; +} + +th, td { + padding: 0.5rem 0.75rem; + border: 1px solid var(--border); + text-align: left; +} + +th { + background: var(--bg-secondary); + font-weight: 600; +} + +/* Responsive */ +@media (max-width: 768px) { + .site-wrapper { flex-direction: column; } + .site-nav { + position: static; + width: 100%; + border-right: none; + border-bottom: 1px solid var(--border); + padding: 1rem; + } + .site-content { + margin-left: 0; + padding: 1.5rem 1rem; + } + .site-footer { + position: static; + width: 100%; + border-top: 1px solid var(--border); + } + .nav-list { display: flex; flex-wrap: wrap; gap: 0.25rem; } + .nav-list li { margin-bottom: 0; } +} +""", +} + + +# --- API Models --- + +class WritePage(BaseModel): + content: str + message: str = "" + + +class SearchQuery(BaseModel): + query: str + limit: int = 10 + + +class GlobalSearchQuery(BaseModel): + query: str + limit: int = 10 + + +class NoveltyCheck(BaseModel): + title: str + abstract: str + + +class DraftReject(BaseModel): + reason: str = "" + + +# --- Endpoints (map to MCP tools: wiki_query, wiki_list_topics, wiki_check_novelty) --- + +@app.on_event("startup") +def startup(): + _ensure_repo() + + +# --- wiki_list_topics --- +@app.get("/topics") +def list_topics(request: Request): + """List topics the caller has access to.""" + identity = resolve_identity(request) + visible = [] + for topic_id, acl in _acl_cache.items(): + all_allowed = acl.readers + acl.writers + acl.admins + if identity.subject in all_allowed or identity.actor in all_allowed or "*" in all_allowed: + visible.append({ + "topic_id": topic_id, + "page_count": len(list(_topic_dir(topic_id).rglob("*.md"))), + }) + return {"topics": visible} + + +# --- wiki_query (search + read) --- +@app.post("/topics/{topic_id}/query") +def query_topic(topic_id: str, body: SearchQuery, request: Request): + """Search a topic's wiki. Used by Query Agent on behalf of user.""" + identity = resolve_identity(request) + check_topic_access(identity, topic_id, "read") + results = search_topic(topic_id, body.query, body.limit) + return {"topic": topic_id, "results": results, "queried_by": identity.subject} + + +@app.get("/topics/{topic_id}/pages") +def list_pages(topic_id: str, request: Request): + """List all pages in a topic.""" + identity = resolve_identity(request) + check_topic_access(identity, topic_id, "read") + topic_dir = _topic_dir(topic_id) + pages = [str(f.relative_to(topic_dir)) for f in topic_dir.rglob("*.md")] + return {"topic": topic_id, "pages": sorted(pages)} + + +@app.get("/topics/{topic_id}/pages/{path:path}") +def read_page(topic_id: str, path: str, request: Request): + """Read a specific page. Used by Query Agent.""" + identity = resolve_identity(request) + check_topic_access(identity, topic_id, "read") + full = _topic_dir(topic_id) / path + if not full.exists(): + raise HTTPException(404, f"Page not found: {topic_id}/{path}") + content = full.read_text() + frontmatter, _ = parse_frontmatter(content) + result: dict = {"content": content, "path": f"{topic_id}/{path}"} + if frontmatter: + result["frontmatter"] = frontmatter + return result + + +# --- Write (Discovery Agent) --- +@app.post("/topics/{topic_id}/pages/{path:path}") +def write_page(topic_id: str, path: str, body: WritePage, request: Request, draft: bool = False): + """Write/update a wiki page. Used by Discovery Agent after ingest.""" + identity = resolve_identity(request) + check_topic_access(identity, topic_id, "write") + + topic_dir = _topic_dir(topic_id) + if draft: + full = topic_dir / "_drafts" / path + else: + full = topic_dir / path + full.parent.mkdir(parents=True, exist_ok=True) + full.write_text(body.content) + + rel = str(full.relative_to(WIKI_ROOT)) + prefix = "draft" if draft else "ingest" + msg = body.message or f"{prefix}: {topic_id}/{path}" + author = identity.subject.split("/")[-1] if "/" in identity.subject else identity.subject + _commit(rel, msg, author) + + result: dict = {"status": "draft" if draft else "written", "path": f"{topic_id}/{path}", "author": author} + + first_lines = body.content.split("\n", 5) + search_text = " ".join(first_lines[:3]) + suggested = search_topic(topic_id, search_text, limit=6) + suggested = [s for s in suggested if s["path"] != f"{topic_id}/{path}"][:5] + if suggested: + result["suggested_links"] = suggested + + return result + + +# --- wiki_check_novelty --- +@app.post("/topics/{topic_id}/check-novelty") +def check_novelty(topic_id: str, body: NoveltyCheck, request: Request): + """ + Check if material is novel relative to existing wiki content. + Used by Discovery Agent before writing to avoid duplicates. + """ + identity = resolve_identity(request) + check_topic_access(identity, topic_id, "read") + + combined = f"{body.title} {body.abstract}" + results = search_topic(topic_id, combined, limit=3) + + if results and results[0]["score"] > 0.15: + return { + "novel": False, + "reason": "Similar content exists", + "similar": results[:3], + } + return {"novel": True, "reason": "No sufficiently similar content found"} + + +# --- Activity Feed --- + +@app.get("/activity") +def global_activity(request: Request, limit: int = 20): + """Recent changes across all accessible topics.""" + resolve_identity(request) + entries = get_activity(limit=limit) + return {"entries": entries} + + +@app.get("/topics/{topic_id}/activity") +def topic_activity(topic_id: str, request: Request, limit: int = 20): + """Recent changes for a specific topic.""" + identity = resolve_identity(request) + check_topic_access(identity, topic_id, "read") + entries = get_activity(topic_id=topic_id, limit=limit) + return {"topic": topic_id, "entries": entries} + + +# --- Backlinks --- + +@app.get("/topics/{topic_id}/backlinks/{path:path}") +def get_backlinks(topic_id: str, path: str, request: Request): + """Find pages that link to the given page.""" + identity = resolve_identity(request) + check_topic_access(identity, topic_id, "read") + backlinks = find_backlinks(topic_id, path) + return {"path": f"{topic_id}/{path}", "backlinks": backlinks} + + +# --- Global Search --- + +@app.post("/search") +def global_search(body: GlobalSearchQuery, request: Request): + """Search across all accessible topics.""" + identity = resolve_identity(request) + all_results = [] + for topic_id, acl in _acl_cache.items(): + all_allowed = acl.readers + acl.writers + acl.admins + if identity.subject in all_allowed or (identity.actor and identity.actor in all_allowed) or "*" in all_allowed: + for group in identity.groups: + if f"github:team:{group}" in all_allowed: + break + org = group.split("/")[0] if "/" in group else None + if org and f"github:org:{org}" in all_allowed: + break + else: + if identity.subject not in all_allowed and (not identity.actor or identity.actor not in all_allowed) and "*" not in all_allowed: + continue + results = search_topic(topic_id, body.query, body.limit) + for r in results: + r["topic_id"] = topic_id + all_results.extend(results) + all_results.sort(key=lambda r: r["score"], reverse=True) + return {"results": all_results[:body.limit]} + + +# --- Templates --- + +@app.get("/templates") +def list_templates(): + """List available page templates.""" + return {"templates": [ + {"id": tid, "name": t["name"], "description": t["description"]} + for tid, t in _TEMPLATES.items() + ]} + + +@app.get("/templates/{template_id}") +def get_template(template_id: str): + """Get a specific page template.""" + t = _TEMPLATES.get(template_id) + if not t: + raise HTTPException(404, f"Template '{template_id}' not found. Available: {list(_TEMPLATES.keys())}") + return {"id": template_id, "name": t["name"], "description": t["description"], "content": t["content"]} + + +# --- Draft/Review Queue --- + +@app.get("/topics/{topic_id}/drafts") +def list_drafts(topic_id: str, request: Request): + """List pending drafts in a topic.""" + identity = resolve_identity(request) + check_topic_access(identity, topic_id, "write") + drafts_dir = _topic_dir(topic_id) / "_drafts" + if not drafts_dir.exists(): + return {"topic": topic_id, "drafts": []} + pages = [str(f.relative_to(drafts_dir)) for f in drafts_dir.rglob("*.md")] + return {"topic": topic_id, "drafts": sorted(pages)} + + +@app.post("/topics/{topic_id}/drafts/{path:path}/approve") +def approve_draft(topic_id: str, path: str, request: Request): + """Approve a draft — move from _drafts/ to live.""" + identity = resolve_identity(request) + check_topic_access(identity, topic_id, "admin") + topic_dir = _topic_dir(topic_id) + draft_file = topic_dir / "_drafts" / path + if not draft_file.exists(): + raise HTTPException(404, f"Draft not found: {topic_id}/_drafts/{path}") + live_file = topic_dir / path + live_file.parent.mkdir(parents=True, exist_ok=True) + live_file.write_text(draft_file.read_text()) + draft_file.unlink() + rel_live = str(live_file.relative_to(WIKI_ROOT)) + rel_draft = str(draft_file.relative_to(WIKI_ROOT)) + _git(["add", rel_live, rel_draft]) + _git(["commit", "-m", f"approve: {topic_id}/{path}", + "--author", f"{identity.subject} "]) + if WIKI_REMOTE_URL and WIKI_PUSH_STRATEGY == "immediate": + try: + _git(["pull", "--rebase", "origin", "main"], timeout=30) + except RuntimeError: + pass + _git(["push", "origin", "main"], timeout=30) + return {"status": "approved", "path": f"{topic_id}/{path}"} + + +@app.post("/topics/{topic_id}/drafts/{path:path}/reject") +def reject_draft(topic_id: str, path: str, body: DraftReject, request: Request): + """Reject a draft — delete it.""" + identity = resolve_identity(request) + check_topic_access(identity, topic_id, "admin") + topic_dir = _topic_dir(topic_id) + draft_file = topic_dir / "_drafts" / path + if not draft_file.exists(): + raise HTTPException(404, f"Draft not found: {topic_id}/_drafts/{path}") + draft_file.unlink() + rel = str(draft_file.relative_to(WIKI_ROOT)) + _git(["add", rel]) + reason = f" ({body.reason})" if body.reason else "" + _git(["commit", "-m", f"reject: {topic_id}/{path}{reason}", + "--author", f"{identity.subject} "]) + return {"status": "rejected", "path": f"{topic_id}/{path}", "reason": body.reason} + + +# --- Tags/Frontmatter --- + +@app.get("/topics/{topic_id}/tags") +def list_tags(topic_id: str, request: Request): + """List all tags in a topic with page counts.""" + identity = resolve_identity(request) + check_topic_access(identity, topic_id, "read") + topic_dir = _topic_dir(topic_id) + tag_counts: dict[str, int] = {} + for f in topic_dir.rglob("*.md"): + if "_drafts" in f.parts: + continue + content = f.read_text(errors="replace") + meta, _ = parse_frontmatter(content) + for tag in meta.get("tags", []): + tag_counts[tag] = tag_counts.get(tag, 0) + 1 + return {"topic": topic_id, "tags": tag_counts} + + +@app.get("/topics/{topic_id}/tags/{tag}") +def pages_by_tag(topic_id: str, tag: str, request: Request): + """List pages with a specific tag.""" + identity = resolve_identity(request) + check_topic_access(identity, topic_id, "read") + topic_dir = _topic_dir(topic_id) + pages = [] + for f in topic_dir.rglob("*.md"): + if "_drafts" in f.parts: + continue + content = f.read_text(errors="replace") + meta, _ = parse_frontmatter(content) + if tag in meta.get("tags", []): + pages.append(str(f.relative_to(topic_dir))) + return {"topic": topic_id, "tag": tag, "pages": sorted(pages)} + + +# --- Graph View --- + +@app.get("/topics/{topic_id}/graph") +def topic_graph(topic_id: str, request: Request): + """Get page graph (nodes + edges) for a topic.""" + identity = resolve_identity(request) + check_topic_access(identity, topic_id, "read") + topic_dir = _topic_dir(topic_id) + nodes = [] + edges = [] + for f in topic_dir.rglob("*.md"): + if "_drafts" in f.parts: + continue + rel = str(f.relative_to(topic_dir)) + content = f.read_text(errors="replace") + meta, body = parse_frontmatter(content) + title_match = re.search(r"^#\s+(.+)", body) + title = title_match.group(1) if title_match else rel + nodes.append({ + "id": rel, + "title": title, + "tags": meta.get("tags", []), + }) + for link in extract_links(content): + edges.append({"source": rel, "target": link}) + return {"topic": topic_id, "nodes": nodes, "edges": edges} + + +# --- Admin --- +@app.delete("/topics/{topic_id}/pages/{path:path}") +def delete_page(topic_id: str, path: str, request: Request): + """Delete a page. Requires admin access.""" + identity = resolve_identity(request) + check_topic_access(identity, topic_id, "admin") + full = _topic_dir(topic_id) / path + if full.exists(): + full.unlink() + rel = str(full.relative_to(WIKI_ROOT)) + _git(["add", rel]) + _git(["commit", "-m", f"delete: {topic_id}/{path}", + "--author", f"{identity.subject} "]) + return {"status": "deleted"} + + +@app.post("/admin/reload-acl") +def reload_acl(request: Request): + """Reload ACL from ConfigMap (after kubectl edit).""" + global _acl_cache + identity = resolve_identity(request) + if identity.kind != "user" or identity.subject not in _acl_cache.get("_system", TopicACL(topic_id="_system", writers=[], readers=[], admins=[])).admins: + raise HTTPException(403, "Admin only") + _acl_cache = load_acl() + return {"status": "reloaded", "topics": list(_acl_cache.keys())} + + +@app.post("/admin/init-pages") +def init_pages_scaffold(request: Request): + """Initialize GitHub Pages Jekyll layout files and fix page front-matter/links (admin only).""" + identity = resolve_identity(request) + check_topic_access(identity, "_system", "admin") + + written = [] + # Step 1: Write Jekyll scaffold files + for rel_path, content in _PAGES_SCAFFOLD.items(): + full = WIKI_ROOT / rel_path + full.parent.mkdir(parents=True, exist_ok=True) + full.write_text(content) + _git(["add", rel_path]) + written.append(rel_path) + + # Step 2: Fix front-matter and links in all existing .md files + updated = [] + for md_file in WIKI_ROOT.rglob("*.md"): + rel = str(md_file.relative_to(WIKI_ROOT)) + if rel.startswith("_") or rel == "index.md": + continue + content = md_file.read_text() + new_content = _ensure_jekyll_frontmatter(content, md_file) + if new_content != content: + md_file.write_text(new_content) + _git(["add", rel]) + updated.append(rel) + + all_changed = written + updated + if all_changed: + _git(["commit", "-m", "Initialize GitHub Pages layout and fix page front-matter", + "--author", f"{identity.subject} ", + "--allow-empty"]) + if WIKI_REMOTE_URL and WIKI_PUSH_STRATEGY == "immediate": + try: + _git(["pull", "--rebase", "origin", "main"], timeout=30) + except RuntimeError: + pass + _git(["push", "origin", "main"], timeout=30) + + return {"status": "ok", "files": written, "updated": updated} + + +def _ensure_jekyll_frontmatter(content: str, md_file: Path) -> str: + """Ensure a markdown file has title/layout in front-matter and uses {% link %} for internal links.""" + frontmatter, body = parse_frontmatter(content) + + # Extract title from first heading if not in front-matter + title = frontmatter.get("title") + if not title: + for line in body.split("\n"): + if line.startswith("# "): + title = line[2:].strip() + break + if not title: + title = md_file.stem.replace("-", " ").replace("_", " ").title() + + # Determine the topic directory this file belongs to + try: + topic_dir = md_file.parent + if topic_dir.name == "_drafts": + topic_dir = topic_dir.parent + topic_id = str(topic_dir.relative_to(WIKI_ROOT)) + except ValueError: + topic_id = "" + + # Build updated front-matter + frontmatter.setdefault("layout", "page") + frontmatter["title"] = title + # Preserve existing tags and other fields + + # Convert internal markdown links to Jekyll {% link %} tags + body = _convert_links_to_jekyll(body, topic_id) + + # Reconstruct file + fm_lines = ["---"] + fm_lines.append(f"layout: {frontmatter['layout']}") + fm_lines.append(f"title: \"{title}\"") + if frontmatter.get("tags"): + tags = frontmatter["tags"] + if isinstance(tags, list): + fm_lines.append(f"tags: [{', '.join(tags)}]") + else: + fm_lines.append(f"tags: {tags}") + # Preserve any extra front-matter keys + for key, val in frontmatter.items(): + if key in ("layout", "title", "tags"): + continue + fm_lines.append(f"{key}: {val}") + fm_lines.append("---") + + return "\n".join(fm_lines) + "\n" + body + + +def _convert_links_to_jekyll(body: str, topic_id: str) -> str: + """Convert internal wiki links to Jekyll {% link %} syntax.""" + prefix = f"{topic_id}/" if topic_id else "" + + def _replace_md_link(m): + text = m.group(1) + target = m.group(2).strip() + if target.startswith("http://") or target.startswith("https://"): + return m.group(0) + if target.startswith("#"): + return m.group(0) + if not target.endswith(".md"): + target += ".md" + # Use {% link topic/file.md %} for Jekyll resolution + link_path = f"{prefix}{target}" if not target.startswith(prefix) else target + return f'[{text}]({{% link {link_path} %}})' + + body = re.sub(r'\[([^\]]*)\]\(([^)]+)\)', _replace_md_link, body) + + # Convert [[wikilinks]] to Jekyll links + def _replace_wikilink(m): + target = m.group(1).strip() + display = target.replace("-", " ").replace("_", " ").title() + if not target.endswith(".md"): + target += ".md" + link_path = f"{prefix}{target}" if not target.startswith(prefix) else target + return f'[{display}]({{% link {link_path} %}})' + + body = re.sub(r'\[\[([^\]]+)\]\]', _replace_wikilink, body) + + return body + + +# --- GitHub OAuth --- + +@app.get("/auth/github/login") +def github_login(request: Request): + """Redirect user to GitHub for OAuth authorization.""" + if not GITHUB_CLIENT_ID: + raise HTTPException(500, "GitHub OAuth not configured (GITHUB_CLIENT_ID missing)") + params = urlencode({ + "client_id": GITHUB_CLIENT_ID, + "scope": "user:email read:org", + "state": _sign_jwt({"purpose": "oauth_state", "exp": time.time() + 600}), + }) + return RedirectResponse(f"https://github.com/login/oauth/authorize?{params}") + + +@app.get("/auth/github/callback") +def github_callback(code: str, state: str): + """Handle GitHub OAuth callback — exchange code for token, issue wiki JWT.""" + import httpx + + claims = _validate_jwt(state) + if not claims or claims.get("purpose") != "oauth_state": + raise HTTPException(400, "Invalid or expired OAuth state") + + # Exchange code for access token + resp = httpx.post("https://github.com/login/oauth/access_token", json={ + "client_id": GITHUB_CLIENT_ID, + "client_secret": GITHUB_CLIENT_SECRET, + "code": code, + }, headers={"Accept": "application/json"}, timeout=10) + token_data = resp.json() + access_token = token_data.get("access_token") + if not access_token: + raise HTTPException(400, f"GitHub token exchange failed: {token_data.get('error_description', 'unknown')}") + + user_info, teams = _fetch_github_identity(access_token) + wiki_token = _issue_wiki_jwt(user_info, teams) + return JSONResponse({ + "token": wiki_token, + "github_login": user_info["login"], + "groups": teams, + }) + + +@app.post("/auth/github/device") +def github_device_start(): + """Start GitHub device flow (for CLI/MCP clients).""" + import httpx + + if not GITHUB_CLIENT_ID: + raise HTTPException(500, "GitHub OAuth not configured") + resp = httpx.post("https://github.com/login/device/code", json={ + "client_id": GITHUB_CLIENT_ID, + "scope": "user:email read:org", + }, headers={"Accept": "application/json"}, timeout=10) + data = resp.json() + return { + "device_code": data["device_code"], + "user_code": data["user_code"], + "verification_uri": data["verification_uri"], + "expires_in": data["expires_in"], + "interval": data.get("interval", 5), + } + + +class DeviceTokenRequest(BaseModel): + device_code: str = "" + + +@app.post("/auth/github/device/token") +def github_device_token(body: DeviceTokenRequest): + """Poll for device flow token (CLI calls this after user approves).""" + import httpx + + device_code = body.device_code + if not device_code: + raise HTTPException(400, "device_code required") + resp = httpx.post("https://github.com/login/oauth/access_token", json={ + "client_id": GITHUB_CLIENT_ID, + "client_secret": GITHUB_CLIENT_SECRET, + "device_code": device_code, + "grant_type": "urn:ietf:params:oauth:grant-type:device_code", + }, headers={"Accept": "application/json"}, timeout=10) + data = resp.json() + + if "error" in data: + pending_errors = ("authorization_pending", "slow_down") + status = 202 if data["error"] in pending_errors else 400 + return JSONResponse({"status": "pending", "error": data["error"], + "error_description": data.get("error_description", "")}, + status_code=status) + + access_token = data.get("access_token") + if not access_token: + raise HTTPException(400, "No access token in response") + + user_info, teams = _fetch_github_identity(access_token) + wiki_token = _issue_wiki_jwt(user_info, teams) + return {"token": wiki_token, "github_login": user_info["login"], "groups": teams} + + +@app.get("/auth/whoami") +def whoami(request: Request): + """Return current identity (useful for verifying tokens).""" + identity = resolve_identity(request) + return { + "subject": identity.subject, + "kind": identity.kind, + "groups": identity.groups, + "actor": identity.actor, + } + + +@app.get("/auth/permissions") +def get_permissions(request: Request): + """Return per-topic permissions with explanations of why access is granted.""" + identity = resolve_identity(request) + subject = identity.subject + login = subject.removeprefix("github:") if subject.startswith("github:") else None + permissions: dict = {} + for topic_id, acl in _acl_cache.items(): + if topic_id.startswith("_"): + continue + all_readers = acl.readers + acl.writers + acl.admins + all_writers = acl.writers + acl.admins + + def _match_reason(allowed: list[str]) -> str | None: + if "*" in allowed: + return "*" + if subject in allowed: + return subject + if login and f"github:user:{login}" in allowed: + return f"github:user:{login}" + for group in identity.groups: + if f"github:team:{group}" in allowed: + return f"github:team:{group}" + org = group.split("/")[0] if "/" in group else None + if org and f"github:org:{org}" in allowed: + return f"github:org:{org}" + return None + + topic_access: dict = {} + reason = _match_reason(all_readers) + if reason: + topic_access["read"] = reason + reason = _match_reason(all_writers) + if reason: + topic_access["write"] = reason + reason = _match_reason(acl.admins) + if reason: + topic_access["admin"] = reason + if topic_access: + permissions[topic_id] = topic_access + return {"subject": subject, "groups": identity.groups, "permissions": permissions} + + +@app.post("/auth/renew") +def renew_token(request: Request): + """Renew a token that is still valid or expired within the last 24h.""" + auth_header = request.headers.get("authorization", "") + if not auth_header.startswith("Bearer "): + raise HTTPException(401, "No token provided") + token = auth_header.removeprefix("Bearer ").strip() + parts = token.split(".") + if len(parts) != 3: + raise HTTPException(401, "Invalid token format") + try: + payload = json.loads(_b64url_decode(parts[1])) + except Exception: + raise HTTPException(401, "Cannot decode token") + exp = payload.get("exp", 0) + grace_window = 7 * 24 * 3600 + if exp < time.time() - grace_window: + raise HTTPException(401, "Token too old to renew. Login again: kwiki login") + login = payload.get("github_login") or payload.get("sub", "").removeprefix("github:") + if not login: + raise HTTPException(401, "Cannot identify user from token") + new_payload = { + "sub": f"github:{login}", + "github_login": login, + "email": payload.get("email"), + "groups": payload.get("groups", []), + "iss": "wiki-memory-service", + "iat": int(time.time()), + "exp": int(time.time()) + JWT_EXPIRY_HOURS * 3600, + } + new_token = _sign_jwt(new_payload) + return {"token": new_token, "github_login": login, "expires_in": JWT_EXPIRY_HOURS * 3600} + + +WIKI_GITHUB_ORG = os.environ.get("WIKI_GITHUB_ORG", "kaslomorg") + + +def _fetch_github_identity(access_token: str) -> tuple[dict, list[str]]: + """Fetch user profile and team membership from GitHub API.""" + import httpx + + headers = {"Authorization": f"Bearer {access_token}", "Accept": "application/json"} + client = httpx.Client(timeout=10) + + user_resp = client.get("https://api.github.com/user", headers=headers) + user_resp.raise_for_status() + user_info = user_resp.json() + username = user_info["login"] + + teams: list[str] = [] + + # Method 1: GET /user/teams (works when OAuth app has org access) + try: + url: str | None = "https://api.github.com/user/teams?per_page=100" + while url: + teams_resp = client.get(url, headers=headers) + logger.info("GET /user/teams -> %d", teams_resp.status_code) + if teams_resp.status_code != 200: + break + for team in teams_resp.json(): + org = team.get("organization", {}).get("login", "") + slug = team.get("slug", "") + if org and slug: + teams.append(f"{org}/{slug}") + link = teams_resp.headers.get("link", "") + url = None + if 'rel="next"' in link: + for part in link.split(","): + if 'rel="next"' in part: + url = part.split("<")[1].split(">")[0] + except Exception as e: + logger.warning("Failed /user/teams: %s", e) + + # Method 2: Use GraphQL to query team membership + if not teams: + acl_teams: set[str] = set() + for acl in _acl_cache.values(): + for entry in acl.writers + acl.readers + acl.admins: + if entry.startswith(f"github:team:{WIKI_GITHUB_ORG}/"): + team_slug = entry.removeprefix(f"github:team:{WIKI_GITHUB_ORG}/") + acl_teams.add(team_slug) + logger.info("Checking via GraphQL for user=%s org=%s teams=%s", username, WIKI_GITHUB_ORG, acl_teams) + for team_slug in acl_teams: + try: + query = """ + query($org: String!, $team: String!, $user: String!) { + organization(login: $org) { + team(slug: $team) { + members(query: $user, first: 1) { + nodes { login } + } + } + } + } + """ + gql_resp = client.post( + "https://api.github.com/graphql", + json={"query": query, "variables": {"org": WIKI_GITHUB_ORG, "team": team_slug, "user": username}}, + headers=headers, + ) + logger.debug("GraphQL %s -> %d", team_slug, gql_resp.status_code) + if gql_resp.status_code == 200: + data = gql_resp.json() + team_data = (data.get("data") or {}).get("organization", {}).get("team") + if team_data: + members = team_data.get("members", {}).get("nodes", []) + if any(m.get("login", "").lower() == username.lower() for m in members): + teams.append(f"{WIKI_GITHUB_ORG}/{team_slug}") + logger.info("Team %s: user %s is MEMBER", team_slug, username) + else: + logger.debug("Team %s: user %s not a member", team_slug, username) + else: + errors = data.get("errors", []) + logger.warning("Team %s: no team data, errors=%s", team_slug, errors) + except Exception as e: + logger.warning("Failed team %s: %s", team_slug, e) + + logger.info("Resolved github identity for user=%s teams=%s", username, teams) + return user_info, teams + + +def _issue_wiki_jwt(user_info: dict, teams: list[str]) -> str: + """Issue a wiki-service JWT with GitHub identity and groups.""" + payload = { + "sub": f"github:{user_info['login']}", + "github_login": user_info["login"], + "email": user_info.get("email", ""), + "groups": teams, + "iss": "wiki-memory-service", + "iat": int(time.time()), + "exp": int(time.time()) + JWT_EXPIRY_HOURS * 3600, + } + return _sign_jwt(payload) + + +# --- Health --- +@app.get("/healthz") +def health(): + return {"status": "ok", "version": __version__, "topics": len(_acl_cache), "root": str(WIKI_ROOT)}