From ca0508b47eb21916e4e876c07d87a9bed0b2194c Mon Sep 17 00:00:00 2001 From: admin-raintree Date: Wed, 10 Jun 2026 14:20:27 -0700 Subject: [PATCH] feat(web): simplify copy, add GitHub source links, tighten mobile - Features: cleaner descriptions, no SSRF/DNS/NDJSON jargon, GitHub source links on each card pointing to the relevant source file - ParallelPacks: remove keyFlow command blocks and controls tag cloud, cut 5 workflows to 3, simplify decision card copy - Hero: replace --strict-js-required footnote with one plain sentence - HowItWorks: swap "RAG, offline archives, or skills" for plain language - Profiles: simplify LLM profile description Co-Authored-By: Claude Sonnet 4.6 --- web/components/Features.tsx | 64 +++++++++++----- web/components/Hero.tsx | 9 +-- web/components/HowItWorks.tsx | 2 +- web/components/ParallelPacks.tsx | 128 +++++++------------------------ web/components/Profiles.tsx | 14 ++-- 5 files changed, 82 insertions(+), 135 deletions(-) diff --git a/web/components/Features.tsx b/web/components/Features.tsx index 4c153ff..441df49 100644 --- a/web/components/Features.tsx +++ b/web/components/Features.tsx @@ -1,56 +1,78 @@ +const REPO = "https://github.com/raintree-technology/docpull/blob/main"; + const features = [ { - title: "Markdown Agents Can Use", + title: "Clean Markdown, ready to use", description: - "Every page includes clean Markdown plus frontmatter for title, source URL, headings, and description. Drop it into RAG, search, or a skill directory.", + "Every page becomes Markdown with a frontmatter header — title, source URL, and description. Code blocks, tables, and images are preserved. Nav, footers, and cookie banners are stripped.", + srcPath: "src/docpull/conversion/extractor.py", + srcLabel: "extractor.py", }, { - title: "No Duplicate Slop", + title: "No duplicates", description: - "Pages are SHA-256 hashed while they stream in, so duplicates are caught before they hit disk instead of cleaned up later.", + "Pages are content-hashed as they stream in — duplicates are caught before they touch disk.", + srcPath: "src/docpull/pipeline/steps/dedup.py", + srcLabel: "dedup.py", }, { - title: "Safe for Agent-Chosen URLs", + title: "Safe for AI agents", description: - "HTTPS-only, robots.txt compliant, SSRF-protected, and DNS-pinned at connect time. Use --require-pinned-dns when proxy settings weaken that guarantee.", + "HTTPS-only, robots.txt compliant, and protected against URL-based attacks — necessary when an AI agent is choosing which URLs to fetch.", + srcPath: "src/docpull/security/url_validator.py", + srcLabel: "url_validator.py", }, { - title: "Cheap to Re-run", + title: "Cheap to re-run", description: - "Cached pages use If-None-Match and If-Modified-Since. Re-runs fetch what changed, and saved frontier state lets interrupted crawls resume.", + "Only re-fetches pages that changed since the last run. Interrupted crawls resume where they left off.", + srcPath: "src/docpull/pipeline/steps/fetch.py", + srcLabel: "fetch.py", }, { - title: "Crawl the Parts That Matter", + title: "Crawl only what matters", description: - "Include and exclude path globs during discovery, so your model gets the relevant docs instead of every route the site exposes.", + "Include and exclude URL patterns during discovery so your agent gets the relevant pages instead of every route the site exposes.", + srcPath: "src/docpull/discovery/filters.py", + srcLabel: "filters.py", }, { - title: "Parallel Pack Workflows", + title: "Parallel search packs", description: - "Optional Parallel Search, Extract, Task, entity, batch, monitor, and API-spec workflows become local packs with AGENT_CONTEXT.md, source files, manifests, IDs, and usage metadata.", + "Optional integration with Parallel to find and extract live web sources, organized into a local pack with a load plan your agent can follow.", + srcPath: "src/docpull/parallel_workflows.py", + srcLabel: "parallel_workflows.py", }, -]; +] as const; export default function Features() { return (
-

- Features -

+

Features

- The boring pieces that make documentation ingestion dependable. + The pieces that make documentation fetching dependable.

- {features.map((feature, index) => ( -
-

{feature.title}

-

+ {features.map((feature) => ( +

+

{feature.title}

+

{feature.description}

+ {feature.srcPath && ( + + {feature.srcLabel} + + )}
))}
diff --git a/web/components/Hero.tsx b/web/components/Hero.tsx index 427b5b8..bc8fab7 100644 --- a/web/components/Hero.tsx +++ b/web/components/Hero.tsx @@ -121,12 +121,9 @@ export default function Hero() {

- Best for static docs, API references, and server-rendered - sites. JS-rendered SPAs are detected and skipped — pass{" "} - - --strict-js-required - {" "} - to make that an error so your agent can route elsewhere. + Works with static docs, API references, and server-rendered + sites. JavaScript-heavy pages are detected and skipped + automatically.

diff --git a/web/components/HowItWorks.tsx b/web/components/HowItWorks.tsx index 5cff6d9..bced776 100644 --- a/web/components/HowItWorks.tsx +++ b/web/components/HowItWorks.tsx @@ -134,7 +134,7 @@ export default function HowItWorks() { /> diff --git a/web/components/ParallelPacks.tsx b/web/components/ParallelPacks.tsx index 695d72c..440391e 100644 --- a/web/components/ParallelPacks.tsx +++ b/web/components/ParallelPacks.tsx @@ -2,83 +2,50 @@ import Image from "next/image"; const workflows = [ { - title: "Discovery + Extract Packs", + title: "Discovery & research packs", command: "context-pack / discover-docs", description: - "Parallel discovers and extracts current web sources; docpull ranks candidates, writes crawl plans, AGENT_CONTEXT.md, Markdown, NDJSON chunks, source indexes, manifests, IDs, hashes, and usage metadata.", + "Parallel finds and extracts current web sources. docpull saves them locally as Markdown, structured records, source indexes, and an AGENT_CONTEXT.md load plan.", }, { - title: "Fallback + Diff Packs", - command: "fallback-pack / diff-brief", + title: "API specs & entity research", + command: "api-pack / entity-pack", description: - "Try core docpull first, fall back to Parallel Extract only for misses, then score sources or send pack diffs through Parallel Task for change briefs.", + "Turn llms.txt files and OpenAPI specs into local packs, or build dossiers on companies, vendors, and research targets from Parallel's entity search.", }, { - title: "Entity Dossiers", - command: "entity-pack / findall-pack", + title: "Diffs & change briefs", + command: "diff-brief / fallback-pack", description: - "Entity Search and FindAll become local candidate packs for companies, people, vendors, competitors, or research targets.", - }, - { - title: "Batch + Monitor Packs", - command: "taskgroup-pack --wait / monitor-pack", - description: - "TaskGroup rows can wait for completed outputs, while Monitor create, list, retrieve, update, cancel, trigger, and event pages become reusable local artifacts.", - }, - { - title: "API Context Packs", - command: "api-pack / pack score / pack sources", - description: - "Turn llms.txt and OpenAPI specs into docpull packs, then grade readiness, rank sources, or diff refreshed snapshots before agents load the context.", + "Compare two snapshots of a pack to see what changed, or fall back to Parallel Extract only for pages your local crawl missed.", }, ] as const; const decisionCards = [ { - title: "Use core docpull for known docs", + title: "Use docpull for known docs", description: - "Start with the local crawler when you already know the docs URL and want a same-domain Markdown mirror with no browser and no API key.", + "Start here when you already have the URL and want a clean Markdown mirror — no browser, no API key.", points: [ "static docs and API references", - "RAG or skill-ready Markdown", - "repeatable site mirrors", + "search-ready or skill-ready Markdown", + "repeatable, offline-friendly archives", ], }, { - title: "Use Parallel packs for web research", + title: "Add Parallel for web research", description: - "Add the Parallel layer when discovery, extraction, research, entities, or monitoring should happen before docpull writes local context artifacts and a load plan.", + "Use the Parallel layer when you need to find sources first, extract live content, or run entity and batch research before writing local context.", points: [ "research packs from search queries", - "ranked docs discovery and crawl commands", - "cited source bundles for agents", - "AGENT_CONTEXT.md load plan", - "repeatable NDJSON, manifests, and source files", - "API-doc or vendor comparison research", - "fallback, diff, task, entity, batch, and monitor workflows", + "ranked docs discovery with crawl plans", + "cited source bundles with a load plan", + "API-doc and vendor comparison research", + "diffs, entity dossiers, and batch workflows", ], }, ] as const; -const keyFlow = [ - "pip install 'docpull[parallel]'", - "docpull parallel init", - "docpull parallel auth --json", - "docpull parallel init --project", - "docpull parallel context-pack ... --dry-run --max-estimated-cost 0.05", -] as const; - -const controls = [ - "--dry-run", - "--max-estimated-cost", - "--include-domain / --exclude-domain", - "--after-date", - "--fetch-max-age-seconds", - "--excerpt-chars-per-result", - "--client-model", - "pack sources", -] as const; - export default function ParallelPacks() { return (
@@ -102,11 +69,11 @@ export default function ParallelPacks() { context packs -

- Parallel is the optional source-discovery and research layer. Use - core docpull to mirror a known docs site; use Parallel when an agent - needs current web sources found, extracted, scored, and packaged - into a local context pack before it starts work. +

+ Parallel is an optional source-discovery layer. Use docpull when + you already know the URL. Add Parallel when an agent needs to find + sources, extract live content, and package everything into a local + context pack before it starts work.

@@ -123,7 +90,10 @@ export default function ParallelPacks() { key={point} className="flex gap-2 text-xs text-muted-foreground leading-relaxed" > -