From ca0508b47eb21916e4e876c07d87a9bed0b2194c Mon Sep 17 00:00:00 2001
From: admin-raintree <admin@raintree.technology>
Date: Wed, 10 Jun 2026 14:20:27 -0700
Subject: [PATCH] feat(web): simplify copy, add GitHub source links, tighten
 mobile

- Features: cleaner descriptions, no SSRF/DNS/NDJSON jargon, GitHub
  source links on each card pointing to the relevant source file
- ParallelPacks: remove keyFlow command blocks and controls tag cloud,
  cut 5 workflows to 3, simplify decision card copy
- Hero: replace --strict-js-required footnote with one plain sentence
- HowItWorks: swap "RAG, offline archives, or skills" for plain language
- Profiles: simplify LLM profile description

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 web/components/Features.tsx      |  64 +++++++++++-----
 web/components/Hero.tsx          |   9 +--
 web/components/HowItWorks.tsx    |   2 +-
 web/components/ParallelPacks.tsx | 128 +++++++------------------------
 web/components/Profiles.tsx      |  14 ++--
 5 files changed, 82 insertions(+), 135 deletions(-)

diff --git a/web/components/Features.tsx b/web/components/Features.tsx
index 4c153ff..441df49 100644
--- a/web/components/Features.tsx
+++ b/web/components/Features.tsx
@@ -1,56 +1,78 @@
+const REPO = "https://github.com/raintree-technology/docpull/blob/main";
+
 const features = [
   {
-    title: "Markdown Agents Can Use",
+    title: "Clean Markdown, ready to use",
     description:
-      "Every page includes clean Markdown plus frontmatter for title, source URL, headings, and description. Drop it into RAG, search, or a skill directory.",
+      "Every page becomes Markdown with a frontmatter header — title, source URL, and description. Code blocks, tables, and images are preserved. Nav, footers, and cookie banners are stripped.",
+    srcPath: "src/docpull/conversion/extractor.py",
+    srcLabel: "extractor.py",
   },
   {
-    title: "No Duplicate Slop",
+    title: "No duplicates",
     description:
-      "Pages are SHA-256 hashed while they stream in, so duplicates are caught before they hit disk instead of cleaned up later.",
+      "Pages are content-hashed as they stream in — duplicates are caught before they touch disk.",
+    srcPath: "src/docpull/pipeline/steps/dedup.py",
+    srcLabel: "dedup.py",
   },
   {
-    title: "Safe for Agent-Chosen URLs",
+    title: "Safe for AI agents",
     description:
-      "HTTPS-only, robots.txt compliant, SSRF-protected, and DNS-pinned at connect time. Use --require-pinned-dns when proxy settings weaken that guarantee.",
+      "HTTPS-only, robots.txt compliant, and protected against URL-based attacks — necessary when an AI agent is choosing which URLs to fetch.",
+    srcPath: "src/docpull/security/url_validator.py",
+    srcLabel: "url_validator.py",
   },
   {
-    title: "Cheap to Re-run",
+    title: "Cheap to re-run",
     description:
-      "Cached pages use If-None-Match and If-Modified-Since. Re-runs fetch what changed, and saved frontier state lets interrupted crawls resume.",
+      "Only re-fetches pages that changed since the last run. Interrupted crawls resume where they left off.",
+    srcPath: "src/docpull/pipeline/steps/fetch.py",
+    srcLabel: "fetch.py",
   },
   {
-    title: "Crawl the Parts That Matter",
+    title: "Crawl only what matters",
     description:
-      "Include and exclude path globs during discovery, so your model gets the relevant docs instead of every route the site exposes.",
+      "Include and exclude URL patterns during discovery so your agent gets the relevant pages instead of every route the site exposes.",
+    srcPath: "src/docpull/discovery/filters.py",
+    srcLabel: "filters.py",
   },
   {
-    title: "Parallel Pack Workflows",
+    title: "Parallel search packs",
     description:
-      "Optional Parallel Search, Extract, Task, entity, batch, monitor, and API-spec workflows become local packs with AGENT_CONTEXT.md, source files, manifests, IDs, and usage metadata.",
+      "Optional integration with Parallel to find and extract live web sources, organized into a local pack with a load plan your agent can follow.",
+    srcPath: "src/docpull/parallel_workflows.py",
+    srcLabel: "parallel_workflows.py",
   },
-];
+] as const;
 
 export default function Features() {
   return (
     <section id="features" className="pt-16 sm:pt-32 pb-24 border-t">
       <div className="mx-auto max-w-5xl px-6">
         <div className="mb-12 text-center sm:text-left">
-          <h2 className="text-2xl font-medium mb-3">
-            <span>Features</span>
-          </h2>
+          <h2 className="text-2xl font-medium mb-3">Features</h2>
           <p className="text-muted-foreground">
-            The boring pieces that make documentation ingestion dependable.
+            The pieces that make documentation fetching dependable.
           </p>
         </div>
 
         <div className="grid grid-cols-1 sm:grid-cols-2 lg:grid-cols-3 gap-4 sm:gap-6">
-          {features.map((feature, index) => (
-            <div key={index} className="p-4 rounded-xl glass">
-              <h3 className="font-medium text-sm mb-1">{feature.title}</h3>
-              <p className="text-sm text-muted-foreground leading-relaxed">
+          {features.map((feature) => (
+            <div key={feature.title} className="p-4 rounded-xl glass flex flex-col gap-2">
+              <h3 className="font-medium text-sm">{feature.title}</h3>
+              <p className="text-sm text-muted-foreground leading-relaxed flex-1">
                 {feature.description}
               </p>
+              {feature.srcPath && (
+                <a
+                  href={`${REPO}/${feature.srcPath}`}
+                  target="_blank"
+                  rel="noopener noreferrer"
+                  className="text-[11px] font-mono text-muted-foreground/50 hover:text-muted-foreground transition-colors w-fit"
+                >
+                  {feature.srcLabel}
+                </a>
+              )}
             </div>
           ))}
         </div>
diff --git a/web/components/Hero.tsx b/web/components/Hero.tsx
index 427b5b8..bc8fab7 100644
--- a/web/components/Hero.tsx
+++ b/web/components/Hero.tsx
@@ -121,12 +121,9 @@ export default function Hero() {
             </div>
 
             <p className="mt-4 text-xs text-muted-foreground max-w-md leading-relaxed">
-              Best for static docs, API references, and server-rendered
-              sites. JS-rendered SPAs are detected and skipped — pass{" "}
-              <code className="font-mono text-[11px] bg-background/60 px-1 rounded">
-                --strict-js-required
-              </code>{" "}
-              to make that an error so your agent can route elsewhere.
+              Works with static docs, API references, and server-rendered
+              sites. JavaScript-heavy pages are detected and skipped
+              automatically.
             </p>
           </div>
 
diff --git a/web/components/HowItWorks.tsx b/web/components/HowItWorks.tsx
index 5cff6d9..bced776 100644
--- a/web/components/HowItWorks.tsx
+++ b/web/components/HowItWorks.tsx
@@ -134,7 +134,7 @@ export default function HowItWorks() {
             />
             <StepText
               title="Use"
-              desc="Use the Markdown in search, RAG, offline archives, or skills."
+              desc="Load the Markdown into your agent, search index, or skill directory."
               active={activeIdx === 2}
             />
           </div>
diff --git a/web/components/ParallelPacks.tsx b/web/components/ParallelPacks.tsx
index 695d72c..440391e 100644
--- a/web/components/ParallelPacks.tsx
+++ b/web/components/ParallelPacks.tsx
@@ -2,83 +2,50 @@ import Image from "next/image";
 
 const workflows = [
   {
-    title: "Discovery + Extract Packs",
+    title: "Discovery & research packs",
     command: "context-pack / discover-docs",
     description:
-      "Parallel discovers and extracts current web sources; docpull ranks candidates, writes crawl plans, AGENT_CONTEXT.md, Markdown, NDJSON chunks, source indexes, manifests, IDs, hashes, and usage metadata.",
+      "Parallel finds and extracts current web sources. docpull saves them locally as Markdown, structured records, source indexes, and an AGENT_CONTEXT.md load plan.",
   },
   {
-    title: "Fallback + Diff Packs",
-    command: "fallback-pack / diff-brief",
+    title: "API specs & entity research",
+    command: "api-pack / entity-pack",
     description:
-      "Try core docpull first, fall back to Parallel Extract only for misses, then score sources or send pack diffs through Parallel Task for change briefs.",
+      "Turn llms.txt files and OpenAPI specs into local packs, or build dossiers on companies, vendors, and research targets from Parallel's entity search.",
   },
   {
-    title: "Entity Dossiers",
-    command: "entity-pack / findall-pack",
+    title: "Diffs & change briefs",
+    command: "diff-brief / fallback-pack",
     description:
-      "Entity Search and FindAll become local candidate packs for companies, people, vendors, competitors, or research targets.",
-  },
-  {
-    title: "Batch + Monitor Packs",
-    command: "taskgroup-pack --wait / monitor-pack",
-    description:
-      "TaskGroup rows can wait for completed outputs, while Monitor create, list, retrieve, update, cancel, trigger, and event pages become reusable local artifacts.",
-  },
-  {
-    title: "API Context Packs",
-    command: "api-pack / pack score / pack sources",
-    description:
-      "Turn llms.txt and OpenAPI specs into docpull packs, then grade readiness, rank sources, or diff refreshed snapshots before agents load the context.",
+      "Compare two snapshots of a pack to see what changed, or fall back to Parallel Extract only for pages your local crawl missed.",
   },
 ] as const;
 
 const decisionCards = [
   {
-    title: "Use core docpull for known docs",
+    title: "Use docpull for known docs",
     description:
-      "Start with the local crawler when you already know the docs URL and want a same-domain Markdown mirror with no browser and no API key.",
+      "Start here when you already have the URL and want a clean Markdown mirror — no browser, no API key.",
     points: [
       "static docs and API references",
-      "RAG or skill-ready Markdown",
-      "repeatable site mirrors",
+      "search-ready or skill-ready Markdown",
+      "repeatable, offline-friendly archives",
     ],
   },
   {
-    title: "Use Parallel packs for web research",
+    title: "Add Parallel for web research",
     description:
-      "Add the Parallel layer when discovery, extraction, research, entities, or monitoring should happen before docpull writes local context artifacts and a load plan.",
+      "Use the Parallel layer when you need to find sources first, extract live content, or run entity and batch research before writing local context.",
     points: [
       "research packs from search queries",
-      "ranked docs discovery and crawl commands",
-      "cited source bundles for agents",
-      "AGENT_CONTEXT.md load plan",
-      "repeatable NDJSON, manifests, and source files",
-      "API-doc or vendor comparison research",
-      "fallback, diff, task, entity, batch, and monitor workflows",
+      "ranked docs discovery with crawl plans",
+      "cited source bundles with a load plan",
+      "API-doc and vendor comparison research",
+      "diffs, entity dossiers, and batch workflows",
     ],
   },
 ] as const;
 
-const keyFlow = [
-  "pip install 'docpull[parallel]'",
-  "docpull parallel init",
-  "docpull parallel auth --json",
-  "docpull parallel init --project",
-  "docpull parallel context-pack ... --dry-run --max-estimated-cost 0.05",
-] as const;
-
-const controls = [
-  "--dry-run",
-  "--max-estimated-cost",
-  "--include-domain / --exclude-domain",
-  "--after-date",
-  "--fetch-max-age-seconds",
-  "--excerpt-chars-per-result",
-  "--client-model",
-  "pack sources",
-] as const;
-
 export default function ParallelPacks() {
   return (
     <section id="parallel" className="py-16 sm:py-24 border-t">
@@ -102,11 +69,11 @@ export default function ParallelPacks() {
               context packs
             </span>
           </h2>
-          <p className="text-sm sm:text-base text-muted-foreground max-w-3xl">
-            Parallel is the optional source-discovery and research layer. Use
-            core docpull to mirror a known docs site; use Parallel when an agent
-            needs current web sources found, extracted, scored, and packaged
-            into a local context pack before it starts work.
+          <p className="text-sm sm:text-base text-muted-foreground max-w-2xl">
+            Parallel is an optional source-discovery layer. Use docpull when
+            you already know the URL. Add Parallel when an agent needs to find
+            sources, extract live content, and package everything into a local
+            context pack before it starts work.
           </p>
         </div>
 
@@ -123,7 +90,10 @@ export default function ParallelPacks() {
                     key={point}
                     className="flex gap-2 text-xs text-muted-foreground leading-relaxed"
                   >
-                    <span aria-hidden="true" className="mt-1.5 h-1 w-1 shrink-0 rounded-full bg-foreground/50" />
+                    <span
+                      aria-hidden="true"
+                      className="mt-1.5 h-1 w-1 shrink-0 rounded-full bg-foreground/50"
+                    />
                     <span>{point}</span>
                   </li>
                 ))}
@@ -132,49 +102,7 @@ export default function ParallelPacks() {
           ))}
         </div>
 
-        <div className="grid grid-cols-1 lg:grid-cols-[1.1fr_0.9fr] gap-4 sm:gap-6 mb-4 sm:mb-6">
-          <div className="p-4 sm:p-5 rounded-xl glass">
-            <h3 className="font-medium text-sm mb-3">API key flow</h3>
-            <div className="space-y-2">
-              {keyFlow.map((command) => (
-                <code
-                  key={command}
-                  className="block px-3 py-2 bg-background/60 rounded-md text-xs font-mono text-muted-foreground overflow-x-auto"
-                >
-                  {command}
-                </code>
-              ))}
-            </div>
-            <p className="mt-3 text-xs text-muted-foreground leading-relaxed">
-              Keys live in the environment, user config, or project .env.local.
-              docpull does not echo{" "}
-              <code className="font-mono text-[11px]">PARALLEL_API_KEY</code>,
-              but pack artifacts can include source content, task inputs,
-              outputs, and metadata.
-            </p>
-          </div>
-
-          <div className="p-4 sm:p-5 rounded-xl glass">
-            <h3 className="font-medium text-sm mb-3">Cost and source controls</h3>
-            <div className="flex flex-wrap gap-2">
-              {controls.map((control) => (
-                <code
-                  key={control}
-                  className="px-2.5 py-1.5 bg-background/60 rounded-md text-[11px] font-mono text-muted-foreground"
-                >
-                  {control}
-                </code>
-              ))}
-            </div>
-            <p className="mt-3 text-xs text-muted-foreground leading-relaxed">
-              Dry runs estimate spend before live calls, domain filters pin the
-              source policy, and AGENT_CONTEXT.md gives agents a deterministic
-              load order before they inspect deeper metadata.
-            </p>
-          </div>
-        </div>
-
-        <div className="grid grid-cols-1 sm:grid-cols-2 gap-3 sm:gap-4">
+        <div className="grid grid-cols-1 sm:grid-cols-3 gap-3 sm:gap-4">
           {workflows.map((workflow) => (
             <div key={workflow.title} className="p-4 rounded-xl glass">
               <div className="flex flex-wrap items-baseline justify-between gap-2 mb-2">
diff --git a/web/components/Profiles.tsx b/web/components/Profiles.tsx
index 2c2408a..1ab8a77 100644
--- a/web/components/Profiles.tsx
+++ b/web/components/Profiles.tsx
@@ -1,23 +1,23 @@
 const profiles = [
   {
     name: "RAG",
-    description: "Clean Markdown with metadata and deduping for retrieval.",
+    description: "Clean Markdown with metadata and deduplication for search and retrieval.",
     example: "docpull URL --profile rag",
   },
   {
     name: "Mirror",
-    description: "A fuller local archive with cache, resume, and stable paths.",
+    description: "A full local archive with caching, resume on interrupt, and stable file paths.",
     example: "docpull URL --profile mirror",
   },
   {
     name: "Quick",
-    description: "A 50-page sample when you need to inspect output first.",
+    description: "A 50-page sample when you want to inspect output before committing to a full crawl.",
     example: "docpull URL --profile quick",
   },
   {
     name: "LLM",
     description:
-      "Token-aware NDJSON chunks that skip JS-only pages unless strict mode is enabled.",
+      "Chunked, streaming records sized for language model context windows. JavaScript-only pages are skipped unless strict mode is on.",
     example: "docpull URL --profile llm --stream | jq .",
   },
 ];
@@ -28,7 +28,7 @@ export default function Profiles() {
       <div className="mx-auto max-w-5xl px-6">
         <div className="mb-8 sm:mb-12 text-center sm:text-left">
           <h2 className="text-xl sm:text-2xl font-medium mb-2 sm:mb-3">
-            <span>Profiles</span>
+            Profiles
           </h2>
           <p className="text-sm sm:text-base text-muted-foreground">
             Choose the output shape before you crawl.
@@ -36,8 +36,8 @@ export default function Profiles() {
         </div>
 
         <div className="grid grid-cols-1 sm:grid-cols-2 gap-3 sm:gap-4">
-          {profiles.map((profile, index) => (
-            <div key={index} className="p-4 rounded-xl glass">
+          {profiles.map((profile) => (
+            <div key={profile.name} className="p-4 rounded-xl glass">
               <h3 className="font-medium mb-2">{profile.name}</h3>
               <p className="text-sm text-muted-foreground mb-3">
                 {profile.description}