hyperbrowserai · human-pages-ai · Apr 21, 2026
diff --git a/human-fallback/.gitignore b/human-fallback/.gitignore
@@ -0,0 +1,41 @@
+# See https://help.github.com/articles/ignoring-files/ for more about ignoring files.
+
+# dependencies
+/node_modules
+/.pnp
+.pnp.*
+.yarn/*
+!.yarn/patches
+!.yarn/plugins
+!.yarn/releases
+!.yarn/versions
+
+# testing
+/coverage
+
+# next.js
+/.next/
+/out/
+
+# production
+/build
+
+# misc
+.DS_Store
+*.pem
+
+# debug
+npm-debug.log*
+yarn-debug.log*
+yarn-error.log*
+.pnpm-debug.log*
+
+# env files (can opt-in for committing if needed)
+.env*
+
+# vercel
+.vercel
+
+# typescript
+*.tsbuildinfo
+next-env.d.ts
diff --git a/human-fallback/README.md b/human-fallback/README.md
@@ -0,0 +1,74 @@
+# Human Fallback — Scrape Any Page with a Human Safety Net
+
+Scrape any webpage using [Hyperbrowser](https://hyperbrowser.ai). When automation is blocked by CAPTCHAs, login walls, or other anti-bot measures, the app automatically falls back to [Human Pages](https://humanpages.ai) to hire a real person to complete the task.
+
+## Features
+
+- **Automated-first**: Hyperbrowser stealth browser handles most pages instantly
+- **Smart detection**: Detects CAPTCHAs, Cloudflare challenges, login walls, and empty responses
+- **Human fallback**: When automation fails, hires a real human via the Human Pages API
+- **Live progress**: Real-time step-by-step status updates streamed to the UI
+- **Workflow visualization**: See exactly which path (bot or human) resolved your request
+
+## How It Works
+
+1. You enter a URL and click **Scrape**
+2. The app launches a stealth Hyperbrowser session and navigates to the page
+3. If the page loads successfully, the extracted text is returned immediately
+4. If the page is blocked (CAPTCHA, login wall, etc.), the app:
+   - Searches Human Pages for available humans with web task skills
+   - Creates a job offer with a description of what to scrape
+   - Polls for completion and returns the result when the human finishes
+
+## Getting Started
+
+1. **Get Your API Keys**
+   - [Hyperbrowser](https://hyperbrowser.ai) — sign up and get an API key
+   - [Human Pages](https://humanpages.ai) — register an agent and get an agent key
+
+2. **Setup**
+   ```bash
+   # Install dependencies
+   npm install
+
+   # Create .env.local with your keys
+   cat > .env.local << EOF
+   HYPERBROWSER_API_KEY=your_hyperbrowser_key_here
+   HUMANPAGES_API_KEY=your_humanpages_key_here
+   EOF
+
+   # Start the dev server
+   npm run dev
+   ```
+
+3. **Open** `http://localhost:3000` and enter a URL to scrape.
+
+## Environment Variables
+
+| Variable | Required | Description |
+|---|---|---|
+| `HYPERBROWSER_API_KEY` | Yes | Your Hyperbrowser API key |
+| `HUMANPAGES_API_KEY` | No | Your Human Pages agent key (enables human fallback) |
+
+The app works without `HUMANPAGES_API_KEY` — it will just skip the human fallback step and report that the scrape failed.
+
+## Human Pages API
+
+This example uses the Human Pages REST API:
+
+| Endpoint | Method | Description |
+|---|---|---|
+| `/api/humans/search?skill=web+task&available=true` | GET | Find available humans |
+| `/api/jobs` | POST | Create a job offer |
+| `/api/jobs/{jobId}` | GET | Check job status |
+| `/api/jobs/{jobId}/messages` | GET | Read job messages |
+
+All requests use the `X-Agent-Key` header for authentication.
+
+## Tech Stack
+
+- **Next.js 14** (App Router)
+- **Tailwind CSS** for styling
+- **@hyperbrowser/sdk** + **puppeteer-core** for browser automation
+- **Human Pages REST API** for human fallback
+- **Server-Sent Events** for real-time progress streaming
diff --git a/human-fallback/app/api/scrape/route.ts b/human-fallback/app/api/scrape/route.ts
@@ -0,0 +1,250 @@
+import { NextRequest, NextResponse } from 'next/server';
+import { scrapePage } from '../../../lib/scrape';
+import { searchHumans, createJob, getJobStatus } from '../../../lib/humanpages';
+
+export async function POST(request: NextRequest) {
+  try {
+    const body = await request.json();
+    const { url } = body;
+
+    const hbKey = process.env.HYPERBROWSER_API_KEY;
+    const hpKey = process.env.HUMANPAGES_API_KEY;
+
+    if (!hbKey) {
+      return NextResponse.json(
+        { error: 'HYPERBROWSER_API_KEY environment variable is not set' },
+        { status: 500 }
+      );
+    }
+
+    if (!url) {
+      return NextResponse.json(
+        { error: 'Missing required field: url' },
+        { status: 400 }
+      );
+    }
+
+    // Validate URL
+    try {
+      new URL(url);
+    } catch {
+      return NextResponse.json(
+        { error: 'Invalid URL provided' },
+        { status: 400 }
+      );
+    }
+
+    // Stream progress updates to the client
+    const encoder = new TextEncoder();
+
+    const stream = new ReadableStream({
+      async start(controller) {
+        const send = (type: string, data: Record<string, any>) => {
+          controller.enqueue(
+            encoder.encode(`data: ${JSON.stringify({ type, ...data })}\n\n`)
+          );
+        };
+
+        const progress = (message: string) => send('progress', { message });
+
+        try {
+          // ── Step 1: Try Hyperbrowser ──────────────────────────────
+          send('step', { step: 'scrape', status: 'active' });
+          progress('Starting automated scrape with Hyperbrowser...');
+
+          const result = await scrapePage({
+            url,
+            apiKey: hbKey,
+            onProgress: progress,
+          });
+
+          if (result.success) {
+            send('step', { step: 'scrape', status: 'success' });
+            progress('Scrape completed successfully!');
+            send('result', {
+              source: 'hyperbrowser',
+              title: result.title,
+              content: result.content.slice(0, 5000), // Cap for display
+              url: result.url,
+            });
+            controller.close();
+            return;
+          }
+
+          // ── Step 2: Scrape failed — try Human Pages ──────────────
+          send('step', { step: 'scrape', status: 'failed' });
+          progress(
+            `Automated scrape failed: ${result.failureReason}. Falling back to Human Pages...`
+          );
+
+          if (!hpKey) {
+            send('step', { step: 'human', status: 'failed' });
+            progress(
+              'HUMANPAGES_API_KEY not set — cannot fall back to human. Set it in .env.local to enable fallback.'
+            );
+            send('result', {
+              source: 'none',
+              error: 'Scrape failed and Human Pages fallback is not configured.',
+              failureReason: result.failureReason,
+            });
+            controller.close();
+            return;
+          }
+
+          send('step', { step: 'human', status: 'active' });
+
+          // Search for an available human
+          progress('Searching for available humans with web task skills...');
+          let humans;
+          try {
+            humans = await searchHumans(hpKey);
+          } catch (err) {
+            const msg = err instanceof Error ? err.message : 'Unknown error';
+            progress(`Human search failed: ${msg}`);
+            send('step', { step: 'human', status: 'failed' });
+            send('result', {
+              source: 'none',
+              error: `Scrape failed and human search also failed: ${msg}`,
+              failureReason: result.failureReason,
+            });
+            controller.close();
+            return;
+          }
+
+          if (!humans || humans.length === 0) {
+            progress('No humans available right now. Try again later.');
+            send('step', { step: 'human', status: 'failed' });
+            send('result', {
+              source: 'none',
+              error: 'Scrape failed and no humans are currently available.',
+              failureReason: result.failureReason,
+            });
+            controller.close();
+            return;
+          }
+
+          const human = humans[0];
+          progress(
+            `Found human: ${human.name || human.id} (rating: ${human.rating ?? 'N/A'})`
+          );
+
+          // Create a job for the human
+          progress('Creating job offer...');
+          let job;
+          try {
+            job = await createJob(hpKey, {
+              humanId: human.id,
+              title: `Scrape page: ${url}`,
+              description: [
+                `Please visit this URL and extract the full visible text content:`,
+                ``,
+                `${url}`,
+                ``,
+                `The automated scraper was blocked (reason: ${result.failureReason}).`,
+                `Please copy and paste all visible text from the page.`,
+              ].join('\n'),
+              priceUsdc: 0.25,
+              deadlineHours: 1,
+            });
+          } catch (err) {
+            const msg = err instanceof Error ? err.message : 'Unknown error';
+            progress(`Job creation failed: ${msg}`);
+            send('step', { step: 'human', status: 'failed' });
+            send('result', {
+              source: 'none',
+              error: `Scrape failed and job creation also failed: ${msg}`,
+              failureReason: result.failureReason,
+            });
+            controller.close();
+            return;
+          }
+
+          progress(`Job created: ${job.id}. Waiting for human to accept...`);
+
+          // Poll for job completion (up to 60 seconds for demo purposes)
+          const maxPolls = 12;
+          const pollInterval = 5000;
+          let completed = false;
+
+          for (let i = 0; i < maxPolls; i++) {
+            await new Promise(resolve => setTimeout(resolve, pollInterval));
+
+            try {
+              const status = await getJobStatus(hpKey, job.id);
+              progress(`Job status: ${status.status}`);
+
+              if (status.status === 'completed' && status.result) {
+                completed = true;
+                send('step', { step: 'human', status: 'success' });
+                progress('Human completed the task!');
+                send('result', {
+                  source: 'humanpages',
+                  title: `Scraped by human: ${url}`,
+                  content: status.result.slice(0, 5000),
+                  url,
+                  jobId: job.id,
+                  humanId: human.id,
+                });
+                controller.close();
+                return;
+              }
+
+              if (status.status === 'cancelled') {
+                progress('Job was cancelled by the human.');
+                break;
+              }
+            } catch {
+              progress('Error checking job status, will retry...');
+            }
+          }
+
+          if (!completed) {
+            send('step', { step: 'human', status: 'pending' });
+            progress(
+              'Job is still in progress. Check back later using the job ID.'
+            );
+            send('result', {
+              source: 'humanpages-pending',
+              jobId: job.id,
+              humanId: human.id,
+              message:
+                'A human has been hired but has not yet completed the task. The job will continue in the background.',
+              url,
+            });
+          }
+
+          controller.close();
+        } catch (error) {
+          const errorMessage =
+            error instanceof Error ? error.message : 'Unknown error';
+          send('error', { error: errorMessage });
+          controller.close();
+        }
+      },
+    });
+
+    return new Response(stream, {
+      headers: {
+        'Content-Type': 'text/event-stream',
+        'Cache-Control': 'no-cache',
+        Connection: 'keep-alive',
+      },
+    });
+  } catch (error) {
+    console.error('Error in scrape API:', error);
+    return NextResponse.json(
+      { error: 'Internal server error' },
+      { status: 500 }
+    );
+  }
+}
+
+export async function GET() {
+  return NextResponse.json({
+    message: 'Human Fallback Scrape Endpoint',
+    method: 'POST',
+    body: { url: 'https://example.com' },
+    description:
+      'Tries Hyperbrowser first. If blocked, falls back to Human Pages to hire a real human.',
+  });
+}