From 5f9b93c35bc35e287d6b1b24a73a13d7c958efee Mon Sep 17 00:00:00 2001 From: human-pages-ai Date: Tue, 21 Apr 2026 17:00:20 +0700 Subject: [PATCH] Add human-fallback example: Hyperbrowser + Human Pages When Hyperbrowser hits a CAPTCHA, login wall, or Cloudflare challenge, this example automatically delegates the task to a real human via Human Pages and returns their result. Co-Authored-By: Claude Opus 4.6 --- human-fallback/.gitignore | 41 +++ human-fallback/README.md | 74 ++++ human-fallback/app/api/scrape/route.ts | 250 +++++++++++++ human-fallback/app/components/LogConsole.tsx | 50 +++ .../app/components/StepIndicator.tsx | 73 ++++ human-fallback/app/globals.css | 108 ++++++ human-fallback/app/layout.tsx | 29 ++ human-fallback/app/page.tsx | 333 ++++++++++++++++++ human-fallback/lib/humanpages.ts | 110 ++++++ human-fallback/lib/scrape.ts | 150 ++++++++ human-fallback/next.config.js | 57 +++ human-fallback/package.json | 30 ++ human-fallback/postcss.config.mjs | 9 + human-fallback/tailwind.config.js | 42 +++ human-fallback/tsconfig.json | 27 ++ 15 files changed, 1383 insertions(+) create mode 100644 human-fallback/.gitignore create mode 100644 human-fallback/README.md create mode 100644 human-fallback/app/api/scrape/route.ts create mode 100644 human-fallback/app/components/LogConsole.tsx create mode 100644 human-fallback/app/components/StepIndicator.tsx create mode 100644 human-fallback/app/globals.css create mode 100644 human-fallback/app/layout.tsx create mode 100644 human-fallback/app/page.tsx create mode 100644 human-fallback/lib/humanpages.ts create mode 100644 human-fallback/lib/scrape.ts create mode 100644 human-fallback/next.config.js create mode 100644 human-fallback/package.json create mode 100644 human-fallback/postcss.config.mjs create mode 100644 human-fallback/tailwind.config.js create mode 100644 human-fallback/tsconfig.json diff --git a/human-fallback/.gitignore b/human-fallback/.gitignore new file mode 100644 index 0000000..5ef6a52 --- /dev/null +++ b/human-fallback/.gitignore @@ -0,0 +1,41 @@ +# See https://help.github.com/articles/ignoring-files/ for more about ignoring files. + +# dependencies +/node_modules +/.pnp +.pnp.* +.yarn/* +!.yarn/patches +!.yarn/plugins +!.yarn/releases +!.yarn/versions + +# testing +/coverage + +# next.js +/.next/ +/out/ + +# production +/build + +# misc +.DS_Store +*.pem + +# debug +npm-debug.log* +yarn-debug.log* +yarn-error.log* +.pnpm-debug.log* + +# env files (can opt-in for committing if needed) +.env* + +# vercel +.vercel + +# typescript +*.tsbuildinfo +next-env.d.ts diff --git a/human-fallback/README.md b/human-fallback/README.md new file mode 100644 index 0000000..7ba6ddf --- /dev/null +++ b/human-fallback/README.md @@ -0,0 +1,74 @@ +# Human Fallback — Scrape Any Page with a Human Safety Net + +Scrape any webpage using [Hyperbrowser](https://hyperbrowser.ai). When automation is blocked by CAPTCHAs, login walls, or other anti-bot measures, the app automatically falls back to [Human Pages](https://humanpages.ai) to hire a real person to complete the task. + +## Features + +- **Automated-first**: Hyperbrowser stealth browser handles most pages instantly +- **Smart detection**: Detects CAPTCHAs, Cloudflare challenges, login walls, and empty responses +- **Human fallback**: When automation fails, hires a real human via the Human Pages API +- **Live progress**: Real-time step-by-step status updates streamed to the UI +- **Workflow visualization**: See exactly which path (bot or human) resolved your request + +## How It Works + +1. You enter a URL and click **Scrape** +2. The app launches a stealth Hyperbrowser session and navigates to the page +3. If the page loads successfully, the extracted text is returned immediately +4. If the page is blocked (CAPTCHA, login wall, etc.), the app: + - Searches Human Pages for available humans with web task skills + - Creates a job offer with a description of what to scrape + - Polls for completion and returns the result when the human finishes + +## Getting Started + +1. **Get Your API Keys** + - [Hyperbrowser](https://hyperbrowser.ai) — sign up and get an API key + - [Human Pages](https://humanpages.ai) — register an agent and get an agent key + +2. **Setup** + ```bash + # Install dependencies + npm install + + # Create .env.local with your keys + cat > .env.local << EOF + HYPERBROWSER_API_KEY=your_hyperbrowser_key_here + HUMANPAGES_API_KEY=your_humanpages_key_here + EOF + + # Start the dev server + npm run dev + ``` + +3. **Open** `http://localhost:3000` and enter a URL to scrape. + +## Environment Variables + +| Variable | Required | Description | +|---|---|---| +| `HYPERBROWSER_API_KEY` | Yes | Your Hyperbrowser API key | +| `HUMANPAGES_API_KEY` | No | Your Human Pages agent key (enables human fallback) | + +The app works without `HUMANPAGES_API_KEY` — it will just skip the human fallback step and report that the scrape failed. + +## Human Pages API + +This example uses the Human Pages REST API: + +| Endpoint | Method | Description | +|---|---|---| +| `/api/humans/search?skill=web+task&available=true` | GET | Find available humans | +| `/api/jobs` | POST | Create a job offer | +| `/api/jobs/{jobId}` | GET | Check job status | +| `/api/jobs/{jobId}/messages` | GET | Read job messages | + +All requests use the `X-Agent-Key` header for authentication. + +## Tech Stack + +- **Next.js 14** (App Router) +- **Tailwind CSS** for styling +- **@hyperbrowser/sdk** + **puppeteer-core** for browser automation +- **Human Pages REST API** for human fallback +- **Server-Sent Events** for real-time progress streaming diff --git a/human-fallback/app/api/scrape/route.ts b/human-fallback/app/api/scrape/route.ts new file mode 100644 index 0000000..c4a0d36 --- /dev/null +++ b/human-fallback/app/api/scrape/route.ts @@ -0,0 +1,250 @@ +import { NextRequest, NextResponse } from 'next/server'; +import { scrapePage } from '../../../lib/scrape'; +import { searchHumans, createJob, getJobStatus } from '../../../lib/humanpages'; + +export async function POST(request: NextRequest) { + try { + const body = await request.json(); + const { url } = body; + + const hbKey = process.env.HYPERBROWSER_API_KEY; + const hpKey = process.env.HUMANPAGES_API_KEY; + + if (!hbKey) { + return NextResponse.json( + { error: 'HYPERBROWSER_API_KEY environment variable is not set' }, + { status: 500 } + ); + } + + if (!url) { + return NextResponse.json( + { error: 'Missing required field: url' }, + { status: 400 } + ); + } + + // Validate URL + try { + new URL(url); + } catch { + return NextResponse.json( + { error: 'Invalid URL provided' }, + { status: 400 } + ); + } + + // Stream progress updates to the client + const encoder = new TextEncoder(); + + const stream = new ReadableStream({ + async start(controller) { + const send = (type: string, data: Record) => { + controller.enqueue( + encoder.encode(`data: ${JSON.stringify({ type, ...data })}\n\n`) + ); + }; + + const progress = (message: string) => send('progress', { message }); + + try { + // ── Step 1: Try Hyperbrowser ────────────────────────────── + send('step', { step: 'scrape', status: 'active' }); + progress('Starting automated scrape with Hyperbrowser...'); + + const result = await scrapePage({ + url, + apiKey: hbKey, + onProgress: progress, + }); + + if (result.success) { + send('step', { step: 'scrape', status: 'success' }); + progress('Scrape completed successfully!'); + send('result', { + source: 'hyperbrowser', + title: result.title, + content: result.content.slice(0, 5000), // Cap for display + url: result.url, + }); + controller.close(); + return; + } + + // ── Step 2: Scrape failed — try Human Pages ────────────── + send('step', { step: 'scrape', status: 'failed' }); + progress( + `Automated scrape failed: ${result.failureReason}. Falling back to Human Pages...` + ); + + if (!hpKey) { + send('step', { step: 'human', status: 'failed' }); + progress( + 'HUMANPAGES_API_KEY not set — cannot fall back to human. Set it in .env.local to enable fallback.' + ); + send('result', { + source: 'none', + error: 'Scrape failed and Human Pages fallback is not configured.', + failureReason: result.failureReason, + }); + controller.close(); + return; + } + + send('step', { step: 'human', status: 'active' }); + + // Search for an available human + progress('Searching for available humans with web task skills...'); + let humans; + try { + humans = await searchHumans(hpKey); + } catch (err) { + const msg = err instanceof Error ? err.message : 'Unknown error'; + progress(`Human search failed: ${msg}`); + send('step', { step: 'human', status: 'failed' }); + send('result', { + source: 'none', + error: `Scrape failed and human search also failed: ${msg}`, + failureReason: result.failureReason, + }); + controller.close(); + return; + } + + if (!humans || humans.length === 0) { + progress('No humans available right now. Try again later.'); + send('step', { step: 'human', status: 'failed' }); + send('result', { + source: 'none', + error: 'Scrape failed and no humans are currently available.', + failureReason: result.failureReason, + }); + controller.close(); + return; + } + + const human = humans[0]; + progress( + `Found human: ${human.name || human.id} (rating: ${human.rating ?? 'N/A'})` + ); + + // Create a job for the human + progress('Creating job offer...'); + let job; + try { + job = await createJob(hpKey, { + humanId: human.id, + title: `Scrape page: ${url}`, + description: [ + `Please visit this URL and extract the full visible text content:`, + ``, + `${url}`, + ``, + `The automated scraper was blocked (reason: ${result.failureReason}).`, + `Please copy and paste all visible text from the page.`, + ].join('\n'), + priceUsdc: 0.25, + deadlineHours: 1, + }); + } catch (err) { + const msg = err instanceof Error ? err.message : 'Unknown error'; + progress(`Job creation failed: ${msg}`); + send('step', { step: 'human', status: 'failed' }); + send('result', { + source: 'none', + error: `Scrape failed and job creation also failed: ${msg}`, + failureReason: result.failureReason, + }); + controller.close(); + return; + } + + progress(`Job created: ${job.id}. Waiting for human to accept...`); + + // Poll for job completion (up to 60 seconds for demo purposes) + const maxPolls = 12; + const pollInterval = 5000; + let completed = false; + + for (let i = 0; i < maxPolls; i++) { + await new Promise(resolve => setTimeout(resolve, pollInterval)); + + try { + const status = await getJobStatus(hpKey, job.id); + progress(`Job status: ${status.status}`); + + if (status.status === 'completed' && status.result) { + completed = true; + send('step', { step: 'human', status: 'success' }); + progress('Human completed the task!'); + send('result', { + source: 'humanpages', + title: `Scraped by human: ${url}`, + content: status.result.slice(0, 5000), + url, + jobId: job.id, + humanId: human.id, + }); + controller.close(); + return; + } + + if (status.status === 'cancelled') { + progress('Job was cancelled by the human.'); + break; + } + } catch { + progress('Error checking job status, will retry...'); + } + } + + if (!completed) { + send('step', { step: 'human', status: 'pending' }); + progress( + 'Job is still in progress. Check back later using the job ID.' + ); + send('result', { + source: 'humanpages-pending', + jobId: job.id, + humanId: human.id, + message: + 'A human has been hired but has not yet completed the task. The job will continue in the background.', + url, + }); + } + + controller.close(); + } catch (error) { + const errorMessage = + error instanceof Error ? error.message : 'Unknown error'; + send('error', { error: errorMessage }); + controller.close(); + } + }, + }); + + return new Response(stream, { + headers: { + 'Content-Type': 'text/event-stream', + 'Cache-Control': 'no-cache', + Connection: 'keep-alive', + }, + }); + } catch (error) { + console.error('Error in scrape API:', error); + return NextResponse.json( + { error: 'Internal server error' }, + { status: 500 } + ); + } +} + +export async function GET() { + return NextResponse.json({ + message: 'Human Fallback Scrape Endpoint', + method: 'POST', + body: { url: 'https://example.com' }, + description: + 'Tries Hyperbrowser first. If blocked, falls back to Human Pages to hire a real human.', + }); +} diff --git a/human-fallback/app/components/LogConsole.tsx b/human-fallback/app/components/LogConsole.tsx new file mode 100644 index 0000000..d8b97bf --- /dev/null +++ b/human-fallback/app/components/LogConsole.tsx @@ -0,0 +1,50 @@ +'use client'; + +import React, { useEffect, useRef } from 'react'; + +interface LogConsoleProps { + messages: string[]; + isActive: boolean; +} + +export default function LogConsole({ messages, isActive }: LogConsoleProps) { + const scrollRef = useRef(null); + + useEffect(() => { + if (scrollRef.current) { + scrollRef.current.scrollTop = scrollRef.current.scrollHeight; + } + }, [messages]); + + return ( +
+
+
0 ? 'bg-gray-500' : 'bg-gray-700'}`} /> +

Activity Log

+
+
+ {messages.length === 0 ? ( +
Waiting for input...
+ ) : ( + messages.map((msg, i) => ( +
+ + [{new Date().toLocaleTimeString()}] + + {msg} +
+ )) + )} + {isActive && ( +
+
+ Processing... +
+ )} +
+
+ ); +} diff --git a/human-fallback/app/components/StepIndicator.tsx b/human-fallback/app/components/StepIndicator.tsx new file mode 100644 index 0000000..a744753 --- /dev/null +++ b/human-fallback/app/components/StepIndicator.tsx @@ -0,0 +1,73 @@ +'use client'; + +import React from 'react'; +import { Bot, User, CheckCircle, XCircle, Clock, Loader2 } from 'lucide-react'; + +export type StepStatus = 'idle' | 'active' | 'success' | 'failed' | 'pending'; + +interface Step { + id: string; + label: string; + description: string; + icon: 'bot' | 'human'; + status: StepStatus; +} + +interface StepIndicatorProps { + steps: Step[]; +} + +const statusStyles: Record = { + idle: 'border-gray-700 bg-gray-900 text-gray-500', + active: 'border-accent bg-accent/10 text-accent pulse-glow', + success: 'border-green-500 bg-green-500/10 text-green-400', + failed: 'border-red-500 bg-red-500/10 text-red-400', + pending: 'border-yellow-500 bg-yellow-500/10 text-yellow-400', +}; + +const StatusIcon = ({ status }: { status: StepStatus }) => { + switch (status) { + case 'active': + return ; + case 'success': + return ; + case 'failed': + return ; + case 'pending': + return ; + default: + return null; + } +}; + +export default function StepIndicator({ steps }: StepIndicatorProps) { + return ( +
+ {steps.map((step, index) => ( + +
+
+
+ {step.icon === 'bot' ? ( + + ) : ( + + )} + {step.label} +
+ +
+

{step.description}

+
+ {index < steps.length - 1 && ( +
+ + + +
+ )} +
+ ))} +
+ ); +} diff --git a/human-fallback/app/globals.css b/human-fallback/app/globals.css new file mode 100644 index 0000000..073a35c --- /dev/null +++ b/human-fallback/app/globals.css @@ -0,0 +1,108 @@ +@tailwind base; +@tailwind components; +@tailwind utilities; + +:root { + --background: #000000; + --foreground: #ffffff; + --card: rgba(0, 0, 0, 0.8); + --card-foreground: #e5e7eb; + --accent: #F0FF26; + --terminal: #000000; +} + +* { + box-sizing: border-box; + padding: 0; + margin: 0; +} + +html, +body { + max-width: 100vw; + overflow-x: hidden; + font-family: system-ui, -apple-system, sans-serif; + font-weight: 500; + letter-spacing: -0.04em; + background-color: var(--background); + color: var(--foreground); +} + +.glass-card { + backdrop-filter: blur(12px); + background: rgba(0, 0, 0, 0.8); + border: 1px solid rgba(107, 114, 128, 0.3); + transition: all 200ms ease; +} + +.glass-card:hover { + background: rgba(0, 0, 0, 0.9); + border-color: rgba(107, 114, 128, 0.5); +} + +.scrollbar-hide { + -ms-overflow-style: none; + scrollbar-width: none; +} + +.scrollbar-hide::-webkit-scrollbar { + display: none; +} + +.terminal-text { + color: #F0FF26; + font-family: ui-monospace, 'SF Mono', monospace; +} + +/* Custom animations */ +@keyframes fadeIn { + from { + opacity: 0; + transform: translateY(10px); + } + to { + opacity: 1; + transform: translateY(0); + } +} + +.animate-in { + animation: fadeIn 0.5s ease-out; +} + +/* Focus styles */ +button:focus-visible, +input:focus-visible, +a:focus-visible { + outline: 2px solid #F0FF26; + outline-offset: 2px; +} + +/* Terminal styling */ +.terminal-bg { + background: #000000; + border: 1px solid rgba(107, 114, 128, 0.3); +} + +/* Loading spinner */ +.loading-spinner { + border: 2px solid rgba(240, 255, 38, 0.3); + border-top: 2px solid #F0FF26; + border-radius: 50%; + animation: spin 1s linear infinite; +} + +@keyframes spin { + 0% { transform: rotate(0deg); } + 100% { transform: rotate(360deg); } +} + +/* Pulse animation for active steps */ +@keyframes pulse-glow { + 0%, 100% { box-shadow: 0 0 0 0 rgba(240, 255, 38, 0.4); } + 50% { box-shadow: 0 0 0 8px rgba(240, 255, 38, 0); } +} + +.pulse-glow { + animation: pulse-glow 2s ease-in-out infinite; +} diff --git a/human-fallback/app/layout.tsx b/human-fallback/app/layout.tsx new file mode 100644 index 0000000..b241dde --- /dev/null +++ b/human-fallback/app/layout.tsx @@ -0,0 +1,29 @@ +import type { Metadata } from "next"; +import "./globals.css"; + +export const metadata: Metadata = { + title: "Human Fallback - Hyperbrowser + Human Pages", + description: "Scrape any page with Hyperbrowser. When automation fails, hire a real human via Human Pages.", + keywords: "web scraping, human fallback, CAPTCHA bypass, Hyperbrowser, Human Pages", + authors: [{ name: "Hyperbrowser" }], + icons: { + icon: [ + { url: '/favicon.ico', sizes: '16x16 32x32', type: 'image/x-icon' }, + ], + shortcut: '/favicon.ico', + }, +}; + +export default function RootLayout({ + children, +}: { + children: React.ReactNode; +}) { + return ( + + + {children} + + + ); +} diff --git a/human-fallback/app/page.tsx b/human-fallback/app/page.tsx new file mode 100644 index 0000000..45a7025 --- /dev/null +++ b/human-fallback/app/page.tsx @@ -0,0 +1,333 @@ +'use client'; + +import React, { useState } from 'react'; +import { Play, Zap, Bot, User, ExternalLink } from 'lucide-react'; +import StepIndicator, { StepStatus } from './components/StepIndicator'; +import LogConsole from './components/LogConsole'; + +interface ScrapeResult { + source: string; + title?: string; + content?: string; + url?: string; + jobId?: string; + humanId?: string; + message?: string; + error?: string; + failureReason?: string; +} + +export default function Home() { + const [url, setUrl] = useState(''); + const [messages, setMessages] = useState([]); + const [isLoading, setIsLoading] = useState(false); + const [result, setResult] = useState(null); + const [scrapeStatus, setScrapeStatus] = useState('idle'); + const [humanStatus, setHumanStatus] = useState('idle'); + + const addMessage = (message: string) => { + setMessages(prev => [...prev, message]); + }; + + const handleScrape = async () => { + if (!url) { + addMessage('Please enter a URL'); + return; + } + + // Reset state + setIsLoading(true); + setResult(null); + setMessages([]); + setScrapeStatus('idle'); + setHumanStatus('idle'); + + try { + const response = await fetch('/api/scrape', { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ url }), + }); + + if (!response.ok) { + throw new Error(`Request failed: ${response.status}`); + } + + const reader = response.body?.getReader(); + const decoder = new TextDecoder(); + + if (!reader) { + throw new Error('No response body'); + } + + while (true) { + const { done, value } = await reader.read(); + if (done) break; + + const chunk = decoder.decode(value, { stream: true }); + const lines = chunk.split('\n'); + + for (const line of lines) { + if (!line.startsWith('data: ')) continue; + + try { + const data = JSON.parse(line.slice(6)); + + if (data.type === 'progress') { + addMessage(data.message); + } else if (data.type === 'step') { + if (data.step === 'scrape') { + setScrapeStatus(data.status); + } else if (data.step === 'human') { + setHumanStatus(data.status); + } + } else if (data.type === 'result') { + setResult(data); + } else if (data.type === 'error') { + addMessage(`Error: ${data.error}`); + } + } catch { + // Ignore JSON parse errors on partial chunks + } + } + } + } catch (error) { + addMessage( + `Error: ${error instanceof Error ? error.message : 'Unknown error'}` + ); + } finally { + setIsLoading(false); + } + }; + + const steps = [ + { + id: 'scrape', + label: 'Hyperbrowser', + description: 'Automated scrape with stealth browser', + icon: 'bot' as const, + status: scrapeStatus, + }, + { + id: 'human', + label: 'Human Pages', + description: 'Hire a real human if automation fails', + icon: 'human' as const, + status: humanStatus, + }, + ]; + + return ( +
+ {/* Header */} +
+
+
+
+
+ +
+

Human Fallback

+
+
+ Hyperbrowser + Human Pages +
+
+
+
+ +
+ {/* Input Card */} +
+
+

+ Scrape any page — with a human safety net +

+

+ Enter a URL below. Hyperbrowser will attempt an automated scrape. + If the page is protected by a CAPTCHA, login wall, or other block, + the app automatically falls back to{' '} + + Human Pages + {' '} + to hire a real person. +

+
+ +
+
+ + setUrl(e.target.value)} + onKeyDown={e => { + if (e.key === 'Enter' && !isLoading) handleScrape(); + }} + className="w-full px-4 py-3 bg-gray-800 border border-gray-600 rounded-lg focus:ring-2 focus:ring-accent focus:border-transparent text-lg" + placeholder="https://example.com" + /> +
+ + +
+
+ + {/* Workflow Steps */} +
+ +
+ + {/* Activity Log */} +
+ +
+ + {/* Result */} + {result && ( +
+ {result.source === 'hyperbrowser' && ( + <> +
+
+ +
+
+

+ Scraped by Hyperbrowser +

+

{result.title}

+
+
+
+
+                    {result.content}
+                  
+
+ + )} + + {result.source === 'humanpages' && ( + <> +
+
+ +
+
+

+ Completed by Human +

+

+ Job {result.jobId} · Human {result.humanId} +

+
+
+
+
+                    {result.content}
+                  
+
+ + )} + + {result.source === 'humanpages-pending' && ( + <> +
+
+ +
+
+

+ Human Hired — In Progress +

+

+ Job {result.jobId} +

+
+
+
+

{result.message}

+ + + View job on Human Pages + +
+ + )} + + {result.source === 'none' && ( + <> +
+
+ +
+
+

+ Scrape Failed +

+

+ {result.failureReason} +

+
+
+
+

{result.error}

+
+ + )} +
+ )} +
+ + {/* Footer */} +
+
+
+ Powered by{' '} + + Hyperbrowser + + {' + '} + + Human Pages + +
+
+
+
+ ); +} diff --git a/human-fallback/lib/humanpages.ts b/human-fallback/lib/humanpages.ts new file mode 100644 index 0000000..fd980bb --- /dev/null +++ b/human-fallback/lib/humanpages.ts @@ -0,0 +1,110 @@ +const BASE_URL = 'https://humanpages.ai'; + +interface HumanSearchResult { + id: string; + name: string; + skills: string[]; + rating: number; + available: boolean; +} + +interface JobCreateResponse { + id: string; + status: string; + humanId: string; + title: string; +} + +interface JobStatusResponse { + id: string; + status: 'pending' | 'accepted' | 'in_progress' | 'completed' | 'cancelled'; + result?: string; + humanId: string; + title: string; +} + +interface JobMessage { + id: string; + content: string; + sender: 'agent' | 'human'; + createdAt: string; +} + +function headers(apiKey: string) { + return { + 'Content-Type': 'application/json', + 'X-Agent-Key': apiKey, + }; +} + +/** + * Search for available humans with web task skills. + */ +export async function searchHumans(apiKey: string): Promise { + const res = await fetch( + `${BASE_URL}/api/humans/search?skill=web+task&available=true`, + { headers: headers(apiKey) } + ); + if (!res.ok) { + throw new Error(`Human Pages search failed: ${res.status} ${res.statusText}`); + } + const data = await res.json(); + return data.humans || data || []; +} + +/** + * Create a job offer for a human to complete a web task. + */ +export async function createJob( + apiKey: string, + params: { + humanId: string; + title: string; + description: string; + priceUsdc: number; + deadlineHours: number; + } +): Promise { + const res = await fetch(`${BASE_URL}/api/jobs`, { + method: 'POST', + headers: headers(apiKey), + body: JSON.stringify(params), + }); + if (!res.ok) { + throw new Error(`Human Pages job creation failed: ${res.status} ${res.statusText}`); + } + return res.json(); +} + +/** + * Check the status of an existing job. + */ +export async function getJobStatus( + apiKey: string, + jobId: string +): Promise { + const res = await fetch(`${BASE_URL}/api/jobs/${jobId}`, { + headers: headers(apiKey), + }); + if (!res.ok) { + throw new Error(`Human Pages job status failed: ${res.status} ${res.statusText}`); + } + return res.json(); +} + +/** + * Get messages for a job (communication between agent and human). + */ +export async function getJobMessages( + apiKey: string, + jobId: string +): Promise { + const res = await fetch(`${BASE_URL}/api/jobs/${jobId}/messages`, { + headers: headers(apiKey), + }); + if (!res.ok) { + throw new Error(`Human Pages messages failed: ${res.status} ${res.statusText}`); + } + const data = await res.json(); + return data.messages || data || []; +} diff --git a/human-fallback/lib/scrape.ts b/human-fallback/lib/scrape.ts new file mode 100644 index 0000000..25b76c3 --- /dev/null +++ b/human-fallback/lib/scrape.ts @@ -0,0 +1,150 @@ +import { Hyperbrowser } from '@hyperbrowser/sdk'; + +export interface ScrapeOptions { + url: string; + apiKey: string; + onProgress?: (message: string) => void; +} + +export interface ScrapeResult { + success: boolean; + content: string; + title: string; + url: string; + failureReason?: string; +} + +/** + * Indicators that a page is blocked by a CAPTCHA, login wall, + * or other anti-bot measure. + */ +const BLOCK_INDICATORS = [ + 'captcha', + 'cf-challenge', + 'challenge-running', + 'please verify you are a human', + 'access denied', + 'please complete the security check', + 'just a moment', + 'checking your browser', + 'sign in to continue', + 'log in to continue', + 'please log in', + 'login required', + 'enable javascript and cookies', +]; + +function detectBlock(html: string, title: string): string | null { + const combined = (html + ' ' + title).toLowerCase(); + for (const indicator of BLOCK_INDICATORS) { + if (combined.includes(indicator)) { + return indicator; + } + } + // Very short page content is often a block page + const textContent = html.replace(/<[^>]*>/g, '').trim(); + if (textContent.length < 100) { + return 'page returned minimal content (likely blocked)'; + } + return null; +} + +export async function scrapePage(options: ScrapeOptions): Promise { + const { url, apiKey, onProgress } = options; + let session: any = null; + let browser: any = null; + + try { + onProgress?.('Launching Hyperbrowser session...'); + + const hb = new Hyperbrowser({ apiKey }); + + session = await hb.sessions.create({ + useStealth: true, + useProxy: false, + }); + + onProgress?.('Connecting to browser...'); + + const { connect } = await import('puppeteer-core'); + browser = await connect({ + browserWSEndpoint: session.wsEndpoint, + defaultViewport: null, + }); + + const [page] = await browser.pages(); + + onProgress?.('Navigating to target URL...'); + + let retries = 2; + while (retries > 0) { + try { + await page.goto(url, { + waitUntil: 'networkidle0', + timeout: 15000, + }); + break; + } catch (navError) { + retries--; + if (retries === 0) throw navError; + onProgress?.(`Navigation failed, retrying... (${retries} attempts left)`); + await new Promise(resolve => setTimeout(resolve, 1000)); + } + } + + onProgress?.('Waiting for page to fully load...'); + await new Promise(resolve => setTimeout(resolve, 2000)); + + const title = await page.title(); + const html = await page.content(); + + // Check for anti-bot blocks + const blockReason = detectBlock(html, title); + if (blockReason) { + onProgress?.(`Block detected: ${blockReason}`); + return { + success: false, + content: html, + title, + url, + failureReason: blockReason, + }; + } + + // Extract readable text content + const textContent = await page.evaluate(() => { + const body = document.body; + if (!body) return ''; + // Remove scripts and styles + const clone = body.cloneNode(true) as HTMLElement; + clone.querySelectorAll('script, style, noscript').forEach(el => el.remove()); + return clone.innerText || clone.textContent || ''; + }); + + onProgress?.('Page scraped successfully!'); + + return { + success: true, + content: textContent.trim(), + title, + url, + }; + } catch (error) { + const message = error instanceof Error ? error.message : 'Unknown error'; + onProgress?.(`Scrape failed: ${message}`); + return { + success: false, + content: '', + title: '', + url, + failureReason: message, + }; + } finally { + try { + if (browser) await browser.close(); + if (session?.destroy) await session.destroy(); + } catch { + // Cleanup errors are non-fatal + } + } +} diff --git a/human-fallback/next.config.js b/human-fallback/next.config.js new file mode 100644 index 0000000..1860d28 --- /dev/null +++ b/human-fallback/next.config.js @@ -0,0 +1,57 @@ +/** @type {import('next').NextConfig} */ +const nextConfig = { + experimental: { + serverComponentsExternalPackages: ['@hyperbrowser/sdk', 'puppeteer-core'] + }, + webpack: (config, { isServer }) => { + if (!isServer) { + // Don't bundle server-only modules for client + config.resolve.fallback = { + ...config.resolve.fallback, + fs: false, + net: false, + tls: false, + dns: false, + 'node:assert': false, + 'node:child_process': false, + 'node:fs/promises': false, + 'node:path': false, + 'node:url': false, + 'node:util': false, + 'node:stream': false, + 'node:buffer': false, + 'node:crypto': false, + 'node:os': false, + 'node:events': false, + 'node:querystring': false, + 'node:http': false, + 'node:https': false, + 'node:zlib': false, + 'child_process': false, + 'assert': false, + 'path': false, + 'url': false, + 'util': false, + 'stream': false, + 'buffer': false, + 'crypto': false, + 'os': false, + 'events': false, + 'querystring': false, + 'http': false, + 'https': false, + 'zlib': false, + }; + + // Exclude server-only packages from client bundle + config.externals = config.externals || []; + config.externals.push( + '@hyperbrowser/sdk', + 'puppeteer-core' + ); + } + return config; + }, +}; + +module.exports = nextConfig; diff --git a/human-fallback/package.json b/human-fallback/package.json new file mode 100644 index 0000000..c5fa6da --- /dev/null +++ b/human-fallback/package.json @@ -0,0 +1,30 @@ +{ + "name": "human-fallback", + "version": "0.1.0", + "private": true, + "scripts": { + "dev": "next dev", + "build": "next build", + "start": "next start", + "lint": "next lint" + }, + "dependencies": { + "@hyperbrowser/sdk": "^0.51.0", + "lucide-react": "^0.446.0", + "next": "14.2.13", + "puppeteer-core": "^24.11.2", + "react": "^18", + "react-dom": "^18" + }, + "devDependencies": { + "@types/node": "^20", + "@types/react": "^18", + "@types/react-dom": "^18", + "autoprefixer": "^10.4.20", + "eslint": "^8", + "eslint-config-next": "14.2.13", + "postcss": "^8", + "tailwindcss": "^3.4.1", + "typescript": "^5" + } +} diff --git a/human-fallback/postcss.config.mjs b/human-fallback/postcss.config.mjs new file mode 100644 index 0000000..d0c615b --- /dev/null +++ b/human-fallback/postcss.config.mjs @@ -0,0 +1,9 @@ +/** @type {import('postcss-load-config').Config} */ +const config = { + plugins: { + tailwindcss: {}, + autoprefixer: {}, + }, +} + +export default config diff --git a/human-fallback/tailwind.config.js b/human-fallback/tailwind.config.js new file mode 100644 index 0000000..5ef1ac7 --- /dev/null +++ b/human-fallback/tailwind.config.js @@ -0,0 +1,42 @@ +/** @type {import('tailwindcss').Config} */ +module.exports = { + content: [ + './pages/**/*.{js,ts,jsx,tsx,mdx}', + './components/**/*.{js,ts,jsx,tsx,mdx}', + './app/**/*.{js,ts,jsx,tsx,mdx}', + ], + theme: { + extend: { + colors: { + accent: '#F0FF26', + terminal: '#000000', + }, + fontFamily: { + sans: ['system-ui', '-apple-system', 'sans-serif'], + mono: ['ui-monospace', 'SF Mono', 'Monaco', 'monospace'], + }, + fontWeight: { + normal: '500', + medium: '500', + semibold: '600', + bold: '700', + }, + letterSpacing: { + tight4: '-0.04em', + }, + animation: { + 'fade-in': 'fadeIn 0.5s ease-out', + }, + keyframes: { + fadeIn: { + '0%': { opacity: '0', transform: 'translateY(10px)' }, + '100%': { opacity: '1', transform: 'translateY(0)' }, + }, + }, + backdropBlur: { + md: '12px', + }, + }, + }, + plugins: [], +} \ No newline at end of file diff --git a/human-fallback/tsconfig.json b/human-fallback/tsconfig.json new file mode 100644 index 0000000..d8b9323 --- /dev/null +++ b/human-fallback/tsconfig.json @@ -0,0 +1,27 @@ +{ + "compilerOptions": { + "target": "ES2017", + "lib": ["dom", "dom.iterable", "esnext"], + "allowJs": true, + "skipLibCheck": true, + "strict": true, + "noEmit": true, + "esModuleInterop": true, + "module": "esnext", + "moduleResolution": "bundler", + "resolveJsonModule": true, + "isolatedModules": true, + "jsx": "preserve", + "incremental": true, + "plugins": [ + { + "name": "next" + } + ], + "paths": { + "@/*": ["./*"] + } + }, + "include": ["next-env.d.ts", "**/*.ts", "**/*.tsx", ".next/types/**/*.ts"], + "exclude": ["node_modules"] +}