Skip to content

Commit 94af729

Browse files
committed
feat: add SEO and AI optimization
- Add llms.txt for AI crawler discoverability - Add robots.txt with AI crawler permissions - Update VitePress config with: - Open Graph meta tags - Twitter card meta tags - JSON-LD structured data - Sitemap generation - SEO keywords and description - Add branded OG image for social sharing
1 parent ecf79a7 commit 94af729

29 files changed

Lines changed: 475 additions & 144 deletions

.vitepress/config.ts

Lines changed: 90 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,67 @@ import { defineConfig } from 'vitepress';
22

33
export default defineConfig({
44
title: 'Crawlee Cloud',
5-
description: 'Open-source self-hosted platform for Crawlee scrapers',
5+
description:
6+
'Self-hosted, open-source platform for running Crawlee and Apify Actors on your own infrastructure. Deploy web scrapers with Playwright, Puppeteer, or Cheerio.',
67
cleanUrls: true,
8+
lang: 'en-US',
9+
lastUpdated: true,
10+
11+
// Sitemap generation
12+
sitemap: {
13+
hostname: 'https://crawlee.cloud',
14+
lastmodDateOnly: false,
15+
},
16+
717
head: [
18+
// Primary Meta Tags
19+
['meta', { name: 'viewport', content: 'width=device-width, initial-scale=1.0' }],
20+
[
21+
'meta',
22+
{
23+
name: 'keywords',
24+
content:
25+
'crawlee, apify, web scraping, self-hosted, open source, playwright, puppeteer, cheerio, web crawler, data extraction, scraping platform, actor platform',
26+
},
27+
],
28+
['meta', { name: 'author', content: 'Crawlee Cloud' }],
29+
['meta', { name: 'robots', content: 'index, follow' }],
30+
['link', { rel: 'canonical', href: 'https://crawlee.cloud' }],
31+
32+
// Open Graph / Facebook
33+
['meta', { property: 'og:type', content: 'website' }],
34+
['meta', { property: 'og:url', content: 'https://crawlee.cloud' }],
35+
['meta', { property: 'og:title', content: 'Crawlee Cloud - Your Scrapers, Your Cloud' }],
36+
[
37+
'meta',
38+
{
39+
property: 'og:description',
40+
content:
41+
'Self-hosted, open-source platform for running Crawlee and Apify Actors on your own infrastructure.',
42+
},
43+
],
44+
['meta', { property: 'og:image', content: 'https://crawlee.cloud/og-image.png' }],
45+
['meta', { property: 'og:site_name', content: 'Crawlee Cloud' }],
46+
['meta', { property: 'og:locale', content: 'en_US' }],
47+
48+
// Twitter / X
49+
['meta', { name: 'twitter:card', content: 'summary_large_image' }],
50+
['meta', { name: 'twitter:url', content: 'https://crawlee.cloud' }],
51+
['meta', { name: 'twitter:title', content: 'Crawlee Cloud - Your Scrapers, Your Cloud' }],
52+
[
53+
'meta',
54+
{
55+
name: 'twitter:description',
56+
content:
57+
'Self-hosted, open-source platform for running Crawlee and Apify Actors on your own infrastructure.',
58+
},
59+
],
60+
['meta', { name: 'twitter:image', content: 'https://crawlee.cloud/og-image.png' }],
61+
62+
// Favicons (if you add them later)
63+
['link', { rel: 'icon', type: 'image/svg+xml', href: '/logo.svg' }],
64+
65+
// Google Analytics
866
[
967
'script',
1068
{ async: '', src: 'https://www.googletagmanager.com/gtag/js?id=G-FYMRJ2GG39' },
@@ -17,6 +75,37 @@ export default defineConfig({
1775
gtag('js', new Date());
1876
gtag('config', 'G-FYMRJ2GG39');`,
1977
],
78+
79+
// JSON-LD Structured Data
80+
[
81+
'script',
82+
{ type: 'application/ld+json' },
83+
JSON.stringify({
84+
'@context': 'https://schema.org',
85+
'@type': 'SoftwareApplication',
86+
name: 'Crawlee Cloud',
87+
applicationCategory: 'DeveloperApplication',
88+
operatingSystem: 'Linux, macOS, Windows',
89+
description:
90+
'Self-hosted, open-source platform for running Crawlee and Apify Actors on your own infrastructure.',
91+
url: 'https://crawlee.cloud',
92+
offers: {
93+
'@type': 'Offer',
94+
price: '0',
95+
priceCurrency: 'USD',
96+
},
97+
author: {
98+
'@type': 'Organization',
99+
name: 'Crawlee Cloud',
100+
url: 'https://crawlee.cloud',
101+
},
102+
license: 'https://opensource.org/licenses/MIT',
103+
codeRepository: 'https://github.com/crawlee-cloud/crawlee-cloud',
104+
programmingLanguage: ['TypeScript', 'JavaScript'],
105+
keywords:
106+
'web scraping, crawlee, apify, self-hosted, open source, playwright, puppeteer, cheerio',
107+
}),
108+
],
20109
],
21110
srcDir: 'src',
22111
outDir: './dist',

dist/404.html

Lines changed: 25 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,21 +2,42 @@
22
<html lang="en-US" dir="ltr">
33
<head>
44
<meta charset="utf-8">
5-
<meta name="viewport" content="width=device-width,initial-scale=1">
5+
66
<title>404 | Crawlee Cloud</title>
77
<meta name="description" content="Not Found">
88
<meta name="generator" content="VitePress v1.6.4">
9-
<link rel="preload stylesheet" href="/assets/style.BWDTHy11.css" as="style">
9+
<link rel="preload stylesheet" href="/assets/style.B1qQlgUi.css" as="style">
1010
<link rel="preload stylesheet" href="/vp-icons.css" as="style">
1111

12-
<script type="module" src="/assets/app.BOliJLSL.js"></script>
12+
<script type="module" src="/assets/app.D2DgKinZ.js"></script>
1313
<link rel="preload" href="/assets/inter-roman-latin.Di8DUHzh.woff2" as="font" type="font/woff2" crossorigin="">
14+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
15+
<meta name="keywords" content="crawlee, apify, web scraping, self-hosted, open source, playwright, puppeteer, cheerio, web crawler, data extraction, scraping platform, actor platform">
16+
<meta name="author" content="Crawlee Cloud">
17+
<meta name="robots" content="index, follow">
18+
<link rel="canonical" href="https://crawlee.cloud">
19+
<meta property="og:type" content="website">
20+
<meta property="og:url" content="https://crawlee.cloud">
21+
<meta property="og:title" content="Crawlee Cloud - Your Scrapers, Your Cloud">
22+
<meta property="og:description" content="Self-hosted, open-source platform for running Crawlee and Apify Actors on your own infrastructure.">
23+
<meta property="og:image" content="https://crawlee.cloud/og-image.png">
24+
<meta property="og:site_name" content="Crawlee Cloud">
25+
<meta property="og:locale" content="en_US">
26+
<meta name="twitter:card" content="summary_large_image">
27+
<meta name="twitter:url" content="https://crawlee.cloud">
28+
<meta name="twitter:title" content="Crawlee Cloud - Your Scrapers, Your Cloud">
29+
<meta name="twitter:description" content="Self-hosted, open-source platform for running Crawlee and Apify Actors on your own infrastructure.">
30+
<meta name="twitter:image" content="https://crawlee.cloud/og-image.png">
31+
<link rel="icon" type="image/svg+xml" href="/logo.svg">
32+
<script async src="https://www.googletagmanager.com/gtag/js?id=G-FYMRJ2GG39"></script>
33+
<script>window.dataLayer=window.dataLayer||[];function gtag(){dataLayer.push(arguments)}gtag("js",new Date),gtag("config","G-FYMRJ2GG39");</script>
34+
<script type="application/ld+json">{"@context":"https://schema.org","@type":"SoftwareApplication","name":"Crawlee Cloud","applicationCategory":"DeveloperApplication","operatingSystem":"Linux, macOS, Windows","description":"Self-hosted, open-source platform for running Crawlee and Apify Actors on your own infrastructure.","url":"https://crawlee.cloud","offers":{"@type":"Offer","price":"0","priceCurrency":"USD"},"author":{"@type":"Organization","name":"Crawlee Cloud","url":"https://crawlee.cloud"},"license":"https://opensource.org/licenses/MIT","codeRepository":"https://github.com/crawlee-cloud/crawlee-cloud","programmingLanguage":["TypeScript","JavaScript"],"keywords":"web scraping, crawlee, apify, self-hosted, open source, playwright, puppeteer, cheerio"}</script>
1435
<script id="check-dark-mode">(()=>{const e=localStorage.getItem("vitepress-theme-appearance")||"auto",a=window.matchMedia("(prefers-color-scheme: dark)").matches;(!e||e==="auto"?a:e==="dark")&&document.documentElement.classList.add("dark")})();</script>
1536
<script id="check-mac-os">document.documentElement.classList.toggle("mac",/Mac|iPhone|iPod|iPad/i.test(navigator.platform));</script>
1637
</head>
1738
<body>
1839
<div id="app"></div>
19-
<script>window.__VP_HASH_MAP__=JSON.parse("{\"docs_api.md\":\"D3YRveVj\",\"docs_apify-sdk-environment.md\":\"B5PGvbGR\",\"docs_cli.md\":\"uL6wdgO-\",\"docs_dashboard.md\":\"BisIJXWe\",\"docs_deployment.md\":\"qTpXi688\",\"docs_index.md\":\"w1F42R42\",\"docs_runner.md\":\"BGocZfZH\",\"index.md\":\"CPBbALSI\"}");window.__VP_SITE_DATA__=JSON.parse("{\"lang\":\"en-US\",\"dir\":\"ltr\",\"title\":\"Crawlee Cloud\",\"description\":\"Open-source self-hosted platform for Crawlee scrapers\",\"base\":\"/\",\"head\":[],\"router\":{\"prefetchLinks\":true},\"appearance\":true,\"themeConfig\":{\"siteTitle\":false,\"logo\":\"/logo.svg\",\"nav\":[{\"text\":\"Home\",\"link\":\"/\"},{\"text\":\"Docs\",\"link\":\"/docs/\"},{\"text\":\"GitHub\",\"link\":\"https://github.com/crawlee-cloud/crawlee-cloud\"}],\"sidebar\":[{\"text\":\"Introduction\",\"items\":[{\"text\":\"Overview\",\"link\":\"/docs/\"},{\"text\":\"Apify SDK Environment\",\"link\":\"/docs/apify-sdk-environment\"}]},{\"text\":\"Components\",\"items\":[{\"text\":\"CLI\",\"link\":\"/docs/cli\"},{\"text\":\"Dashboard\",\"link\":\"/docs/dashboard\"},{\"text\":\"Runner\",\"link\":\"/docs/runner\"},{\"text\":\"Deployment\",\"link\":\"/docs/deployment\"},{\"text\":\"API Reference\",\"link\":\"/docs/api\"}]}],\"socialLinks\":[{\"icon\":\"github\",\"link\":\"https://github.com/crawlee-cloud/crawlee-cloud\"}],\"footer\":{\"message\":\"Released under the MIT License.\",\"copyright\":\"Copyright © 2025 Crawlee Cloud\"},\"search\":{\"provider\":\"local\"}},\"locales\":{},\"scrollOffset\":134,\"cleanUrls\":true}");</script>
40+
<script>window.__VP_HASH_MAP__=JSON.parse("{\"docs_api.md\":\"BIXT62I7\",\"docs_apify-sdk-environment.md\":\"DvYz4nYC\",\"docs_cli.md\":\"Centaw9h\",\"docs_dashboard.md\":\"DHYgmMGv\",\"docs_deployment.md\":\"Cg_aVGTo\",\"docs_index.md\":\"C4xmGzKy\",\"docs_runner.md\":\"BgQxNnu7\",\"index.md\":\"NJbyKGSV\"}");window.__VP_SITE_DATA__=JSON.parse("{\"lang\":\"en-US\",\"dir\":\"ltr\",\"title\":\"Crawlee Cloud\",\"description\":\"Self-hosted, open-source platform for running Crawlee and Apify Actors on your own infrastructure. Deploy web scrapers with Playwright, Puppeteer, or Cheerio.\",\"base\":\"/\",\"head\":[],\"router\":{\"prefetchLinks\":true},\"appearance\":true,\"themeConfig\":{\"siteTitle\":false,\"logo\":{\"light\":\"/logo-light.svg\",\"dark\":\"/logo-dark.svg\"},\"nav\":[{\"text\":\"Home\",\"link\":\"/\"},{\"text\":\"Docs\",\"link\":\"/docs/\"},{\"text\":\"GitHub\",\"link\":\"https://github.com/crawlee-cloud/crawlee-cloud\"}],\"sidebar\":[{\"text\":\"Introduction\",\"items\":[{\"text\":\"Overview\",\"link\":\"/docs/\"},{\"text\":\"Apify SDK Environment\",\"link\":\"/docs/apify-sdk-environment\"}]},{\"text\":\"Components\",\"items\":[{\"text\":\"CLI\",\"link\":\"/docs/cli\"},{\"text\":\"Dashboard\",\"link\":\"/docs/dashboard\"},{\"text\":\"Runner\",\"link\":\"/docs/runner\"},{\"text\":\"Deployment\",\"link\":\"/docs/deployment\"},{\"text\":\"API Reference\",\"link\":\"/docs/api\"}]}],\"socialLinks\":[{\"icon\":\"github\",\"link\":\"https://github.com/crawlee-cloud/crawlee-cloud\"}],\"footer\":{\"message\":\"Released under the MIT License.\",\"copyright\":\"Copyright © 2025 Crawlee Cloud\"},\"search\":{\"provider\":\"local\"}},\"locales\":{},\"scrollOffset\":134,\"cleanUrls\":true}");</script>
2041

2142
</body>
2243
</html>

dist/assets/chunks/@localSearchIndexroot.DXC8KXSo.js

Lines changed: 0 additions & 1 deletion
This file was deleted.

0 commit comments

Comments
 (0)