-
Notifications
You must be signed in to change notification settings - Fork 46
Expand file tree
/
Copy pathllmstxt.ts
More file actions
366 lines (311 loc) · 12.4 KB
/
llmstxt.ts
File metadata and controls
366 lines (311 loc) · 12.4 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
import { GatsbyNode } from 'gatsby';
import * as path from 'path';
import * as fs from 'fs';
/**
* This script is used to create a file called llms.txt that contains a list of all the pages in the site.
* It is heavily inspired by the gatsby-plugin-sitemap plugin, and stripped down to only to what we need.
*/
const LLMS_TXT_PREAMBLE = `# Ably Documentation
> Ably is a realtime experience infrastructure platform that provides pub/sub messaging, chat, realtime data synchronization, and more.
- **Global Edge Network**: Ultra-low latency realtime messaging delivered through a globally distributed edge network
- **Enterprise Scale**: Built to handle millions of concurrent connections with guaranteed message delivery
- **Multiple Products**: Pub/Sub, AI Transport, Chat, LiveSync, LiveObjects and Spaces
- **Developer-Friendly SDKs**: SDKs available for JavaScript, Node.js, Java, Python, Go, Objective-C, Swift, Csharp, PHP, Flutter, Ruby, React, React Native, and Kotlin
`;
const REPORTER_PREFIX = 'onPostBuild:';
interface DocumentQueryResult {
site: {
siteMetadata: {
siteUrl: string;
};
};
allMdx: {
nodes: {
parent: {
relativeDirectory: string;
name: string;
};
frontmatter: {
title?: string;
meta_description?: string;
};
}[];
};
}
const withoutTrailingSlash = (path: string) => (path === `/` ? path : path.replace(/\/$/, ``));
const prefixPath = ({ url, siteUrl, pathPrefix = `` }: { url: string; siteUrl: string; pathPrefix?: string }) => {
return new URL(pathPrefix + withoutTrailingSlash(url), siteUrl).toString();
};
const escapeMarkdown = (text: string) => {
// backslash-escape Markdown special chars: \ ` * _ { } [ ] ( ) # + !
return text.replace(/([\\`*_{}[\]()#+!])/g, '\\$1');
};
// Category structure for organizing pages
interface CategoryStructure {
[category: string]: {
title: string;
pages?: Array<{
slug: string;
meta: { title: string; meta_description: string };
}>;
subcategories: {
[subcategory: string]: {
title: string;
pages: Array<{
slug: string;
meta: { title: string; meta_description: string };
}>;
};
};
};
}
// Function to categorize a page based on its slug
const categorizePage = (slug: string): { category: string; subcategory?: string } => {
const parts = slug.split('/');
const firstPart = parts[0] || 'general';
const secondPart = parts[1];
// Define category mappings
const categoryMap: Record<string, { category: string; subcategory?: string }> = {
// Platform
platform: { category: 'Platform' },
account: { category: 'Platform', subcategory: 'Account Management' },
architecture: { category: 'Platform', subcategory: 'Architecture' },
deprecate: { category: 'Platform', subcategory: 'Deprecations' },
errors: { category: 'Platform', subcategory: 'Errors' },
integrations: { category: 'Platform', subcategory: 'Integrations' },
pricing: { category: 'Platform', subcategory: 'Pricing' },
auth: { category: 'Platform', subcategory: 'Authentication' },
guides: { category: 'Platform' },
sdks: { category: 'Platform', subcategory: 'SDKs' },
'control-api': { category: 'Platform', subcategory: 'Control API' },
// Pub/Sub - Core realtime messaging features
api: { category: 'Pub/Sub', subcategory: 'API Reference' },
basics: { category: 'Pub/Sub' },
channels: { category: 'Pub/Sub', subcategory: 'Channels' },
connect: { category: 'Pub/Sub', subcategory: 'Connections' },
'getting-started': { category: 'Pub/Sub', subcategory: 'Getting Started' },
messages: { category: 'Pub/Sub', subcategory: 'Messages' },
'metadata-stats': { category: 'Pub/Sub', subcategory: 'Metadata & Statistics' },
'presence-occupancy': { category: 'Pub/Sub', subcategory: 'Presence & Occupancy' },
protocols: { category: 'Pub/Sub', subcategory: 'Protocols' },
'pub-sub': { category: 'Pub/Sub' },
push: { category: 'Pub/Sub', subcategory: 'Push Notifications' },
'storage-history': { category: 'Pub/Sub', subcategory: 'Storage & History' },
// Chat
chat: { category: 'Chat' },
// Spaces
spaces: { category: 'Spaces' },
// LiveObjects
liveobjects: { category: 'LiveObjects' },
// LiveSync
livesync: { category: 'LiveSync' },
// AI Transport
'ai-transport': { category: 'AI Transport' },
// General - FAQs
faq: { category: 'General', subcategory: 'FAQs' },
};
// Try to match two-part path first (e.g., "platform/account"), then single part (e.g., "platform")
const twoPartPath = secondPart ? `${firstPart}/${secondPart}` : null;
// Special handling for API references - distinguish between REST SDK, Realtime SDK, and Control API
if (firstPart === 'api') {
if (secondPart === 'control-api') {
return { category: 'Platform', subcategory: 'Control API' };
} else if (secondPart === 'rest-sdk' || secondPart === 'rest-api' || secondPart === 'sse') {
return { category: 'Pub/Sub', subcategory: 'REST SDK API Reference' };
} else if (secondPart === 'realtime-sdk') {
return { category: 'Pub/Sub', subcategory: 'Realtime SDK API Reference' };
} else if (secondPart) {
// For other api/* paths, keep them in general API Reference
return { category: 'Pub/Sub', subcategory: 'API Reference' };
}
// For just /api (no second part), use general API Reference
return { category: 'Pub/Sub', subcategory: 'API Reference' };
}
// Special handling for product/api pattern
if (twoPartPath && secondPart === 'api') {
// Check if it's a product-specific API
if (
categoryMap[firstPart] &&
['Chat', 'Spaces', 'LiveObjects', 'LiveSync'].includes(categoryMap[firstPart].category)
) {
return { category: categoryMap[firstPart].category, subcategory: 'API Reference' };
}
}
// Special handling for platform subdirectories
if (firstPart === 'platform' && secondPart && categoryMap[secondPart]?.category === 'Platform') {
return categoryMap[secondPart];
}
// Special handling for platform/account/control-api
if (firstPart === 'platform' && secondPart === 'account' && parts[2] === 'control-api') {
return { category: 'Platform', subcategory: 'Control API' };
}
// Special handling for guides/ai-transport - route to AI Transport category with Guides subcategory
if (firstPart === 'guides' && secondPart === 'ai-transport') {
return { category: 'AI Transport', subcategory: 'Guides' };
}
if (categoryMap[firstPart]) {
return categoryMap[firstPart];
}
// Default categorization for uncategorized pages
return { category: 'General', subcategory: 'Documentation' };
};
export const onPostBuild: GatsbyNode['onPostBuild'] = async ({ graphql, reporter, basePath }) => {
const query = `
query {
site {
siteMetadata {
siteUrl
}
}
allMdx {
nodes {
parent {
... on File {
relativeDirectory
name
}
}
frontmatter {
title
meta_description
}
}
}
}
`;
const { data: queryRecords, errors } = await graphql<DocumentQueryResult>(query);
if (errors) {
reporter.panicOnBuild(`Error while running GraphQL query: ${JSON.stringify(errors, null, 2)}`);
throw errors;
}
if (!queryRecords) {
reporter.panicOnBuild(`No documents found.`);
throw new Error('No documents found.');
}
const siteUrl = queryRecords.site.siteMetadata.siteUrl;
if (!siteUrl) {
reporter.panicOnBuild(`${REPORTER_PREFIX} Site URL not found.`);
throw new Error('Site URL not found.');
}
// Process MDX pages (allMdx)
const pages = queryRecords.allMdx.nodes
.filter((node) => {
// Only include pages from docs directory that have the required frontmatter
return (
node.parent.relativeDirectory.startsWith('docs') &&
node.frontmatter?.title &&
node.frontmatter?.meta_description
);
})
.map((node) => {
// Create slug from parent file info - remove 'docs/' prefix since it's already in relativeDirectory
const slug = (
node.parent.relativeDirectory + (node.parent.name === 'index' ? '' : `/${node.parent.name}`)
).replace(/^docs\//, '');
return {
slug,
meta: {
title: node.frontmatter.title!,
meta_description: node.frontmatter.meta_description!,
},
};
});
reporter.info(`${REPORTER_PREFIX} Found ${pages.length} pages to place into llms.txt`);
// Organize pages into categories
const categoryStructure: CategoryStructure = {};
for (const page of pages) {
const { category, subcategory } = categorizePage(page.slug);
// Initialize category if it doesn't exist
if (!categoryStructure[category]) {
categoryStructure[category] = {
title: category,
subcategories: {},
};
}
// If no subcategory, add directly to category
if (!subcategory) {
if (!categoryStructure[category].pages) {
categoryStructure[category].pages = [];
}
categoryStructure[category].pages.push(page);
} else {
// Initialize subcategory if it doesn't exist
if (!categoryStructure[category].subcategories[subcategory]) {
categoryStructure[category].subcategories[subcategory] = {
title: subcategory,
pages: [],
};
}
// Add page to subcategory (only base page without language variants)
categoryStructure[category].subcategories[subcategory].pages.push(page);
}
}
// Generate serialized output with categorization
const serializedPages = [LLMS_TXT_PREAMBLE];
// Define the order of categories
const categoryOrder = ['Platform', 'Pub/Sub', 'Chat', 'Spaces', 'LiveObjects', 'LiveSync', 'AI Transport', 'General'];
// Sort categories by defined order
const sortedCategories = Object.keys(categoryStructure).sort((a, b) => {
const indexA = categoryOrder.indexOf(a);
const indexB = categoryOrder.indexOf(b);
if (indexA === -1 && indexB === -1) {
return a.localeCompare(b);
}
if (indexA === -1) {
return 1;
}
if (indexB === -1) {
return -1;
}
return indexA - indexB;
});
// Helper function to serialize pages
// Note: We only generate the base .md URL since the markdown endpoint returns
// the same content regardless of language parameter - all language code snippets
// are included in the single markdown file.
const serializePages = (
pages: Array<{ slug: string; meta: { title: string; meta_description: string } }>,
) => {
for (const page of pages) {
const { slug, meta } = page;
const { title, meta_description } = meta;
try {
const baseUrl = prefixPath({ url: `/docs/${slug}.md`, siteUrl, pathPrefix: basePath });
const safeTitle = escapeMarkdown(title);
// Generate base page entry only (no language-specific variants needed)
// The markdown file contains all language code snippets
const baseLink = `[${safeTitle}](${baseUrl})`;
const baseLine = `- ${[baseLink, meta_description].join(': ')}`;
serializedPages.push(baseLine);
} catch (err) {
reporter.panic(`${REPORTER_PREFIX} Error serializing pages`, err as Error);
}
}
};
for (const categoryKey of sortedCategories) {
const category = categoryStructure[categoryKey];
serializedPages.push(`## ${category.title}`);
serializedPages.push('');
// Add pages directly under the category (no subcategory)
if (category.pages && category.pages.length > 0) {
serializePages(category.pages);
serializedPages.push(''); // Add blank line after category pages
}
// Sort subcategories alphabetically
const sortedSubcategories = Object.keys(category.subcategories).sort();
for (const subcategoryKey of sortedSubcategories) {
const subcategory = category.subcategories[subcategoryKey];
serializedPages.push(`### ${subcategory.title}`);
serializedPages.push('');
serializePages(subcategory.pages);
serializedPages.push(''); // Add blank line after each subcategory
}
}
const llmsTxtPath = path.join(process.cwd(), 'public', 'llms.txt');
try {
fs.writeFileSync(llmsTxtPath, serializedPages.join('\n'));
reporter.info(`${REPORTER_PREFIX} Successfully wrote llms.txt with ${serializedPages.length} pages`);
} catch (err) {
reporter.panic(`${REPORTER_PREFIX} Error writing llms.txt file`, err as Error);
}
};