-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcreate-search-index.ts
More file actions
127 lines (110 loc) · 3.32 KB
/
create-search-index.ts
File metadata and controls
127 lines (110 loc) · 3.32 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
import type { Page } from "content-collections"
import slug from "slug"
import { getPageSlug } from "~/utils/get-page-slug"
function cleanParagraph(raw: string) {
return (
raw
// strip inline code, bold, italics
.replace(/`([^`]+)`/g, "$1")
.replace(/\*\*([^*]+)\*\*/g, "$1")
.replace(/\*([^*]+)\*/g, "$1")
.replace(/_(.+?)_/g, "$1")
// strip markdown links [text](url)
.replace(/\[([^\]]+)\]\([^)]+\)/g, "$1")
// strip mdx attributes { ... } inline
.replace(/\{[^}]*\}/g, "")
// list bullets / ordered list markers at line start
.replace(/^\s*[-*+]\s+/gm, "")
.replace(/^\s*\d+\.\s+/gm, "")
// collapse whitespace
.replace(/\n{2,}/g, "\n")
.replace(/[ \t]+/g, " ")
.trim()
)
}
function stripCodeFences(src: string) {
return src.replace(/```[\s\S]*?```/g, "")
}
function splitIntoParagraphs(src: string) {
return src
.split(/\n\s*\n/g)
.map(cleanParagraph)
.filter((p) => p.length > 0)
}
const extractHeadingData = (match: RegExpMatchArray) => {
const [fullMatch, hashes, text] = match
return {
level: hashes.length,
text,
index: match.index || 0,
length: fullMatch.length,
}
}
function extractHeadingSections(rawMdx: string) {
const src = stripCodeFences(rawMdx)
const headingRegex = /^(#{1,6})\s+(.+?)\s*$/gm
const matches = Array.from(src.matchAll(headingRegex), extractHeadingData)
const usedAnchors = new Set<string>()
const createUniqueAnchor = (baseAnchor: string) => {
let unique = baseAnchor
let n = 2
while (usedAnchors.has(unique)) {
unique = `${baseAnchor}-${n++}`
}
usedAnchors.add(unique)
return unique
}
const cleanHeadingText = (text: string) =>
text
.replace(/`([^`]+)`/g, "$1")
.replace(/\*\*([^*]+)\*\*/g, "$1")
.replace(/\*([^*]+)\*/g, "$1")
.replace(/\[([^\]]+)\]\([^)]+\)/g, "$1")
.replace(/\{[^}]*\}/g, "")
.trim()
if (matches.length === 0) {
const paragraphs = splitIntoParagraphs(src)
return paragraphs.length ? [{ heading: "_intro", anchor: "_intro", paragraphs }] : []
}
const sections = []
// we are adding intro section if content exists before first heading
const introBlock = src.slice(0, matches[0].index).trim()
if (introBlock) {
const introParas = splitIntoParagraphs(introBlock)
if (introParas.length) {
sections.push({ heading: "_intro", anchor: "_intro", paragraphs: introParas })
}
}
matches.forEach((match, i) => {
const nextMatch = matches[i + 1]
const block = src.slice(match.index + match.length, nextMatch?.index).trim()
const rawHeading = cleanHeadingText(match.text)
const baseAnchor = slug(rawHeading) || "_section"
const anchor = createUniqueAnchor(baseAnchor)
const paragraphs = splitIntoParagraphs(block)
sections.push({
heading: rawHeading,
anchor,
paragraphs,
})
})
return sections
}
export function createSearchIndex(pages: Page[]) {
return pages
.filter((page) => page.slug !== "_index")
.flatMap((page) => {
const pageSlug = getPageSlug(page)
const pageUrl = pageSlug.startsWith("/") ? pageSlug : `/${pageSlug}`
const sections = extractHeadingSections(page.rawMdx)
return sections.map((section) => {
const heading = section.heading === "_intro" ? page.title : section.heading
return {
id: `${pageUrl}#${section.anchor}`,
title: page.title,
subtitle: heading,
paragraphs: [heading, ...section.paragraphs],
}
})
})
}