diff --git a/docusaurus.config.js b/docusaurus.config.js index 3aa4a391ab..01610adfc3 100644 --- a/docusaurus.config.js +++ b/docusaurus.config.js @@ -8,6 +8,8 @@ const { config } = require('./apify-docs-theme'); const { collectSlugs } = require('./tools/utils/collectSlugs'); const { externalLinkProcessor, isInternal } = require('./tools/utils/externalLink'); const { removeLlmButtons } = require('./tools/utils/removeLlmButtons'); +const { rehypeExpandTabs } = require('./tools/utils/rehypeExpandTabs'); +const { rehypeFixCodeLanguage } = require('./tools/utils/rehypeFixCodeLanguage'); /** * Helper to extract text from a node recursively. @@ -354,6 +356,8 @@ module.exports = { categoryName: 'Platform documentation', }, ], + // Expand and fix Prism code language tags before HTML→markdown conversion + beforeDefaultRehypePlugins: [rehypeExpandTabs, rehypeFixCodeLanguage], // Add custom remark processing to remove LLM button text remarkPlugins: [removeLlmButtons], }, diff --git a/tools/utils/rehypeExpandTabs.js b/tools/utils/rehypeExpandTabs.js new file mode 100644 index 0000000000..15451deb2d --- /dev/null +++ b/tools/utils/rehypeExpandTabs.js @@ -0,0 +1,70 @@ +'use strict'; + +const { visit, SKIP } = require('unist-util-visit'); +const { selectAll } = require('hast-util-select'); +const { toString } = require('hast-util-to-string'); + +function isTabsContainer(node) { + return ( + node.tagName === 'div' && + Array.isArray(node.properties?.className) && + node.properties.className.includes('tabs-container') + ); +} + +function buildReplacement(tabsContainer) { + const labels = selectAll('ul[role="tablist"] li[role="tab"]', tabsContainer).map((li) => toString(li).trim()); + const panels = selectAll('div[role="tabpanel"]', tabsContainer); + + const result = []; + for (let i = 0; i < panels.length; i++) { + const panel = panels[i]; + if (panel.properties) delete panel.properties.hidden; + + if (labels[i]) { + result.push({ + type: 'element', + tagName: 'p', + properties: {}, + children: [ + { + type: 'element', + tagName: 'strong', + properties: {}, + children: [{ type: 'text', value: labels[i] }], + }, + ], + }); + } + + result.push(...(panel.children ?? [])); + } + return result; +} + +/** + * Expands Docusaurus into sequential labeled sections before HTML→markdown conversion. + * + * Without this, the pipeline drops hidden tab panels and loses label-to-content + * associations, making multi-language examples unreadable for LLMs. + */ +function rehypeExpandTabs() { + return (tree) => { + const replacements = []; + + visit(tree, 'element', (node, index, parent) => { + if (!isTabsContainer(node)) return; + const nodes = buildReplacement(node); + if (nodes.length === 0) return; + replacements.push({ parent, index, nodes }); + return SKIP; + }); + + // Reverse to preserve indices when splicing + for (const { parent, index, nodes } of replacements.reverse()) { + parent.children.splice(index, 1, ...nodes); + } + }; +} + +module.exports = { rehypeExpandTabs }; diff --git a/tools/utils/rehypeFixCodeLanguage.js b/tools/utils/rehypeFixCodeLanguage.js new file mode 100644 index 0000000000..4cb65fd187 --- /dev/null +++ b/tools/utils/rehypeFixCodeLanguage.js @@ -0,0 +1,33 @@ +'use strict'; + +const { visit } = require('unist-util-visit'); + +/** + * Copies the `language-*` class from Prism's `
` to its child `` element.
+ *
+ * Docusaurus renders code blocks with the language class on `
` but `hast-util-to-mdast` only checks `` for the class,
+ * so without this all code blocks lose their language tag in generated markdown.
+ */
+function rehypeFixCodeLanguage() {
+    return (tree) => {
+        visit(tree, 'element', (node) => {
+            if (node.tagName !== 'pre') return;
+
+            const preClasses = node.properties?.className ?? [];
+            const langClass = preClasses.find((c) => typeof c === 'string' && c.startsWith('language-'));
+            if (!langClass) return;
+
+            const code = (node.children ?? []).find((c) => c.type === 'element' && c.tagName === 'code');
+            if (!code) return;
+
+            if (!code.properties) code.properties = {};
+            const codeClasses = code.properties.className ?? [];
+            if (!codeClasses.some((c) => typeof c === 'string' && c.startsWith('language-'))) {
+                code.properties.className = [langClass, ...codeClasses];
+            }
+        });
+    };
+}
+
+module.exports = { rehypeFixCodeLanguage };