From 49abbc9769dfb49e0d5f0fce5fe6ecb1e5912885 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Richard=20Biro=C5=A1?= Date: Wed, 13 May 2026 13:05:00 +0200 Subject: [PATCH 1/2] fix: preserve tab labels and code language tags in agent-facing .md files MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds two rehype plugins to the signalwire llms-txt pipeline that run before HTML→markdown conversion: - rehypeExpandTabs: expands Docusaurus into sequential labeled sections so all tab variants (e.g. JS and Python) are visible with their labels, instead of being dropped or merged into unlabeled blocks - rehypeFixCodeLanguage: copies the language-* class from Prism's
  to its child  so rehype-remark can detect the language and emit
  correct fenced code blocks (e.g. ```javascript instead of bare ```)

Fixes #2459, #1884

Co-Authored-By: Claude Sonnet 4.6 
---
 docusaurus.config.js                 |  4 ++
 tools/utils/rehypeExpandTabs.js      | 72 ++++++++++++++++++++++++++++
 tools/utils/rehypeFixCodeLanguage.js | 37 ++++++++++++++
 3 files changed, 113 insertions(+)
 create mode 100644 tools/utils/rehypeExpandTabs.js
 create mode 100644 tools/utils/rehypeFixCodeLanguage.js

diff --git a/docusaurus.config.js b/docusaurus.config.js
index 3aa4a391ab..01610adfc3 100644
--- a/docusaurus.config.js
+++ b/docusaurus.config.js
@@ -8,6 +8,8 @@ const { config } = require('./apify-docs-theme');
 const { collectSlugs } = require('./tools/utils/collectSlugs');
 const { externalLinkProcessor, isInternal } = require('./tools/utils/externalLink');
 const { removeLlmButtons } = require('./tools/utils/removeLlmButtons');
+const { rehypeExpandTabs } = require('./tools/utils/rehypeExpandTabs');
+const { rehypeFixCodeLanguage } = require('./tools/utils/rehypeFixCodeLanguage');
 
 /**
  * Helper to extract text from a node recursively.
@@ -354,6 +356,8 @@ module.exports = {
                             categoryName: 'Platform documentation',
                         },
                     ],
+                    // Expand  and fix Prism code language tags before HTML→markdown conversion
+                    beforeDefaultRehypePlugins: [rehypeExpandTabs, rehypeFixCodeLanguage],
                     // Add custom remark processing to remove LLM button text
                     remarkPlugins: [removeLlmButtons],
                 },
diff --git a/tools/utils/rehypeExpandTabs.js b/tools/utils/rehypeExpandTabs.js
new file mode 100644
index 0000000000..6fba0c77bd
--- /dev/null
+++ b/tools/utils/rehypeExpandTabs.js
@@ -0,0 +1,72 @@
+'use strict';
+
+const { visit, SKIP } = require('unist-util-visit');
+const { selectAll } = require('hast-util-select');
+const { toString } = require('hast-util-to-string');
+
+function isTabsContainer(node) {
+    return (
+        node.tagName === 'div' &&
+        Array.isArray(node.properties?.className) &&
+        node.properties.className.includes('tabs-container')
+    );
+}
+
+function buildReplacement(tabsContainer) {
+    const labels = selectAll('ul[role="tablist"] li[role="tab"]', tabsContainer).map((li) =>
+        toString(li).trim(),
+    );
+    const panels = selectAll('div[role="tabpanel"]', tabsContainer);
+
+    const result = [];
+    for (let i = 0; i < panels.length; i++) {
+        const panel = panels[i];
+        if (panel.properties) delete panel.properties.hidden;
+
+        if (labels[i]) {
+            result.push({
+                type: 'element',
+                tagName: 'p',
+                properties: {},
+                children: [
+                    {
+                        type: 'element',
+                        tagName: 'strong',
+                        properties: {},
+                        children: [{ type: 'text', value: labels[i] }],
+                    },
+                ],
+            });
+        }
+
+        result.push(...(panel.children ?? []));
+    }
+    return result;
+}
+
+/**
+ * Expands Docusaurus  into sequential labeled sections before HTML→markdown conversion.
+ *
+ * Without this, the pipeline drops hidden tab panels and loses label-to-content
+ * associations, making multi-language examples unreadable for LLMs.
+ */
+function rehypeExpandTabs() {
+    return (tree) => {
+        const replacements = [];
+
+        visit(tree, 'element', (node, index, parent) => {
+            if (!isTabsContainer(node)) return;
+            const nodes = buildReplacement(node);
+            if (nodes.length === 0) return;
+            replacements.push({ parent, index, nodes });
+            return SKIP;
+        });
+
+        // Reverse to preserve indices when splicing
+        for (const { parent, index, nodes } of replacements.reverse()) {
+            parent.children.splice(index, 1, ...nodes);
+        }
+    };
+}
+
+module.exports = { rehypeExpandTabs };
diff --git a/tools/utils/rehypeFixCodeLanguage.js b/tools/utils/rehypeFixCodeLanguage.js
new file mode 100644
index 0000000000..992c8c755c
--- /dev/null
+++ b/tools/utils/rehypeFixCodeLanguage.js
@@ -0,0 +1,37 @@
+'use strict';
+
+const { visit } = require('unist-util-visit');
+
+/**
+ * Copies the `language-*` class from Prism's `
` to its child `` element.
+ *
+ * Docusaurus renders code blocks with the language class on `
` but `hast-util-to-mdast` only checks `` for the class,
+ * so without this all code blocks lose their language tag in generated markdown.
+ */
+function rehypeFixCodeLanguage() {
+    return (tree) => {
+        visit(tree, 'element', (node) => {
+            if (node.tagName !== 'pre') return;
+
+            const preClasses = node.properties?.className ?? [];
+            const langClass = preClasses.find(
+                (c) => typeof c === 'string' && c.startsWith('language-'),
+            );
+            if (!langClass) return;
+
+            const code = (node.children ?? []).find(
+                (c) => c.type === 'element' && c.tagName === 'code',
+            );
+            if (!code) return;
+
+            if (!code.properties) code.properties = {};
+            const codeClasses = code.properties.className ?? [];
+            if (!codeClasses.some((c) => typeof c === 'string' && c.startsWith('language-'))) {
+                code.properties.className = [langClass, ...codeClasses];
+            }
+        });
+    };
+}
+
+module.exports = { rehypeFixCodeLanguage };

From f5931877b74febea4dd6bc0f12a4e3d219b98b98 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Richard=20Biro=C5=A1?= 
Date: Wed, 13 May 2026 13:27:12 +0200
Subject: [PATCH 2/2] chore: fix oxfmt formatting in rehype plugins

Co-Authored-By: Claude Sonnet 4.6 
---
 tools/utils/rehypeExpandTabs.js      | 4 +---
 tools/utils/rehypeFixCodeLanguage.js | 8 ++------
 2 files changed, 3 insertions(+), 9 deletions(-)

diff --git a/tools/utils/rehypeExpandTabs.js b/tools/utils/rehypeExpandTabs.js
index 6fba0c77bd..15451deb2d 100644
--- a/tools/utils/rehypeExpandTabs.js
+++ b/tools/utils/rehypeExpandTabs.js
@@ -13,9 +13,7 @@ function isTabsContainer(node) {
 }
 
 function buildReplacement(tabsContainer) {
-    const labels = selectAll('ul[role="tablist"] li[role="tab"]', tabsContainer).map((li) =>
-        toString(li).trim(),
-    );
+    const labels = selectAll('ul[role="tablist"] li[role="tab"]', tabsContainer).map((li) => toString(li).trim());
     const panels = selectAll('div[role="tabpanel"]', tabsContainer);
 
     const result = [];
diff --git a/tools/utils/rehypeFixCodeLanguage.js b/tools/utils/rehypeFixCodeLanguage.js
index 992c8c755c..4cb65fd187 100644
--- a/tools/utils/rehypeFixCodeLanguage.js
+++ b/tools/utils/rehypeFixCodeLanguage.js
@@ -15,14 +15,10 @@ function rehypeFixCodeLanguage() {
             if (node.tagName !== 'pre') return;
 
             const preClasses = node.properties?.className ?? [];
-            const langClass = preClasses.find(
-                (c) => typeof c === 'string' && c.startsWith('language-'),
-            );
+            const langClass = preClasses.find((c) => typeof c === 'string' && c.startsWith('language-'));
             if (!langClass) return;
 
-            const code = (node.children ?? []).find(
-                (c) => c.type === 'element' && c.tagName === 'code',
-            );
+            const code = (node.children ?? []).find((c) => c.type === 'element' && c.tagName === 'code');
             if (!code) return;
 
             if (!code.properties) code.properties = {};