diff --git a/src/api/json/catalog.json b/src/api/json/catalog.json index a15fc46e017..a16ef77ef4b 100644 --- a/src/api/json/catalog.json +++ b/src/api/json/catalog.json @@ -2336,6 +2336,12 @@ "fileMatch": ["docfx.json"], "url": "https://www.schemastore.org/docfx.json" }, + { + "name": "Docs MCP Manifest", + "description": "Configuration file for Docs MCP", + "fileMatch": [".docs-mcp.json"], + "url": "https://raw.githubusercontent.com/speakeasy-api/docs-mcp/refs/heads/main/schemas/docs-mcp.schema.json" + }, { "name": "Dofigen", "description": "Dofigen configuration file. Documentation: https://github.com/lenra-io/dofigen", diff --git a/src/schema-validation.jsonc b/src/schema-validation.jsonc index 9111cd1d2a1..dee15b72a95 100644 --- a/src/schema-validation.jsonc +++ b/src/schema-validation.jsonc @@ -344,6 +344,7 @@ "web-manifest.json", "web-manifest-app-info.json", "web-manifest-share-target.json", + "docs-mcp-manifest.json", // Below this line are standalone schemas without a URL in the catalog, "lsdlschema-0.7.json", "lsdlschema-1.0.json", diff --git a/src/schemas/json/docs-mcp-manifest.json b/src/schemas/json/docs-mcp-manifest.json new file mode 100644 index 00000000000..6c7d14db54b --- /dev/null +++ b/src/schemas/json/docs-mcp-manifest.json @@ -0,0 +1,121 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "$id": "https://json.schemastore.org/docs-mcp-manifest.json", + "additionalProperties": false, + "description": "Docs MCP configuration file (.docs-mcp.json) that controls how documentation is chunked, tagged, and indexed for search.", + "properties": { + "metadata": { + "additionalProperties": { + "type": "string" + }, + "description": "Key-value pairs attached to every chunk produced from this directory tree. Each key becomes a filterable taxonomy dimension exposed as an enum parameter on the search tool.", + "examples": [ + { + "language": "typescript", + "scope": "sdk-specific" + } + ], + "propertyNames": { + "type": "string" + }, + "type": "object" + }, + "overrides": { + "description": "Per-file-pattern overrides for chunking strategy and metadata. Evaluated top-to-bottom; last match wins.", + "items": { + "additionalProperties": false, + "description": "Overrides the default chunking strategy and/or metadata for files matching a glob pattern. Within the overrides array, later matches take precedence.", + "properties": { + "metadata": { + "additionalProperties": { + "type": "string" + }, + "description": "Metadata key-value pairs merged with root metadata for matching files (override keys win). Each key becomes a filterable taxonomy dimension in the search API.", + "examples": [ + { + "scope": "global-guide" + } + ], + "propertyNames": { + "type": "string" + }, + "type": "object" + }, + "pattern": { + "description": "A glob pattern matched against file paths relative to the directory containing the manifest.", + "examples": ["guides/advanced/*.md"], + "minLength": 1, + "type": "string" + }, + "strategy": { + "additionalProperties": false, + "description": "Chunking strategy override for files matching this pattern. Replaces the root strategy entirely.", + "properties": { + "chunk_by": { + "description": "The heading level at which to split markdown into chunks. 'h1' splits at top-level headings, 'h2'/'h3' at progressively finer granularity, and 'file' treats the entire file as one chunk.", + "enum": ["h1", "h2", "h3", "file"], + "examples": ["h2"], + "type": "string" + }, + "max_chunk_size": { + "description": "Maximum chunk size in characters. Chunks exceeding this limit are split at the next available boundary to prevent oversized results.", + "examples": [8000], + "minimum": 1, + "maximum": 9007199254740991, + "type": "integer" + }, + "min_chunk_size": { + "description": "Minimum chunk size in characters. Trailing chunks smaller than this are merged into the preceding chunk to avoid fragments.", + "examples": [200], + "minimum": 1, + "maximum": 9007199254740991, + "type": "integer" + } + }, + "required": ["chunk_by"], + "type": "object" + } + }, + "required": ["pattern"], + "type": "object" + }, + "type": "array" + }, + "strategy": { + "additionalProperties": false, + "description": "Default chunking strategy applied to all files in this directory tree unless overridden by a more specific rule.", + "properties": { + "chunk_by": { + "description": "The heading level at which to split markdown into chunks. 'h1' splits at top-level headings, 'h2'/'h3' at progressively finer granularity, and 'file' treats the entire file as one chunk.", + "enum": ["h1", "h2", "h3", "file"], + "examples": ["h2"], + "type": "string" + }, + "max_chunk_size": { + "description": "Maximum chunk size in characters. Chunks exceeding this limit are split at the next available boundary to prevent oversized results.", + "examples": [8000], + "minimum": 1, + "maximum": 9007199254740991, + "type": "integer" + }, + "min_chunk_size": { + "description": "Minimum chunk size in characters. Trailing chunks smaller than this are merged into the preceding chunk to avoid fragments.", + "examples": [200], + "minimum": 1, + "maximum": 9007199254740991, + "type": "integer" + } + }, + "required": ["chunk_by"], + "type": "object" + }, + "version": { + "const": "1", + "description": "Schema version. Must be '1'.", + "examples": ["1"], + "type": "string" + } + }, + "required": ["version"], + "type": "object" +} diff --git a/src/schemas/json/partial-tox.json b/src/schemas/json/partial-tox.json index 77526f1f046..363d5a28a32 100644 --- a/src/schemas/json/partial-tox.json +++ b/src/schemas/json/partial-tox.json @@ -9,22 +9,5 @@ "$ref": "https://json.schemastore.org/tox.json" } ], - "properties": { - "legacy_tox_ini": { - "type": "string", - "title": "Tox configuration in ini format", - "description": "This is equivalent to tox.ini format, with the difference that the text is stored instead inside the pyproject.toml file under the tool.tox table and legacy_tox_ini key.", - "markdownDescription": "This is equivalent to `tox.ini` format, with the difference that the text is stored instead inside the `pyproject.toml` file under the `tool.tox` table and `legacy_tox_ini` key.", - "x-intellij-html-description": "

This is equivalent to tox.ini format, with the difference that the text is stored instead inside the pyproject.toml file under the tool.tox table and legacy_tox_ini key.

", - "x-intellij-language-injection": "ini", - "x-taplo": { - "links": { - "key": "https://tox.wiki/en/stable/config.html#pyproject-toml-ini" - } - }, - "examples": [ - "[tool.tox]\nlegacy_tox_ini = \"\"\"\n[tox]\nenvlist = py38\n[testenv]\ncommands = pytest\n\"\"\"" - ] - } - } + "additionalProperties": true } diff --git a/src/test/docs-mcp-manifest/.docs-mcp.json b/src/test/docs-mcp-manifest/.docs-mcp.json new file mode 100644 index 00000000000..09c43bff5b4 --- /dev/null +++ b/src/test/docs-mcp-manifest/.docs-mcp.json @@ -0,0 +1,23 @@ +{ + "metadata": { + "language": "typescript", + "scope": "sdk-specific" + }, + "overrides": [ + { + "metadata": { + "scope": "global-guide" + }, + "pattern": "guides/advanced/*.md", + "strategy": { + "chunk_by": "h3" + } + } + ], + "strategy": { + "chunk_by": "h2", + "max_chunk_size": 8000, + "min_chunk_size": 200 + }, + "version": "1" +}