diff --git a/docs/modelcontextprotocol-io/package-types.mdx b/docs/modelcontextprotocol-io/package-types.mdx index 4ce97b2f9..528a0e8d5 100644 --- a/docs/modelcontextprotocol-io/package-types.mdx +++ b/docs/modelcontextprotocol-io/package-types.mdx @@ -162,6 +162,86 @@ The MCP Registry verifies ownership of Docker/OCI images by checking for an `io. LABEL io.modelcontextprotocol.server.name="io.github.username/kubernetes-manager-mcp" ``` +## ScrapeGraphAI Packages + +For ScrapeGraphAI packages, the MCP Registry currently supports packages distributed via: +- The official PyPI registry (`https://pypi.org`) +- Smithery (`https://smithery.ai`), which distributes packages via npm + +ScrapeGraphAI packages use `"registryType": "scrapegraphai"` in `server.json`. For example: + +**Example - ScrapeGraphAI package from PyPI:** + +```json server.json highlight={9} +{ + "$schema": "https://static.modelcontextprotocol.io/schemas/2025-12-11/server.schema.json", + "name": "io.github.username/web-scraper-mcp", + "title": "Web Scraper", + "description": "Scrape and extract data from websites using ScrapeGraphAI", + "version": "1.0.0", + "packages": [ + { + "registryType": "scrapegraphai", + "registryBaseUrl": "https://pypi.org", + "identifier": "web-scraper-mcp", + "version": "1.0.0", + "transport": { + "type": "stdio" + } + } + ] +} +``` + +**Example - ScrapeGraphAI package from Smithery:** + +```json server.json highlight={9} +{ + "$schema": "https://static.modelcontextprotocol.io/schemas/2025-12-11/server.schema.json", + "name": "io.scrapegraphai/scrapegraph-mcp", + "title": "ScrapeGraph MCP", + "description": "Scrape and extract data from websites using ScrapeGraphAI", + "version": "1.0.0", + "packages": [ + { + "registryType": "scrapegraphai", + "registryBaseUrl": "https://smithery.ai", + "identifier": "@ScrapeGraphAI/scrapegraph-mcp", + "version": "1.0.0", + "transport": { + "type": "stdio" + } + } + ] +} +``` + +### Ownership Verification + +**For PyPI packages:** The MCP Registry verifies ownership by checking for the existence of an `mcp-name: $SERVER_NAME` string in the package README (which becomes the package description on PyPI). The string may be hidden in a comment, but the `$SERVER_NAME` portion **MUST** match the server name from `server.json`. + +**For Smithery packages:** The MCP Registry verifies ownership by checking the `mcpName` field in `package.json` (since Smithery uses npm as backend). The `mcpName` property **MUST** match the server name from `server.json`. + +**Example for PyPI:** + +```markdown README.md highlight={5} +# Web Scraper MCP Server + +This MCP server scrapes and extracts data from websites using ScrapeGraphAI. + + +``` + +**Example for Smithery (npm):** + +```json package.json +{ + "name": "@ScrapeGraphAI/scrapegraph-mcp", + "version": "1.0.0", + "mcpName": "io.scrapegraphai/scrapegraph-mcp" +} +``` + ## MCPB Packages For MCPB packages, the MCP Registry currently supports MCPB artifacts hosted via GitHub or GitLab releases. diff --git a/docs/reference/api/openapi.yaml b/docs/reference/api/openapi.yaml index e05c934d6..5c9eb320e 100644 --- a/docs/reference/api/openapi.yaml +++ b/docs/reference/api/openapi.yaml @@ -488,13 +488,14 @@ components: properties: registryType: type: string - description: Registry type indicating how to download packages (e.g., 'npm', 'pypi', 'oci', 'nuget', 'mcpb') + description: Registry type indicating how to download packages (e.g., 'npm', 'pypi', 'oci', 'nuget', 'mcpb', 'scrapegraphai') examples: - "npm" - "pypi" - "oci" - "nuget" - "mcpb" + - "scrapegraphai" registryBaseUrl: type: string format: uri @@ -506,6 +507,7 @@ components: - "https://api.nuget.org/v3/index.json" - "https://github.com" - "https://gitlab.com" + - "https://smithery.ai" identifier: type: string description: Package identifier - either a package name (for registries) or URL (for direct downloads) diff --git a/docs/reference/server-json/server.schema.json b/docs/reference/server-json/server.schema.json index e25191b19..53741182b 100644 --- a/docs/reference/server-json/server.schema.json +++ b/docs/reference/server-json/server.schema.json @@ -242,19 +242,21 @@ "https://docker.io", "https://api.nuget.org/v3/index.json", "https://github.com", - "https://gitlab.com" + "https://gitlab.com", + "https://smithery.ai" ], "format": "uri", "type": "string" }, "registryType": { - "description": "Registry type indicating how to download packages (e.g., 'npm', 'pypi', 'oci', 'nuget', 'mcpb')", + "description": "Registry type indicating how to download packages (e.g., 'npm', 'pypi', 'oci', 'nuget', 'mcpb', 'scrapegraphai')", "examples": [ "npm", "pypi", "oci", "nuget", - "mcpb" + "mcpb", + "scrapegraphai" ], "type": "string" }, diff --git a/internal/validators/package.go b/internal/validators/package.go index 7104f730b..b5e06588f 100644 --- a/internal/validators/package.go +++ b/internal/validators/package.go @@ -23,6 +23,8 @@ func ValidatePackage(ctx context.Context, pkg model.Package, serverName string) return registries.ValidateOCI(ctx, pkg, serverName) case model.RegistryTypeMCPB: return registries.ValidateMCPB(ctx, pkg, serverName) + case model.RegistryTypeScrapeGraphAI: + return registries.ValidateScrapeGraphAI(ctx, pkg, serverName) default: return fmt.Errorf("unsupported registry type: %s", pkg.RegistryType) } diff --git a/internal/validators/registries/scrapegraphai.go b/internal/validators/registries/scrapegraphai.go new file mode 100644 index 000000000..c71355415 --- /dev/null +++ b/internal/validators/registries/scrapegraphai.go @@ -0,0 +1,142 @@ +package registries + +import ( + "context" + "encoding/json" + "errors" + "fmt" + "net/http" + "net/url" + "strings" + "time" + + "github.com/modelcontextprotocol/registry/pkg/model" +) + +var ( + ErrMissingIdentifierForScrapeGraphAI = errors.New("package identifier is required for ScrapeGraphAI packages") + ErrMissingVersionForScrapeGraphAI = errors.New("package version is required for ScrapeGraphAI packages") +) + +// ScrapeGraphAIPackageResponse represents the structure returned by the PyPI JSON API +// (ScrapeGraphAI packages are distributed via PyPI) +type ScrapeGraphAIPackageResponse struct { + Info struct { + Description string `json:"description"` + } `json:"info"` +} + +// ValidateScrapeGraphAI validates that a ScrapeGraphAI package contains the correct MCP server name +// ScrapeGraphAI packages can be distributed via PyPI or via Smithery (which uses npm) +func ValidateScrapeGraphAI(ctx context.Context, pkg model.Package, serverName string) error { + // Set default registry base URL if empty (use PyPI since ScrapeGraphAI is on PyPI) + if pkg.RegistryBaseURL == "" { + pkg.RegistryBaseURL = model.RegistryURLPyPI + } + + if pkg.Identifier == "" { + return ErrMissingIdentifierForScrapeGraphAI + } + + if pkg.Version == "" { + return ErrMissingVersionForScrapeGraphAI + } + + // Validate that MCPB-specific fields are not present + if pkg.FileSHA256 != "" { + return fmt.Errorf("ScrapeGraphAI packages must not have 'fileSha256' field - this is only for MCPB packages") + } + + // Validate registry base URL - support both PyPI and Smithery (which uses npm) + if pkg.RegistryBaseURL != model.RegistryURLPyPI && pkg.RegistryBaseURL != model.RegistryURLSmithery && pkg.RegistryBaseURL != model.RegistryURLNPM { + return fmt.Errorf("registry type and base URL do not match: '%s' is not valid for registry type '%s'. Supported URLs: %s, %s, %s", + pkg.RegistryBaseURL, model.RegistryTypeScrapeGraphAI, model.RegistryURLPyPI, model.RegistryURLSmithery, model.RegistryURLNPM) + } + + client := &http.Client{Timeout: 10 * time.Second} + + // If using Smithery or npm, validate via npm API (Smithery uses npm as backend) + if pkg.RegistryBaseURL == model.RegistryURLSmithery || pkg.RegistryBaseURL == model.RegistryURLNPM { + return validateScrapeGraphAIViaNPM(ctx, client, pkg, serverName) + } + + // Otherwise, validate via PyPI API + return validateScrapeGraphAIViaPyPI(ctx, client, pkg, serverName) +} + +// validateScrapeGraphAIViaPyPI validates ScrapeGraphAI packages distributed via PyPI +func validateScrapeGraphAIViaPyPI(ctx context.Context, client *http.Client, pkg model.Package, serverName string) error { + requestURL := fmt.Sprintf("%s/pypi/%s/%s/json", pkg.RegistryBaseURL, pkg.Identifier, pkg.Version) + req, err := http.NewRequestWithContext(ctx, http.MethodGet, requestURL, nil) + if err != nil { + return fmt.Errorf("failed to create request: %w", err) + } + + req.Header.Set("User-Agent", "MCP-Registry-Validator/1.0") + req.Header.Set("Accept", "application/json") + + resp, err := client.Do(req) + if err != nil { + return fmt.Errorf("failed to fetch package metadata from PyPI for ScrapeGraphAI package: %w", err) + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + return fmt.Errorf("ScrapeGraphAI package '%s' not found on PyPI (status: %d)", pkg.Identifier, resp.StatusCode) + } + + var scrapeGraphAIResp ScrapeGraphAIPackageResponse + if err := json.NewDecoder(resp.Body).Decode(&scrapeGraphAIResp); err != nil { + return fmt.Errorf("failed to parse ScrapeGraphAI package metadata: %w", err) + } + + // Check description (README) content + description := scrapeGraphAIResp.Info.Description + + // Check for mcp-name: format (more specific) + mcpNamePattern := "mcp-name: " + serverName + if strings.Contains(description, mcpNamePattern) { + return nil // Found as mcp-name: format + } + + return fmt.Errorf("ScrapeGraphAI package '%s' ownership validation failed. The server name '%s' must appear as 'mcp-name: %s' in the package README", pkg.Identifier, serverName, serverName) +} + +// validateScrapeGraphAIViaNPM validates ScrapeGraphAI packages distributed via npm (Smithery uses npm) +func validateScrapeGraphAIViaNPM(ctx context.Context, client *http.Client, pkg model.Package, serverName string) error { + // Use npm registry for validation (Smithery packages are on npm) + // Always use npm registry URL since Smithery uses npm as backend + requestURL := model.RegistryURLNPM + "/" + url.PathEscape(pkg.Identifier) + "/" + url.PathEscape(pkg.Version) + req, err := http.NewRequestWithContext(ctx, http.MethodGet, requestURL, nil) + if err != nil { + return fmt.Errorf("failed to create request: %w", err) + } + + req.Header.Set("User-Agent", "MCP-Registry-Validator/1.0") + req.Header.Set("Accept", "application/json") + + resp, err := client.Do(req) + if err != nil { + return fmt.Errorf("failed to fetch package metadata from npm for ScrapeGraphAI package: %w", err) + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + return fmt.Errorf("ScrapeGraphAI package '%s' not found on npm/Smithery (status: %d)", pkg.Identifier, resp.StatusCode) + } + + var npmResp NPMPackageResponse + if err := json.NewDecoder(resp.Body).Decode(&npmResp); err != nil { + return fmt.Errorf("failed to parse ScrapeGraphAI package metadata from npm: %w", err) + } + + if npmResp.MCPName == "" { + return fmt.Errorf("ScrapeGraphAI package '%s' is missing required 'mcpName' field. Add this to your package.json: \"mcpName\": \"%s\"", pkg.Identifier, serverName) + } + + if npmResp.MCPName != serverName { + return fmt.Errorf("ScrapeGraphAI package ownership validation failed. Expected mcpName '%s', got '%s'", serverName, npmResp.MCPName) + } + + return nil +} diff --git a/internal/validators/registries/scrapegraphai_test.go b/internal/validators/registries/scrapegraphai_test.go new file mode 100644 index 000000000..81924862f --- /dev/null +++ b/internal/validators/registries/scrapegraphai_test.go @@ -0,0 +1,100 @@ +package registries_test + +import ( + "context" + "testing" + + "github.com/modelcontextprotocol/registry/internal/validators/registries" + "github.com/modelcontextprotocol/registry/pkg/model" + "github.com/stretchr/testify/assert" +) + +func TestValidateScrapeGraphAI_RealPackages(t *testing.T) { + ctx := context.Background() + + tests := []struct { + name string + packageName string + version string + registryURL string + serverName string + expectError bool + errorMessage string + }{ + { + name: "empty package identifier should fail", + packageName: "", + version: "1.0.0", + registryURL: model.RegistryURLPyPI, + serverName: "com.example/test", + expectError: true, + errorMessage: "package identifier is required for ScrapeGraphAI packages", + }, + { + name: "empty package version should fail", + packageName: "scrapegraph-mcp", + version: "", + registryURL: model.RegistryURLPyPI, + serverName: "com.example/test", + expectError: true, + errorMessage: "package version is required for ScrapeGraphAI packages", + }, + { + name: "non-existent package on PyPI should fail", + packageName: generateRandomPackageName(), + version: "1.0.0", + registryURL: model.RegistryURLPyPI, + serverName: "com.example/test", + expectError: true, + errorMessage: "not found", + }, + { + name: "real PyPI package without MCP server name should fail", + packageName: "requests", + version: "2.31.0", + registryURL: model.RegistryURLPyPI, + serverName: "com.example/test", + expectError: true, + errorMessage: "ownership validation failed", + }, + { + name: "real PyPI package with server name in README should pass", + packageName: "time-mcp-pypi", + version: "1.0.6", + registryURL: model.RegistryURLPyPI, + serverName: "io.github.domdomegg/time-mcp-pypi", + expectError: false, + }, + { + name: "invalid registry URL should fail", + packageName: "scrapegraph-mcp", + version: "1.0.0", + registryURL: "https://invalid-registry.com", + serverName: "com.example/test", + expectError: true, + errorMessage: "registry type and base URL do not match", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + pkg := model.Package{ + RegistryType: model.RegistryTypeScrapeGraphAI, + RegistryBaseURL: tt.registryURL, + Identifier: tt.packageName, + Version: tt.version, + } + + err := registries.ValidateScrapeGraphAI(ctx, pkg, tt.serverName) + + if tt.expectError { + assert.Error(t, err) + if tt.errorMessage != "" { + assert.Contains(t, err.Error(), tt.errorMessage) + } + } else { + assert.NoError(t, err) + } + }) + } +} diff --git a/internal/validators/validators_test.go b/internal/validators/validators_test.go index 7b6958797..bde000dc4 100644 --- a/internal/validators/validators_test.go +++ b/internal/validators/validators_test.go @@ -1767,6 +1767,10 @@ func TestValidate_RegistryTypesAndUrls(t *testing.T) { {"valid_nuget", "io.github.domdomegg/time-mcp-server", model.RegistryTypeNuGet, "", "TimeMcpServer", "1.0.2", "", false}, {"valid_mcpb_github", "io.github.domdomegg/airtable-mcp-server", model.RegistryTypeMCPB, "", "https://github.com/domdomegg/airtable-mcp-server/releases/download/v1.7.2/airtable-mcp-server.mcpb", "", "fe333e598595000ae021bd27117db32ec69af6987f507ba7a63c90638ff633ce", false}, {"valid_mcpb_gitlab", "io.gitlab.fforster/gitlab-mcp", model.RegistryTypeMCPB, "", "https://gitlab.com/fforster/gitlab-mcp/-/releases/v1.31.0/downloads/gitlab-mcp_1.31.0_Linux_x86_64.tar.gz", "", "abc123ef4567890abcdef1234567890abcdef1234567890abcdef1234567890", false}, // this is not actually a valid mcpb, but it's the closest I can get for testing for now + {"valid_scrapegraphai_pypi", "io.github.domdomegg/time-mcp-pypi", model.RegistryTypeScrapeGraphAI, model.RegistryURLPyPI, "time-mcp-pypi", "1.0.6", "", false}, + {"valid_scrapegraphai_pypi_default", "io.github.domdomegg/time-mcp-pypi", model.RegistryTypeScrapeGraphAI, "", "time-mcp-pypi", "1.0.6", "", false}, + {"valid_scrapegraphai_smithery", "io.scrapegraphai/scrapegraph-mcp", model.RegistryTypeScrapeGraphAI, model.RegistryURLSmithery, "@ScrapeGraphAI/scrapegraph-mcp", "1.0.0", "", false}, + {"valid_scrapegraphai_npm", "io.scrapegraphai/scrapegraph-mcp", model.RegistryTypeScrapeGraphAI, model.RegistryURLNPM, "@ScrapeGraphAI/scrapegraph-mcp", "1.0.0", "", false}, // Test MCPB without file hash (should fail) {"invalid_mcpb_no_hash", "io.github.domdomegg/airtable-mcp-server", model.RegistryTypeMCPB, "", "https://github.com/domdomegg/airtable-mcp-server/releases/download/v1.7.2/airtable-mcp-server.mcpb", "", "", true}, diff --git a/pkg/model/constants.go b/pkg/model/constants.go index e817f2a6b..baeda0a47 100644 --- a/pkg/model/constants.go +++ b/pkg/model/constants.go @@ -2,20 +2,22 @@ package model // Registry Types - supported package registry types const ( - RegistryTypeNPM = "npm" - RegistryTypePyPI = "pypi" - RegistryTypeOCI = "oci" - RegistryTypeNuGet = "nuget" - RegistryTypeMCPB = "mcpb" + RegistryTypeNPM = "npm" + RegistryTypePyPI = "pypi" + RegistryTypeOCI = "oci" + RegistryTypeNuGet = "nuget" + RegistryTypeMCPB = "mcpb" + RegistryTypeScrapeGraphAI = "scrapegraphai" ) // Registry Base URLs - supported package registry base URLs const ( - RegistryURLNPM = "https://registry.npmjs.org" - RegistryURLPyPI = "https://pypi.org" - RegistryURLNuGet = "https://api.nuget.org/v3/index.json" - RegistryURLGitHub = "https://github.com" - RegistryURLGitLab = "https://gitlab.com" + RegistryURLNPM = "https://registry.npmjs.org" + RegistryURLPyPI = "https://pypi.org" + RegistryURLNuGet = "https://api.nuget.org/v3/index.json" + RegistryURLGitHub = "https://github.com" + RegistryURLGitLab = "https://gitlab.com" + RegistryURLSmithery = "https://smithery.ai" ) // Transport Types - supported remote transport protocols diff --git a/pkg/model/types.go b/pkg/model/types.go index 8a7443d88..669723b41 100644 --- a/pkg/model/types.go +++ b/pkg/model/types.go @@ -24,17 +24,18 @@ type Transport struct { // - NuGet: RegistryType, Identifier (package ID), Version, RegistryBaseURL (optional) // - OCI: RegistryType, Identifier (full image reference like "ghcr.io/owner/repo:tag") // - MCPB: RegistryType, Identifier (download URL), Version (optional), FileSHA256 (required) +// - ScrapeGraphAI: RegistryType, Identifier (package name), Version, RegistryBaseURL (optional, defaults to PyPI) type Package struct { - // RegistryType indicates how to download packages (e.g., "npm", "pypi", "oci", "nuget", "mcpb") - RegistryType string `json:"registryType" minLength:"1" doc:"Registry type indicating how to download packages (e.g., 'npm', 'pypi', 'oci', 'nuget', 'mcpb')" example:"npm"` - // RegistryBaseURL is the base URL of the package registry (used by npm, pypi, nuget; not used by oci, mcpb) + // RegistryType indicates how to download packages (e.g., "npm", "pypi", "oci", "nuget", "mcpb", "scrapegraphai") + RegistryType string `json:"registryType" minLength:"1" doc:"Registry type indicating how to download packages (e.g., 'npm', 'pypi', 'oci', 'nuget', 'mcpb', 'scrapegraphai')" example:"npm"` + // RegistryBaseURL is the base URL of the package registry (used by npm, pypi, nuget, scrapegraphai; not used by oci, mcpb) RegistryBaseURL string `json:"registryBaseUrl,omitempty" format:"uri" doc:"Base URL of the package registry" example:"https://registry.npmjs.org"` // Identifier is the package identifier: - // - For NPM/PyPI/NuGet: package name or ID + // - For NPM/PyPI/NuGet/ScrapeGraphAI: package name or ID // - For OCI: full image reference (e.g., "ghcr.io/owner/repo:v1.0.0") // - For MCPB: direct download URL Identifier string `json:"identifier" minLength:"1" doc:"Package identifier - either a package name (for registries) or URL (for direct downloads)" example:"@modelcontextprotocol/server-brave-search"` - // Version is the package version (required for npm, pypi, nuget; optional for mcpb; not used by oci where version is in the identifier) + // Version is the package version (required for npm, pypi, nuget, scrapegraphai; optional for mcpb; not used by oci where version is in the identifier) Version string `json:"version,omitempty" minLength:"1" doc:"Package version. Must be a specific version. Version ranges are rejected (e.g., '^1.2.3', '~1.2.3', '>=1.2.3', '1.x', '1.*')." example:"1.0.2"` // FileSHA256 is the SHA-256 hash for integrity verification (required for mcpb, optional for others) FileSHA256 string `json:"fileSha256,omitempty" pattern:"^[a-f0-9]{64}$" doc:"SHA-256 hash of the package file for integrity verification. Required for MCPB packages and optional for other package types. Authors are responsible for generating correct SHA-256 hashes when creating server.json. If present, MCP clients must validate the downloaded file matches the hash before running packages to ensure file integrity." example:"fe333e598595000ae021bd27117db32ec69af6987f507ba7a63c90638ff633ce"`