From 181473a3e0a7e6acc1ef47f310e8677945ce1365 Mon Sep 17 00:00:00 2001 From: Vishnu Jain Date: Fri, 5 Jun 2026 20:45:00 +0530 Subject: [PATCH 1/2] feat(mcp): trim wide-table payload in get_entity_details (column descriptions, schema/model sql) --- .../openmetadata/mcp/tools/GetEntityTool.java | 100 +++++++++- .../mcp/tools/RootCauseAnalysisTool.java | 5 +- .../main/resources/json/data/mcp/tools.json | 2 +- .../mcp/tools/GetEntityToolTest.java | 176 ++++++++++++++++++ 4 files changed, 272 insertions(+), 11 deletions(-) create mode 100644 openmetadata-mcp/src/test/java/org/openmetadata/mcp/tools/GetEntityToolTest.java diff --git a/openmetadata-mcp/src/main/java/org/openmetadata/mcp/tools/GetEntityTool.java b/openmetadata-mcp/src/main/java/org/openmetadata/mcp/tools/GetEntityTool.java index a3bfe058c783..c18a28007808 100644 --- a/openmetadata-mcp/src/main/java/org/openmetadata/mcp/tools/GetEntityTool.java +++ b/openmetadata-mcp/src/main/java/org/openmetadata/mcp/tools/GetEntityTool.java @@ -2,6 +2,7 @@ import static org.openmetadata.schema.type.MetadataOperation.VIEW_ALL; +import com.google.common.annotations.VisibleForTesting; import java.io.IOException; import java.util.HashMap; import java.util.List; @@ -26,6 +27,7 @@ public class GetEntityTool implements McpTool { "updatedAt", "updatedBy", "changeDescription", + "incrementalChangeDescription", "followers", "votes", "totalVotes", @@ -44,6 +46,17 @@ public class GetEntityTool implements McpTool { "columnDescriptionStatus", "descriptionStatus"); + private static final String DESCRIPTION_KEY = "description"; + private static final String COLUMNS_KEY = "columns"; + private static final String CHILDREN_KEY = "children"; + private static final String SCHEMA_DEFINITION_KEY = "schemaDefinition"; + private static final String DATA_MODEL_KEY = "dataModel"; + private static final String SQL_KEY = "sql"; + private static final String RAW_SQL_KEY = "rawSql"; + private static final String COLUMN_DESCRIPTIONS_TRUNCATED_KEY = "columnDescriptionsTruncated"; + private static final String SCHEMA_DEFINITION_TRUNCATED_KEY = "schemaDefinitionTruncated"; + private static final String SQL_TRUNCATED_KEY = "sqlTruncated"; + @Override public Map execute( Authorizer authorizer, CatalogSecurityContext securityContext, Map params) @@ -64,19 +77,90 @@ public Map execute( } /** - * Removes verbose fields from entity response to optimize LLM context. Keeps essential fields - * while removing metadata that adds little value for LLM understanding. + * Removes verbose fields and trims the wide-table multipliers (per-column descriptions, raw + * schema/model SQL) so the detail response stays usable on entities with hundreds of columns. + * The entity-level description is deliberately left untouched — this is the one tool whose + * callers need the full text after search results truncated it. The map tree comes from a fresh + * Jackson conversion ({@code JsonUtils.getMap}), so in-place edits never touch the cached entity + * POJO. */ - private static Map cleanEntityResponse(Map entityData) { - if (entityData == null) { - return new HashMap<>(); + @VisibleForTesting + static Map cleanEntityResponse(Map entityData) { + Map cleaned = new HashMap<>(); + if (entityData != null) { + cleaned = new HashMap<>(entityData); + EXCLUDE_FIELDS.forEach(cleaned::remove); + McpResponseTrim.VECTOR_NOISE_FIELDS.forEach(cleaned::remove); + trimSchemaDefinition(cleaned); + trimDataModelSql(cleaned); + if (trimColumnDescriptions(cleaned.get(COLUMNS_KEY))) { + cleaned.put(COLUMN_DESCRIPTIONS_TRUNCATED_KEY, Boolean.TRUE); + } } - Map cleaned = new HashMap<>(entityData); - EXCLUDE_FIELDS.forEach(cleaned::remove); - McpResponseTrim.VECTOR_NOISE_FIELDS.forEach(cleaned::remove); return cleaned; } + /** + * Truncates over-length column descriptions, recursing through {@code children} for nested + * struct/map columns. Returns whether any description was cut so the caller can surface a single + * top-level marker instead of per-column flag noise. + */ + private static boolean trimColumnDescriptions(Object columnsValue) { + boolean truncated = false; + if (columnsValue instanceof List columns) { + for (Object column : columns) { + if (column instanceof Map) { + truncated |= trimColumn(castMap(column)); + } + } + } + return truncated; + } + + private static boolean trimColumn(Map column) { + boolean truncated = false; + if (column.get(DESCRIPTION_KEY) instanceof String description + && description.length() > McpResponseTrim.TEXT_MAX_LENGTH) { + column.put( + DESCRIPTION_KEY, McpResponseTrim.truncate(description, McpResponseTrim.TEXT_MAX_LENGTH)); + truncated = true; + } + return truncated | trimColumnDescriptions(column.get(CHILDREN_KEY)); + } + + private static void trimSchemaDefinition(Map entity) { + if (entity.get(SCHEMA_DEFINITION_KEY) instanceof String ddl + && ddl.length() > McpResponseTrim.SQL_MAX_LENGTH) { + entity.put( + SCHEMA_DEFINITION_KEY, McpResponseTrim.truncate(ddl, McpResponseTrim.SQL_MAX_LENGTH)); + entity.put(SCHEMA_DEFINITION_TRUNCATED_KEY, Boolean.TRUE); + } + } + + private static void trimDataModelSql(Map entity) { + if (entity.get(DATA_MODEL_KEY) instanceof Map) { + Map dataModel = castMap(entity.get(DATA_MODEL_KEY)); + boolean truncated = trimSqlField(dataModel, SQL_KEY) | trimSqlField(dataModel, RAW_SQL_KEY); + if (truncated) { + dataModel.put(SQL_TRUNCATED_KEY, Boolean.TRUE); + } + } + } + + private static boolean trimSqlField(Map dataModel, String key) { + boolean truncated = false; + if (dataModel.get(key) instanceof String sql && sql.length() > McpResponseTrim.SQL_MAX_LENGTH) { + dataModel.put(key, McpResponseTrim.truncate(sql, McpResponseTrim.SQL_MAX_LENGTH)); + truncated = true; + } + return truncated; + } + + @SuppressWarnings("unchecked") + private static Map castMap(Object value) { + return (Map) value; + } + @Override public Map execute( Authorizer authorizer, diff --git a/openmetadata-mcp/src/main/java/org/openmetadata/mcp/tools/RootCauseAnalysisTool.java b/openmetadata-mcp/src/main/java/org/openmetadata/mcp/tools/RootCauseAnalysisTool.java index 16b1ada9011e..6742234bb15d 100644 --- a/openmetadata-mcp/src/main/java/org/openmetadata/mcp/tools/RootCauseAnalysisTool.java +++ b/openmetadata-mcp/src/main/java/org/openmetadata/mcp/tools/RootCauseAnalysisTool.java @@ -78,11 +78,12 @@ public Map execute( return analyze(request); } catch (IOException e) { LOG.error("IOException during root cause analysis for entity: {}", fqn, e); - throw new RuntimeException("Failed to perform root cause analysis: " + e.getMessage(), e); + throw new RuntimeException( + "Failed to perform root cause analysis: " + McpResponseTrim.safeMessage(e), e); } catch (Exception e) { LOG.error("Unexpected error during root cause analysis for entity: {}", fqn, e); throw new RuntimeException( - "Unexpected error during root cause analysis: " + e.getMessage(), e); + "Unexpected error during root cause analysis: " + McpResponseTrim.safeMessage(e), e); } } diff --git a/openmetadata-mcp/src/main/resources/json/data/mcp/tools.json b/openmetadata-mcp/src/main/resources/json/data/mcp/tools.json index d0f94f8172ef..c1ab7058084a 100644 --- a/openmetadata-mcp/src/main/resources/json/data/mcp/tools.json +++ b/openmetadata-mcp/src/main/resources/json/data/mcp/tools.json @@ -182,7 +182,7 @@ }, { "name": "get_entity_details", - "description": "Get detailed information about a specific entity by its fully qualified name, including its custom properties (returned under the 'extension' field). IMPORTANT: Use the 'fullyQualifiedName' and 'entityType' values directly from search_metadata or semantic_search results — do not construct the FQN manually. Response is optimized for LLM context by excluding verbose metadata fields.", + "description": "Get detailed information about a specific entity by its fully qualified name, including its custom properties (returned under the 'extension' field). IMPORTANT: Use the 'fullyQualifiedName' and 'entityType' values directly from search_metadata or semantic_search results — do not construct the FQN manually. Response is optimized for LLM context: verbose metadata fields are excluded, and per-column descriptions and raw schema/model SQL are truncated at 500 characters (marked with 'columnDescriptionsTruncated' / 'schemaDefinitionTruncated' when cut). The entity-level description is always returned in full.", "parameters": { "description": "Use 'fullyQualifiedName' and 'entityType' from search results directly.", "type": "object", diff --git a/openmetadata-mcp/src/test/java/org/openmetadata/mcp/tools/GetEntityToolTest.java b/openmetadata-mcp/src/test/java/org/openmetadata/mcp/tools/GetEntityToolTest.java new file mode 100644 index 000000000000..f127ef45cd8a --- /dev/null +++ b/openmetadata-mcp/src/test/java/org/openmetadata/mcp/tools/GetEntityToolTest.java @@ -0,0 +1,176 @@ +/* + * Copyright 2025 Collate + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.openmetadata.mcp.tools; + +import static org.assertj.core.api.Assertions.assertThat; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import org.junit.jupiter.api.Test; + +/** + * Pins {@link GetEntityTool#cleanEntityResponse}. The entity-level description must always be + * returned in full (this is the detail tool — the one place full text is reachable after search + * truncates), while per-column descriptions, schema DDL and dbt SQL — the wide-table multipliers — + * are truncated. The {@code extension} field (custom properties, #28594 contract) must survive at + * both table and column level. + */ +class GetEntityToolTest { + + private static Map column(String name, String description) { + Map column = new HashMap<>(); + column.put("name", name); + column.put("dataType", "VARCHAR"); + if (description != null) { + column.put("description", description); + } + return column; + } + + @Test + void entityDescriptionIsNeverTruncated() { + Map entity = new HashMap<>(); + String longDescription = "d".repeat(5_000); + entity.put("description", longDescription); + + Map cleaned = GetEntityTool.cleanEntityResponse(entity); + + assertThat(cleaned.get("description")).isEqualTo(longDescription); + assertThat(cleaned).doesNotContainKey("columnDescriptionsTruncated"); + } + + @Test + void longColumnDescriptionIsTruncatedWithTopLevelFlag() { + Map entity = new HashMap<>(); + List> columns = new ArrayList<>(); + columns.add(column("a", "x".repeat(900))); + columns.add(column("b", "short")); + entity.put("columns", columns); + + Map cleaned = GetEntityTool.cleanEntityResponse(entity); + + assertThat((String) columns.get(0).get("description")).hasSize(503).endsWith("..."); + assertThat(columns.get(1).get("description")).isEqualTo("short"); + assertThat(cleaned.get("columnDescriptionsTruncated")).isEqualTo(Boolean.TRUE); + } + + @Test + void shortColumnDescriptionsProduceNoFlag() { + Map entity = new HashMap<>(); + entity.put("columns", List.of(column("a", "short"), column("b", null))); + + Map cleaned = GetEntityTool.cleanEntityResponse(entity); + + assertThat(cleaned).doesNotContainKey("columnDescriptionsTruncated"); + } + + @Test + void nestedChildColumnDescriptionsAreTruncatedRecursively() { + Map child = column("inner", "y".repeat(700)); + Map parent = column("outer", "short"); + parent.put("children", List.of(child)); + Map entity = new HashMap<>(); + entity.put("columns", List.of(parent)); + + Map cleaned = GetEntityTool.cleanEntityResponse(entity); + + assertThat((String) child.get("description")).hasSize(503).endsWith("..."); + assertThat(cleaned.get("columnDescriptionsTruncated")).isEqualTo(Boolean.TRUE); + } + + @Test + void extensionSurvivesAtTableAndColumnLevel() { + Map column = column("a", "short"); + column.put("extension", Map.of("colProp", "v")); + Map entity = new HashMap<>(); + entity.put("extension", Map.of("tableProp", "v")); + entity.put("columns", List.of(column)); + + Map cleaned = GetEntityTool.cleanEntityResponse(entity); + + assertThat(cleaned.get("extension")).isEqualTo(Map.of("tableProp", "v")); + assertThat(column.get("extension")).isEqualTo(Map.of("colProp", "v")); + } + + @Test + void noiseAndVectorFieldsAreRemoved() { + Map entity = new HashMap<>(); + entity.put("incrementalChangeDescription", Map.of("fieldsAdded", List.of())); + entity.put("changeDescription", Map.of()); + entity.put("embedding", List.of(0.1, 0.2)); + entity.put("textToEmbed", "blob"); + entity.put("name", "orders"); + + Map cleaned = GetEntityTool.cleanEntityResponse(entity); + + assertThat(cleaned) + .doesNotContainKeys( + "incrementalChangeDescription", "changeDescription", "embedding", "textToEmbed") + .containsKey("name"); + } + + @Test + void schemaDefinitionIsTruncatedWithFlag() { + Map entity = new HashMap<>(); + entity.put("schemaDefinition", "CREATE TABLE orders (".repeat(60)); + + Map cleaned = GetEntityTool.cleanEntityResponse(entity); + + assertThat((String) cleaned.get("schemaDefinition")).hasSize(503).endsWith("..."); + assertThat(cleaned.get("schemaDefinitionTruncated")).isEqualTo(Boolean.TRUE); + } + + @Test + void dataModelSqlAndRawSqlAreTruncatedWithFlag() { + Map dataModel = new HashMap<>(); + dataModel.put("sql", "SELECT 1 FROM t ".repeat(60)); + dataModel.put("rawSql", "SELECT 2 FROM t ".repeat(60)); + Map entity = new HashMap<>(); + entity.put("dataModel", dataModel); + + Map cleaned = GetEntityTool.cleanEntityResponse(entity); + + Map cleanedModel = castMap(cleaned.get("dataModel")); + assertThat((String) cleanedModel.get("sql")).hasSize(503).endsWith("..."); + assertThat((String) cleanedModel.get("rawSql")).hasSize(503).endsWith("..."); + assertThat(cleanedModel.get("sqlTruncated")).isEqualTo(Boolean.TRUE); + } + + @Test + void shortSchemaAndModelSqlAreUntouched() { + Map dataModel = new HashMap<>(); + dataModel.put("sql", "SELECT 1"); + Map entity = new HashMap<>(); + entity.put("schemaDefinition", "CREATE TABLE t (id INT)"); + entity.put("dataModel", dataModel); + + Map cleaned = GetEntityTool.cleanEntityResponse(entity); + + assertThat(cleaned.get("schemaDefinition")).isEqualTo("CREATE TABLE t (id INT)"); + assertThat(cleaned).doesNotContainKey("schemaDefinitionTruncated"); + assertThat(castMap(cleaned.get("dataModel"))).doesNotContainKey("sqlTruncated"); + } + + @Test + void nullEntityYieldsEmptyResponse() { + assertThat(GetEntityTool.cleanEntityResponse(null)).isEmpty(); + } + + @SuppressWarnings("unchecked") + private static Map castMap(Object value) { + return (Map) value; + } +} From 86645a55a3658d13fbbac67ab283c73efb9830a9 Mon Sep 17 00:00:00 2001 From: Vishnu Jain Date: Fri, 5 Jun 2026 20:47:48 +0530 Subject: [PATCH 2/2] docs(mcp): annotate intentional non-short-circuit operators in GetEntityTool --- .../src/main/java/org/openmetadata/mcp/tools/GetEntityTool.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/openmetadata-mcp/src/main/java/org/openmetadata/mcp/tools/GetEntityTool.java b/openmetadata-mcp/src/main/java/org/openmetadata/mcp/tools/GetEntityTool.java index c18a28007808..35e12c7e435f 100644 --- a/openmetadata-mcp/src/main/java/org/openmetadata/mcp/tools/GetEntityTool.java +++ b/openmetadata-mcp/src/main/java/org/openmetadata/mcp/tools/GetEntityTool.java @@ -125,6 +125,7 @@ private static boolean trimColumn(Map column) { DESCRIPTION_KEY, McpResponseTrim.truncate(description, McpResponseTrim.TEXT_MAX_LENGTH)); truncated = true; } + // Non-short-circuit | : children must be trimmed even when this column's description was cut. return truncated | trimColumnDescriptions(column.get(CHILDREN_KEY)); } @@ -140,6 +141,7 @@ private static void trimSchemaDefinition(Map entity) { private static void trimDataModelSql(Map entity) { if (entity.get(DATA_MODEL_KEY) instanceof Map) { Map dataModel = castMap(entity.get(DATA_MODEL_KEY)); + // Non-short-circuit | : both sql and rawSql must be trimmed regardless of the other. boolean truncated = trimSqlField(dataModel, SQL_KEY) | trimSqlField(dataModel, RAW_SQL_KEY); if (truncated) { dataModel.put(SQL_TRUNCATED_KEY, Boolean.TRUE);