Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import static org.openmetadata.schema.type.MetadataOperation.VIEW_ALL;

import com.google.common.annotations.VisibleForTesting;
import java.io.IOException;
import java.util.HashMap;
import java.util.List;
Expand All @@ -26,6 +27,7 @@ public class GetEntityTool implements McpTool {
"updatedAt",
"updatedBy",
"changeDescription",
"incrementalChangeDescription",
"followers",
"votes",
"totalVotes",
Expand All @@ -44,6 +46,17 @@ public class GetEntityTool implements McpTool {
"columnDescriptionStatus",
"descriptionStatus");

private static final String DESCRIPTION_KEY = "description";
private static final String COLUMNS_KEY = "columns";
private static final String CHILDREN_KEY = "children";
private static final String SCHEMA_DEFINITION_KEY = "schemaDefinition";
private static final String DATA_MODEL_KEY = "dataModel";
private static final String SQL_KEY = "sql";
private static final String RAW_SQL_KEY = "rawSql";
private static final String COLUMN_DESCRIPTIONS_TRUNCATED_KEY = "columnDescriptionsTruncated";
private static final String SCHEMA_DEFINITION_TRUNCATED_KEY = "schemaDefinitionTruncated";
private static final String SQL_TRUNCATED_KEY = "sqlTruncated";

@Override
public Map<String, Object> execute(
Authorizer authorizer, CatalogSecurityContext securityContext, Map<String, Object> params)
Expand All @@ -64,19 +77,92 @@ public Map<String, Object> execute(
}

/**
* Removes verbose fields from entity response to optimize LLM context. Keeps essential fields
* while removing metadata that adds little value for LLM understanding.
* Removes verbose fields and trims the wide-table multipliers (per-column descriptions, raw
* schema/model SQL) so the detail response stays usable on entities with hundreds of columns.
* The entity-level description is deliberately left untouched — this is the one tool whose
* callers need the full text after search results truncated it. The map tree comes from a fresh
* Jackson conversion ({@code JsonUtils.getMap}), so in-place edits never touch the cached entity
* POJO.
*/
private static Map<String, Object> cleanEntityResponse(Map<String, Object> entityData) {
if (entityData == null) {
return new HashMap<>();
@VisibleForTesting
static Map<String, Object> cleanEntityResponse(Map<String, Object> entityData) {
Map<String, Object> cleaned = new HashMap<>();
if (entityData != null) {
cleaned = new HashMap<>(entityData);
EXCLUDE_FIELDS.forEach(cleaned::remove);
McpResponseTrim.VECTOR_NOISE_FIELDS.forEach(cleaned::remove);
trimSchemaDefinition(cleaned);
trimDataModelSql(cleaned);
if (trimColumnDescriptions(cleaned.get(COLUMNS_KEY))) {
cleaned.put(COLUMN_DESCRIPTIONS_TRUNCATED_KEY, Boolean.TRUE);
}
}
Map<String, Object> cleaned = new HashMap<>(entityData);
EXCLUDE_FIELDS.forEach(cleaned::remove);
McpResponseTrim.VECTOR_NOISE_FIELDS.forEach(cleaned::remove);
return cleaned;
}

/**
* Truncates over-length column descriptions, recursing through {@code children} for nested
* struct/map columns. Returns whether any description was cut so the caller can surface a single
* top-level marker instead of per-column flag noise.
*/
private static boolean trimColumnDescriptions(Object columnsValue) {
boolean truncated = false;
if (columnsValue instanceof List<?> columns) {
for (Object column : columns) {
if (column instanceof Map) {
truncated |= trimColumn(castMap(column));
}
}
}
return truncated;
}

private static boolean trimColumn(Map<String, Object> column) {
boolean truncated = false;
if (column.get(DESCRIPTION_KEY) instanceof String description
&& description.length() > McpResponseTrim.TEXT_MAX_LENGTH) {
column.put(
DESCRIPTION_KEY, McpResponseTrim.truncate(description, McpResponseTrim.TEXT_MAX_LENGTH));
truncated = true;
}
// Non-short-circuit | : children must be trimmed even when this column's description was cut.
return truncated | trimColumnDescriptions(column.get(CHILDREN_KEY));
}

private static void trimSchemaDefinition(Map<String, Object> entity) {
if (entity.get(SCHEMA_DEFINITION_KEY) instanceof String ddl
&& ddl.length() > McpResponseTrim.SQL_MAX_LENGTH) {
entity.put(
SCHEMA_DEFINITION_KEY, McpResponseTrim.truncate(ddl, McpResponseTrim.SQL_MAX_LENGTH));
entity.put(SCHEMA_DEFINITION_TRUNCATED_KEY, Boolean.TRUE);
}
}

private static void trimDataModelSql(Map<String, Object> entity) {
if (entity.get(DATA_MODEL_KEY) instanceof Map) {
Map<String, Object> dataModel = castMap(entity.get(DATA_MODEL_KEY));
// Non-short-circuit | : both sql and rawSql must be trimmed regardless of the other.
boolean truncated = trimSqlField(dataModel, SQL_KEY) | trimSqlField(dataModel, RAW_SQL_KEY);
if (truncated) {
dataModel.put(SQL_TRUNCATED_KEY, Boolean.TRUE);
}
}
}

private static boolean trimSqlField(Map<String, Object> dataModel, String key) {
boolean truncated = false;
if (dataModel.get(key) instanceof String sql && sql.length() > McpResponseTrim.SQL_MAX_LENGTH) {
dataModel.put(key, McpResponseTrim.truncate(sql, McpResponseTrim.SQL_MAX_LENGTH));
truncated = true;
}
return truncated;
}

@SuppressWarnings("unchecked")
private static Map<String, Object> castMap(Object value) {
return (Map<String, Object>) value;
}

@Override
public Map<String, Object> execute(
Authorizer authorizer,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -78,11 +78,12 @@ public Map<String, Object> execute(
return analyze(request);
} catch (IOException e) {
LOG.error("IOException during root cause analysis for entity: {}", fqn, e);
throw new RuntimeException("Failed to perform root cause analysis: " + e.getMessage(), e);
throw new RuntimeException(
"Failed to perform root cause analysis: " + McpResponseTrim.safeMessage(e), e);
} catch (Exception e) {
LOG.error("Unexpected error during root cause analysis for entity: {}", fqn, e);
throw new RuntimeException(
"Unexpected error during root cause analysis: " + e.getMessage(), e);
"Unexpected error during root cause analysis: " + McpResponseTrim.safeMessage(e), e);
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,7 @@
},
{
"name": "get_entity_details",
"description": "Get detailed information about a specific entity by its fully qualified name, including its custom properties (returned under the 'extension' field). IMPORTANT: Use the 'fullyQualifiedName' and 'entityType' values directly from search_metadata or semantic_search results — do not construct the FQN manually. Response is optimized for LLM context by excluding verbose metadata fields.",
"description": "Get detailed information about a specific entity by its fully qualified name, including its custom properties (returned under the 'extension' field). IMPORTANT: Use the 'fullyQualifiedName' and 'entityType' values directly from search_metadata or semantic_search results — do not construct the FQN manually. Response is optimized for LLM context: verbose metadata fields are excluded, and per-column descriptions and raw schema/model SQL are truncated at 500 characters (marked with 'columnDescriptionsTruncated' / 'schemaDefinitionTruncated' when cut). The entity-level description is always returned in full.",
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

💡 Quality: tools.json omits dataModel 'sqlTruncated' flag from description

The updated get_entity_details description advertises that truncation is signalled via columnDescriptionsTruncated and schemaDefinitionTruncated, but the code also truncates dataModel.sql/dataModel.rawSql and emits a sqlTruncated flag nested inside dataModel. Since the tool description is what the LLM reads to interpret the response, it won't know that dbt/data-model SQL may be cut or that a sqlTruncated marker exists. Consider mentioning the dataModel.sqlTruncated flag in the tool description so the consuming model can reason about truncated model SQL the same way it does for schema DDL.

Mention the dataModel SQL truncation flag in the tool description.:

...truncated at 500 characters (marked with 'columnDescriptionsTruncated' / 'schemaDefinitionTruncated', or 'sqlTruncated' inside 'dataModel', when cut). The entity-level description is always returned in full.
  • Apply fix

Check the box to apply the fix or reply for a change | Was this helpful? React with 👍 / 👎

"parameters": {
"description": "Use 'fullyQualifiedName' and 'entityType' from search results directly.",
"type": "object",
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,176 @@
/*
* Copyright 2025 Collate
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
* http://www.apache.org/licenses/LICENSE-2.0
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.openmetadata.mcp.tools;

import static org.assertj.core.api.Assertions.assertThat;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.junit.jupiter.api.Test;

/**
* Pins {@link GetEntityTool#cleanEntityResponse}. The entity-level description must always be
* returned in full (this is the detail tool — the one place full text is reachable after search
* truncates), while per-column descriptions, schema DDL and dbt SQL — the wide-table multipliers —
* are truncated. The {@code extension} field (custom properties, #28594 contract) must survive at
* both table and column level.
*/
class GetEntityToolTest {

private static Map<String, Object> column(String name, String description) {
Map<String, Object> column = new HashMap<>();
column.put("name", name);
column.put("dataType", "VARCHAR");
if (description != null) {
column.put("description", description);
}
return column;
}

@Test
void entityDescriptionIsNeverTruncated() {
Map<String, Object> entity = new HashMap<>();
String longDescription = "d".repeat(5_000);
entity.put("description", longDescription);

Map<String, Object> cleaned = GetEntityTool.cleanEntityResponse(entity);

assertThat(cleaned.get("description")).isEqualTo(longDescription);
assertThat(cleaned).doesNotContainKey("columnDescriptionsTruncated");
}

@Test
void longColumnDescriptionIsTruncatedWithTopLevelFlag() {
Map<String, Object> entity = new HashMap<>();
List<Map<String, Object>> columns = new ArrayList<>();
columns.add(column("a", "x".repeat(900)));
columns.add(column("b", "short"));
entity.put("columns", columns);

Map<String, Object> cleaned = GetEntityTool.cleanEntityResponse(entity);

assertThat((String) columns.get(0).get("description")).hasSize(503).endsWith("...");
assertThat(columns.get(1).get("description")).isEqualTo("short");
assertThat(cleaned.get("columnDescriptionsTruncated")).isEqualTo(Boolean.TRUE);
}

@Test
void shortColumnDescriptionsProduceNoFlag() {
Map<String, Object> entity = new HashMap<>();
entity.put("columns", List.of(column("a", "short"), column("b", null)));

Map<String, Object> cleaned = GetEntityTool.cleanEntityResponse(entity);

assertThat(cleaned).doesNotContainKey("columnDescriptionsTruncated");
}

@Test
void nestedChildColumnDescriptionsAreTruncatedRecursively() {
Map<String, Object> child = column("inner", "y".repeat(700));
Map<String, Object> parent = column("outer", "short");
parent.put("children", List.of(child));
Map<String, Object> entity = new HashMap<>();
entity.put("columns", List.of(parent));

Map<String, Object> cleaned = GetEntityTool.cleanEntityResponse(entity);

assertThat((String) child.get("description")).hasSize(503).endsWith("...");
assertThat(cleaned.get("columnDescriptionsTruncated")).isEqualTo(Boolean.TRUE);
}

@Test
void extensionSurvivesAtTableAndColumnLevel() {
Map<String, Object> column = column("a", "short");
column.put("extension", Map.of("colProp", "v"));
Map<String, Object> entity = new HashMap<>();
entity.put("extension", Map.of("tableProp", "v"));
entity.put("columns", List.of(column));

Map<String, Object> cleaned = GetEntityTool.cleanEntityResponse(entity);

assertThat(cleaned.get("extension")).isEqualTo(Map.of("tableProp", "v"));
assertThat(column.get("extension")).isEqualTo(Map.of("colProp", "v"));
}

@Test
void noiseAndVectorFieldsAreRemoved() {
Map<String, Object> entity = new HashMap<>();
entity.put("incrementalChangeDescription", Map.of("fieldsAdded", List.of()));
entity.put("changeDescription", Map.of());
entity.put("embedding", List.of(0.1, 0.2));
entity.put("textToEmbed", "blob");
entity.put("name", "orders");

Map<String, Object> cleaned = GetEntityTool.cleanEntityResponse(entity);

assertThat(cleaned)
.doesNotContainKeys(
"incrementalChangeDescription", "changeDescription", "embedding", "textToEmbed")
.containsKey("name");
}

@Test
void schemaDefinitionIsTruncatedWithFlag() {
Map<String, Object> entity = new HashMap<>();
entity.put("schemaDefinition", "CREATE TABLE orders (".repeat(60));

Map<String, Object> cleaned = GetEntityTool.cleanEntityResponse(entity);

assertThat((String) cleaned.get("schemaDefinition")).hasSize(503).endsWith("...");
assertThat(cleaned.get("schemaDefinitionTruncated")).isEqualTo(Boolean.TRUE);
}

@Test
void dataModelSqlAndRawSqlAreTruncatedWithFlag() {
Map<String, Object> dataModel = new HashMap<>();
dataModel.put("sql", "SELECT 1 FROM t ".repeat(60));
dataModel.put("rawSql", "SELECT 2 FROM t ".repeat(60));
Map<String, Object> entity = new HashMap<>();
entity.put("dataModel", dataModel);

Map<String, Object> cleaned = GetEntityTool.cleanEntityResponse(entity);

Map<String, Object> cleanedModel = castMap(cleaned.get("dataModel"));
assertThat((String) cleanedModel.get("sql")).hasSize(503).endsWith("...");
assertThat((String) cleanedModel.get("rawSql")).hasSize(503).endsWith("...");
assertThat(cleanedModel.get("sqlTruncated")).isEqualTo(Boolean.TRUE);
}

@Test
void shortSchemaAndModelSqlAreUntouched() {
Map<String, Object> dataModel = new HashMap<>();
dataModel.put("sql", "SELECT 1");
Map<String, Object> entity = new HashMap<>();
entity.put("schemaDefinition", "CREATE TABLE t (id INT)");
entity.put("dataModel", dataModel);

Map<String, Object> cleaned = GetEntityTool.cleanEntityResponse(entity);

assertThat(cleaned.get("schemaDefinition")).isEqualTo("CREATE TABLE t (id INT)");
assertThat(cleaned).doesNotContainKey("schemaDefinitionTruncated");
assertThat(castMap(cleaned.get("dataModel"))).doesNotContainKey("sqlTruncated");
}

@Test
void nullEntityYieldsEmptyResponse() {
assertThat(GetEntityTool.cleanEntityResponse(null)).isEmpty();
}

@SuppressWarnings("unchecked")
private static Map<String, Object> castMap(Object value) {
return (Map<String, Object>) value;
}
}
Loading