From 26fc055a366bff66c38bdaf54ccc30534e6c0e62 Mon Sep 17 00:00:00 2001 From: vinzee Date: Sat, 16 May 2026 07:39:23 -0700 Subject: [PATCH] Optimize SQL map filters to use text indexes SQL and facet filters such as ResourceAttributes['key'] IN ('value') were passed through unchanged, so ClickHouse used map subscript/IN and could not use text-index direct read on materialized key=value tokens. Changes: - Add rewriteSqlWithKvItems() for Map['key'] IN (=) patterns - Add Metadata.getKvItemsLookup() (shared with Lucene kvItems path) - Export KV_ITEMS_STRATEGIES and normalizeChExpression from queryParser When a materialized KV column (e.g. ResourceAttributeTokens) has a text(tokenizer=array) skip index, SQL filters are rewritten to has()/hasAny() with concat(key, separator, value), matching the existing Lucene quoted-term optimization. --- .changeset/petite-clouds-decide.md | 5 + .../src/__tests__/clickhouse.test.ts | 26 + .../src/__tests__/metadata.test.ts | 111 +++ .../src/__tests__/queryParser.test.ts | 8 + .../src/__tests__/renderChartConfig.test.ts | 649 +++++++++++++++++- .../common-utils/src/__tests__/utils.test.ts | 24 + packages/common-utils/src/clickhouse/index.ts | 14 +- packages/common-utils/src/core/metadata.ts | 62 ++ .../src/core/renderChartConfig.ts | 349 +++++++++- packages/common-utils/src/core/utils.ts | 15 + packages/common-utils/src/queryParser.ts | 59 +- 11 files changed, 1262 insertions(+), 60 deletions(-) create mode 100644 .changeset/petite-clouds-decide.md diff --git a/.changeset/petite-clouds-decide.md b/.changeset/petite-clouds-decide.md new file mode 100644 index 0000000000..e7bdea226f --- /dev/null +++ b/.changeset/petite-clouds-decide.md @@ -0,0 +1,5 @@ +--- +'@hyperdx/common-utils': patch +--- + +Rewrite SQL map filters to has()/hasAny() for KV text index direct read \ No newline at end of file diff --git a/packages/common-utils/src/__tests__/clickhouse.test.ts b/packages/common-utils/src/__tests__/clickhouse.test.ts index 336f9f9a9e..9d86b43bbe 100644 --- a/packages/common-utils/src/__tests__/clickhouse.test.ts +++ b/packages/common-utils/src/__tests__/clickhouse.test.ts @@ -203,6 +203,32 @@ describe('chSqlToAliasMap - alias unit test', () => { expect(res).toEqual(aliasMap); }); + it('parses SELECT aliases when WHERE has hasAny(array(...)) from KV rewrite', () => { + const chSqlInput: ChSql = { + sql: "SELECT Timestamp as ts, Body as body FROM {HYPERDX_PARAM_1:Identifier}.{HYPERDX_PARAM_2:Identifier} WHERE (has(`ResourceAttributeTokens`, concat('facility', '=', 'local0')) OR hasAny(`ResourceAttributeTokens`, array('cloud.availability_zone=zone-a', 'cloud.availability_zone=zone-b'))) ORDER BY Timestamp DESC LIMIT {HYPERDX_PARAM_3:Int32}", + params: { + HYPERDX_PARAM_1: 'otel', + HYPERDX_PARAM_2: 'otel_logs', + HYPERDX_PARAM_3: 200, + }, + }; + expect(chSqlToAliasMap(chSqlInput)).toEqual({ + ts: 'Timestamp', + body: 'Body', + }); + }); + + it('parses SELECT aliases when WHERE has legacy hasAny bracket arrays', () => { + const chSqlInput: ChSql = { + sql: "SELECT Timestamp as ts FROM {HYPERDX_PARAM_1:Identifier}.{HYPERDX_PARAM_2:Identifier} WHERE hasAny(`ResourceAttributeTokens`, ['facility=local0', 'facility=local1']) ORDER BY Timestamp DESC", + params: { + HYPERDX_PARAM_1: 'otel', + HYPERDX_PARAM_2: 'otel_logs', + }, + }; + expect(chSqlToAliasMap(chSqlInput)).toEqual({ ts: 'Timestamp' }); + }); + it('Alias, with JSON expressions', () => { const chSqlInput: ChSql = { sql: "SELECT Timestamp as ts,ResourceAttributes.service.name as service,toStartOfDay(LogAttributes.start.`time`) as start_time,Body,TimestampTime,ServiceName,TimestampTime FROM {HYPERDX_PARAM_1544803905:Identifier}.{HYPERDX_PARAM_129845054:Identifier} WHERE (TimestampTime >= fromUnixTimestamp64Milli({HYPERDX_PARAM_1456399765:Int64}) AND TimestampTime <= fromUnixTimestamp64Milli({HYPERDX_PARAM_1719057412:Int64})) AND (`ResourceAttributes`.`service`.`name` = 'serviceName') ORDER BY TimestampTime DESC LIMIT {HYPERDX_PARAM_49586:Int32} OFFSET {HYPERDX_PARAM_48:Int32}", diff --git a/packages/common-utils/src/__tests__/metadata.test.ts b/packages/common-utils/src/__tests__/metadata.test.ts index f39b1d10e6..adce7fa6b5 100644 --- a/packages/common-utils/src/__tests__/metadata.test.ts +++ b/packages/common-utils/src/__tests__/metadata.test.ts @@ -1,3 +1,4 @@ +import { ColumnMeta } from '../clickhouse'; import { ClickhouseClient } from '../clickhouse/node'; import { Metadata, MetadataCache, parseKeyPath } from '../core/metadata'; import * as renderChartConfigModule from '../core/renderChartConfig'; @@ -416,6 +417,116 @@ describe('Metadata', () => { }); }); + describe('getKvItemsLookup', () => { + const otelKvExpression = + "arrayMap(arr -> concat(arr.1, '=', arr.2), CAST(ResourceAttributes, 'Array(Tuple(String, String))'))"; + + const otelColumns: ColumnMeta[] = [ + { + name: 'ResourceAttributes', + type: 'Map(LowCardinality(String), String)', + default_type: '', + default_expression: '', + codec_expression: '', + comment: '', + ttl_expression: '', + }, + { + name: 'ResourceAttributeTokens', + type: 'Array(String)', + default_type: 'MATERIALIZED', + default_expression: otelKvExpression, + codec_expression: '', + comment: '', + ttl_expression: '', + }, + ]; + + const otelTokensTextIndex = { + name: 'idx_res_attr_tokens_text', + type: 'text', + typeFull: 'text(tokenizer = array)', + expression: 'ResourceAttributeTokens', + granularity: 1, + }; + + beforeEach(() => { + mockCache.getOrFetch.mockImplementation((_key, queryFn) => queryFn()); + jest.spyOn(metadata, 'getColumns').mockResolvedValue(otelColumns); + jest + .spyOn(metadata, 'getSkipIndices') + .mockResolvedValue([otelTokensTextIndex]); + }); + + afterEach(() => { + jest.restoreAllMocks(); + }); + + it('registers ResourceAttributes when KV column and array text index exist', async () => { + const lookup = await metadata.getKvItemsLookup({ + databaseName: 'otel', + tableName: 'otel_logs', + connectionId: 'test_connection', + }); + + expect(lookup.get('ResourceAttributes')).toEqual({ + kvItemsColumn: 'ResourceAttributeTokens', + separator: '=', + }); + }); + + it('returns empty lookup when text index tokenizer is not array', async () => { + jest.spyOn(metadata, 'getSkipIndices').mockResolvedValue([ + { + ...otelTokensTextIndex, + typeFull: "text(tokenizer = 'splitByNonAlpha')", + }, + ]); + + const lookup = await metadata.getKvItemsLookup({ + databaseName: 'otel', + tableName: 'otel_logs', + connectionId: 'test_connection', + }); + + expect(lookup.size).toBe(0); + }); + + it('returns empty lookup when text index expression does not match KV column', async () => { + jest.spyOn(metadata, 'getSkipIndices').mockResolvedValue([ + { + ...otelTokensTextIndex, + expression: 'mapKeys(ResourceAttributes)', + }, + ]); + + const lookup = await metadata.getKvItemsLookup({ + databaseName: 'otel', + tableName: 'otel_logs', + connectionId: 'test_connection', + }); + + expect(lookup.size).toBe(0); + }); + + it('returns empty lookup when default_expression is not a KV items pattern', async () => { + jest.spyOn(metadata, 'getColumns').mockResolvedValue([ + { + ...otelColumns[1], + default_expression: 'toString(ResourceAttributes)', + }, + ]); + + const lookup = await metadata.getKvItemsLookup({ + databaseName: 'otel', + tableName: 'otel_logs', + connectionId: 'test_connection', + }); + + expect(lookup.size).toBe(0); + }); + }); + describe('getKeyValues', () => { const mockChartConfig: BuilderChartConfigWithDateRange = { from: { diff --git a/packages/common-utils/src/__tests__/queryParser.test.ts b/packages/common-utils/src/__tests__/queryParser.test.ts index 96ca01b9a3..022d9ca9f9 100644 --- a/packages/common-utils/src/__tests__/queryParser.test.ts +++ b/packages/common-utils/src/__tests__/queryParser.test.ts @@ -1745,6 +1745,14 @@ describe('parseKvItemsCastExpression', () => { ).toEqual({ mapColumn: 'ResourceAttributes', separator: '=' }); }); + it('parses CAST KV items expression with bare lambda param (ClickHouse form)', () => { + expect( + parseKvItemsCastExpression( + "arrayMap(arr -> concat(arr.1, '=', arr.2), CAST(ResourceAttributes, 'Array(Tuple(String, String))'))", + ), + ).toEqual({ mapColumn: 'ResourceAttributes', separator: '=' }); + }); + it('parses CAST form without spaces in type', () => { expect( parseKvItemsCastExpression( diff --git a/packages/common-utils/src/__tests__/renderChartConfig.test.ts b/packages/common-utils/src/__tests__/renderChartConfig.test.ts index 3895d66ef3..0771ed77d6 100644 --- a/packages/common-utils/src/__tests__/renderChartConfig.test.ts +++ b/packages/common-utils/src/__tests__/renderChartConfig.test.ts @@ -1,17 +1,28 @@ -import { chSql, ColumnMeta, parameterizedQueryToSql } from '@/clickhouse'; +import { + chSql, + chSqlToAliasMap, + ColumnMeta, + parameterizedQueryToSql, +} from '@/clickhouse'; import { Metadata } from '@/core/metadata'; +import { filtersToQuery } from '@/filters'; +import type { KvItemsLookup } from '@/queryParser'; import { ChartConfigWithOptDateRange, DisplayType, MetricsDataType, QuerySettings, + SourceKind, + TSource, } from '@/types'; import { ChartConfigWithOptDateRangeEx, renderChartConfig, + rewriteSqlWithKvItems, timeFilterExpr, } from '../core/renderChartConfig'; +import { buildSearchChartConfig } from '../core/searchChartConfig'; describe('renderChartConfig', () => { let mockMetadata: jest.Mocked; @@ -2721,3 +2732,639 @@ describe('renderChartConfig', () => { }); }); }); + +describe('rewriteSqlWithKvItems', () => { + const resourceAttributesLookup: KvItemsLookup = new Map([ + [ + 'ResourceAttributes', + { kvItemsColumn: 'ResourceAttributeTokens', separator: '=' }, + ], + ['LogAttributes', { kvItemsColumn: 'LogAttributeItems', separator: '=' }], + ]); + + it('rewrites single-value IN to has()', () => { + expect( + rewriteSqlWithKvItems( + "ResourceAttributes['host.ip'] IN ('192.168.1.1')", + resourceAttributesLookup, + ), + ).toBe( + "has(`ResourceAttributeTokens`, concat('host.ip', '=', '192.168.1.1'))", + ); + }); + + it('rewrites multi-value IN to hasAny()', () => { + expect( + rewriteSqlWithKvItems( + "ResourceAttributes['facility'] IN ('local0', 'local1')", + resourceAttributesLookup, + ), + ).toBe( + "hasAny(`ResourceAttributeTokens`, array('facility=local0', 'facility=local1'))", + ); + }); + + it('uses array() for hasAny so node-sql-parser can parse the expression', () => { + const result = rewriteSqlWithKvItems( + "ResourceAttributes['facility'] IN ('local0', 'local1')", + resourceAttributesLookup, + ); + expect(result).toContain('hasAny(`ResourceAttributeTokens`, array('); + expect(result).not.toMatch(/hasAny\([^)]+\[/); + }); + + it('rewrites equality to has()', () => { + expect( + rewriteSqlWithKvItems( + "ResourceAttributes['facility'] = 'local0'", + resourceAttributesLookup, + ), + ).toBe("has(`ResourceAttributeTokens`, concat('facility', '=', 'local0'))"); + }); + + it('does not rewrite NOT IN', () => { + const condition = "ResourceAttributes['facility'] NOT IN ('local0')"; + expect(rewriteSqlWithKvItems(condition, resourceAttributesLookup)).toBe( + condition, + ); + }); + + it('does not rewrite !=', () => { + const condition = "ResourceAttributes['facility'] != 'local0'"; + expect(rewriteSqlWithKvItems(condition, resourceAttributesLookup)).toBe( + condition, + ); + }); + + it('leaves unknown map columns unchanged', () => { + const condition = "ScopeAttributes['env'] IN ('prod')"; + expect(rewriteSqlWithKvItems(condition, resourceAttributesLookup)).toBe( + condition, + ); + }); + + it('returns condition unchanged when lookup is empty', () => { + const condition = "ResourceAttributes['host.ip'] IN ('192.168.1.1')"; + expect(rewriteSqlWithKvItems(condition, new Map())).toBe(condition); + }); + + it('rewrites only mapped columns in compound conditions', () => { + const result = rewriteSqlWithKvItems( + "ResourceAttributes['k'] IN ('v') AND ServiceName IN ('api')", + resourceAttributesLookup, + ); + expect(result).toContain( + "has(`ResourceAttributeTokens`, concat('k', '=', 'v'))", + ); + expect(result).toContain("ServiceName IN ('api')"); + }); + + it('rewrites LogAttributes when present in lookup', () => { + expect( + rewriteSqlWithKvItems( + "LogAttributes['error.message'] IN ('timeout')", + resourceAttributesLookup, + ), + ).toBe("has(`LogAttributeItems`, concat('error.message', '=', 'timeout'))"); + }); + + it('rewrites materialized column names when materializedFields is provided', () => { + const materializedFields = new Map([ + ["ResourceAttributes['facility']", 'facility'], + ]); + expect( + rewriteSqlWithKvItems( + "facility IN ('local0')", + resourceAttributesLookup, + materializedFields, + ), + ).toBe("has(`ResourceAttributeTokens`, concat('facility', '=', 'local0'))"); + }); + + it('preserves commas inside quoted IN values', () => { + expect( + rewriteSqlWithKvItems( + "ResourceAttributes['k'] IN ('a,b', 'c')", + resourceAttributesLookup, + ), + ).toBe("hasAny(`ResourceAttributeTokens`, array('k=a,b', 'k=c'))"); + }); + + it('parses SQL-escaped apostrophes in equality values', () => { + const result = rewriteSqlWithKvItems( + "ResourceAttributes['name'] = 'O''Brien'", + resourceAttributesLookup, + ); + expect(result).toContain('has(`ResourceAttributeTokens`'); + expect(result).not.toContain("= 'O')"); + expect(result).toMatch(/concat\('name', '=', '.+Brien'\)/); + }); + + it('does not rewrite IN when list is not all string literals', () => { + const condition = "ResourceAttributes['k'] IN (1, 2)"; + expect(rewriteSqlWithKvItems(condition, resourceAttributesLookup)).toBe( + condition, + ); + }); + + it('handles closing paren inside a quoted IN value', () => { + expect( + rewriteSqlWithKvItems( + "ResourceAttributes['k'] IN ('a)b')", + resourceAttributesLookup, + ), + ).toBe("has(`ResourceAttributeTokens`, concat('k', '=', 'a)b'))"); + }); + + it('does not rewrite IN with a subquery', () => { + const condition = + "ResourceAttributes['k'] IN (SELECT v FROM t WHERE id = 1)"; + expect(rewriteSqlWithKvItems(condition, resourceAttributesLookup)).toBe( + condition, + ); + }); + + it('does not rewrite when a table alias prefixes the map column', () => { + const condition = "t.ResourceAttributes['k'] IN ('v')"; + expect(rewriteSqlWithKvItems(condition, resourceAttributesLookup)).toBe( + condition, + ); + }); + + it('does not rewrite equality when a table alias prefixes the map column', () => { + const condition = "t.ResourceAttributes['k'] = 'v'"; + expect(rewriteSqlWithKvItems(condition, resourceAttributesLookup)).toBe( + condition, + ); + }); + + it('does not rewrite materialized column IN when a table alias is present', () => { + const materializedFields = new Map([ + ["ResourceAttributes['facility']", 'facility'], + ]); + const condition = "t.facility IN ('local0')"; + expect( + rewriteSqlWithKvItems( + condition, + resourceAttributesLookup, + materializedFields, + ), + ).toBe(condition); + }); + + it('rewrites dotted materialized column names (k8s.namespace)', () => { + const materializedFields = new Map([ + ["ResourceAttributes['k8s.namespace']", 'k8s.namespace'], + ]); + expect( + rewriteSqlWithKvItems( + "k8s.namespace IN ('default', 'production')", + resourceAttributesLookup, + materializedFields, + ), + ).toBe( + "hasAny(`ResourceAttributeTokens`, array('k8s.namespace=default', 'k8s.namespace=production'))", + ); + }); +}); + +describe('renderChartConfig SQL filter KV items rewrite', () => { + let mockMetadata: jest.Mocked; + const start = new Date('2025-01-01'); + const end = new Date('2025-01-02'); + + beforeAll(() => { + jest.spyOn(console, 'warn').mockImplementation(() => {}); + jest.spyOn(console, 'error').mockImplementation(() => {}); + }); + + afterAll(() => { + jest.restoreAllMocks(); + }); + + beforeEach(() => { + mockMetadata = { + getColumns: jest.fn().mockResolvedValue([]), + getMaterializedColumnsLookupTable: jest.fn().mockResolvedValue(new Map()), + getColumn: jest.fn().mockResolvedValue(undefined), + getTableMetadata: jest + .fn() + .mockResolvedValue({ primary_key: 'timestamp' }), + getSkipIndices: jest.fn().mockResolvedValue([]), + getSetting: jest.fn().mockResolvedValue(undefined), + getKvItemsLookup: jest.fn().mockResolvedValue( + new Map([ + [ + 'ResourceAttributes', + { + kvItemsColumn: 'ResourceAttributeTokens', + separator: '=', + }, + ], + ]), + ), + } as unknown as jest.Mocked; + }); + + it('rewrites sql filters in $__filters via getKvItemsLookup', async () => { + const result = await renderChartConfig( + { + configType: 'sql', + sqlTemplate: 'SELECT * FROM otel_logs WHERE $__filters', + connection: 'conn-1', + dateRange: [start, end], + source: 'source-1', + from: { databaseName: 'otel', tableName: 'otel_logs' }, + filters: [ + { + type: 'sql', + condition: "ResourceAttributes['host.ip'] IN ('192.168.1.1')", + }, + ], + }, + mockMetadata, + undefined, + ); + + expect(mockMetadata.getKvItemsLookup).toHaveBeenCalledWith({ + databaseName: 'otel', + tableName: 'otel_logs', + connectionId: 'conn-1', + }); + expect(result.sql).toContain( + "has(`ResourceAttributeTokens`, concat('host.ip', '=', '192.168.1.1'))", + ); + expect(result.sql).not.toContain("ResourceAttributes['host.ip'] IN"); + }); + + it('does not apply KV SQL rewrite to lucene filters (language guard)', async () => { + mockMetadata.getColumn = jest + .fn() + .mockImplementation(async ({ column }) => { + if (column === 'ServiceName') { + return { name: 'ServiceName', type: 'String' }; + } + return undefined; + }); + + const result = await renderChartConfig( + { + configType: 'sql', + sqlTemplate: 'SELECT * FROM logs WHERE $__filters', + connection: 'conn-1', + dateRange: [start, end], + source: 'source-1', + from: { databaseName: 'default', tableName: 'logs' }, + implicitColumnExpression: 'Body', + filters: [{ type: 'lucene', condition: 'ServiceName:api' }], + }, + mockMetadata, + undefined, + ); + + expect(result.sql).toContain("ServiceName ILIKE '%api%'"); + expect(result.sql).not.toMatch(/has\(`ResourceAttributeTokens`/); + }); + + it('rewrites chart where when whereLanguage is sql', async () => { + const condition = "ResourceAttributes['host.ip'] IN ('192.168.1.1')"; + + const result = await renderChartConfig( + { + displayType: DisplayType.Table, + connection: 'conn-1', + dateRange: [start, end], + from: { databaseName: 'otel', tableName: 'otel_logs' }, + timestampValueExpression: 'TimestampTime', + where: condition, + whereLanguage: 'sql', + select: [{ valueExpression: 'count()', alias: 'count' }], + }, + mockMetadata, + undefined, + ); + + expect(result.sql).toContain( + "has(`ResourceAttributeTokens`, concat('host.ip', '=', '192.168.1.1'))", + ); + expect(result.sql).not.toContain(condition); + }); + + it('falls through with original condition when getKvItemsLookup throws', async () => { + mockMetadata.getKvItemsLookup = jest + .fn() + .mockRejectedValue(new Error('boom')); + const condition = "ResourceAttributes['host.ip'] IN ('192.168.1.1')"; + + const result = await renderChartConfig( + { + configType: 'sql', + sqlTemplate: 'SELECT * FROM otel_logs WHERE $__filters', + connection: 'conn-1', + dateRange: [start, end], + source: 'source-1', + from: { databaseName: 'otel', tableName: 'otel_logs' }, + filters: [{ type: 'sql', condition }], + }, + mockMetadata, + undefined, + ); + + expect(console.warn).toHaveBeenCalledWith( + 'Error fetching KV items lookup for SQL rewrite:', + expect.any(Error), + ); + expect(result.sql).toContain(condition); + expect(result.sql).not.toContain('ResourceAttributeTokens'); + }); +}); + +describe('facet SQL filter KV items rewrite (search page hypothesis)', () => { + const start = new Date('2025-01-01'); + const end = new Date('2025-01-02'); + + const kvItemsLookup: KvItemsLookup = new Map([ + [ + 'ResourceAttributes', + { kvItemsColumn: 'ResourceAttributeTokens', separator: '=' }, + ], + ]); + + const logSource = { + id: 'log-source-1', + kind: SourceKind.Log, + name: 'logs', + connection: 'conn-1', + from: { databaseName: 'otel', tableName: 'otel_logs' }, + timestampValueExpression: 'TimestampTime', + defaultTableSelectExpression: 'Timestamp, ServiceName, SeverityText, Body', + implicitColumnExpression: 'Body', + } as unknown as TSource; + + function makeMetadata( + materializedFields: Map = new Map(), + ): jest.Mocked { + return { + getColumns: jest.fn().mockResolvedValue([]), + getMaterializedColumnsLookupTable: jest + .fn() + .mockResolvedValue(materializedFields), + getColumn: jest.fn().mockResolvedValue(undefined), + getTableMetadata: jest + .fn() + .mockResolvedValue({ primary_key: 'TimestampTime' }), + getSkipIndices: jest.fn().mockResolvedValue([]), + getSetting: jest.fn().mockResolvedValue(undefined), + getKvItemsLookup: jest.fn().mockResolvedValue(kvItemsLookup), + } as unknown as jest.Mocked; + } + + /** Mirrors /search URL: filters=[sql ServiceName, sql ResourceAttributes['key'] IN (...)] */ + function facetFiltersFromUrl() { + return [ + { type: 'sql' as const, condition: "ServiceName IN ('api')" }, + { + type: 'sql' as const, + condition: + "ResourceAttributes['cloud.availability_zone'] IN ('zone-a')", + }, + ]; + } + + beforeAll(() => { + jest.spyOn(console, 'warn').mockImplementation(() => {}); + jest.spyOn(console, 'error').mockImplementation(() => {}); + }); + + afterAll(() => { + jest.restoreAllMocks(); + }); + + it('filtersToQuery emits bracket notation for map facet keys', () => { + const filters = filtersToQuery({ + "ResourceAttributes['cloud.availability_zone']": { + included: new Set(['zone-a']), + excluded: new Set(), + }, + }); + + expect(filters).toEqual([ + { + type: 'sql', + condition: + "ResourceAttributes['cloud.availability_zone'] IN ('zone-a')", + }, + ]); + }); + + it('Search chart rewrites facet sql filters when materialized lookup is empty', async () => { + const mockMetadata = makeMetadata(new Map()); + const chartConfig = buildSearchChartConfig(logSource, { + where: '', + whereLanguage: 'lucene', + filters: facetFiltersFromUrl(), + dateRange: [start, end], + }); + + const result = await renderChartConfig( + chartConfig, + mockMetadata, + undefined, + ); + const sql = parameterizedQueryToSql(result); + + expect(sql).toContain( + "has(`ResourceAttributeTokens`, concat('cloud.availability_zone', '=', 'zone-a'))", + ); + expect(sql).not.toContain( + "ResourceAttributes['cloud.availability_zone'] IN", + ); + expect(sql).toContain("ServiceName IN ('api')"); + }); + + it('Search chart rewrites map facet keys that have no materialized column', async () => { + // Only bracket Map['key'] form exists for this attribute. + const mockMetadata = makeMetadata(new Map()); + const chartConfig = buildSearchChartConfig(logSource, { + where: '', + whereLanguage: 'lucene', + filters: facetFiltersFromUrl(), + dateRange: [start, end], + }); + + const result = await renderChartConfig( + chartConfig, + mockMetadata, + undefined, + ); + const sql = parameterizedQueryToSql(result); + + expect(sql).toContain( + "has(`ResourceAttributeTokens`, concat('cloud.availability_zone', '=', 'zone-a'))", + ); + expect(sql).not.toContain( + "ResourceAttributes['cloud.availability_zone'] IN", + ); + }); + + it('Search chart rewrites materialized k8s.namespace facets via KV index', async () => { + const materializedFields = new Map([ + ["ResourceAttributes['k8s.namespace']", 'k8s.namespace'], + ]); + const mockMetadata = makeMetadata(materializedFields); + const chartConfig = buildSearchChartConfig(logSource, { + where: '', + whereLanguage: 'lucene', + filters: [ + { + type: 'sql', + condition: "ResourceAttributes['k8s.namespace'] IN ('default')", + }, + ], + dateRange: [start, end], + }); + + const result = await renderChartConfig( + chartConfig, + mockMetadata, + undefined, + ); + const sql = parameterizedQueryToSql(result); + + expect(sql).toContain( + "has(`ResourceAttributeTokens`, concat('k8s.namespace', '=', 'default'))", + ); + expect(sql).not.toContain('k8s.namespace IN'); + }); + + it('dot-notation facet keys are not rewritten (regex expects bracket notation)', () => { + const lookup = kvItemsLookup; + const condition = + "ResourceAttributes.cloud.availability_zone IN ('zone-a')"; + + expect(rewriteSqlWithKvItems(condition, lookup)).toBe(condition); + }); + + it('facility materialized column still rewrites to has() before fastifySQL', async () => { + const materializedFields = new Map([ + ["ResourceAttributes['facility']", 'facility'], + ]); + const mockMetadata = makeMetadata(materializedFields); + const chartConfig = buildSearchChartConfig(logSource, { + where: '', + whereLanguage: 'lucene', + filters: [ + { + type: 'sql', + condition: "ResourceAttributes['facility'] IN ('local0', 'local1')", + }, + ], + dateRange: [start, end], + }); + + const result = await renderChartConfig( + chartConfig, + mockMetadata, + undefined, + ); + const sql = parameterizedQueryToSql(result); + + expect(sql).toContain( + "hasAny(`ResourceAttributeTokens`, array('facility=local0', 'facility=local1'))", + ); + expect(sql).not.toContain("facility IN ('local0'"); + }); + + it('does not rewrite sql filters when getKvItemsLookup is empty', async () => { + const mockMetadata = makeMetadata(new Map()); + mockMetadata.getKvItemsLookup = jest.fn().mockResolvedValue(new Map()); + + const chartConfig = buildSearchChartConfig(logSource, { + where: '', + whereLanguage: 'lucene', + filters: facetFiltersFromUrl(), + dateRange: [start, end], + }); + + const result = await renderChartConfig( + chartConfig, + mockMetadata, + undefined, + ); + const sql = parameterizedQueryToSql(result); + + expect(sql).toContain( + "ResourceAttributes['cloud.availability_zone'] IN ('zone-a')", + ); + expect(sql).not.toContain('has(`ResourceAttributeTokens`'); + }); +}); + +describe('alias map with KV filter rewrite', () => { + const start = new Date('2025-01-01'); + const end = new Date('2025-01-02'); + + const kvItemsLookup: KvItemsLookup = new Map([ + [ + 'ResourceAttributes', + { kvItemsColumn: 'ResourceAttributeTokens', separator: '=' }, + ], + ]); + + const logSource = { + id: 'log-source-1', + kind: SourceKind.Log, + name: 'logs', + connection: 'conn-1', + from: { databaseName: 'otel', tableName: 'otel_logs' }, + timestampValueExpression: 'TimestampTime', + defaultTableSelectExpression: 'Timestamp, ServiceName, SeverityText, Body', + implicitColumnExpression: 'Body', + } as unknown as TSource; + + beforeAll(() => { + jest.spyOn(console, 'warn').mockImplementation(() => {}); + jest.spyOn(console, 'error').mockImplementation(() => {}); + }); + + afterAll(() => { + jest.restoreAllMocks(); + }); + + it('chSqlToAliasMap tolerates search SQL with has/hasAny filters in WHERE', async () => { + const mockMetadata = { + getColumns: jest.fn().mockResolvedValue([]), + getMaterializedColumnsLookupTable: jest.fn().mockResolvedValue(new Map()), + getColumn: jest.fn().mockResolvedValue(undefined), + getTableMetadata: jest + .fn() + .mockResolvedValue({ primary_key: 'TimestampTime' }), + getSkipIndices: jest.fn().mockResolvedValue([]), + getSetting: jest.fn().mockResolvedValue(undefined), + getKvItemsLookup: jest.fn().mockResolvedValue(kvItemsLookup), + } as unknown as jest.Mocked; + + const chartConfig = buildSearchChartConfig(logSource, { + where: '', + whereLanguage: 'lucene', + filters: [ + { type: 'sql', condition: "ServiceName IN ('api')" }, + { + type: 'sql', + condition: "ResourceAttributes['facility'] IN ('local0', 'local1')", + }, + ], + dateRange: [start, end], + }); + + const query = await renderChartConfig(chartConfig, mockMetadata, undefined); + const sql = parameterizedQueryToSql(query); + + // Default search SELECT has no `AS` aliases; parser must not throw on has/hasAny WHERE. + expect(chSqlToAliasMap(query)).toEqual({}); + expect(sql).toContain( + "hasAny(`ResourceAttributeTokens`, array('facility=local0', 'facility=local1'))", + ); + expect(sql).not.toMatch(/hasAny\([^)]+\[/); + }); +}); diff --git a/packages/common-utils/src/__tests__/utils.test.ts b/packages/common-utils/src/__tests__/utils.test.ts index ff8d0a96a2..880bcb1dab 100644 --- a/packages/common-utils/src/__tests__/utils.test.ts +++ b/packages/common-utils/src/__tests__/utils.test.ts @@ -31,6 +31,7 @@ import { replaceJsonExpressions, splitAndTrimCSV, splitAndTrimWithBracket, + sqlForAliasMapParser, } from '../core/utils'; describe('utils', () => { @@ -1915,6 +1916,29 @@ describe('utils', () => { ); }); + describe('sqlForAliasMapParser', () => { + it('strips WHERE with ClickHouse has/hasAny', () => { + expect( + sqlForAliasMapParser( + "SELECT Timestamp as ts FROM otel.otel_logs WHERE hasAny(`ResourceAttributeTokens`, ['a=b']) ORDER BY ts DESC", + ), + ).toBe('SELECT Timestamp as ts FROM otel.otel_logs'); + }); + + it('strips GROUP BY ORDER BY LIMIT and OFFSET', () => { + expect( + sqlForAliasMapParser( + 'SELECT a FROM t GROUP BY a ORDER BY a LIMIT 10 OFFSET 5', + ), + ).toBe('SELECT a FROM t'); + }); + + it('leaves SELECT without trailing clauses unchanged', () => { + const sql = 'SELECT Timestamp as ts, Body FROM otel.otel_logs'; + expect(sqlForAliasMapParser(sql)).toBe(sql); + }); + }); + describe('parseToNumber', () => { it('returns `undefined` for an empty string', () => { expect(parseToNumber('')).toBe(undefined); diff --git a/packages/common-utils/src/clickhouse/index.ts b/packages/common-utils/src/clickhouse/index.ts index 1a8b21283c..7ed00bbcfc 100644 --- a/packages/common-utils/src/clickhouse/index.ts +++ b/packages/common-utils/src/clickhouse/index.ts @@ -22,6 +22,7 @@ import { hashCode, replaceJsonExpressions, splitAndTrimWithBracket, + sqlForAliasMapParser, } from '@/core/utils'; import { isBuilderChartConfig } from '@/guards'; import { ChartConfigWithOptDateRange, QuerySettings } from '@/types'; @@ -832,10 +833,11 @@ export function chSqlToAliasMap( // Remove the SETTINGS clause because `SQLParser` doesn't understand it. const [sqlWithoutSettingsClause] = extractSettingsClauseFromEnd(sql); + const sqlForParser = sqlForAliasMapParser(sqlWithoutSettingsClause); // Replace JSON expressions with replacement tokens so that node-sql-parser can parse the SQL const { sqlWithReplacements, replacements: jsonReplacementsToExpressions } = - replaceJsonExpressions(sqlWithoutSettingsClause); + replaceJsonExpressions(sqlForParser); const parser = new SQLParser.Parser(); // eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion -- astify returns union type @@ -876,11 +878,11 @@ export function chSqlToAliasMap( } return aliasMap; } catch (e) { - console.error( - 'Error parsing alias map with JSON removed', - e, - 'for query', - chSql, + // Warn without passing the Error object — Next.js dev overlay treats + // console.error(error) as an uncaught runtime failure. + console.warn( + 'Error parsing alias map with JSON removed:', + e instanceof Error ? e.message : e, ); } diff --git a/packages/common-utils/src/core/metadata.ts b/packages/common-utils/src/core/metadata.ts index 06fe045cad..deb2fe1a47 100644 --- a/packages/common-utils/src/core/metadata.ts +++ b/packages/common-utils/src/core/metadata.ts @@ -13,6 +13,12 @@ import { tableExpr, } from '@/clickhouse'; import { renderChartConfig } from '@/core/renderChartConfig'; +import { + KV_ITEMS_STRATEGIES, + type KvItemsInfo, + type KvItemsLookup, + normalizeChExpression, +} from '@/queryParser'; import type { BuilderChartConfig, BuilderChartConfigWithDateRange, @@ -29,6 +35,7 @@ import { getAlignedDateRange, getDistributedTableArgs, objectHash, + parseTokenizerFromTextIndex, } from './utils'; // If filters initially are taking too long to load, decrease this number. @@ -950,6 +957,61 @@ export class Metadata { ); } + async getKvItemsLookup({ + databaseName, + tableName, + connectionId, + }: { + databaseName: string; + tableName: string; + connectionId: string; + }): Promise { + return this.cache.getOrFetch( + `${connectionId}.${databaseName}.${tableName}.kvItemsLookup`, + async () => { + const [columns, skipIndices] = await Promise.all([ + this.getColumns({ databaseName, tableName, connectionId }), + this.getSkipIndices({ databaseName, tableName, connectionId }), + ]); + + const lookup: KvItemsLookup = new Map(); + const candidates = columns.filter( + c => + (c.default_type === 'ALIAS' || c.default_type === 'MATERIALIZED') && + c.default_expression, + ); + + for (const candidate of candidates) { + let parsed: { mapColumn: string; separator: string } | undefined; + for (const strategy of KV_ITEMS_STRATEGIES) { + parsed = strategy(candidate.default_expression); + if (parsed) break; + } + if (!parsed) continue; + + const hasArrayTextIndex = skipIndices.some(idx => { + if (idx.type !== 'text') return false; + const tokenizer = parseTokenizerFromTextIndex(idx); + if (tokenizer?.type !== 'array') return false; + return ( + normalizeChExpression(idx.expression) === + normalizeChExpression(candidate.name) + ); + }); + + if (hasArrayTextIndex) { + lookup.set(parsed.mapColumn, { + kvItemsColumn: candidate.name, + separator: parsed.separator, + }); + } + } + + return lookup; + }, + ); + } + /** * Inspects the ClickHouse connection for OpenTelemetry telemetry tables. * Returns one coherent set of tables from the same database. diff --git a/packages/common-utils/src/core/renderChartConfig.ts b/packages/common-utils/src/core/renderChartConfig.ts index 9c8a6f0d4e..55b9c8dd97 100644 --- a/packages/common-utils/src/core/renderChartConfig.ts +++ b/packages/common-utils/src/core/renderChartConfig.ts @@ -17,7 +17,11 @@ import { } from '@/core/utils'; import { isBuilderChartConfig, isRawSqlChartConfig } from '@/guards'; import { replaceMacros } from '@/macros'; -import { CustomSchemaSQLSerializerV2, SearchQueryBuilder } from '@/queryParser'; +import { + CustomSchemaSQLSerializerV2, + type KvItemsLookup, + SearchQueryBuilder, +} from '@/queryParser'; import { QUERY_PARAMS_BY_DISPLAY_TYPE } from '@/rawSqlParams'; import { AggregateFunction, @@ -800,6 +804,330 @@ function renderFrom({ ); } +const MAP_SUBSCRIPT_EXPR_RE = /^(\w+)\['([^']+)'\]$/; + +function escapeRegExp(value: string): string { + return value.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); +} + +/** SQL string literal body: '' inside quotes becomes a single apostrophe. */ +function unescapeSqlStringLiteral(body: string): string { + return body.replace(/''/g, "'"); +} + +/** + * Parse a single-quoted SQL string literal starting at `start` (must be `'). + * Returns unescaped value and index after the closing quote. + */ +function parseSqlSingleQuotedLiteralAt( + s: string, + start: number, +): { value: string; end: number } | undefined { + if (s[start] !== "'") { + return undefined; + } + + let i = start + 1; + let body = ''; + while (i < s.length) { + if (s[i] === "'") { + if (s[i + 1] === "'") { + body += "'"; + i += 2; + continue; + } + return { value: unescapeSqlStringLiteral(body), end: i + 1 }; + } + body += s[i]; + i++; + } + + return undefined; +} + +/** + * Parse the inside of IN (...): comma-separated single-quoted string literals only. + * Returns undefined if the list is malformed or contains non-literals. + */ +function parseSqlInStringLiteralList(listBody: string): string[] | undefined { + const values: string[] = []; + let i = 0; + const n = listBody.length; + + while (i < n) { + while (i < n && /\s/.test(listBody[i]!)) { + i++; + } + if (i >= n) { + break; + } + + const parsed = parseSqlSingleQuotedLiteralAt(listBody, i); + if (!parsed) { + return undefined; + } + values.push(parsed.value); + i = parsed.end; + + while (i < n && /\s/.test(listBody[i]!)) { + i++; + } + if (i >= n) { + break; + } + if (listBody[i] !== ',') { + return undefined; + } + i++; + } + + return values.length > 0 ? values : undefined; +} + +/** + * Index of the closing `)` for `(` at openParenIndex, ignoring `)` inside quotes. + */ +function findClosingParenIndex( + s: string, + openParenIndex: number, +): number | undefined { + if (s[openParenIndex] !== '(') { + return undefined; + } + + let depth = 0; + let i = openParenIndex; + + while (i < s.length) { + if (s[i] === "'") { + const parsed = parseSqlSingleQuotedLiteralAt(s, i); + if (!parsed) { + return undefined; + } + i = parsed.end; + continue; + } + + if (s[i] === '(') { + depth++; + i++; + continue; + } + + if (s[i] === ')') { + depth--; + if (depth === 0) { + return i; + } + i++; + continue; + } + + i++; + } + + return undefined; +} + +function isNotInMatch(condition: string, inKeywordIndex: number): boolean { + const before = condition.slice( + Math.max(0, inKeywordIndex - 5), + inKeywordIndex, + ); + return /\bNOT\s*$/i.test(before); +} + +type MapKeyRewriteFn = ( + mapCol: string, + key: string, + values: string[], +) => string | undefined; + +function rewriteSqlInClauses( + condition: string, + inPrefixRe: RegExp, + getMapKey: (match: RegExpExecArray) => { mapCol: string; key: string } | null, + rewrite: MapKeyRewriteFn, +): string { + let result = ''; + let lastIndex = 0; + const re = new RegExp(inPrefixRe.source, inPrefixRe.flags); + + for ( + let match = re.exec(condition); + match != null; + match = re.exec(condition) + ) { + const inIndex = match.index + match[0].lastIndexOf('IN'); + if (isNotInMatch(condition, inIndex)) { + continue; + } + + const openParenIndex = match.index + match[0].length - 1; + const closeParenIndex = findClosingParenIndex(condition, openParenIndex); + if (closeParenIndex === undefined) { + continue; + } + + const mapKey = getMapKey(match); + if (mapKey == null) { + result += condition.slice(lastIndex, closeParenIndex + 1); + lastIndex = closeParenIndex + 1; + re.lastIndex = lastIndex; + continue; + } + + const listBody = condition.slice(openParenIndex + 1, closeParenIndex); + const values = parseSqlInStringLiteralList(listBody); + const { mapCol, key } = mapKey; + const rewritten = values != null ? rewrite(mapCol, key, values) : undefined; + + result += condition.slice(lastIndex, match.index); + if (rewritten != null) { + result += rewritten; + } else { + result += condition.slice(match.index, closeParenIndex + 1); + } + lastIndex = closeParenIndex + 1; + re.lastIndex = lastIndex; + } + + return result + condition.slice(lastIndex); +} + +function buildMaterializedMapKeyLookup( + materializedFields: Map | undefined, +): Map { + const lookup = new Map(); + if (materializedFields == null) { + return lookup; + } + + for (const [expr, materializedColumn] of materializedFields) { + const match = MAP_SUBSCRIPT_EXPR_RE.exec(expr); + if (match) { + lookup.set(materializedColumn, { mapColumn: match[1], key: match[2] }); + } + } + + return lookup; +} + +function renderKvItemsHasExpr( + kvInfo: { kvItemsColumn: string; separator: string }, + key: string, + values: string[], +): string { + if (values.length === 1) { + return SqlString.format(`has(??, concat(?, ?, ?))`, [ + kvInfo.kvItemsColumn, + key, + kvInfo.separator, + values[0], + ]); + } + + const tokens = values.map(v => `${key}${kvInfo.separator}${v}`); + // Use array(...) instead of [...] so node-sql-parser can parse the WHERE + // when building alias maps (bracket literals fail with "O" in values). + return SqlString.format( + `hasAny(??, array(${tokens.map(() => '?').join(', ')}))`, + [kvInfo.kvItemsColumn, ...tokens], + ); +} + +/** + * Rewrites SQL filter conditions containing Map['key'] IN ('v1', 'v2', ...) + * to use has()/hasAny() on a KV items column when a text(tokenizer=array) + * index exists. This enables direct read from the inverted index. + * + * Patterns handled: + * MapCol['key'] IN ('v1') -> has(KvCol, concat('key', '=', 'v1')) + * MapCol['key'] IN ('v1', 'v2') -> hasAny(KvCol, array('key=v1', 'key=v2')) + * MapCol['key'] = 'v1' -> has(KvCol, concat('key', '=', 'v1')) + * materialized_col IN ('v1', ...) -> same, when col maps to MapCol['key'] + * MapCol['key'] NOT IN ('v1', ...) and MapCol['key'] != 'v1' are NOT + * rewritten (negation semantics differ when the key is absent). + */ +export function rewriteSqlWithKvItems( + condition: string, + kvItemsLookup: KvItemsLookup, + materializedFields?: Map, +): string { + if (kvItemsLookup.size === 0) return condition; + + const MAP_IN_PREFIX_RE = /(?:(\w+)\.)?(\w+)\['([^']+)'\]\s+IN\s+\(/gi; + const MAP_EQ_RE = + /(?:(\w+)\.)?(\w+)\['([^']+)'\]\s*(? { + const kvInfo = kvItemsLookup.get(mapCol); + if (!kvInfo) return undefined; + return renderKvItemsHasExpr(kvInfo, key, values); + }; + + const mapKeyFromSubscriptMatch = ( + match: RegExpExecArray, + ): { mapCol: string; key: string } | null => { + if (match[1]) { + return null; + } + return { mapCol: match[2]!, key: match[3]! }; + }; + + let result = rewriteSqlInClauses( + condition, + MAP_IN_PREFIX_RE, + mapKeyFromSubscriptMatch, + rewriteMapKeyPredicate, + ); + + result = result.replace( + MAP_EQ_RE, + (match, tableAlias, mapCol, key, rawValue) => { + if (tableAlias) { + return match; + } + const value = unescapeSqlStringLiteral(rawValue); + return rewriteMapKeyPredicate(mapCol, key, [value]) ?? match; + }, + ); + + const materializedMapKeyLookup = + buildMaterializedMapKeyLookup(materializedFields); + for (const [ + materializedColumn, + { mapColumn, key }, + ] of materializedMapKeyLookup) { + const escapedColumn = escapeRegExp(materializedColumn); + const matInPrefixRe = new RegExp( + `(? (match[1] ? null : { mapCol: mapColumn, key }), + rewriteMapKeyPredicate, + ); + + result = result.replace(matEqRe, (match, tableAlias, _col, rawValue) => { + if (tableAlias) { + return match; + } + const value = unescapeSqlStringLiteral(rawValue); + return rewriteMapKeyPredicate(mapColumn, key, [value]) ?? match; + }); + } + + return result; +} + async function renderWhereExpressionStr({ condition, language, @@ -850,6 +1178,25 @@ async function renderWhereExpressionStr({ // ignore } + // Rewrite Map['key'] IN/= (and materialized map columns) before fastifySQL + // substitutes Map subscripts to column names, which would skip KV index use. + if (language === 'sql' && from.databaseName && from.tableName) { + try { + const kvItemsLookup = await metadata.getKvItemsLookup({ + databaseName: from.databaseName, + tableName: from.tableName, + connectionId, + }); + _condition = rewriteSqlWithKvItems( + _condition, + kvItemsLookup, + materializedFields, + ); + } catch (error) { + console.warn('Error fetching KV items lookup for SQL rewrite:', error); + } + } + const _sqlPrefix = 'SELECT * FROM `t` WHERE '; const rawSQL = `${_sqlPrefix}${_condition}`; // strip 'SELECT * FROM `t` WHERE ' from the sql diff --git a/packages/common-utils/src/core/utils.ts b/packages/common-utils/src/core/utils.ts index f2185904b3..b6c009ec58 100644 --- a/packages/common-utils/src/core/utils.ts +++ b/packages/common-utils/src/core/utils.ts @@ -779,6 +779,21 @@ export function extractSettingsClauseFromEnd( return [remaining, settingsClause] as const; } +/** + * Returns SQL safe for node-sql-parser when extracting SELECT aliases. + * Drops WHERE/GROUP BY/HAVING/ORDER BY/LIMIT/OFFSET — those clauses may contain + * ClickHouse-only functions (has, hasAny, array(...)) that the parser rejects. + */ +export function sqlForAliasMapParser(sql: string): string { + const clauseStart = + /\s+(?:WHERE|GROUP\s+BY|HAVING|ORDER\s+BY|LIMIT|OFFSET)\s+/i; + const match = clauseStart.exec(sql); + if (match?.index != null) { + return sql.slice(0, match.index).trim(); + } + return sql.trim(); +} + export function parseToNumber(input: string): number | undefined { const trimmed = input.trim(); diff --git a/packages/common-utils/src/queryParser.ts b/packages/common-utils/src/queryParser.ts index 0781019705..f13bdb3bff 100644 --- a/packages/common-utils/src/queryParser.ts +++ b/packages/common-utils/src/queryParser.ts @@ -52,7 +52,7 @@ function buildMapContains(mapField: string) { } /** Strip whitespace and backtick-quoting from a ClickHouse expression for comparison */ -function normalizeChExpression(expr: string): string { +export function normalizeChExpression(expr: string): string { return expr.replace(/\s+/g, '').replace(/`/g, ''); } @@ -1048,7 +1048,7 @@ export function parseKvItemsCastExpression( } // To add another known KV items parsing strategy, simply define another function with the same signature and add the strategy to this array -const KV_ITEMS_STRATEGIES = [ +export const KV_ITEMS_STRATEGIES = [ parseKvItemsExpression, parseKvItemsCastExpression, ] as const; @@ -1115,56 +1115,11 @@ export class CustomSchemaSQLSerializerV2 extends SQLSerializer { * a text(tokenizer=array) skip index. */ private async buildKvItemsLookup(): Promise { - const [columns, skipIndices] = await Promise.all([ - this.metadata.getColumns({ - databaseName: this.databaseName, - tableName: this.tableName, - connectionId: this.connectionId, - }), - this.skipIndicesPromise ?? Promise.resolve([]), - ]); - - const lookup: KvItemsLookup = new Map(); - - // Find columns that are ALIAS or MATERIALIZED with KV items expressions - const kvItemsCandidates = columns.filter( - c => - (c.default_type === 'ALIAS' || c.default_type === 'MATERIALIZED') && - c.default_expression, - ); - - for (const candidate of kvItemsCandidates) { - const parsed = (() => { - let parsed: { mapColumn: string; separator: string } | undefined; - for (const strategy of KV_ITEMS_STRATEGIES) { - parsed = strategy(candidate.default_expression); - if (parsed) break; - } - return parsed; - })(); - if (!parsed) continue; - - // Check if this column has a text(tokenizer=array) skip index - const hasArrayTextIndex = skipIndices.some(idx => { - if (idx.type !== 'text') return false; - const tokenizer = parseTokenizerFromTextIndex(idx); - if (tokenizer?.type !== 'array') return false; - // Require exact match: has() won't benefit from a transformed index like lower(col) - return ( - normalizeChExpression(idx.expression) === - normalizeChExpression(candidate.name) - ); - }); - - if (hasArrayTextIndex) { - lookup.set(parsed.mapColumn, { - kvItemsColumn: candidate.name, - separator: parsed.separator, - }); - } - } - - return lookup; + return this.metadata.getKvItemsLookup({ + databaseName: this.databaseName, + tableName: this.tableName, + connectionId: this.connectionId, + }); } /**