diff --git a/pom.xml b/pom.xml index 0cd14f0533..e93fc6a98c 100644 --- a/pom.xml +++ b/pom.xml @@ -401,6 +401,10 @@ com.spotify.fmt fmt-maven-plugin 2.23 + + + ^((?!SuperShreddingMetadata).)*\.java$ + process-sources diff --git a/src/main/java/io/stargate/sgv2/jsonapi/config/constants/DocumentConstants.java b/src/main/java/io/stargate/sgv2/jsonapi/config/constants/DocumentConstants.java index 80f5e2d1d5..2f59af5d58 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/config/constants/DocumentConstants.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/config/constants/DocumentConstants.java @@ -36,44 +36,6 @@ interface Fields { String SCORES_FIELD = "scores"; } - /** Names of columns in Document-containing Tables */ - interface Columns { - /** - * Atomic values are added to the array_contains field to support $eq on both atomic value and - * array element - */ - String DATA_CONTAINS_COLUMN_NAME = "array_contains"; - - String QUERY_BOOLEAN_MAP_COLUMN_NAME = "query_bool_values"; - - String QUERY_DOUBLE_MAP_COLUMN_NAME = "query_dbl_values"; - - String QUERY_NULL_MAP_COLUMN_NAME = "query_null_values"; - - /** Text map support _id $ne and _id $nin on both atomic value and array element */ - String QUERY_TEXT_MAP_COLUMN_NAME = "query_text_values"; - - String QUERY_TIMESTAMP_MAP_COLUMN_NAME = "query_timestamp_values"; - - /** Physical table column name that stores the vector field. */ - String VECTOR_SEARCH_INDEX_COLUMN_NAME = "query_vector_value"; - - /** Document field name to which vector data is stored. */ - String VECTOR_EMBEDDING_FIELD = "$vector"; - - /** Document field name that will have text value for which vectorize method in called */ - String VECTOR_EMBEDDING_TEXT_FIELD = "$vectorize"; - - /** Document field name that will have text value for which vectorize method in called */ - String BINARY_VECTOR_TEXT_FIELD = "$binary"; - - /** Field name used in projection clause to get similarity score in response. */ - String VECTOR_FUNCTION_SIMILARITY_FIELD = "$similarity"; - - /** Physical table column name that stores the lexical content. */ - String LEXICAL_INDEX_COLUMN_NAME = "query_lexical_value"; - } - interface KeyTypeId { /** * Type id are used in key stored in database representing the datatype of the id field. These diff --git a/src/main/java/io/stargate/sgv2/jsonapi/exception/ErrorFormatters.java b/src/main/java/io/stargate/sgv2/jsonapi/exception/ErrorFormatters.java index 0e22d73045..0956a326eb 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/exception/ErrorFormatters.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/exception/ErrorFormatters.java @@ -123,7 +123,8 @@ public static String errFmt(ApiDataType apiDataType) { } public static String errFmt(DataType dataType) { - return nullSafe(dataType, d -> d.asCql(true, true)); + // pass false for includeFrozen to avoid using frozen<> when not needed. + return nullSafe(dataType, d -> d.asCql(false, true)); } public static Map errVars(SchemaObject schemaObject) { diff --git a/src/main/java/io/stargate/sgv2/jsonapi/service/cqldriver/executor/CollectionIndexUsage.java b/src/main/java/io/stargate/sgv2/jsonapi/service/cqldriver/executor/CollectionIndexUsage.java index 231f16d84f..c848b3d085 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/service/cqldriver/executor/CollectionIndexUsage.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/service/cqldriver/executor/CollectionIndexUsage.java @@ -3,7 +3,7 @@ import com.google.common.base.Preconditions; import io.micrometer.core.instrument.Tag; import io.micrometer.core.instrument.Tags; -import io.stargate.sgv2.jsonapi.config.constants.DocumentConstants; +import io.stargate.sgv2.jsonapi.service.schema.collections.spec.SuperShreddingMetadata; /** * This class is used to track the usage of indexes in a query. It is used to generate metrics for @@ -34,23 +34,15 @@ public Tags getTags() { Tag.of("key", String.valueOf(primaryKeyTag)), Tag.of("exist_keys", String.valueOf(existKeysIndexTag)), Tag.of("array_size", String.valueOf(arraySizeIndexTag)), + Tag.of(SuperShreddingMetadata.Names.ARRAY_CONTAINS, String.valueOf(arrayContainsTag)), + Tag.of(SuperShreddingMetadata.Names.QUERY_BOOLEAN_VALUES, String.valueOf(booleanIndexTag)), + Tag.of(SuperShreddingMetadata.Names.QUERY_DOUBLE_VALUES, String.valueOf(numberIndexTag)), + Tag.of(SuperShreddingMetadata.Names.QUERY_NULL_VALUES, String.valueOf(nullIndexTag)), + Tag.of(SuperShreddingMetadata.Names.QUERY_TEXT_VALUES, String.valueOf(textIndexTag)), Tag.of( - DocumentConstants.Columns.DATA_CONTAINS_COLUMN_NAME, String.valueOf(arrayContainsTag)), - Tag.of( - DocumentConstants.Columns.QUERY_BOOLEAN_MAP_COLUMN_NAME, - String.valueOf(booleanIndexTag)), - Tag.of( - DocumentConstants.Columns.QUERY_DOUBLE_MAP_COLUMN_NAME, String.valueOf(numberIndexTag)), - Tag.of(DocumentConstants.Columns.QUERY_NULL_MAP_COLUMN_NAME, String.valueOf(nullIndexTag)), - Tag.of(DocumentConstants.Columns.QUERY_TEXT_MAP_COLUMN_NAME, String.valueOf(textIndexTag)), - Tag.of( - DocumentConstants.Columns.QUERY_TIMESTAMP_MAP_COLUMN_NAME, - String.valueOf(timestampIndexTag)), - Tag.of( - DocumentConstants.Columns.VECTOR_SEARCH_INDEX_COLUMN_NAME, - String.valueOf(vectorIndexTag)), - Tag.of( - DocumentConstants.Columns.LEXICAL_INDEX_COLUMN_NAME, String.valueOf(lexicalIndexTag))); + SuperShreddingMetadata.Names.QUERY_TIMESTAMP_VALUES, String.valueOf(timestampIndexTag)), + Tag.of(SuperShreddingMetadata.Names.QUERY_VECTOR_VALUE, String.valueOf(vectorIndexTag)), + Tag.of(SuperShreddingMetadata.Names.QUERY_LEXICAL_VALUE, String.valueOf(lexicalIndexTag))); } /** diff --git a/src/main/java/io/stargate/sgv2/jsonapi/service/cqldriver/override/ExtendedVectorType.java b/src/main/java/io/stargate/sgv2/jsonapi/service/cqldriver/override/ExtendedVectorType.java index 5ee0d617aa..92d71eff65 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/service/cqldriver/override/ExtendedVectorType.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/service/cqldriver/override/ExtendedVectorType.java @@ -4,8 +4,9 @@ import com.datastax.oss.driver.internal.core.type.DefaultVectorType; /** - * Extended vector type to support vector size This is needed because java drivers - * DataTypes.vectorOf() method has a bug + * Extended vector type to support vector size. + * + *

Basically a clone of {@link DefaultVectorType} but changes the {@link #asCql} override. */ public class ExtendedVectorType extends DefaultVectorType { public ExtendedVectorType(DataType subtype, int vectorSize) { @@ -14,6 +15,10 @@ public ExtendedVectorType(DataType subtype, int vectorSize) { @Override public String asCql(boolean includeFrozen, boolean pretty) { - return "VECTOR<" + getElementType().asCql(includeFrozen, pretty) + "," + getDimensions() + ">"; + // NOTE: this is very similar to the DefaultVectorType.asCql() method, the difference + // is passing along the includeFrozen and pretty parameters. Default sets them to true + // which means frozen is included in places we dont want it. + return String.format( + "vector<%s, %d>", getElementType().asCql(includeFrozen, pretty), getDimensions()); } } diff --git a/src/main/java/io/stargate/sgv2/jsonapi/service/operation/MetadataDBTask.java b/src/main/java/io/stargate/sgv2/jsonapi/service/operation/MetadataDBTask.java index 2955b8ca93..4e20205b25 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/service/operation/MetadataDBTask.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/service/operation/MetadataDBTask.java @@ -16,7 +16,7 @@ import io.stargate.sgv2.jsonapi.service.operation.tasks.Task; import io.stargate.sgv2.jsonapi.service.operation.tasks.TaskRetryPolicy; import io.stargate.sgv2.jsonapi.service.schema.SchemaObject; -import io.stargate.sgv2.jsonapi.service.schema.collections.CollectionTableMatcher; +import io.stargate.sgv2.jsonapi.service.schema.collections.spec.SuperShreddingTablePredicate; import io.stargate.sgv2.jsonapi.util.CqlIdentifierUtil; import java.util.List; import java.util.Map; @@ -31,7 +31,7 @@ public abstract class MetadataDBTask extends DBTas // Re-use the matcher for a collection, anything not a collection is a table protected static final Predicate TABLE_MATCHER = - new CollectionTableMatcher().negate(); + new SuperShreddingTablePredicate().negate(); // this will be set on executeStatement // TODO: BETTER CONTROL ON WHEN THIS IS SET AND NOT SET diff --git a/src/main/java/io/stargate/sgv2/jsonapi/service/operation/collections/CreateCollectionOperation.java b/src/main/java/io/stargate/sgv2/jsonapi/service/operation/collections/CreateCollectionOperation.java index 7776d6ba74..6d55e9292e 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/service/operation/collections/CreateCollectionOperation.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/service/operation/collections/CreateCollectionOperation.java @@ -41,7 +41,7 @@ import io.stargate.sgv2.jsonapi.service.schema.collections.CollectionLexicalDef; import io.stargate.sgv2.jsonapi.service.schema.collections.CollectionRerankDef; import io.stargate.sgv2.jsonapi.service.schema.collections.CollectionSchemaObject; -import io.stargate.sgv2.jsonapi.service.schema.collections.CollectionTableMatcher; +import io.stargate.sgv2.jsonapi.service.schema.collections.spec.SuperShreddingTablePredicate; import io.stargate.sgv2.jsonapi.service.schema.tables.CQLSAIIndex; import java.time.Duration; import java.util.*; @@ -69,7 +69,8 @@ public record CreateCollectionOperation( private static final Logger LOGGER = LoggerFactory.getLogger(CreateCollectionOperation.class); - private static final CollectionTableMatcher COLLECTION_MATCHER = new CollectionTableMatcher(); + private static final SuperShreddingTablePredicate COLLECTION_MATCHER = + new SuperShreddingTablePredicate(); private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); diff --git a/src/main/java/io/stargate/sgv2/jsonapi/service/operation/collections/FindCollectionOperation.java b/src/main/java/io/stargate/sgv2/jsonapi/service/operation/collections/FindCollectionOperation.java index 60ac326ab5..2c74be73cc 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/service/operation/collections/FindCollectionOperation.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/service/operation/collections/FindCollectionOperation.java @@ -11,7 +11,6 @@ import io.stargate.sgv2.jsonapi.api.model.command.CommandResult; import io.stargate.sgv2.jsonapi.api.model.command.clause.sort.SortExpression; import io.stargate.sgv2.jsonapi.api.request.RequestContext; -import io.stargate.sgv2.jsonapi.config.constants.DocumentConstants; import io.stargate.sgv2.jsonapi.exception.SchemaException; import io.stargate.sgv2.jsonapi.service.cql.builder.Query; import io.stargate.sgv2.jsonapi.service.cql.builder.QueryBuilder; @@ -24,6 +23,7 @@ import io.stargate.sgv2.jsonapi.service.operation.query.DBLogicalExpression; import io.stargate.sgv2.jsonapi.service.projection.DocumentProjector; import io.stargate.sgv2.jsonapi.service.schema.collections.CollectionSchemaObject; +import io.stargate.sgv2.jsonapi.service.schema.collections.spec.SuperShreddingMetadata; import io.stargate.sgv2.jsonapi.service.shredding.collections.DocumentId; import java.util.*; import java.util.function.Supplier; @@ -524,8 +524,7 @@ private List buildSelectQueries(IDCollectionFilter additionalId if (bm25Expr != null) { qb = qb.bm25Sort( - DocumentConstants.Columns.LEXICAL_INDEX_COLUMN_NAME, - bm25Expr.getLexicalQuery()); + SuperShreddingMetadata.Names.QUERY_LEXICAL_VALUE, bm25Expr.getLexicalQuery()); } query = qb.build(); } else { @@ -547,14 +546,14 @@ private Query getVectorSearchQueryByExpression(Expression expres .select() .column(CollectionReadType.DOCUMENT == readType ? documentColumns : documentKeyColumns) .similarityFunction( - DocumentConstants.Columns.VECTOR_SEARCH_INDEX_COLUMN_NAME, + SuperShreddingMetadata.Names.QUERY_VECTOR_VALUE, commandContext().schemaObject().similarityFunction()) .from( commandContext.schemaObject().identifier().keyspace(), commandContext.schemaObject().identifier().table()) .where(expression) .limit(limit) - .vsearch(DocumentConstants.Columns.VECTOR_SEARCH_INDEX_COLUMN_NAME, vector()) + .vsearch(SuperShreddingMetadata.Names.QUERY_VECTOR_VALUE, vector()) .build(); } else { return new QueryBuilder() @@ -565,7 +564,7 @@ private Query getVectorSearchQueryByExpression(Expression expres commandContext.schemaObject().identifier().table()) .where(expression) .limit(limit) - .vsearch(DocumentConstants.Columns.VECTOR_SEARCH_INDEX_COLUMN_NAME, vector()) + .vsearch(SuperShreddingMetadata.Names.QUERY_VECTOR_VALUE, vector()) .build(); } } diff --git a/src/main/java/io/stargate/sgv2/jsonapi/service/operation/collections/FindCollectionsCollectionOperation.java b/src/main/java/io/stargate/sgv2/jsonapi/service/operation/collections/FindCollectionsCollectionOperation.java index 421faa26af..8fbb607497 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/service/operation/collections/FindCollectionsCollectionOperation.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/service/operation/collections/FindCollectionsCollectionOperation.java @@ -17,32 +17,34 @@ import io.stargate.sgv2.jsonapi.service.operation.Operation; import io.stargate.sgv2.jsonapi.service.schema.KeyspaceSchemaObject; import io.stargate.sgv2.jsonapi.service.schema.collections.CollectionSchemaObject; -import io.stargate.sgv2.jsonapi.service.schema.collections.CollectionTableMatcher; +import io.stargate.sgv2.jsonapi.service.schema.collections.spec.SuperShreddingTablePredicate; import java.util.List; import java.util.function.Supplier; /** * Find collection operation. Uses {@link CQLSessionCache} to fetch all valid jsonapi tables for a - * namespace. The schema check against the table is done in the {@link CollectionTableMatcher}. + * namespace. The schema check against the table is done in the {@link + * SuperShreddingTablePredicate}. * * @param explain - returns collection options if `true`; returns only collection names if `false` * @param objectMapper {@link ObjectMapper} * @param cqlSessionCache {@link CQLSessionCache} - * @param tableMatcher {@link CollectionTableMatcher} + * @param tableMatcher {@link SuperShreddingTablePredicate} * @param commandContext {@link CommandContext} */ public record FindCollectionsCollectionOperation( boolean explain, ObjectMapper objectMapper, CQLSessionCache cqlSessionCache, - CollectionTableMatcher tableMatcher, + SuperShreddingTablePredicate tableMatcher, CommandContext commandContext) implements Operation { // shared table matcher instance // TODO: if this is static why does the record that have an instance variable passed by the ctor // below ? - private static final CollectionTableMatcher TABLE_MATCHER = new CollectionTableMatcher(); + private static final SuperShreddingTablePredicate TABLE_MATCHER = + new SuperShreddingTablePredicate(); public FindCollectionsCollectionOperation( boolean explain, diff --git a/src/main/java/io/stargate/sgv2/jsonapi/service/operation/filters/collection/AllCollectionFilter.java b/src/main/java/io/stargate/sgv2/jsonapi/service/operation/filters/collection/AllCollectionFilter.java index 4f7c63ab2b..e10ace112d 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/service/operation/filters/collection/AllCollectionFilter.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/service/operation/filters/collection/AllCollectionFilter.java @@ -2,12 +2,12 @@ import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.node.JsonNodeFactory; -import io.stargate.sgv2.jsonapi.config.constants.DocumentConstants; import io.stargate.sgv2.jsonapi.exception.ServerException; import io.stargate.sgv2.jsonapi.service.operation.builder.BuiltCondition; import io.stargate.sgv2.jsonapi.service.operation.builder.BuiltConditionPredicate; import io.stargate.sgv2.jsonapi.service.operation.builder.ConditionLHS; import io.stargate.sgv2.jsonapi.service.operation.builder.JsonTerm; +import io.stargate.sgv2.jsonapi.service.schema.collections.spec.SuperShreddingMetadata; import io.stargate.sgv2.jsonapi.service.shredding.collections.DocValueHasher; import java.util.ArrayList; import java.util.List; @@ -57,7 +57,7 @@ public List getAll() { this.collectionIndexUsage.arrayContainsTag = true; result.add( BuiltCondition.of( - ConditionLHS.column(DocumentConstants.Columns.DATA_CONTAINS_COLUMN_NAME), + ConditionLHS.column(SuperShreddingMetadata.Names.ARRAY_CONTAINS), negation ? BuiltConditionPredicate.NOT_CONTAINS : BuiltConditionPredicate.CONTAINS, new JsonTerm(getHashValue(new DocValueHasher(), getPath(), value)))); } diff --git a/src/main/java/io/stargate/sgv2/jsonapi/service/operation/filters/collection/InCollectionFilter.java b/src/main/java/io/stargate/sgv2/jsonapi/service/operation/filters/collection/InCollectionFilter.java index 77e230164d..cac60f1a02 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/service/operation/filters/collection/InCollectionFilter.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/service/operation/filters/collection/InCollectionFilter.java @@ -4,13 +4,13 @@ import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.node.JsonNodeFactory; -import io.stargate.sgv2.jsonapi.config.constants.DocumentConstants; import io.stargate.sgv2.jsonapi.exception.FilterException; import io.stargate.sgv2.jsonapi.exception.ServerException; import io.stargate.sgv2.jsonapi.service.operation.builder.BuiltCondition; import io.stargate.sgv2.jsonapi.service.operation.builder.BuiltConditionPredicate; import io.stargate.sgv2.jsonapi.service.operation.builder.ConditionLHS; import io.stargate.sgv2.jsonapi.service.operation.builder.JsonTerm; +import io.stargate.sgv2.jsonapi.service.schema.collections.spec.SuperShreddingMetadata; import io.stargate.sgv2.jsonapi.service.shredding.collections.DocValueHasher; import io.stargate.sgv2.jsonapi.service.shredding.collections.DocumentId; import java.math.BigDecimal; @@ -100,7 +100,7 @@ public List getAll() { this.collectionIndexUsage.arrayContainsTag = true; inResult.add( BuiltCondition.of( - ConditionLHS.column(DocumentConstants.Columns.DATA_CONTAINS_COLUMN_NAME), + ConditionLHS.column(SuperShreddingMetadata.Names.ARRAY_CONTAINS), BuiltConditionPredicate.CONTAINS, new JsonTerm(getHashValue(new DocValueHasher(), getPath(), value)))); } @@ -131,7 +131,7 @@ public List getAll() { this.collectionIndexUsage.arrayContainsTag = true; ninResults.add( BuiltCondition.of( - ConditionLHS.column(DocumentConstants.Columns.DATA_CONTAINS_COLUMN_NAME), + ConditionLHS.column(SuperShreddingMetadata.Names.ARRAY_CONTAINS), BuiltConditionPredicate.NOT_CONTAINS, new JsonTerm(getHashValue(new DocValueHasher(), getPath(), value)))); } diff --git a/src/main/java/io/stargate/sgv2/jsonapi/service/operation/filters/collection/MapCollectionFilter.java b/src/main/java/io/stargate/sgv2/jsonapi/service/operation/filters/collection/MapCollectionFilter.java index e3e8b1bb8c..0e859554a2 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/service/operation/filters/collection/MapCollectionFilter.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/service/operation/filters/collection/MapCollectionFilter.java @@ -1,11 +1,11 @@ package io.stargate.sgv2.jsonapi.service.operation.filters.collection; -import io.stargate.sgv2.jsonapi.config.constants.DocumentConstants; import io.stargate.sgv2.jsonapi.exception.FilterException; import io.stargate.sgv2.jsonapi.service.operation.builder.BuiltCondition; import io.stargate.sgv2.jsonapi.service.operation.builder.BuiltConditionPredicate; import io.stargate.sgv2.jsonapi.service.operation.builder.ConditionLHS; import io.stargate.sgv2.jsonapi.service.operation.builder.JsonTerm; +import io.stargate.sgv2.jsonapi.service.schema.collections.spec.SuperShreddingMetadata; import io.stargate.sgv2.jsonapi.service.shredding.collections.DocValueHasher; import java.util.Map; import java.util.Objects; @@ -90,12 +90,12 @@ public BuiltCondition get() { return switch (operator) { case EQ -> BuiltCondition.of( - ConditionLHS.column(DocumentConstants.Columns.DATA_CONTAINS_COLUMN_NAME), + ConditionLHS.column(SuperShreddingMetadata.Names.ARRAY_CONTAINS), BuiltConditionPredicate.CONTAINS, new JsonTerm(getHashValue(new DocValueHasher(), key, value))); case NE -> BuiltCondition.of( - ConditionLHS.column(DocumentConstants.Columns.DATA_CONTAINS_COLUMN_NAME), + ConditionLHS.column(SuperShreddingMetadata.Names.ARRAY_CONTAINS), BuiltConditionPredicate.NOT_CONTAINS, new JsonTerm(getHashValue(new DocValueHasher(), key, value))); case MAP_EQUALS -> diff --git a/src/main/java/io/stargate/sgv2/jsonapi/service/operation/filters/collection/MatchCollectionFilter.java b/src/main/java/io/stargate/sgv2/jsonapi/service/operation/filters/collection/MatchCollectionFilter.java index 710caef278..64a007cf42 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/service/operation/filters/collection/MatchCollectionFilter.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/service/operation/filters/collection/MatchCollectionFilter.java @@ -2,11 +2,11 @@ import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.node.JsonNodeFactory; -import io.stargate.sgv2.jsonapi.config.constants.DocumentConstants; import io.stargate.sgv2.jsonapi.service.operation.builder.BuiltCondition; import io.stargate.sgv2.jsonapi.service.operation.builder.BuiltConditionPredicate; import io.stargate.sgv2.jsonapi.service.operation.builder.ConditionLHS; import io.stargate.sgv2.jsonapi.service.operation.builder.JsonTerm; +import io.stargate.sgv2.jsonapi.service.schema.collections.spec.SuperShreddingMetadata; import java.util.Objects; import java.util.Optional; @@ -23,7 +23,7 @@ public MatchCollectionFilter(String path, String value) { @Override public BuiltCondition get() { return BuiltCondition.of( - ConditionLHS.column(DocumentConstants.Columns.LEXICAL_INDEX_COLUMN_NAME), + ConditionLHS.column(SuperShreddingMetadata.Names.QUERY_LEXICAL_VALUE), BuiltConditionPredicate.TEXT_SEARCH, new JsonTerm(value)); } diff --git a/src/main/java/io/stargate/sgv2/jsonapi/service/schema/SchemaObjectFactory.java b/src/main/java/io/stargate/sgv2/jsonapi/service/schema/SchemaObjectFactory.java index a23af83a88..c5784d70ef 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/service/schema/SchemaObjectFactory.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/service/schema/SchemaObjectFactory.java @@ -11,7 +11,7 @@ import io.stargate.sgv2.jsonapi.service.cqldriver.CQLSessionCache; import io.stargate.sgv2.jsonapi.service.cqldriver.executor.*; import io.stargate.sgv2.jsonapi.service.schema.collections.CollectionSchemaObject; -import io.stargate.sgv2.jsonapi.service.schema.collections.CollectionTableMatcher; +import io.stargate.sgv2.jsonapi.service.schema.collections.spec.SuperShreddingTablePredicate; import io.stargate.sgv2.jsonapi.service.schema.tables.TableBasedSchemaObject; import io.stargate.sgv2.jsonapi.service.schema.tables.TableSchemaObject; import io.stargate.sgv2.jsonapi.util.CqlIdentifierUtil; @@ -24,8 +24,8 @@ public class SchemaObjectFactory implements SchemaObjectCache.SchemaObjectFactory { private static final Logger LOGGER = LoggerFactory.getLogger(SchemaObjectFactory.class); - private static final CollectionTableMatcher IS_COLLECTION_PREDICATE = - new CollectionTableMatcher(); + private static final SuperShreddingTablePredicate IS_COLLECTION_PREDICATE = + new SuperShreddingTablePredicate(); private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); diff --git a/src/main/java/io/stargate/sgv2/jsonapi/service/schema/collections/CollectionSchemaObject.java b/src/main/java/io/stargate/sgv2/jsonapi/service/schema/collections/CollectionSchemaObject.java index 7be5777d5b..fc2bdea4af 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/service/schema/collections/CollectionSchemaObject.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/service/schema/collections/CollectionSchemaObject.java @@ -16,7 +16,6 @@ import io.stargate.sgv2.jsonapi.api.model.command.impl.VectorizeConfig; import io.stargate.sgv2.jsonapi.api.request.RequestContext; import io.stargate.sgv2.jsonapi.api.request.tenant.Tenant; -import io.stargate.sgv2.jsonapi.config.constants.DocumentConstants; import io.stargate.sgv2.jsonapi.config.constants.TableCommentConstants; import io.stargate.sgv2.jsonapi.config.constants.VectorConstants; import io.stargate.sgv2.jsonapi.exception.DatabaseException; @@ -26,6 +25,7 @@ import io.stargate.sgv2.jsonapi.service.schema.*; import io.stargate.sgv2.jsonapi.service.schema.CollectionSchemaVersion; import io.stargate.sgv2.jsonapi.service.schema.SchemaHolder; +import io.stargate.sgv2.jsonapi.service.schema.collections.spec.SuperShreddingMetadata; import io.stargate.sgv2.jsonapi.service.schema.tables.TableBasedSchemaObject; import java.util.List; import java.util.Map; @@ -134,7 +134,7 @@ public static CollectionSchemaObject getCollectionSettings( // get vector column final Optional vectorColumn = - table.getColumn(DocumentConstants.Columns.VECTOR_SEARCH_INDEX_COLUMN_NAME); + table.getColumn(SuperShreddingMetadata.Names.QUERY_VECTOR_VALUE); boolean vectorEnabled = vectorColumn.isPresent(); final String comment = (String) table.getOptions().get(CqlIdentifier.fromInternal("comment")); @@ -145,7 +145,7 @@ public static CollectionSchemaObject getCollectionSettings( IndexMetadata vectorIndex = null; Map indexMap = table.getIndexes(); for (CqlIdentifier key : indexMap.keySet()) { - if (key.asInternal().endsWith(DocumentConstants.Columns.VECTOR_SEARCH_INDEX_COLUMN_NAME)) { + if (key.asInternal().endsWith(SuperShreddingMetadata.Names.QUERY_VECTOR_VALUE)) { vectorIndex = indexMap.get(key); break; } diff --git a/src/main/java/io/stargate/sgv2/jsonapi/service/schema/collections/CollectionTableMatcher.java b/src/main/java/io/stargate/sgv2/jsonapi/service/schema/collections/CollectionTableMatcher.java deleted file mode 100644 index 3f46ba1dac..0000000000 --- a/src/main/java/io/stargate/sgv2/jsonapi/service/schema/collections/CollectionTableMatcher.java +++ /dev/null @@ -1,183 +0,0 @@ -package io.stargate.sgv2.jsonapi.service.schema.collections; - -import com.datastax.oss.driver.api.core.CqlIdentifier; -import com.datastax.oss.driver.api.core.metadata.schema.ClusteringOrder; -import com.datastax.oss.driver.api.core.metadata.schema.ColumnMetadata; -import com.datastax.oss.driver.api.core.metadata.schema.TableMetadata; -import com.datastax.oss.driver.internal.core.type.PrimitiveType; -import com.datastax.oss.protocol.internal.ProtocolConstants; -import io.stargate.sgv2.jsonapi.config.constants.DocumentConstants; -import java.util.Collection; -import java.util.List; -import java.util.Map; -import java.util.function.Predicate; - -/** Simple class that can check if table is a matching jsonapi table. */ -public class CollectionTableMatcher implements Predicate { - - private final Predicate primaryKeyPredicate; - - private final Predicate columnsPredicate; - - private final Predicate columnsPredicateVector; - - public CollectionTableMatcher() { - primaryKeyPredicate = - new CqlColumnMatcher.Tuple( - CqlIdentifier.fromInternal("key"), - new PrimitiveType(ProtocolConstants.DataType.TINYINT), - new PrimitiveType(ProtocolConstants.DataType.VARCHAR)); - columnsPredicate = - new CqlColumnMatcher.BasicType( - CqlIdentifier.fromInternal("tx_id"), - new PrimitiveType(ProtocolConstants.DataType.TIMEUUID)) - .or( - new CqlColumnMatcher.Tuple( - CqlIdentifier.fromInternal("key"), - new PrimitiveType(ProtocolConstants.DataType.TINYINT), - new PrimitiveType(ProtocolConstants.DataType.VARCHAR))) - .or( - new CqlColumnMatcher.BasicType( - CqlIdentifier.fromInternal("doc_json"), - new PrimitiveType(ProtocolConstants.DataType.VARCHAR))) - .or( - new CqlColumnMatcher.Set( - CqlIdentifier.fromInternal("exist_keys"), - new PrimitiveType(ProtocolConstants.DataType.VARCHAR))) - .or( - new CqlColumnMatcher.Map( - CqlIdentifier.fromInternal("array_size"), - new PrimitiveType(ProtocolConstants.DataType.VARCHAR), - new PrimitiveType(ProtocolConstants.DataType.INT))) - .or( - new CqlColumnMatcher.Set( - CqlIdentifier.fromInternal("array_contains"), - new PrimitiveType(ProtocolConstants.DataType.VARCHAR))) - .or( - new CqlColumnMatcher.Map( - CqlIdentifier.fromInternal("query_bool_values"), - new PrimitiveType(ProtocolConstants.DataType.VARCHAR), - new PrimitiveType(ProtocolConstants.DataType.TINYINT))) - .or( - new CqlColumnMatcher.Map( - CqlIdentifier.fromInternal("query_dbl_values"), - new PrimitiveType(ProtocolConstants.DataType.VARCHAR), - new PrimitiveType(ProtocolConstants.DataType.DECIMAL))) - .or( - new CqlColumnMatcher.Map( - CqlIdentifier.fromInternal( - DocumentConstants.Columns.QUERY_TEXT_MAP_COLUMN_NAME), - new PrimitiveType(ProtocolConstants.DataType.VARCHAR), - new PrimitiveType(ProtocolConstants.DataType.VARCHAR))) - .or( - new CqlColumnMatcher.Map( - CqlIdentifier.fromInternal("query_timestamp_values"), - new PrimitiveType(ProtocolConstants.DataType.VARCHAR), - new PrimitiveType(ProtocolConstants.DataType.TIMESTAMP))) - .or( - new CqlColumnMatcher.Set( - CqlIdentifier.fromInternal("query_null_values"), - new PrimitiveType(ProtocolConstants.DataType.VARCHAR))) - .or( - new CqlColumnMatcher.BasicType( - CqlIdentifier.fromInternal(DocumentConstants.Columns.LEXICAL_INDEX_COLUMN_NAME), - new PrimitiveType(ProtocolConstants.DataType.VARCHAR))); - - // TODO: do not duplicate all of the code above below here, just add one extra predicate if we - // need to test for a vector. - columnsPredicateVector = - new CqlColumnMatcher.BasicType( - CqlIdentifier.fromInternal("tx_id"), - new PrimitiveType(ProtocolConstants.DataType.TIMEUUID)) - .or( - new CqlColumnMatcher.Tuple( - CqlIdentifier.fromInternal("key"), - new PrimitiveType(ProtocolConstants.DataType.TINYINT), - new PrimitiveType(ProtocolConstants.DataType.VARCHAR))) - .or( - new CqlColumnMatcher.BasicType( - CqlIdentifier.fromInternal("doc_json"), - new PrimitiveType(ProtocolConstants.DataType.VARCHAR))) - .or( - new CqlColumnMatcher.Set( - CqlIdentifier.fromInternal("exist_keys"), - new PrimitiveType(ProtocolConstants.DataType.VARCHAR))) - .or( - new CqlColumnMatcher.Map( - CqlIdentifier.fromInternal("array_size"), - new PrimitiveType(ProtocolConstants.DataType.VARCHAR), - new PrimitiveType(ProtocolConstants.DataType.INT))) - .or( - new CqlColumnMatcher.Set( - CqlIdentifier.fromInternal("array_contains"), - new PrimitiveType(ProtocolConstants.DataType.VARCHAR))) - .or( - new CqlColumnMatcher.Map( - CqlIdentifier.fromInternal("query_bool_values"), - new PrimitiveType(ProtocolConstants.DataType.VARCHAR), - new PrimitiveType(ProtocolConstants.DataType.TINYINT))) - .or( - new CqlColumnMatcher.Map( - CqlIdentifier.fromInternal("query_dbl_values"), - new PrimitiveType(ProtocolConstants.DataType.VARCHAR), - new PrimitiveType(ProtocolConstants.DataType.DECIMAL))) - .or( - new CqlColumnMatcher.Map( - CqlIdentifier.fromInternal( - DocumentConstants.Columns.QUERY_TEXT_MAP_COLUMN_NAME), - new PrimitiveType(ProtocolConstants.DataType.VARCHAR), - new PrimitiveType(ProtocolConstants.DataType.VARCHAR))) - .or( - new CqlColumnMatcher.Map( - CqlIdentifier.fromInternal("query_timestamp_values"), - new PrimitiveType(ProtocolConstants.DataType.VARCHAR), - new PrimitiveType(ProtocolConstants.DataType.TIMESTAMP))) - .or( - new CqlColumnMatcher.Set( - CqlIdentifier.fromInternal("query_null_values"), - new PrimitiveType(ProtocolConstants.DataType.VARCHAR))) - .or( - new CqlColumnMatcher.BasicType( - CqlIdentifier.fromInternal(DocumentConstants.Columns.LEXICAL_INDEX_COLUMN_NAME), - new PrimitiveType(ProtocolConstants.DataType.VARCHAR))) - .or( - new CqlColumnMatcher.Vector( - CqlIdentifier.fromInternal("query_vector_value"), - new PrimitiveType(ProtocolConstants.DataType.FLOAT))); - } - - /** - * Tests if the given table is a valid jsonapi table. - * - * @param cqlTable the table - * @return Returns true only if all the columns in the table correspond to the data-api table - * schema. - */ - @Override - public boolean test(TableMetadata cqlTable) { - // null safety - if (null == cqlTable) { - return false; - } - - // partition columns - List partitionColumns = cqlTable.getPartitionKey(); - if (partitionColumns.size() != 1 || !partitionColumns.stream().allMatch(primaryKeyPredicate)) { - return false; - } - - // clustering columns - Map clusteringColumns = cqlTable.getClusteringColumns(); - if (clusteringColumns.size() != 0) { - return false; - } - - Collection columns = cqlTable.getColumns().values(); - if (!(columns.stream().allMatch(columnsPredicate) - || columns.stream().allMatch(columnsPredicateVector))) { - return false; - } - - return true; - } -} diff --git a/src/main/java/io/stargate/sgv2/jsonapi/service/schema/collections/CqlColumnMatcher.java b/src/main/java/io/stargate/sgv2/jsonapi/service/schema/collections/CqlColumnMatcher.java deleted file mode 100644 index 13cc00f290..0000000000 --- a/src/main/java/io/stargate/sgv2/jsonapi/service/schema/collections/CqlColumnMatcher.java +++ /dev/null @@ -1,115 +0,0 @@ -package io.stargate.sgv2.jsonapi.service.schema.collections; - -import com.datastax.oss.driver.api.core.CqlIdentifier; -import com.datastax.oss.driver.api.core.metadata.schema.ColumnMetadata; -import com.datastax.oss.driver.api.core.type.*; -import java.util.Arrays; -import java.util.Objects; -import java.util.function.Predicate; - -/** Interface for matching a CQL column name and type. */ -public interface CqlColumnMatcher extends Predicate { - - /** - * @return Column name for the matcher. - */ - CqlIdentifier name(); - - /** - * @return If column type is matching. - */ - boolean typeMatches(ColumnMetadata columnSpec); - - default boolean test(ColumnMetadata columnSpec) { - return Objects.equals(columnSpec.getName(), name()) && typeMatches(columnSpec); - } - - /** - * Implementation that supports basic column types. - * - * @param name column name - * @param type basic type - */ - record BasicType(CqlIdentifier name, DataType type) implements CqlColumnMatcher { - - @Override - public boolean typeMatches(ColumnMetadata columnSpec) { - return Objects.equals(columnSpec.getType(), type); - } - } - - /** - * Implementation that supports map column type. Only basic values are supported as key/value. - * - * @param name column name - * @param keyType map key type - * @param valueType map value type - */ - record Map(CqlIdentifier name, DataType keyType, DataType valueType) implements CqlColumnMatcher { - - @Override - public boolean typeMatches(ColumnMetadata columnSpec) { - DataType type = columnSpec.getType(); - if (!(type instanceof MapType map)) { - return false; - } - - return Objects.equals(map.getKeyType(), keyType) - && Objects.equals(map.getValueType(), valueType); - } - } - - /** - * Implementation that supports tuple column type. Only basic values are supported as elements. - * - * @param name column name - * @param elements types of elements in the tuple - */ - record Tuple(CqlIdentifier name, DataType... elements) implements CqlColumnMatcher { - - @Override - public boolean typeMatches(ColumnMetadata columnSpec) { - DataType type = columnSpec.getType(); - if (!(type instanceof TupleType)) { - return false; - } - - TupleType tuple = (TupleType) type; - java.util.List elementTypes = tuple.getComponentTypes(); - return Objects.equals(elementTypes, Arrays.asList(elements)); - } - } - - /** - * Implementation that supports set column type. Only basic values are supported as elements. - * - * @param name column name - * @param elementType type of elements in the set - */ - record Set(CqlIdentifier name, DataType elementType) implements CqlColumnMatcher { - - @Override - public boolean typeMatches(ColumnMetadata columnSpec) { - DataType type = columnSpec.getType(); - if (!(type instanceof SetType)) { - return false; - } - - SetType set = (SetType) type; - return Objects.equals(set.getElementType(), elementType); - } - } - - record Vector(CqlIdentifier name, DataType subtype) implements CqlColumnMatcher { - @Override - public boolean typeMatches(ColumnMetadata columnSpec) { - DataType type = columnSpec.getType(); - if (!(type instanceof VectorType)) { - return false; - } - - VectorType vector = (VectorType) type; - return Objects.equals(vector.getElementType(), subtype); - } - } -} diff --git a/src/main/java/io/stargate/sgv2/jsonapi/service/schema/collections/spec/SuperShreddingBinding.java b/src/main/java/io/stargate/sgv2/jsonapi/service/schema/collections/spec/SuperShreddingBinding.java new file mode 100644 index 0000000000..69e4f35403 --- /dev/null +++ b/src/main/java/io/stargate/sgv2/jsonapi/service/schema/collections/spec/SuperShreddingBinding.java @@ -0,0 +1,118 @@ +package io.stargate.sgv2.jsonapi.service.schema.collections.spec; + +import static io.stargate.sgv2.jsonapi.util.StringUtil.isNullOrBlank; + +import com.datastax.oss.driver.api.core.CqlIdentifier; + +/** + * Reusable recipe of the configuration used to create a super-shredding table. + * + *

We often need a way to capture this information, such as when creating a collection or + * checking the collection works as we expect. Hase a builder so we can easily use it as part of the + * {@link SuperShreddingBuilder} hierarchy. + */ +public record SuperShreddingBinding( + CqlIdentifier keyspace, + CqlIdentifier collection, + boolean hasVector, + int vectorLength, + String similarityFunction, + String sourceModel, + boolean hasLexical, + String indexAnalyzer) { + + public boolean isVectorDefined() { + if (!hasVector) { + return false; + } + // everything should be defined + if (vectorLength > 0 && !isNullOrBlank(similarityFunction) && !isNullOrBlank(sourceModel)) { + return true; + } + // the hasVector flag was set, which can be done when we expect a vector but do not have the + // full spec + // such as when we are building a predicate for ANY collection with a vector, not a specific + // one. + throw new IllegalStateException( + "SuperShreddingDef() - hasVector is set but the vector is not defined, def=%s" + .formatted(this)); + } + + public boolean isLexicalDefined() { + if (!hasLexical) { + return false; + } + if (!isNullOrBlank(indexAnalyzer)) { + return true; + } + // same idea as isVectorDefined() + throw new IllegalStateException( + "SuperShreddingDef() - hasLexcial is set but the lexcial index is not defined, def=%s" + .formatted(this)); + } + + public boolean hasAnyOptional() { + return hasVector() || hasLexical(); + } + + public static Builder builder() { + return new Builder(); + } + + public static class Builder { + + private CqlIdentifier keyspace; + private CqlIdentifier collection; + private boolean hasVector = false; + private int vectorLength = 0; + private String similarityFunction; + private String sourceModel; + private boolean hasLexical = false; + private String indexAnalyzer = null; + + public Builder withKeyspace(CqlIdentifier keyspace) { + this.keyspace = keyspace; + return this; + } + + public Builder withCollection(CqlIdentifier collection) { + this.collection = collection; + return this; + } + + public Builder withAnyVector() { + this.hasVector = true; + return this; + } + + public Builder withVector(int vectorLength, String similarityFunction, String sourceModel) { + this.vectorLength = vectorLength; + this.similarityFunction = similarityFunction; + this.sourceModel = sourceModel; + return withAnyVector(); + } + + public Builder withAnyLexical() { + this.hasLexical = true; + return this; + } + + public Builder withLexical(String indexAnalyzer) { + this.indexAnalyzer = indexAnalyzer; + this.hasLexical = true; + return withAnyLexical(); + } + + public SuperShreddingBinding build() { + return new SuperShreddingBinding( + keyspace, + collection, + hasVector, + vectorLength, + similarityFunction, + sourceModel, + hasLexical, + indexAnalyzer); + } + } +} diff --git a/src/main/java/io/stargate/sgv2/jsonapi/service/schema/collections/spec/SuperShreddingBuilder.java b/src/main/java/io/stargate/sgv2/jsonapi/service/schema/collections/spec/SuperShreddingBuilder.java new file mode 100644 index 0000000000..600db55458 --- /dev/null +++ b/src/main/java/io/stargate/sgv2/jsonapi/service/schema/collections/spec/SuperShreddingBuilder.java @@ -0,0 +1,201 @@ +package io.stargate.sgv2.jsonapi.service.schema.collections.spec; + +import com.datastax.oss.driver.api.core.CqlIdentifier; +import com.datastax.oss.driver.api.core.metadata.schema.Describable; +import com.datastax.oss.driver.api.core.metadata.schema.TableMetadata; +import com.google.common.annotations.VisibleForTesting; +import io.stargate.sgv2.jsonapi.service.schema.collections.spec.SuperShreddingMetadata.IndexDef; +import io.stargate.sgv2.jsonapi.service.schema.collections.spec.SuperShreddingMetadata.IndexDefs; +import java.util.*; +import java.util.stream.Stream; + +/** + * General pattern for defining the properties of a super-shredding "table" and then building + * objects from that. + * + *

Building these objects is tied up with how we create the statements to build a table, how we + * build a predicate to test for a table, and how we build test data. Without repeating the table + * cql too many times and creating fragile tests that depend on cql strings. See the test class + * SuperShreddingBuilderTest + * + *

From the logical representation on this builder, we can create: + * + *

    + *
  • cql for testing (below) via {@link #cql()} + *
  • {@link TableMetadata} and {@link + * com.datastax.oss.driver.api.core.metadata.schema.IndexMetadata} for testing (below), via + * {@link #metadata()} + *
  • {@link com.datastax.oss.driver.api.core.cql.SimpleStatement} for creating a table at run + * time via TODO + *
  • {@link SuperShreddingTablePredicate} for runtime testing if TableMetadata represents a + * super shredding table via {@link #predicate()} + *
+ * + *

The builder creates a list of {@link SuperShreddingComponent} which can be either a Table or + * the Index (s) needed. The different builders use different types for these components. + * + * @param Type of the object that represents the Super Shredding Component, such as string for + * cql + * @param Type of the builder itself, so that we can return a reference to this builder. + */ +public abstract class SuperShreddingBuilder> { + + // The comment for a table it a member of the table "options" and must have a + // CqlIdentifier for a name + protected static final CqlIdentifier TABLE_OPTION_COMMENT_IDENTIFIER = + CqlIdentifier.fromInternal("comment"); + + protected final SuperShreddingBinding.Builder bindingBuilder = SuperShreddingBinding.builder(); + // created in build(), private to force use of binding() accessor to check null + private SuperShreddingBinding binding; + + protected boolean ifNotExists = true; + protected String comment; + + /** Geta a new {@link SuperShreddingCQLBuilder} that can be used to build a cql string. */ + public static SuperShreddingCQLBuilder cql() { + return new SuperShreddingCQLBuilder(); + } + + /** + * Get a new {@link SuperShreddingMetadataBuilder} that can be used to build {@link TableMetadata} + * and {@link com.datastax.oss.driver.api.core.metadata.schema.IndexMetadata} objects. + */ + public static SuperShreddingMetadataBuilder metadata() { + return new SuperShreddingMetadataBuilder(); + } + + /** + * Get a new {@link SuperShreddingPredicateBuilder} that can be used to build a {@link + * SuperShreddingTablePredicate} + */ + public static SuperShreddingPredicateBuilder predicate() { + return new SuperShreddingPredicateBuilder(); + } + + /** Implementors must override this method to return a reference to this builder. */ + protected abstract U self(); + + /** + * Implementations must implement and create all the components needed for the super shredding + * table. + */ + protected abstract List> buildInternal(); + + protected SuperShreddingBinding binding() { + Objects.requireNonNull(binding, "binding must be set by build()"); + return binding; + } + + public U withIfNotExists(boolean ifNotExists) { + this.ifNotExists = ifNotExists; + return self(); + } + + public U withKeyspace(CqlIdentifier keyspace) { + bindingBuilder.withKeyspace(keyspace); + return self(); + } + + public U withCollection(CqlIdentifier collection) { + bindingBuilder.withCollection(collection); + return self(); + } + + public U withVector(int vectorLength, String similarityFunction, String sourceModel) { + bindingBuilder.withVector(vectorLength, similarityFunction, sourceModel); + return self(); + } + + public U withLexical(String indexAnalyzer) { + bindingBuilder.withLexical(indexAnalyzer); + return self(); + } + + public U withComment(String comment) { + this.comment = comment; + return self(); + } + + /** + * Builds all the components for the table, and returns only the value of the (first) Table + * component. Use this to quickly get just the (say) "create table" cql. + */ + public T buildTableOnly() { + return build().stream() + .filter(c -> c.type() == SuperShreddingComponentType.TABLE) + .map(SuperShreddingComponent::value) + .findFirst() + .orElse(null); + } + + /** + * Builds all the components for this super shredding table, the table and the indexes as defined + * in the builder. + * + *

NOTE: to implementors, implement {@link #buildInternal()} so the superShreddingDef is set. + * + * @return List of {@link SuperShreddingComponent}s needed for the super shredding table. + */ + public List> build() { + binding = bindingBuilder.build(); + return buildInternal(); + } + + /** The type of component that is being built for the super shredding table */ + public enum SuperShreddingComponentType { + TABLE, + INDEX + } + + /** + * Holds a component of a super shredding table, such as the table or index. These are created by + * the {@link SuperShreddingBuilder} implementations. + * + * @param identifier the name, table name or index name. + * @param type the type of component, either table or index + * @param value the value of the component, such as the table definition or index definition, or + * string + * @param The type of the value of the component, e,g, String or TableMetadata + */ + public record SuperShreddingComponent( + CqlIdentifier identifier, SuperShreddingComponentType type, T value) { + + /** Does its best to get CQL from whatever type of value we have. For testing. */ + @VisibleForTesting + String asCql() { + var cql = + switch (value) { + case Describable d -> d.describe(false).trim(); + case String s -> s.trim(); + default -> + throw new IllegalArgumentException("Unsupported value type: " + value.getClass()); + }; + // there is a small bug in the river IndexMetadata where it does not append ";" for a + // CUSTOM INDEX, just check so they are all the same. + return cql.endsWith(";") ? cql : cql + ";"; + } + } + + /** + * Gets the index definitions and options for the super shredding table based on {@link + * SuperShreddingBinding} + * + *

This pulls the options from the {@link SuperShreddingBinding} and puts them into maps of the + * values each index definition needs + */ + protected Stream indexDefs(SuperShreddingBinding binding) { + + Stream.Builder builder = Stream.builder(); + + IndexDefs.REQUIRED.forEach(builder); + if (this.binding.isVectorDefined()) { + builder.add(IndexDefs.QUERY_VECTOR_VALUE); + } + + if (this.binding.isLexicalDefined()) { + builder.add(IndexDefs.QUERY_LEXICAL_VALUE); + } + return builder.build(); + } +} diff --git a/src/main/java/io/stargate/sgv2/jsonapi/service/schema/collections/spec/SuperShreddingCQL.java b/src/main/java/io/stargate/sgv2/jsonapi/service/schema/collections/spec/SuperShreddingCQL.java new file mode 100644 index 0000000000..dd42061bca --- /dev/null +++ b/src/main/java/io/stargate/sgv2/jsonapi/service/schema/collections/spec/SuperShreddingCQL.java @@ -0,0 +1,260 @@ +package io.stargate.sgv2.jsonapi.service.schema.collections.spec; + +import static io.stargate.sgv2.jsonapi.service.schema.collections.spec.SuperShreddingMetadata.listDifference; + +import io.stargate.sgv2.jsonapi.service.schema.collections.spec.SuperShreddingMetadata.IndexDef; +import io.stargate.sgv2.jsonapi.service.schema.collections.spec.SuperShreddingMetadata.IndexDefs; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.function.Function; +import java.util.stream.Collectors; +import org.apache.commons.text.StringSubstitutor; + +/** + * Defines the dynamic CQL built by the {@link SuperShreddingCQLBuilder}. DO NOT MAKE changes to the + * CQL without testing, in many cases it has spaces and capitalization specifically designed to + * match what is created by parts of the driver. + * + *

NOTE: we do not use this in production, where we use the driver schema builder, this is + * for testing. See {@link SuperShreddingBuilder} for the testing process. + * + *

The tempalates use the {@link StringSubstitutor} and in particular use the idea of a default + * if the key is not present. ${VECTOR_COLUMN:-} is an example, if not present an empty + * string is put in place of the include. + */ +public interface SuperShreddingCQL { + + /** Collapses all reg ex white space characters to a single space, so we can compare strings. */ + static String collapseWhitespace(String s) { + return s.replaceAll("\\s+", " ").trim(); + } + + /** CQL templates for a dynamic super shredding table. */ + interface CQL { + // NOTE: frozen<> included on tuple type because the auto gen for TableMetadata will + // result in TupleType adding frozen, because all tuples are implicitly frozen + // this has no real effect. + // NOTE: pls keep the order following the SuperShreddingMetadata + String CREATE_TABLE_TEMPLATE = + """ + CREATE TABLE ${IF_NOT_EXISTS:-} ${KEYSPACE}.${TABLE} ( + "key" frozen>, + "tx_id" timeuuid, + "doc_json" text, + "exist_keys" set, + "array_size" map, + "array_contains" set, + "query_bool_values" map, + "query_dbl_values" map, + "query_text_values" map, + "query_timestamp_values" map, + "query_null_values" set, + ${VECTOR_COLUMN:-} + ${LEXICAL_COLUMN:-} + PRIMARY KEY ("key") + )${COMMENT_CLAUSE:-}; + """; + + String TABLE_VECTOR_COLUMN_TEMPLATE = + """ + "query_vector_value" vector,"""; + + String TABLE_LEXICAL_COLUMN_TEMPLATE = + """ + "query_lexical_value" text,"""; + + String TABLE_COMMENT_CLAUSE_TEMPLATE = + """ + WITH comment = '${COMMENT}'\ + """; + + String INDEX_EXIST_KEYS_TEMPLATE = + """ + CREATE CUSTOM INDEX ${IF_NOT_EXISTS:-} "${TABLE}_exist_keys" + ON "${KEYSPACE}"."${TABLE}" (values("exist_keys")) + USING 'StorageAttachedIndex'; + """; + + String INDEX_ARRAY_SIZE_TEMPLATE = + """ + CREATE CUSTOM INDEX ${IF_NOT_EXISTS:-} "${TABLE}_array_size" + ON "${KEYSPACE}"."${TABLE}" (entries("array_size")) + USING 'StorageAttachedIndex'; + """; + + String INDEX_ARRAY_CONTAINS_TEMPLATE = + """ + CREATE CUSTOM INDEX ${IF_NOT_EXISTS:-} "${TABLE}_array_contains" + ON "${KEYSPACE}"."${TABLE}" (values("array_contains")) + USING 'StorageAttachedIndex'; + """; + + String INDEX_QUERY_BOOLEAN_VALUES_TEMPLATE = + """ + CREATE CUSTOM INDEX ${IF_NOT_EXISTS:-} "${TABLE}_query_bool_values" + ON "${KEYSPACE}"."${TABLE}" (entries("query_bool_values")) + USING 'StorageAttachedIndex'; + """; + + String INDEX_QUERY_DBL_VALUES_TEMPLATE = + """ + CREATE CUSTOM INDEX ${IF_NOT_EXISTS:-} "${TABLE}_query_dbl_values" + ON "${KEYSPACE}"."${TABLE}" (entries("query_dbl_values")) + USING 'StorageAttachedIndex'; + """; + + String INDEX_QUERY_TEXT_VALUES_TEMPLATE = + """ + CREATE CUSTOM INDEX ${IF_NOT_EXISTS:-} "${TABLE}_query_text_values" + ON "${KEYSPACE}"."${TABLE}" (entries("query_text_values")) + USING 'StorageAttachedIndex'; + """; + + String INDEX_QUERY_TIMESTAMP_VALUES_TEMPLATE = + """ + CREATE CUSTOM INDEX ${IF_NOT_EXISTS:-} "${TABLE}_query_timestamp_values" + ON "${KEYSPACE}"."${TABLE}" (entries("query_timestamp_values")) + USING 'StorageAttachedIndex'; + """; + + String INDEX_QUERY_NULL_VALUES_TEMPLATE = + """ + CREATE CUSTOM INDEX ${IF_NOT_EXISTS:-} "${TABLE}_query_null_values" + ON "${KEYSPACE}"."${TABLE}" (values("query_null_values")) + USING 'StorageAttachedIndex'; + """; + + String INDEX_QUERY_VECTOR_VALUE_TEMPLATE = + """ + CREATE CUSTOM INDEX ${IF_NOT_EXISTS:-} "${TABLE}_query_vector_value" + ON "${KEYSPACE}"."${TABLE}" ("query_vector_value") + USING 'StorageAttachedIndex' + ${VECTOR_WITH_OPTIONS:-}; + """; + + String VECTOR_WITH_OPTIONS_TEMPLATE = + """ + WITH OPTIONS = { 'similarity_function' : '${similarity_function}', 'source_model' : '${source_model}'} + """ + .trim(); + + String INDEX_QUERY_LEXICAL_VALUE_TEMPLATE = + """ + CREATE CUSTOM INDEX ${IF_NOT_EXISTS:-} "${TABLE}_query_lexical_value" + ON "${KEYSPACE}"."${TABLE}" ("query_lexical_value") + USING 'StorageAttachedIndex' + ${LEXICAL_WITH_OPTIONS:-}; + """; + + String LEXICAL_WITH_OPTIONS_TEMPLATE = + """ + WITH OPTIONS = { 'index_analyzer' : '${index_analyzer}'} + """ + .trim(); + + List ALL_INDEXES = + List.of( + INDEX_EXIST_KEYS_TEMPLATE, + INDEX_ARRAY_SIZE_TEMPLATE, + INDEX_ARRAY_CONTAINS_TEMPLATE, + INDEX_QUERY_BOOLEAN_VALUES_TEMPLATE, + INDEX_QUERY_DBL_VALUES_TEMPLATE, + INDEX_QUERY_TEXT_VALUES_TEMPLATE, + INDEX_QUERY_TIMESTAMP_VALUES_TEMPLATE, + INDEX_QUERY_NULL_VALUES_TEMPLATE, + INDEX_QUERY_VECTOR_VALUE_TEMPLATE, + INDEX_QUERY_LEXICAL_VALUE_TEMPLATE); + + List OPTIONAL_INDEXES = + List.of(INDEX_QUERY_VECTOR_VALUE_TEMPLATE, INDEX_QUERY_LEXICAL_VALUE_TEMPLATE); + List REQUIRED_INDEXES = listDifference(ALL_INDEXES, OPTIONAL_INDEXES); + } + + /** + * Holder for a template that generates a clause, such as `VECTOR_WITH_OPTIONS_TEMPLATE` above. + * + * @param template The template we need to run to get the value for the clause. + * @param toKeyName the key the result of the template should be assigned to when used to format + * the CREATE TABLE statement. + */ + record ClauseTemplate(String template, String toKeyName) { + + public Optional format(Map values) { + if (values == null || values.isEmpty()) { + return Optional.empty(); + } + return Optional.of(new StringSubstitutor(values).replace(template)); + } + } + + /** + * Holder to associate the definition of the index from {@link IndexDefs} with the CQL here to + * create it, and optionally the template to make a sub clause for the index. + */ + record IndexCQLAndDef(String cql, IndexDef indexDef, ClauseTemplate clauseTemplate) { + + public IndexCQLAndDef(String cql, IndexDef indexDef) { + this(cql, indexDef, null); + } + } + + /** + * Associates the CQL defined above with the index from {@link IndexDefs} it is designed to + * create. + */ + interface IndexCQLAndDefs { + + // Required Indexes + IndexCQLAndDef INDEX_EXIST_KEYS = + new IndexCQLAndDef(CQL.INDEX_EXIST_KEYS_TEMPLATE, IndexDefs.EXIST_KEYS); + IndexCQLAndDef INDEX_ARRAY_SIZE = + new IndexCQLAndDef(CQL.INDEX_ARRAY_SIZE_TEMPLATE, IndexDefs.ARRAY_SIZE); + IndexCQLAndDef INDEX_ARRAY_CONTAINS = + new IndexCQLAndDef(CQL.INDEX_ARRAY_CONTAINS_TEMPLATE, IndexDefs.ARRAY_CONTAINS); + IndexCQLAndDef INDEX_QUERY_BOOL_VALUES = + new IndexCQLAndDef(CQL.INDEX_QUERY_BOOLEAN_VALUES_TEMPLATE, IndexDefs.QUERY_BOOLEAN_VALUES); + IndexCQLAndDef INDEX_QUERY_DBL_VALUES = + new IndexCQLAndDef(CQL.INDEX_QUERY_DBL_VALUES_TEMPLATE, IndexDefs.QUERY_DOUBLE_VALUES); + IndexCQLAndDef INDEX_QUERY_TEXT_VALUES = + new IndexCQLAndDef(CQL.INDEX_QUERY_TEXT_VALUES_TEMPLATE, IndexDefs.QUERY_TEXT_VALUES); + IndexCQLAndDef INDEX_QUERY_TIMESTAMP_VALUES = + new IndexCQLAndDef( + CQL.INDEX_QUERY_TIMESTAMP_VALUES_TEMPLATE, IndexDefs.QUERY_TIMESTAMP_VALUES); + IndexCQLAndDef INDEX_QUERY_NULL_VALUES = + new IndexCQLAndDef(CQL.INDEX_QUERY_NULL_VALUES_TEMPLATE, IndexDefs.QUERY_NULL_VALUES); + + // Optional Indexes + IndexCQLAndDef INDEX_QUERY_VECTOR_VALUE = + new IndexCQLAndDef( + CQL.INDEX_QUERY_VECTOR_VALUE_TEMPLATE, + IndexDefs.QUERY_VECTOR_VALUE, + new ClauseTemplate(CQL.VECTOR_WITH_OPTIONS_TEMPLATE, "VECTOR_WITH_OPTIONS")); + + IndexCQLAndDef INDEX_QUERY_LEXICAL_VALUE = + new IndexCQLAndDef( + CQL.INDEX_QUERY_LEXICAL_VALUE_TEMPLATE, + IndexDefs.QUERY_LEXICAL_VALUE, + new ClauseTemplate(CQL.LEXICAL_WITH_OPTIONS_TEMPLATE, "LEXICAL_WITH_OPTIONS")); + + List ALL_INDEXES = + List.of( + INDEX_EXIST_KEYS, + INDEX_ARRAY_SIZE, + INDEX_ARRAY_CONTAINS, + INDEX_QUERY_BOOL_VALUES, + INDEX_QUERY_DBL_VALUES, + INDEX_QUERY_TEXT_VALUES, + INDEX_QUERY_TIMESTAMP_VALUES, + INDEX_QUERY_NULL_VALUES, + INDEX_QUERY_VECTOR_VALUE, + INDEX_QUERY_LEXICAL_VALUE); + List OPTIONAL_INDEXES = + List.of(INDEX_QUERY_VECTOR_VALUE, INDEX_QUERY_LEXICAL_VALUE); + List REQUIRED_INDEXES = listDifference(ALL_INDEXES, OPTIONAL_INDEXES); + + Map ALL_INDEXES_BY_INDEX_DEF = + ALL_INDEXES.stream() + .collect(Collectors.toMap(IndexCQLAndDef::indexDef, Function.identity())); + } +} diff --git a/src/main/java/io/stargate/sgv2/jsonapi/service/schema/collections/spec/SuperShreddingCQLBuilder.java b/src/main/java/io/stargate/sgv2/jsonapi/service/schema/collections/spec/SuperShreddingCQLBuilder.java new file mode 100644 index 0000000000..335da8b460 --- /dev/null +++ b/src/main/java/io/stargate/sgv2/jsonapi/service/schema/collections/spec/SuperShreddingCQLBuilder.java @@ -0,0 +1,137 @@ +package io.stargate.sgv2.jsonapi.service.schema.collections.spec; + +import static io.stargate.sgv2.jsonapi.service.schema.collections.spec.SuperShreddingCQL.*; +import static io.stargate.sgv2.jsonapi.util.CqlIdentifierUtil.cqlIdentifierToCQL; + +import java.util.*; +import java.util.stream.Stream; +import org.apache.commons.text.StringSubstitutor; + +/** + * A {@link SuperShreddingBuilder} that builds dynamic CQL from the config provided to the builder. + * + *

NOTE: this class is *not* used in production, it is only used by testing. It exists in the + * regular jar because it is easier to keep all the super shredding code in one place. See {@link + * SuperShreddingBuilder} for the testing process. + * + *

Create via {@link SuperShreddingBuilder#cql()} + */ +public class SuperShreddingCQLBuilder + extends SuperShreddingBuilder { + + private boolean collapseWhitespace = true; + + SuperShreddingCQLBuilder() {} + + @Override + protected SuperShreddingCQLBuilder self() { + return this; + } + + public SuperShreddingCQLBuilder withCollapseWhitespace(boolean collapseWhitespace) { + this.collapseWhitespace = collapseWhitespace; + return this; + } + + @Override + public List> buildInternal() { + + List> components = new ArrayList<>(); + components.add( + new SuperShreddingComponent<>( + binding().collection(), SuperShreddingComponentType.TABLE, tableCQL())); + indexCQL().forEach(components::add); + return components; + } + + private String tableCQL() { + + // building out the vars for the CQL templates + Map vars = new HashMap<>(); + if (ifNotExists) { + vars.put("IF_NOT_EXISTS", "IF NOT EXISTS"); + } + vars.put("KEYSPACE", cqlIdentifierToCQL(binding().keyspace())); + vars.put("TABLE", cqlIdentifierToCQL(binding().collection())); + + if (binding().isVectorDefined()) { + vars.put( + "VECTOR_COLUMN", + new StringSubstitutor(Map.of("VECTOR_DIM", binding().vectorLength())) + .replace(CQL.TABLE_VECTOR_COLUMN_TEMPLATE)); + } + + if (binding().isLexicalDefined()) { + vars.put("LEXICAL_COLUMN", CQL.TABLE_LEXICAL_COLUMN_TEMPLATE); + } + + if (comment != null) { + vars.put( + "COMMENT_CLAUSE", + new StringSubstitutor(Map.of("COMMENT", comment)) + .replace(CQL.TABLE_COMMENT_CLAUSE_TEMPLATE)); + } + + var result = new StringSubstitutor(vars).replace(CQL.CREATE_TABLE_TEMPLATE); + return collapseWhitespace ? collapseWhitespace(result) : result; + } + + private Stream> indexCQL() { + + // get all the indexes this super shredding table should have + var indexDefs = indexDefs(binding()).toList(); + + // For each of the IndexDef, we need to get the CQL to build it + var cqlAndDefs = + indexDefs.stream() + .map(IndexCQLAndDefs.ALL_INDEXES_BY_INDEX_DEF::get) + .filter(Objects::nonNull) + .toList(); + + // sanity check + if (cqlAndDefs.size() != indexDefs.size()) { + throw new IllegalStateException("cqlAndDefs.size() != indexDefs.size()"); + } + + // Start building up the sub vars we need for all the index cql templates. + Map allIndexVars = new HashMap<>(); + + // For indexes, if the def of the cql index has a clause template (like the config for + // a vector index) we need to get those from the defsAndOptions created from superShreddingDef + // run the clause template, and add the clause to our index vars + for (IndexCQLAndDef cqlAndDef : cqlAndDefs) { + if (cqlAndDef.clauseTemplate() != null) { + // run the template for this clause, and put the result of the template into the + // index vars for all the create index statements. + // e.g. look at LEXICAL_WITH_OPTIONS_TEMPLATE + + cqlAndDef + .clauseTemplate() + .format(cqlAndDef.indexDef().indexOptions(binding())) + .map(clause -> allIndexVars.put(cqlAndDef.clauseTemplate().toKeyName(), clause)); + } + } + + if (ifNotExists) { + allIndexVars.put("IF_NOT_EXISTS", "IF NOT EXISTS"); + } + + // using internal the keyspace and table names because the collection name is + // used as part of the index name, so we dont want quotes on them + // NOTE: INDEXES templates MUST put the quotes on + allIndexVars.put("KEYSPACE", binding().keyspace().asInternal()); + allIndexVars.put("TABLE", binding().collection().asInternal()); + + var substitutor = new StringSubstitutor(allIndexVars); + return cqlAndDefs.stream() + .map( + cqlAndDef -> { + var cql = substitutor.replace(cqlAndDef.cql()); + + return new SuperShreddingComponent<>( + cqlAndDef.indexDef().indexName(binding()), + SuperShreddingComponentType.INDEX, + collapseWhitespace ? collapseWhitespace(cql) : cql); + }); + } +} diff --git a/src/main/java/io/stargate/sgv2/jsonapi/service/schema/collections/spec/SuperShreddingMetadata.java b/src/main/java/io/stargate/sgv2/jsonapi/service/schema/collections/spec/SuperShreddingMetadata.java new file mode 100644 index 0000000000..e72294255f --- /dev/null +++ b/src/main/java/io/stargate/sgv2/jsonapi/service/schema/collections/spec/SuperShreddingMetadata.java @@ -0,0 +1,485 @@ +// @formatter:off +package io.stargate.sgv2.jsonapi.service.schema.collections.spec; + +import com.datastax.oss.driver.api.core.CqlIdentifier; +import com.datastax.oss.driver.api.core.cql.SimpleStatement; +import com.datastax.oss.driver.api.core.metadata.schema.ColumnMetadata; +import com.datastax.oss.driver.api.core.metadata.schema.IndexKind; +import com.datastax.oss.driver.api.core.metadata.schema.IndexMetadata; +import com.datastax.oss.driver.api.core.type.DataType; +import com.datastax.oss.driver.api.core.type.DataTypes; +import com.datastax.oss.driver.api.querybuilder.SchemaBuilder; +import com.datastax.oss.driver.api.querybuilder.schema.CreateTable; +import com.datastax.oss.driver.internal.core.metadata.schema.DefaultColumnMetadata; +import com.datastax.oss.driver.internal.core.metadata.schema.DefaultIndexMetadata; +import com.datastax.oss.driver.internal.querybuilder.schema.DefaultCreateIndex; +import io.stargate.sgv2.jsonapi.config.constants.TableDescConstants; +import io.stargate.sgv2.jsonapi.config.constants.VectorConstants; +import io.stargate.sgv2.jsonapi.service.cqldriver.override.ExtendedCreateIndex; +import io.stargate.sgv2.jsonapi.service.cqldriver.override.ExtendedVectorType; +import io.stargate.sgv2.jsonapi.service.schema.tables.ApiIndexFunction; +import io.stargate.sgv2.jsonapi.service.schema.tables.CQLSAIIndex; +import io.stargate.sgv2.jsonapi.util.ColumnMetadataPredicate; +import java.util.*; +import java.util.stream.Collectors; +import java.util.stream.Stream;import static io.stargate.sgv2.jsonapi.util.StringUtil.isNullOrBlank; + +/** + * Canonical definition of the structure of a super-shredding table, + * that is used in production to make super-shredding tables and test their behavior. + *

+ * NOTE: please keep the columns and indexes in order. We have also excluded + * this class from formatting so we can format for ease of reading. This file makes + * more sense when read top to bottom, as it builds up the ideas. + *

+ *

+ * The objects created by {@link SuperShreddingBuilder} 's using this information is then + * tested against CQL from {@link SuperShreddingCQLBuilder}, see the builder and + * SuperShreddingBuilderTest for how we build up the tests. + *

+ */ +public interface SuperShreddingMetadata { + + static List listDifference(List list1, List list2) { + return list1.stream().filter(item -> !list2.contains(item)).collect(Collectors.toList()); + } + + /** + * String names of all columns, in the order that we traditionally have them in the collection + * table, pls try to keep the order :) + * Use the {@link Identifiers} if you want {@link CqlIdentifier}s. + */ + interface Names { + + // Required columns + String KEY = "key"; + String TX_ID = "tx_id"; + String DOC_JSON = "doc_json"; + String EXIST_KEYS = "exist_keys"; + String ARRAY_SIZE = "array_size"; + String ARRAY_CONTAINS = "array_contains"; + String QUERY_BOOLEAN_VALUES = "query_bool_values"; + String QUERY_DOUBLE_VALUES = "query_dbl_values"; + String QUERY_TEXT_VALUES = "query_text_values"; // old comment > Text map support _id $ne and _id $nin on both atomic value and array element + String QUERY_TIMESTAMP_VALUES = "query_timestamp_values"; + String QUERY_NULL_VALUES = "query_null_values"; + // Optional columns + String QUERY_VECTOR_VALUE = "query_vector_value"; + String QUERY_LEXICAL_VALUE = "query_lexical_value"; + + List ALL = + List.of( + KEY, + TX_ID, + DOC_JSON, + EXIST_KEYS, + ARRAY_SIZE, + ARRAY_CONTAINS, + QUERY_BOOLEAN_VALUES, + QUERY_DOUBLE_VALUES, + QUERY_NULL_VALUES, + QUERY_TEXT_VALUES, + QUERY_TIMESTAMP_VALUES, + QUERY_VECTOR_VALUE, + QUERY_LEXICAL_VALUE); + List PARTITION_KEY = List.of(KEY); + List OPTIONAL = List.of(QUERY_VECTOR_VALUE, QUERY_LEXICAL_VALUE); + List REQUIRED = listDifference(ALL, OPTIONAL); + List REQUIRED_NON_PK = listDifference(REQUIRED, PARTITION_KEY); + } + + /** + * {@link CqlIdentifier}s of all columns, in the order that we traditionally have them in + * the collection table, pls try to keep the order :) + */ + interface Identifiers { + + // Required columns + CqlIdentifier KEY = CqlIdentifier.fromInternal(Names.KEY); + CqlIdentifier TX_ID = CqlIdentifier.fromInternal(Names.TX_ID); + CqlIdentifier DOC_JSON = CqlIdentifier.fromInternal(Names.DOC_JSON); + CqlIdentifier EXIST_KEYS = CqlIdentifier.fromInternal(Names.EXIST_KEYS); + CqlIdentifier ARRAY_SIZE = CqlIdentifier.fromInternal(Names.ARRAY_SIZE); + CqlIdentifier ARRAY_CONTAINS = CqlIdentifier.fromInternal(Names.ARRAY_CONTAINS); + CqlIdentifier QUERY_BOOLEAN_VALUES = CqlIdentifier.fromInternal(Names.QUERY_BOOLEAN_VALUES); + CqlIdentifier QUERY_DOUBLE_VALUES = CqlIdentifier.fromInternal(Names.QUERY_DOUBLE_VALUES); + CqlIdentifier QUERY_TEXT_VALUES = CqlIdentifier.fromInternal(Names.QUERY_TEXT_VALUES); + CqlIdentifier QUERY_TIMESTAMP_VALUES = CqlIdentifier.fromInternal(Names.QUERY_TIMESTAMP_VALUES); + CqlIdentifier QUERY_NULL_VALUES = CqlIdentifier.fromInternal(Names.QUERY_NULL_VALUES); + // Optional columns + CqlIdentifier QUERY_VECTOR_VALUE = CqlIdentifier.fromInternal(Names.QUERY_VECTOR_VALUE); + CqlIdentifier QUERY_LEXICAL_VALUE = CqlIdentifier.fromInternal(Names.QUERY_LEXICAL_VALUE); + + List ALL = + List.of( + KEY, + TX_ID, + DOC_JSON, + EXIST_KEYS, + ARRAY_SIZE, + ARRAY_CONTAINS, + QUERY_BOOLEAN_VALUES, + QUERY_DOUBLE_VALUES, + QUERY_NULL_VALUES, + QUERY_TEXT_VALUES, + QUERY_TIMESTAMP_VALUES, + QUERY_VECTOR_VALUE, + QUERY_LEXICAL_VALUE); + List PARTITION_KEY = List.of(KEY); + List OPTIONAL = List.of(QUERY_VECTOR_VALUE, QUERY_LEXICAL_VALUE); + List REQUIRED = listDifference(ALL, OPTIONAL); + List REQUIRED_NON_PK = listDifference(REQUIRED, PARTITION_KEY); + } + + /** + * Function for creating the column metadata for a column, only needed with the vector becase + * we dont know all the info for the column until it is bound to a definition + */ + @FunctionalInterface + interface ColumnMetadataFactory { + ColumnMetadata columnMetadata(ColumnDef columnDef, SuperShreddingBinding binding); + } + + /** + * A definition of a column in a super shredding table, which can then be bound to a + * super shredding definition to create the ColumnMetadata and schema statements we need + * to create a particular table. + *

+ * The properties of the record define the general case of a column in super shredding, the methods + * allow objects to be created for the specific case of a specific table. + *

+ */ + record ColumnDef(CqlIdentifier name, DataType type, ColumnMetadataFactory metadataFactory) { + + ColumnDef(CqlIdentifier name, DataType type) { + this(name, type, null); + } + + public ColumnMetadata columnMetadata(SuperShreddingBinding binding) { + if (metadataFactory == null) { + return new DefaultColumnMetadata(binding.keyspace(), binding.collection(), name, type, false); + } + var factoryValue = metadataFactory.columnMetadata(this, binding); + Objects.requireNonNull( + factoryValue, "ColumnMetadataFactory returned null for columnDef.name:{}" + name); + return factoryValue; + } + + public CreateTable addTo(CreateTable createTable) { + return createTable.withColumn(name, type); + } + + public ColumnMetadataPredicate predicate() { + return new ColumnMetadataPredicate.Basic(name, type); + } + } + + /** + * The list of {@link ColumnDef} for all the columns in a super shredding table. + *

+ * Use the {@link SuperShreddingMetadataBuilder} to build TableMetadata and IndexMetadata, + * use the XXX (TODO:) builder to create statements. + *

+ */ + interface ColumnDefs { + + // Required columns + ColumnDef KEY = new ColumnDef(Identifiers.KEY, DataTypes.tupleOf(DataTypes.TINYINT, DataTypes.TEXT)); + ColumnDef TX_ID = new ColumnDef(Identifiers.TX_ID, DataTypes.TIMEUUID); + ColumnDef DOC_JSON = new ColumnDef(Identifiers.DOC_JSON, DataTypes.TEXT); + ColumnDef EXIST_KEYS = new ColumnDef(Identifiers.EXIST_KEYS, DataTypes.setOf(DataTypes.TEXT)); + ColumnDef ARRAY_SIZE = new ColumnDef(Identifiers.ARRAY_SIZE, DataTypes.mapOf(DataTypes.TEXT, DataTypes.INT)); + ColumnDef ARRAY_CONTAINS = new ColumnDef(Identifiers.ARRAY_CONTAINS, DataTypes.setOf(DataTypes.TEXT)); + ColumnDef QUERY_BOOLEAN_VALUES = new ColumnDef(Identifiers.QUERY_BOOLEAN_VALUES, DataTypes.mapOf(DataTypes.TEXT, DataTypes.TINYINT)); + ColumnDef QUERY_DOUBLE_VALUES = new ColumnDef(Identifiers.QUERY_DOUBLE_VALUES, DataTypes.mapOf(DataTypes.TEXT, DataTypes.DECIMAL)); + ColumnDef QUERY_TEXT_VALUES = new ColumnDef(Identifiers.QUERY_TEXT_VALUES, DataTypes.mapOf(DataTypes.TEXT, DataTypes.TEXT)); + ColumnDef QUERY_TIMESTAMP_VALUES = new ColumnDef(Identifiers.QUERY_TIMESTAMP_VALUES, DataTypes.mapOf(DataTypes.TEXT, DataTypes.TIMESTAMP)); + ColumnDef QUERY_NULL_VALUES = new ColumnDef(Identifiers.QUERY_NULL_VALUES, DataTypes.setOf(DataTypes.TEXT)); + + // Optional columns + // NOTE: using our extended vector, length is dependent on the vector dimension of the + // collection + ColumnDef QUERY_VECTOR_VALUE = new ColumnDef(Identifiers.QUERY_VECTOR_VALUE, new ExtendedVectorType(DataTypes.FLOAT, 1), ColumnDefs::vectorColumnMetadataFactory); + ColumnDef QUERY_LEXICAL_VALUE = new ColumnDef(Identifiers.QUERY_LEXICAL_VALUE, DataTypes.TEXT); + + List ALL = + List.of( + KEY, + TX_ID, + DOC_JSON, + EXIST_KEYS, + ARRAY_SIZE, + ARRAY_CONTAINS, + QUERY_BOOLEAN_VALUES, + QUERY_DOUBLE_VALUES, + QUERY_TEXT_VALUES, + QUERY_TIMESTAMP_VALUES, + QUERY_NULL_VALUES, + QUERY_VECTOR_VALUE, + QUERY_LEXICAL_VALUE); + List PARTITION_KEY = List.of(KEY); + List OPTIONAL = List.of(QUERY_VECTOR_VALUE, QUERY_LEXICAL_VALUE); + List REQUIRED = listDifference(ALL, OPTIONAL); + List REQUIRED_NON_PK = listDifference(REQUIRED, OPTIONAL); + + static ColumnMetadata vectorColumnMetadataFactory(ColumnDef columnDef, SuperShreddingBinding binding){ + + if (!binding.isVectorDefined()) { + throw new IllegalArgumentException("SuperShreddingBinding does not define the vector column, binding: %s".formatted(binding)); + } + var elementType = ((ExtendedVectorType) ColumnDefs.QUERY_VECTOR_VALUE.type()).getElementType(); + var vectorWithDimension = new ExtendedVectorType(elementType, binding.vectorLength()); + + return new DefaultColumnMetadata( + binding.keyspace(), + binding.collection(), + columnDef.name(), + vectorWithDimension, + false); + } + + static Stream toColumnMetadata( + List columnDefs, + SuperShreddingBinding binding) { + + Objects.requireNonNull(binding, "binding must not be null"); + return columnDefs.stream() + .map(columnDef -> columnDef.columnMetadata(binding)); + } + } + + /** + * Predicates that can be used to test if a ColumnMetadata matches the definition for a + * super shredding column. Use the {@link SuperShreddingPredicateBuilder} to get a + * predciate that can match a specific {@link SuperShreddingBinding} + * + */ + interface Predicates { + + // Required columns + ColumnMetadataPredicate KEY = ColumnDefs.KEY.predicate(); + ColumnMetadataPredicate TX_ID = ColumnDefs.TX_ID.predicate(); + ColumnMetadataPredicate DOC_JSON = ColumnDefs.DOC_JSON.predicate(); + ColumnMetadataPredicate EXIST_KEYS = ColumnDefs.EXIST_KEYS.predicate(); + ColumnMetadataPredicate ARRAY_SIZE = ColumnDefs.ARRAY_SIZE.predicate(); + ColumnMetadataPredicate ARRAY_CONTAINS = ColumnDefs.ARRAY_CONTAINS.predicate(); + ColumnMetadataPredicate QUERY_BOOLEAN_VALUES = ColumnDefs.QUERY_BOOLEAN_VALUES.predicate(); + ColumnMetadataPredicate QUERY_DOUBLE_VALUES = ColumnDefs.QUERY_DOUBLE_VALUES.predicate(); + ColumnMetadataPredicate QUERY_TEXT_VALUES = ColumnDefs.QUERY_TEXT_VALUES.predicate(); + ColumnMetadataPredicate QUERY_TIMESTAMP_VALUES = ColumnDefs.QUERY_TIMESTAMP_VALUES.predicate(); + ColumnMetadataPredicate QUERY_NULL_VALUES = ColumnDefs.QUERY_NULL_VALUES.predicate(); + // Optional columns + // NOTE: using our extended vector, length is dependent on the vector dimension of the collection + ColumnMetadataPredicate QUERY_VECTOR_VALUE = new ColumnMetadataPredicate.Vector( + ColumnDefs.QUERY_VECTOR_VALUE.name(), + ((ExtendedVectorType) ColumnDefs.QUERY_VECTOR_VALUE.type()).getElementType()); + ColumnMetadataPredicate QUERY_LEXICAL_VALUE = ColumnDefs.QUERY_LEXICAL_VALUE.predicate(); + + List ALL = + List.of( + KEY, + TX_ID, + DOC_JSON, + EXIST_KEYS, + ARRAY_SIZE, + ARRAY_CONTAINS, + QUERY_BOOLEAN_VALUES, + QUERY_DOUBLE_VALUES, + QUERY_TEXT_VALUES, + QUERY_TIMESTAMP_VALUES, + QUERY_NULL_VALUES, + QUERY_VECTOR_VALUE, + QUERY_LEXICAL_VALUE); + List PARTITION_KEY = List.of(KEY); + List OPTIONAL = List.of(QUERY_VECTOR_VALUE, QUERY_LEXICAL_VALUE); + List REQUIRED = listDifference(ALL, OPTIONAL); + List REQUIRED_NON_PK = listDifference(REQUIRED, PARTITION_KEY); + + /** + * Find all the predicates that do not have any matching columns to find columns that we + * expect to be there but are missing. + */ + static List allFailingPredicates( + List predicates, Collection columns) { + return predicates.stream() + .filter(predicate -> columns.stream().noneMatch(predicate)) + .toList(); + } + + /** + * Get the list of columns that do not match any of the supplied predicates, to find the + * columns we do not expect to see. + */ + static List allUnexpectedColumns( + List predicates, Collection columns) { + return columns.stream() + .filter(column -> predicates.stream().noneMatch(p -> p.test(column))) + .toList(); + } + } + + /** + * Function used with the {@link IndexDef} to support extra options from the + * binding for use with the index for creating metadata or create statements + */ + @FunctionalInterface + interface IndexOptionsFactory{ + /** + * @return Options to apply, must not be null + */ + Map apply(SuperShreddingBinding binding); + } + + + /** + * Models an index on a column in a super shredding table, and the function that is used + * with the index, e.g. `entries` or `values`. + *

+ * The below information is reference info for what it looks like when we are creating + * fake TableMetadata (which is built from system_schema.indexes) and when we + * make a CREATE INDEX statement.. + *

+ * In the `system_schema.indexes` the options field has the extra class_name and + * target. But in CQL these are not in the WITH OPTIONS + * + *

Example of system_schema.indexes: + * + *

+   * | keyspace_name | table_name | index_name                       | kind   | options                                                                                                                          |
+   * |-------------- | ---------- | -------------------------------- | ------ | ---------------------------------------------------------------------------------------------------------------------------------|
+   * |     askada_01 |  documents |         documents_array_contains | CUSTOM |                                                       {'class_name': 'StorageAttachedIndex', 'target': 'values(array_contains)'} |
+   * |     askada_01 |  documents |             documents_array_size | CUSTOM |                                                          {'class_name': 'StorageAttachedIndex', 'target': 'entries(array_size)'} |
+   * |     askada_01 |  documents |            documents_exists_keys | CUSTOM |                                                           {'class_name': 'StorageAttachedIndex', 'target': 'values(exist_keys)'} |
+   * |     askada_01 |  documents |      documents_query_bool_values | CUSTOM |                                                   {'class_name': 'StorageAttachedIndex', 'target': 'entries(query_bool_values)'} |
+   * |     askada_01 |  documents |       documents_query_dbl_values | CUSTOM |                                                    {'class_name': 'StorageAttachedIndex', 'target': 'entries(query_dbl_values)'} |
+   * |     askada_01 |  documents |    documents_query_lexical_value | CUSTOM |                            {'class_name': 'StorageAttachedIndex', 'index_analyzer': 'standard', 'target': 'query_lexical_value'} |
+   * |     askada_01 |  documents |      documents_query_null_values | CUSTOM |                                                    {'class_name': 'StorageAttachedIndex', 'target': 'values(query_null_values)'} |
+   * |     askada_01 |  documents |      documents_query_text_values | CUSTOM |                                                   {'class_name': 'StorageAttachedIndex', 'target': 'entries(query_text_values)'} |
+   * |     askada_01 |  documents | documents_query_timestamp_values | CUSTOM |                                              {'class_name': 'StorageAttachedIndex', 'target': 'entries(query_timestamp_values)'} |
+   * |     askada_01 |  documents |     documents_query_vector_value | CUSTOM | {'class_name': 'StorageAttachedIndex', 'similarity_function': 'cosine', 'source_model': 'OTHER', 'target': 'query_vector_value'} |
+   * 
+ * + *

Example of CQL: + * + *

+   * CREATE CUSTOM INDEX IF NOT EXISTS documents_exists_keys ON "keyspace".documents (values(exist_keys)) USING 'StorageAttachedIndex';
+   * CREATE CUSTOM INDEX IF NOT EXISTS documents_array_size ON "keyspace".documents (entries(array_size)) USING 'StorageAttachedIndex';
+   * CREATE CUSTOM INDEX IF NOT EXISTS documents_array_contains ON "keyspace".documents (values(array_contains)) USING 'StorageAttachedIndex';
+   * CREATE CUSTOM INDEX IF NOT EXISTS documents_query_bool_values ON "keyspace".documents (entries(query_bool_values)) USING 'StorageAttachedIndex';
+   * CREATE CUSTOM INDEX IF NOT EXISTS documents_query_dbl_values ON "keyspace".documents (entries(query_dbl_values)) USING 'StorageAttachedIndex';
+   * CREATE CUSTOM INDEX IF NOT EXISTS documents_query_text_values ON "keyspace".documents (entries(query_text_values)) USING 'StorageAttachedIndex';
+   * CREATE CUSTOM INDEX IF NOT EXISTS documents_query_timestamp_values ON "keyspace".documents (entries(query_timestamp_values)) USING 'StorageAttachedIndex';
+   * CREATE CUSTOM INDEX IF NOT EXISTS documents_query_null_values ON "keyspace".documents (values(query_null_values)) USING 'StorageAttachedIndex';
+   * CREATE CUSTOM INDEX IF NOT EXISTS documents_query_vector_value ON "keyspace".documents (query_vector_value) USING 'StorageAttachedIndex' WITH OPTIONS = {'similarity_function': 'cosine', 'source_model': 'OTHER'};
+   * CREATE CUSTOM INDEX IF NOT EXISTS documents_query_lexical_value ON "keyspace".documents (query_lexical_value) USING 'StorageAttachedIndex' WITH OPTIONS = {'index_analyzer': 'standard'};
+   * 
+ * + */ + record IndexDef(ColumnDef columnDef, ApiIndexFunction indexFunction, IndexOptionsFactory optionsFactory) { + + public IndexDef(ColumnDef columnDef, ApiIndexFunction indexFunction){ + this(columnDef, indexFunction, null); + } + + /** + * Get the name to give this index when bound to the SuperShreddingBinding. + *

+ * e.g. if the collection is called users, the index on + * exist_keys column is called users_exist_keys. + */ + public CqlIdentifier indexName(SuperShreddingBinding binding) { + return CqlIdentifier.fromInternal( + binding.collection().asInternal() + "_" + columnDef.name().asInternal()); + } + + /** + * Builds {@link IndexMetadata} for this index for the given {@link SuperShreddingBinding}, + * see the {@link SuperShreddingMetadataBuilder} for how this it made with the table metadata. + */ + public IndexMetadata indexMetadata(SuperShreddingBinding binding) { + + // because this is IndexMetadata read from system_schema.indexes + // we need the options for the `class_name` and `target` AND any other cql "OPTIONS" like + // the vector index configuration + var indexTarget = new CQLSAIIndex.IndexTarget(columnDef.name, indexFunction); + Map fullOptions = new LinkedHashMap<>(indexTarget.indexOptions()); + + // any per index options + fullOptions.putAll(indexOptions(binding)); + + return new DefaultIndexMetadata( + binding.keyspace(), + binding.collection(), + indexName(binding), + IndexKind.CUSTOM, + indexTarget.toTargetString(), + Collections.unmodifiableMap(fullOptions)); + } + + Map indexOptions(SuperShreddingBinding binding) { + if (optionsFactory == null) { + return Collections.emptyMap(); + } + return optionsFactory.apply(binding); + } + + } + + interface IndexDefs { + + // Required indexes + IndexDef EXIST_KEYS = new IndexDef(ColumnDefs.EXIST_KEYS, ApiIndexFunction.VALUES); + IndexDef ARRAY_SIZE = new IndexDef(ColumnDefs.ARRAY_SIZE, ApiIndexFunction.ENTRIES); + IndexDef ARRAY_CONTAINS = new IndexDef(ColumnDefs.ARRAY_CONTAINS, ApiIndexFunction.VALUES); + IndexDef QUERY_BOOLEAN_VALUES = new IndexDef(ColumnDefs.QUERY_BOOLEAN_VALUES, ApiIndexFunction.ENTRIES); + IndexDef QUERY_DOUBLE_VALUES = new IndexDef(ColumnDefs.QUERY_DOUBLE_VALUES, ApiIndexFunction.ENTRIES); + IndexDef QUERY_TEXT_VALUES = new IndexDef(ColumnDefs.QUERY_TEXT_VALUES, ApiIndexFunction.ENTRIES); + IndexDef QUERY_TIMESTAMP_VALUES = new IndexDef(ColumnDefs.QUERY_TIMESTAMP_VALUES, ApiIndexFunction.ENTRIES); + IndexDef QUERY_NULL_VALUES = new IndexDef(ColumnDefs.QUERY_NULL_VALUES, ApiIndexFunction.VALUES); + // Optional indexes + IndexDef QUERY_VECTOR_VALUE = new IndexDef(ColumnDefs.QUERY_VECTOR_VALUE, null, IndexDefs::vectorIndexOptionsFactory); + IndexDef QUERY_LEXICAL_VALUE = new IndexDef(ColumnDefs.QUERY_LEXICAL_VALUE, null, IndexDefs::lexicalIndexOptionsFactory); + + List ALL = + List.of( + EXIST_KEYS, + ARRAY_SIZE, + ARRAY_CONTAINS, + QUERY_BOOLEAN_VALUES, + QUERY_DOUBLE_VALUES, + QUERY_TEXT_VALUES, + QUERY_TIMESTAMP_VALUES, + QUERY_NULL_VALUES, + QUERY_VECTOR_VALUE, + QUERY_LEXICAL_VALUE); + List OPTIONAL = List.of(QUERY_VECTOR_VALUE, QUERY_LEXICAL_VALUE); + List REQUIRED = listDifference(ALL, OPTIONAL); + + static Map vectorIndexOptionsFactory(SuperShreddingBinding binding) { + + // {'similarity_function': '${SIMILARITY_FUNCTION}', 'source_model': '${SOURCE_MODEL}'} + + // preserve order, similarity then source model, important for testing against CQL + Map options = new LinkedHashMap<>(); + if (!isNullOrBlank(binding.similarityFunction())) { + options.put(VectorConstants.CQLAnnIndex.SIMILARITY_FUNCTION, binding.similarityFunction()); + } + if (!isNullOrBlank(binding.sourceModel())) { + options.put(VectorConstants.CQLAnnIndex.SOURCE_MODEL, binding.sourceModel()); + } + return options; + } + + static Map lexicalIndexOptionsFactory(SuperShreddingBinding binding) { + + // {'index_analyzer': '${INDEX_ANALYZER}'} + // preserver order, we only have one, but hey, we preserve order + Map options = new LinkedHashMap<>(); + if (!isNullOrBlank(binding.indexAnalyzer())){ + options.put(TableDescConstants.TextIndexCQLOptions.OPTION_ANALYZER, binding.indexAnalyzer()); + } + return options; + } + + } +} diff --git a/src/main/java/io/stargate/sgv2/jsonapi/service/schema/collections/spec/SuperShreddingMetadataBuilder.java b/src/main/java/io/stargate/sgv2/jsonapi/service/schema/collections/spec/SuperShreddingMetadataBuilder.java new file mode 100644 index 0000000000..0611d19016 --- /dev/null +++ b/src/main/java/io/stargate/sgv2/jsonapi/service/schema/collections/spec/SuperShreddingMetadataBuilder.java @@ -0,0 +1,99 @@ +package io.stargate.sgv2.jsonapi.service.schema.collections.spec; + +import static io.stargate.sgv2.jsonapi.service.schema.collections.spec.SuperShreddingMetadata.ColumnDefs; + +import com.datastax.oss.driver.api.core.CqlIdentifier; +import com.datastax.oss.driver.api.core.metadata.schema.ColumnMetadata; +import com.datastax.oss.driver.api.core.metadata.schema.Describable; +import com.datastax.oss.driver.api.core.metadata.schema.IndexMetadata; +import com.datastax.oss.driver.internal.core.metadata.schema.DefaultTableMetadata; +import java.util.*; +import java.util.stream.Stream; + +/** + * Builder that will create {@link com.datastax.oss.driver.api.core.metadata.schema.TableMetadata} + * and {@link com.datastax.oss.driver.api.core.metadata.schema.IndexMetadata} instances from the + * {@link SuperShreddingMetadata}. + *

+ * We do not create TableMetadata or IndexMetadata directly in production code, we get that from the + * driver. This class is for creating them for tests to fake info from the driver, and the output of this class is + * ground truthed against CQL. See the {@link SuperShreddingBuilder} for more details on the testing. + *

+ */ +public class SuperShreddingMetadataBuilder + extends SuperShreddingBuilder { + + @Override + protected SuperShreddingMetadataBuilder self() { + return this; + } + + @Override + public List> buildInternal() { + + // Primary key, this is the names of the columns not their def, they also need to be + // in allColumns to get created + var primaryKey = ColumnDefs.toColumnMetadata(ColumnDefs.PARTITION_KEY, binding()) + .toList(); + + // get the columns, including the primary keys + // required includes the primary keys + var columnDefs = binding().hasAnyOptional() ? + new ArrayList<>(ColumnDefs.REQUIRED) + : ColumnDefs.REQUIRED; + if (binding().isVectorDefined()) { + columnDefs.add(ColumnDefs.QUERY_VECTOR_VALUE); + } + if (binding().isLexicalDefined()) { + columnDefs.add(ColumnDefs.QUERY_LEXICAL_VALUE); + } + + // LinkedHashMap to maintain order + Map allColumns = new LinkedHashMap<>(ColumnDefs.ALL.size()); + ColumnDefs.toColumnMetadata(columnDefs, binding()) + .forEach(col -> allColumns.put(col.getName(), col)); + + // map needed for the TableMetadata + Map indexMetadata = new LinkedHashMap<>(); + buildIndexMetadata().forEach(metadata -> indexMetadata.put(metadata.getName(), metadata)); + + Map tableOptions = new LinkedHashMap<>(); + if (comment != null && !comment.isBlank()) { + tableOptions.put(TABLE_OPTION_COMMENT_IDENTIFIER, comment); + } + + // Metadata classes do not take defensive copies, wrap to reduce the chance of a bug elsewhere + // updating table metadata + var tableMetadata = + new DefaultTableMetadata( + binding().keyspace(), + binding().collection(), + UUID.randomUUID(), + false, + false, + primaryKey, + Collections.emptyMap(), // no grouping keys + Collections.unmodifiableMap(allColumns), + Collections.unmodifiableMap(tableOptions), + Collections.unmodifiableMap(indexMetadata)); + + List> components = new ArrayList<>(11); + components.add( + new SuperShreddingComponent<>( + binding().collection(), SuperShreddingComponentType.TABLE, tableMetadata)); + indexMetadata + .values() + .forEach( + index -> + components.add( + new SuperShreddingComponent<>( + index.getName(), SuperShreddingComponentType.INDEX, index))); + return components; + } + + private Stream buildIndexMetadata() { + + return indexDefs(binding()) + .map(indexDef -> indexDef.indexMetadata(binding())); + } +} diff --git a/src/main/java/io/stargate/sgv2/jsonapi/service/schema/collections/spec/SuperShreddingPredicateBuilder.java b/src/main/java/io/stargate/sgv2/jsonapi/service/schema/collections/spec/SuperShreddingPredicateBuilder.java new file mode 100644 index 0000000000..87f3199d2d --- /dev/null +++ b/src/main/java/io/stargate/sgv2/jsonapi/service/schema/collections/spec/SuperShreddingPredicateBuilder.java @@ -0,0 +1,36 @@ +package io.stargate.sgv2.jsonapi.service.schema.collections.spec; + +import java.util.List; + +/** + * A {@link SuperShreddingBuilder} to create the {@link SuperShreddingTablePredicate}. + * + *

For now only creates a {@link SuperShreddingComponentType#TABLE} component, future work to + * create index components. See {@link SuperShreddingBuilder} for more details. + */ +public class SuperShreddingPredicateBuilder + extends SuperShreddingBuilder { + + private boolean strict = true; + + protected SuperShreddingPredicateBuilder() {} + + @Override + protected SuperShreddingPredicateBuilder self() { + return this; + } + + public SuperShreddingPredicateBuilder withStrict(boolean strict) { + this.strict = strict; + return this; + } + + @Override + public List> buildInternal() { + + var predicate = new SuperShreddingTablePredicate(strict, binding()); + return List.of( + new SuperShreddingComponent<>( + binding().collection(), SuperShreddingComponentType.TABLE, predicate)); + } +} diff --git a/src/main/java/io/stargate/sgv2/jsonapi/service/schema/collections/spec/SuperShreddingTablePredicate.java b/src/main/java/io/stargate/sgv2/jsonapi/service/schema/collections/spec/SuperShreddingTablePredicate.java new file mode 100644 index 0000000000..17a214e60e --- /dev/null +++ b/src/main/java/io/stargate/sgv2/jsonapi/service/schema/collections/spec/SuperShreddingTablePredicate.java @@ -0,0 +1,212 @@ +package io.stargate.sgv2.jsonapi.service.schema.collections.spec; + +import static io.stargate.sgv2.jsonapi.service.schema.collections.spec.SuperShreddingMetadata.Predicates.*; + +import com.datastax.oss.driver.api.core.metadata.schema.ColumnMetadata; +import com.datastax.oss.driver.api.core.metadata.schema.TableMetadata; +import com.google.common.collect.Streams; +import io.stargate.sgv2.jsonapi.exception.ErrorFormatters; +import io.stargate.sgv2.jsonapi.util.ColumnMetadataPredicate; +import io.stargate.sgv2.jsonapi.util.CqlIdentifierUtil; +import java.util.*; +import java.util.function.Predicate; +import java.util.stream.Collectors; +import java.util.stream.Stream; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Predciate to test if a {@link TableMetadata} is a valid Collection table, on that has the super + * shredding table schema. + * + *

This class is designed to build via {@link SuperShreddingBuilder#predicate()} and the builder + * it provides, so that there is shared logic between the builders that are used to create the super + * shredding table and the predicate used to test for it. See {@link + * SuperShreddingPredicateBuilder}. + * + *

Uses the shared abstract definition of super shredding in {@link SuperShreddingMetadata} + * + *

Note: How we create the statements for, predicate to test for, and test data to use + * with code that uses a super shredding table starts with the {@link SuperShreddingBuilder} class + * which has some slightly complex tests around it. + * + *

This class used to be called CollectionTableMatcher + * + *

NOTE: As of June 2026, there is no check the indexes are valid, this will be future + * work (aaron) + */ +public class SuperShreddingTablePredicate implements Predicate { + private static final Logger LOGGER = LoggerFactory.getLogger(SuperShreddingTablePredicate.class); + + private final SuperShreddingBinding superShreddingBinding; + private final List expectedOptionals; + + // when non null, this is the list of predicates that defines the columns that are ONLY allowed to + // exist + private final List strictMatch; + + // A def that represents the rules used by the old `CollectionTableMatcher` + private static final SuperShreddingBinding BACKWARDS_COMPAT = + new SuperShreddingBinding(null, null, false, 0, null, null, false, null); + + /** + * Visible for backwards compatibility. + * + *

Creates an instance that does not use strict mode, and does not check for optional columns. + */ + public SuperShreddingTablePredicate() { + this(false, BACKWARDS_COMPAT); + } + + /** + * Creates an instance that checks if the table matches the super shredding definition passed in. + * + * @param strict if true, the predicate will error if unexpected columns are found. + * @param superShreddingBinding the super shredding definition to use for the predicate, build via + * builders. + */ + SuperShreddingTablePredicate(boolean strict, SuperShreddingBinding superShreddingBinding) { + + this.superShreddingBinding = + Objects.requireNonNull(superShreddingBinding, "superShreddingDef must not be null"); + + List optionals = new ArrayList<>(); + if (superShreddingBinding.hasVector()) { + optionals.add(SuperShreddingMetadata.Predicates.QUERY_VECTOR_VALUE); + } + if (superShreddingBinding.hasLexical()) { + optionals.add(SuperShreddingMetadata.Predicates.QUERY_LEXICAL_VALUE); + } + this.expectedOptionals = Collections.unmodifiableList(optionals); + + this.strictMatch = + strict + ? Stream.concat( + SuperShreddingMetadata.Predicates.REQUIRED.stream(), expectedOptionals.stream()) + .toList() + : null; + } + + /** + * Tests if the given table is a valid super shredding. + * + * @param tableMetadata the table to test + * @return true if the table is a valid super shredding, false otherwise. + */ + @Override + public boolean test(TableMetadata tableMetadata) { + + // The trace messages are used in the testing to confirm we are failing the way the test expects + if (null == tableMetadata) { + if (LOGGER.isTraceEnabled()) { + LOGGER.trace("test() - tableMetadata is null"); + } + return false; + } + + List failingPredicates; + List unexpectedColumns; + + // STEP 1 - Partition Key, in strict or not, must be exactly as we expect + + failingPredicates = + allFailingPredicates( + SuperShreddingMetadata.Predicates.PARTITION_KEY, tableMetadata.getPartitionKey()); + if (!failingPredicates.isEmpty()) { + if (LOGGER.isTraceEnabled()) { + LOGGER.trace(failedPredicates("partition key missing", failingPredicates)); + } + return false; + } + + unexpectedColumns = + allUnexpectedColumns( + SuperShreddingMetadata.Predicates.PARTITION_KEY, tableMetadata.getPartitionKey()); + if (!unexpectedColumns.isEmpty()) { + if (LOGGER.isTraceEnabled()) { + LOGGER.trace(unexpectedColumns("unexpected columns in partition key", unexpectedColumns)); + } + return false; + } + + // STEP 2 - Clustering Keys, in strict or not, must be exactly as we expect which is empty + + if (!tableMetadata.getClusteringColumns().isEmpty()) { + if (LOGGER.isTraceEnabled()) { + LOGGER.trace( + unexpectedColumns( + "unexpected columns in clustering key", + tableMetadata.getClusteringColumns().keySet())); + } + return false; + } + + // STEP 3 - Columns - Check for required and optional based on the Def (set in ctor) + + failingPredicates = + allFailingPredicates( + SuperShreddingMetadata.Predicates.REQUIRED, tableMetadata.getColumns().values()); + if (!failingPredicates.isEmpty()) { + if (LOGGER.isTraceEnabled()) { + LOGGER.trace(failedPredicates("required columns missing", failingPredicates)); + } + return false; + } + + failingPredicates = + allFailingPredicates(expectedOptionals, tableMetadata.getColumns().values()); + if (!failingPredicates.isEmpty()) { + if (LOGGER.isTraceEnabled()) { + LOGGER.trace(failedPredicates("optional columns missing", failingPredicates)); + } + return false; + } + + // STEP 4 - Strict Columns - If set, then we can only have the expected columns + + if (strictMatch != null) { + var allTableColumns = + Streams.concat( + tableMetadata.getPartitionKey().stream(), + tableMetadata.getClusteringColumns().keySet().stream(), + tableMetadata.getColumns().values().stream()) + .toList(); + unexpectedColumns = allUnexpectedColumns(strictMatch, allTableColumns); + if (!unexpectedColumns.isEmpty()) { + if (LOGGER.isTraceEnabled()) { + LOGGER.trace(unexpectedColumns("unexpected columns in strict mode", unexpectedColumns)); + } + return false; + } + } + + return true; + } + + private static String failedPredicates( + String failure, Collection failingPredicates) { + + // Rely on the toString in the ColumnMetadataPredicate + var names = + failingPredicates.stream() + .sorted(ColumnMetadataPredicate.IDENTIFIER_COMPARATOR) + .map(Object::toString) + .collect(Collectors.joining(", ")); + return failureMessages(failure, names); + } + + private static String unexpectedColumns(String failure, Collection unexpected) { + + var names = + unexpected.stream() + .sorted(CqlIdentifierUtil.COLUMN_METADATA_COMPARATOR) + .map(ErrorFormatters::errFmt) + .collect(Collectors.joining(", ")); + return failureMessages(failure, names); + } + + private static String failureMessages(String failure, String names) { + // e.g. "required columns missing, columns: exist_keys, key" + return "test() - " + failure + ", columns: " + names; + } +} diff --git a/src/main/java/io/stargate/sgv2/jsonapi/service/schema/tables/ApiIndexFunction.java b/src/main/java/io/stargate/sgv2/jsonapi/service/schema/tables/ApiIndexFunction.java index 27bafcb403..2c94293351 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/service/schema/tables/ApiIndexFunction.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/service/schema/tables/ApiIndexFunction.java @@ -1,9 +1,15 @@ package io.stargate.sgv2.jsonapi.service.schema.tables; +import static io.stargate.sgv2.jsonapi.util.CqlIdentifierUtil.cqlIdentifierToCQL; + +import com.datastax.oss.driver.api.core.CqlIdentifier; +import com.datastax.oss.driver.api.querybuilder.schema.CreateIndex; +import com.datastax.oss.driver.api.querybuilder.schema.CreateIndexOnTable; import io.stargate.sgv2.jsonapi.api.model.command.table.definition.datatype.MapComponentDesc; import io.stargate.sgv2.jsonapi.exception.checked.UnknownCqlIndexFunctionException; import java.util.HashMap; import java.util.Map; +import java.util.Objects; /** * ApiIndexFunction is a function that is applied in indexes on CQL collection type. @@ -33,6 +39,44 @@ public enum ApiIndexFunction { this.cqlFunction = cqlFunction; } + public String cqlFunction() { + return cqlFunction; + } + + public String toTargetString(CqlIdentifier targetColumn) { + return toTargetString(this, targetColumn); + } + + /** + * Builds the target of the SAI index, which may or maynot have a function in the + * definition. See examples in {@link CQLSAIIndex} + * + * @param indexFunction nullable index function to use in the target string + * @param targetColumn required column to use in the target string + * @return the target string that is used in an index definition. + */ + public static String toTargetString(ApiIndexFunction indexFunction, CqlIdentifier targetColumn) { + Objects.requireNonNull(targetColumn, "targetColumn cannot be null"); + return indexFunction == null + ? cqlIdentifierToCQL(targetColumn) + : indexFunction.cqlFunction() + "(" + cqlIdentifierToCQL(targetColumn) + ")"; + } + + public static CreateIndex addTo( + CreateIndexOnTable createIndexOnTable, + ApiIndexFunction indexFunction, + CqlIdentifier targetColumn) { + Objects.requireNonNull(createIndexOnTable, "createIndexOnTable cannot be null"); + Objects.requireNonNull(targetColumn, "targetColumn cannot be null"); + + return switch (indexFunction) { + case KEYS -> createIndexOnTable.andColumnKeys(targetColumn); + case VALUES -> createIndexOnTable.andColumnValues(targetColumn); + case ENTRIES -> createIndexOnTable.andColumnEntries(targetColumn); + case null -> createIndexOnTable.andColumn(targetColumn); + }; + } + public static ApiIndexFunction fromCql(String cqlFunction) throws UnknownCqlIndexFunctionException { if (cqlFunction == null || !FUNCTION_MAP.containsKey(cqlFunction.toLowerCase())) { diff --git a/src/main/java/io/stargate/sgv2/jsonapi/service/schema/tables/CQLSAIIndex.java b/src/main/java/io/stargate/sgv2/jsonapi/service/schema/tables/CQLSAIIndex.java index a43b5b5a1c..c1a94784cc 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/service/schema/tables/CQLSAIIndex.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/service/schema/tables/CQLSAIIndex.java @@ -5,9 +5,13 @@ import com.datastax.oss.driver.api.core.CqlIdentifier; import com.datastax.oss.driver.api.core.metadata.schema.IndexKind; import com.datastax.oss.driver.api.core.metadata.schema.IndexMetadata; +import com.datastax.oss.driver.api.querybuilder.schema.CreateIndex; +import com.datastax.oss.driver.api.querybuilder.schema.CreateIndexOnTable; import com.datastax.oss.driver.internal.core.adminrequest.AdminRow; import io.stargate.sgv2.jsonapi.exception.checked.UnknownCqlIndexFunctionException; import io.stargate.sgv2.jsonapi.exception.checked.UnsupportedCqlIndexException; +import io.stargate.sgv2.jsonapi.service.schema.collections.spec.SuperShreddingMetadata; +import java.util.Map; import java.util.Objects; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -125,6 +129,7 @@ static boolean indexClassIsSai(String className) { public static IndexTarget indexTarget(IndexMetadata indexMetadata) throws UnknownCqlIndexFunctionException, UnsupportedCqlIndexException { Objects.requireNonNull(indexMetadata, "indexMetadata must not be null"); + // TODO: XXX: move to InexTarget as from() // if the regex matches then the target is in the form "keys(foo)", "entries(bar)", // "values("foo")", "full("bar")" etc @@ -160,6 +165,31 @@ public static IndexTarget indexTarget(IndexMetadata indexMetadata) return new IndexTarget(CqlIdentifier.fromInternal(columnName), apiIndexFunction); } - /** For internal to this package use only */ - public record IndexTarget(CqlIdentifier targetColumn, ApiIndexFunction indexFunction) {} + /** Contains the column an index is built on, and the index function if there is one. */ + public record IndexTarget(CqlIdentifier targetColumn, ApiIndexFunction indexFunction) { + + /** + * Add the Index Function we would use in a CREATE INDEX CQL statement. + * + *

Used in tables, and by the {@link SuperShreddingMetadata.IndexDef} + * + * @return + */ + public String toTargetString() { + return ApiIndexFunction.toTargetString(indexFunction, targetColumn); + } + + public CreateIndex addTo(CreateIndexOnTable createIndexOnTable) { + return ApiIndexFunction.addTo(createIndexOnTable, indexFunction, targetColumn); + } + + /** + * Build the options that would be used in IndexMetadata for this + * + * @return + */ + public Map indexOptions() { + return Map.of(Options.CLASS_NAME, SAI_CLASS_NAME, Options.TARGET, toTargetString()); + } + } } diff --git a/src/main/java/io/stargate/sgv2/jsonapi/util/ColumnMetadataPredicate.java b/src/main/java/io/stargate/sgv2/jsonapi/util/ColumnMetadataPredicate.java new file mode 100644 index 0000000000..1a424dc743 --- /dev/null +++ b/src/main/java/io/stargate/sgv2/jsonapi/util/ColumnMetadataPredicate.java @@ -0,0 +1,169 @@ +package io.stargate.sgv2.jsonapi.util; + +import static io.stargate.sgv2.jsonapi.exception.ErrorFormatters.errFmt; +import static io.stargate.sgv2.jsonapi.util.CqlIdentifierUtil.CQL_IDENTIFIER_COMPARATOR; + +import com.datastax.oss.driver.api.core.CqlIdentifier; +import com.datastax.oss.driver.api.core.metadata.schema.ColumnMetadata; +import com.datastax.oss.driver.api.core.type.*; +import com.datastax.oss.driver.internal.core.type.DefaultVectorType; +import java.util.Comparator; +import java.util.Objects; +import java.util.function.Predicate; + +/** + * A predciate for matching {@link ColumnMetadata} against a specified column name and type. + * + *

See implementations for concrete usage. Is in general "util" package because while used a lot + * with Collections may also be useful for tables. + * + *

NOTE: This was previously called CqlColumnMatcher + */ +public class ColumnMetadataPredicate implements Predicate { + + // Compare predicates by the identifier name the column must have. + public static final Comparator IDENTIFIER_COMPARATOR = + Comparator.comparing(ColumnMetadataPredicate::name, CQL_IDENTIFIER_COMPARATOR); + + protected final CqlIdentifier name; + protected final DataType type; + + protected ColumnMetadataPredicate(CqlIdentifier name, DataType type) { + // no null checks in the ctor, so a subclass can fully override if they want to. + // null checks when we try to use them. + this.name = name; + this.type = type; + } + + /** + * @return The name the column must have. + */ + public CqlIdentifier name() { + Objects.requireNonNull(name, "name must not be null"); + return name; + } + + /** + * Implementors can override for more complex type matching. + * + * @return Return true if and only if the column type matches the expected types, + * including nested types of CQL collections like a list or map. + */ + protected boolean typeMatches(ColumnMetadata columnMetadata) { + Objects.requireNonNull(type, "type must not be null"); + return Objects.equals(type, columnMetadata.getType()); + } + + /** + * Tests if the supplied column metadata matches the name and type of this matcher. + * + * @param columnMetadata existing column metadata to test. + * @throws NullPointerException if columnMetadata is null. + * @return true if the column metadata matches the name and type of this matcher. + */ + @Override + public boolean test(ColumnMetadata columnMetadata) { + Objects.requireNonNull(columnMetadata, "columnMetadata must not be null"); + + return Objects.equals(columnMetadata.getName(), name()) && typeMatches(columnMetadata); + } + + /** Returns the name and type we match against, e.g. tx_id(uuid) */ + @Override + public String toString() { + Objects.requireNonNull(name, "name must not be null"); + Objects.requireNonNull(type, "type must not be null"); + return String.format("%s(%s)", errFmt(name), errFmt(type)); + } + + /** Basic type matcher, for a name and a type. */ + public static class Basic extends ColumnMetadataPredicate { + + public Basic(CqlIdentifier name, DataType type) { + super(name, type); + Objects.requireNonNull(name, "name must not be null"); + Objects.requireNonNull(type, "type must not be null"); + } + } + + /** Matches a map type, including the key and value types. */ + public static class Map extends Basic { + + public Map(CqlIdentifier name, DataType keyType, DataType valueType) { + this(name, keyType, valueType, false); + } + + public Map(CqlIdentifier name, DataType keyType, DataType valueType, boolean frozen) { + super(name, DataTypes.mapOf(keyType, valueType, frozen)); + Objects.requireNonNull(name, "name must not be null"); + Objects.requireNonNull(keyType, "keyType must not be null"); + Objects.requireNonNull(valueType, "valueType must not be null"); + } + } + + /** Matches a tuple type, including the elements of the tuple */ + public static class Tuple extends Basic { + + public Tuple(CqlIdentifier name, DataType... elements) { + super(name, DataTypes.tupleOf(elements)); + Objects.requireNonNull(name, "name must not be null"); + Objects.requireNonNull(elements, "elements must not be null"); + for (int i = 0; i < elements.length; i++) { + Objects.requireNonNull(elements[i], "elements[" + i + "] must not be null"); + } + } + } + + /** Matches a set type, including the element type. */ + public static class Set extends Basic { + + public Set(CqlIdentifier name, DataType elementType) { + super(name, DataTypes.setOf(elementType)); + Objects.requireNonNull(name, "name must not be null"); + Objects.requireNonNull(elementType, "elementType must not be null"); + } + } + + /** + * Matches a vector type, including the element type. + * + *

NOTE: this matches the column as a vector type, and the subtype of the vector, it DOES NOT + * match the Vector Length. The {@link DefaultVectorType#equals} will match vector length, we dont + * want that in some situations because we do not have the specifics of how long it should be. + * Will add another predicate when that is needed. + * + *

Also, this is not only checks if the column type is an instance of {@link VectorType} + * interface, to account for our {@link + * io.stargate.sgv2.jsonapi.service.cqldriver.override.ExtendedVectorType} + */ + public static class Vector extends ColumnMetadataPredicate { + + private final DataType elementType; + + /** Create a predicate to match a vector with a float element type. */ + public Vector(CqlIdentifier name) { + // lets be honest, they are all floats. + this(name, DataTypes.FLOAT); + } + + public Vector(CqlIdentifier name, DataType elementType) { + super(name, null); + Objects.requireNonNull(name, "name must not be null"); + this.elementType = Objects.requireNonNull(elementType, "elementType must not be null"); + } + + @Override + protected boolean typeMatches(ColumnMetadata columnMetadata) { + // NOTE: checking is instance for reasons above + if (!(columnMetadata.getType() instanceof VectorType vector)) { + return false; + } + return Objects.equals(vector.getElementType(), elementType); + } + + @Override + public String toString() { + return String.format("%s(vector<%s>)", errFmt(name), errFmt(elementType)); + } + } +} diff --git a/src/main/java/io/stargate/sgv2/jsonapi/util/CqlIdentifierUtil.java b/src/main/java/io/stargate/sgv2/jsonapi/util/CqlIdentifierUtil.java index 95978377d3..5f9ff3ae60 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/util/CqlIdentifierUtil.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/util/CqlIdentifierUtil.java @@ -4,6 +4,7 @@ import com.datastax.oss.driver.api.core.metadata.schema.ColumnMetadata; import com.datastax.oss.driver.internal.core.util.Strings; import java.util.Comparator; +import java.util.Objects; public abstract class CqlIdentifierUtil { @@ -41,6 +42,7 @@ public static CqlIdentifier cqlIdentifierFromUserInput(String name) { public static String cqlIdentifierToCQL(CqlIdentifier identifier) { // pretty == false it means we force the double quotes around the internal without checking if // they are needed + Objects.requireNonNull(identifier, "identifier must not be null"); return identifier.asCql(false); } diff --git a/src/main/java/io/stargate/sgv2/jsonapi/util/StringUtil.java b/src/main/java/io/stargate/sgv2/jsonapi/util/StringUtil.java index 2515a4fe35..d96d5dd52b 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/util/StringUtil.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/util/StringUtil.java @@ -14,12 +14,16 @@ public static String normalizeOptionalString(Optional string) { return normalizeOptionalString(string.orElse("")); } + public static boolean isNullOrBlank(String string) { + return string == null || string.isBlank(); + } + /** * Returns {@code value} unchanged if it is non-null and not blank; otherwise throws {@link * IllegalArgumentException} naming the offending {@code name}. */ public static String requireNonBlank(String value, String name) { - if (value == null || value.isBlank()) { + if (isNullOrBlank(value)) { throw new IllegalArgumentException(name + " must not be null or blank"); } return value; diff --git a/src/test/java/io/stargate/sgv2/jsonapi/service/schema/collections/CqlColumnMatcherTest.java b/src/test/java/io/stargate/sgv2/jsonapi/service/schema/collections/CqlColumnMatcherTest.java deleted file mode 100644 index 366c0b5fcc..0000000000 --- a/src/test/java/io/stargate/sgv2/jsonapi/service/schema/collections/CqlColumnMatcherTest.java +++ /dev/null @@ -1,409 +0,0 @@ -package io.stargate.sgv2.jsonapi.service.schema.collections; - -import static org.assertj.core.api.Assertions.assertThat; - -import com.datastax.oss.driver.api.core.CqlIdentifier; -import com.datastax.oss.driver.api.core.metadata.schema.ColumnMetadata; -import com.datastax.oss.driver.api.core.type.DataType; -import com.datastax.oss.driver.internal.core.metadata.schema.DefaultColumnMetadata; -import com.datastax.oss.driver.internal.core.type.DefaultMapType; -import com.datastax.oss.driver.internal.core.type.DefaultSetType; -import com.datastax.oss.driver.internal.core.type.DefaultTupleType; -import com.datastax.oss.driver.internal.core.type.PrimitiveType; -import com.datastax.oss.protocol.internal.ProtocolConstants; -import java.util.Arrays; -import java.util.List; -import org.junit.jupiter.api.Nested; -import org.junit.jupiter.api.Test; - -class CqlColumnMatcherTest { - - @Nested - class BasicType { - - @Test - public void happyPath() { - ColumnMetadata spec = - new DefaultColumnMetadata( - CqlIdentifier.fromInternal("keyspace"), - CqlIdentifier.fromInternal("collection"), - CqlIdentifier.fromInternal("column"), - new PrimitiveType(ProtocolConstants.DataType.VARCHAR), - false); - - CqlColumnMatcher.BasicType matcher = - new CqlColumnMatcher.BasicType( - CqlIdentifier.fromInternal("column"), - new PrimitiveType(ProtocolConstants.DataType.VARCHAR)); - boolean result = matcher.test(spec); - - assertThat(result).isTrue(); - } - - @Test - public void wrongType() { - ColumnMetadata spec = - new DefaultColumnMetadata( - CqlIdentifier.fromInternal("keyspace"), - CqlIdentifier.fromInternal("collection"), - CqlIdentifier.fromInternal("column"), - new PrimitiveType(ProtocolConstants.DataType.INT), - false); - - CqlColumnMatcher.BasicType matcher = - new CqlColumnMatcher.BasicType( - CqlIdentifier.fromInternal("column"), - new PrimitiveType(ProtocolConstants.DataType.VARCHAR)); - boolean result = matcher.test(spec); - - assertThat(result).isFalse(); - } - - @Test - public void notBasicType() { - ColumnMetadata spec = - new DefaultColumnMetadata( - CqlIdentifier.fromInternal("keyspace"), - CqlIdentifier.fromInternal("collection"), - CqlIdentifier.fromInternal("column"), - new DefaultMapType( - new PrimitiveType(ProtocolConstants.DataType.INT), - new PrimitiveType(ProtocolConstants.DataType.INT), - false), - false); - - CqlColumnMatcher.BasicType matcher = - new CqlColumnMatcher.BasicType( - CqlIdentifier.fromInternal("column"), - new PrimitiveType(ProtocolConstants.DataType.VARCHAR)); - boolean result = matcher.test(spec); - - assertThat(result).isFalse(); - } - - @Test - public void wrongName() { - ColumnMetadata spec = - new DefaultColumnMetadata( - CqlIdentifier.fromInternal("keyspace"), - CqlIdentifier.fromInternal("collection"), - CqlIdentifier.fromInternal("column"), - new PrimitiveType(ProtocolConstants.DataType.VARCHAR), - false); - - CqlColumnMatcher.BasicType matcher = - new CqlColumnMatcher.BasicType( - CqlIdentifier.fromInternal("wrong"), - new PrimitiveType(ProtocolConstants.DataType.VARCHAR)); - boolean result = matcher.test(spec); - - assertThat(result).isFalse(); - } - } - - @Nested - class Tuple { - - @Test - public void happyPath() { - DataType type1 = new PrimitiveType(ProtocolConstants.DataType.VARCHAR); - DataType type2 = new PrimitiveType(ProtocolConstants.DataType.INT); - List list = Arrays.asList(type1, type2); - ColumnMetadata spec = - new DefaultColumnMetadata( - CqlIdentifier.fromInternal("keyspace"), - CqlIdentifier.fromInternal("collection"), - CqlIdentifier.fromInternal("column"), - new DefaultTupleType(list), - false); - - CqlColumnMatcher.Tuple matcher = - new CqlColumnMatcher.Tuple( - CqlIdentifier.fromInternal("column"), - new PrimitiveType(ProtocolConstants.DataType.VARCHAR), - new PrimitiveType(ProtocolConstants.DataType.INT)); - boolean result = matcher.test(spec); - - assertThat(result).isTrue(); - } - - @Test - public void wrongOrder() { - DataType type1 = new PrimitiveType(ProtocolConstants.DataType.VARCHAR); - DataType type2 = new PrimitiveType(ProtocolConstants.DataType.INT); - List list = Arrays.asList(type1, type2); - ColumnMetadata spec = - new DefaultColumnMetadata( - CqlIdentifier.fromInternal("keyspace"), - CqlIdentifier.fromInternal("collection"), - CqlIdentifier.fromInternal("column"), - new DefaultTupleType(list), - false); - - CqlColumnMatcher.Tuple matcher = - new CqlColumnMatcher.Tuple( - CqlIdentifier.fromInternal("column"), - new PrimitiveType(ProtocolConstants.DataType.INT), - new PrimitiveType(ProtocolConstants.DataType.VARCHAR)); - boolean result = matcher.test(spec); - - assertThat(result).isFalse(); - } - - @Test - public void wrongTuple() { - DataType type1 = new PrimitiveType(ProtocolConstants.DataType.VARCHAR); - DataType type2 = new PrimitiveType(ProtocolConstants.DataType.INT); - List list = Arrays.asList(type1, type2); - ColumnMetadata spec = - new DefaultColumnMetadata( - CqlIdentifier.fromInternal("keyspace"), - CqlIdentifier.fromInternal("collection"), - CqlIdentifier.fromInternal("column"), - new DefaultTupleType(list), - false); - - CqlColumnMatcher.Tuple matcher = - new CqlColumnMatcher.Tuple( - CqlIdentifier.fromInternal("column"), - new PrimitiveType(ProtocolConstants.DataType.INT)); - boolean result = matcher.test(spec); - - assertThat(result).isFalse(); - } - - @Test - public void notTuple() { - ColumnMetadata spec = - new DefaultColumnMetadata( - CqlIdentifier.fromInternal("keyspace"), - CqlIdentifier.fromInternal("collection"), - CqlIdentifier.fromInternal("column"), - new PrimitiveType(ProtocolConstants.DataType.VARCHAR), - false); - - CqlColumnMatcher.Tuple matcher = - new CqlColumnMatcher.Tuple( - CqlIdentifier.fromInternal("column"), - new PrimitiveType(ProtocolConstants.DataType.INT)); - boolean result = matcher.test(spec); - - assertThat(result).isFalse(); - } - - @Test - public void wrongColumn() { - DataType type1 = new PrimitiveType(ProtocolConstants.DataType.VARCHAR); - DataType type2 = new PrimitiveType(ProtocolConstants.DataType.INT); - List list = Arrays.asList(type1, type2); - ColumnMetadata spec = - new DefaultColumnMetadata( - CqlIdentifier.fromInternal("keyspace"), - CqlIdentifier.fromInternal("collection"), - CqlIdentifier.fromInternal("column"), - new DefaultTupleType(list), - false); - - CqlColumnMatcher.Tuple matcher = - new CqlColumnMatcher.Tuple( - CqlIdentifier.fromInternal("wrong"), - new PrimitiveType(ProtocolConstants.DataType.VARCHAR), - new PrimitiveType(ProtocolConstants.DataType.INT)); - boolean result = matcher.test(spec); - - assertThat(result).isFalse(); - } - } - - @Nested - class Map { - - @Test - public void happyPath() { - DataType key = new PrimitiveType(ProtocolConstants.DataType.VARCHAR); - DataType value = new PrimitiveType(ProtocolConstants.DataType.INT); - ColumnMetadata spec = - new DefaultColumnMetadata( - CqlIdentifier.fromInternal("keyspace"), - CqlIdentifier.fromInternal("collection"), - CqlIdentifier.fromInternal("column"), - new DefaultMapType(key, value, false), - false); - - CqlColumnMatcher.Map matcher = - new CqlColumnMatcher.Map( - CqlIdentifier.fromInternal("column"), - new PrimitiveType(ProtocolConstants.DataType.VARCHAR), - new PrimitiveType(ProtocolConstants.DataType.INT)); - boolean result = matcher.test(spec); - - assertThat(result).isTrue(); - } - - @Test - public void wrongValue() { - DataType key = new PrimitiveType(ProtocolConstants.DataType.VARCHAR); - DataType value = new PrimitiveType(ProtocolConstants.DataType.INT); - ColumnMetadata spec = - new DefaultColumnMetadata( - CqlIdentifier.fromInternal("keyspace"), - CqlIdentifier.fromInternal("collection"), - CqlIdentifier.fromInternal("column"), - new DefaultMapType(key, value, false), - false); - - CqlColumnMatcher.Map matcher = - new CqlColumnMatcher.Map( - CqlIdentifier.fromInternal("column"), - new PrimitiveType(ProtocolConstants.DataType.VARCHAR), - new PrimitiveType(ProtocolConstants.DataType.FLOAT)); - boolean result = matcher.test(spec); - - assertThat(result).isFalse(); - } - - @Test - public void wrongKey() { - DataType key = new PrimitiveType(ProtocolConstants.DataType.VARCHAR); - DataType value = new PrimitiveType(ProtocolConstants.DataType.INT); - ColumnMetadata spec = - new DefaultColumnMetadata( - CqlIdentifier.fromInternal("keyspace"), - CqlIdentifier.fromInternal("collection"), - CqlIdentifier.fromInternal("column"), - new DefaultMapType(key, value, false), - false); - - CqlColumnMatcher.Map matcher = - new CqlColumnMatcher.Map( - CqlIdentifier.fromInternal("column"), - new PrimitiveType(ProtocolConstants.DataType.INT), - new PrimitiveType(ProtocolConstants.DataType.INT)); - boolean result = matcher.test(spec); - - assertThat(result).isFalse(); - } - - @Test - public void notMap() { - ColumnMetadata spec = - new DefaultColumnMetadata( - CqlIdentifier.fromInternal("keyspace"), - CqlIdentifier.fromInternal("collection"), - CqlIdentifier.fromInternal("column"), - new PrimitiveType(ProtocolConstants.DataType.VARCHAR), - false); - - CqlColumnMatcher.Map matcher = - new CqlColumnMatcher.Map( - CqlIdentifier.fromInternal("column"), - new PrimitiveType(ProtocolConstants.DataType.VARCHAR), - new PrimitiveType(ProtocolConstants.DataType.INT)); - boolean result = matcher.test(spec); - - assertThat(result).isFalse(); - } - - @Test - public void wrongColumn() { - DataType key = new PrimitiveType(ProtocolConstants.DataType.VARCHAR); - DataType value = new PrimitiveType(ProtocolConstants.DataType.INT); - ColumnMetadata spec = - new DefaultColumnMetadata( - CqlIdentifier.fromInternal("keyspace"), - CqlIdentifier.fromInternal("collection"), - CqlIdentifier.fromInternal("column"), - new DefaultMapType(key, value, false), - false); - - CqlColumnMatcher.Map matcher = - new CqlColumnMatcher.Map( - CqlIdentifier.fromInternal("wrong"), - new PrimitiveType(ProtocolConstants.DataType.VARCHAR), - new PrimitiveType(ProtocolConstants.DataType.INT)); - boolean result = matcher.test(spec); - - assertThat(result).isFalse(); - } - } - - @Nested - class Set { - - @Test - public void happyPath() { - ColumnMetadata spec = - new DefaultColumnMetadata( - CqlIdentifier.fromInternal("keyspace"), - CqlIdentifier.fromInternal("collection"), - CqlIdentifier.fromInternal("column"), - new DefaultSetType(new PrimitiveType(ProtocolConstants.DataType.VARCHAR), false), - false); - - CqlColumnMatcher.Set matcher = - new CqlColumnMatcher.Set( - CqlIdentifier.fromInternal("column"), - new PrimitiveType(ProtocolConstants.DataType.VARCHAR)); - boolean result = matcher.test(spec); - - assertThat(result).isTrue(); - } - - @Test - public void wrongType() { - ColumnMetadata spec = - new DefaultColumnMetadata( - CqlIdentifier.fromInternal("keyspace"), - CqlIdentifier.fromInternal("collection"), - CqlIdentifier.fromInternal("column"), - new DefaultSetType(new PrimitiveType(ProtocolConstants.DataType.VARCHAR), false), - false); - - CqlColumnMatcher.Set matcher = - new CqlColumnMatcher.Set( - CqlIdentifier.fromInternal("column"), - new PrimitiveType(ProtocolConstants.DataType.INT)); - boolean result = matcher.test(spec); - - assertThat(result).isFalse(); - } - - @Test - public void notSet() { - ColumnMetadata spec = - new DefaultColumnMetadata( - CqlIdentifier.fromInternal("keyspace"), - CqlIdentifier.fromInternal("collection"), - CqlIdentifier.fromInternal("column"), - new PrimitiveType(ProtocolConstants.DataType.VARCHAR), - false); - - CqlColumnMatcher.Set matcher = - new CqlColumnMatcher.Set( - CqlIdentifier.fromInternal("column"), - new PrimitiveType(ProtocolConstants.DataType.INT)); - boolean result = matcher.test(spec); - - assertThat(result).isFalse(); - } - - @Test - public void wrongColumn() { - ColumnMetadata spec = - new DefaultColumnMetadata( - CqlIdentifier.fromInternal("keyspace"), - CqlIdentifier.fromInternal("collection"), - CqlIdentifier.fromInternal("column"), - new DefaultSetType(new PrimitiveType(ProtocolConstants.DataType.VARCHAR), false), - false); - - CqlColumnMatcher.Set matcher = - new CqlColumnMatcher.Set( - CqlIdentifier.fromInternal("wrong"), - new PrimitiveType(ProtocolConstants.DataType.VARCHAR)); - boolean result = matcher.test(spec); - - assertThat(result).isFalse(); - } - } -} diff --git a/src/test/java/io/stargate/sgv2/jsonapi/service/schema/collections/spec/SuperShreddingBuilderTest.java b/src/test/java/io/stargate/sgv2/jsonapi/service/schema/collections/spec/SuperShreddingBuilderTest.java new file mode 100644 index 0000000000..0b7f750b81 --- /dev/null +++ b/src/test/java/io/stargate/sgv2/jsonapi/service/schema/collections/spec/SuperShreddingBuilderTest.java @@ -0,0 +1,189 @@ +package io.stargate.sgv2.jsonapi.service.schema.collections.spec; + +import static io.stargate.sgv2.jsonapi.service.schema.collections.spec.SuperShreddingCQL.collapseWhitespace; +import static org.assertj.core.api.Assertions.assertThat; + +import com.datastax.oss.driver.api.core.CqlIdentifier; +import com.datastax.oss.driver.api.core.metadata.schema.Describable; +import io.stargate.sgv2.jsonapi.TestConstants; +import java.util.*; +import java.util.stream.Collectors; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Base for classes that test a SuperShreddingBuilder subclass. + * + *

The testing process is a little complicated, but here are the reasons: we want to avoid + * repeating the table def in many places, avoid testing raw CQL, we want to be able to test + * high-level things list the statements or the schema of a table actually in the DB. And finally, + * once you get to a real table, it is "bound" with names and real values, which often results in + * using the same name for all tables etc. because it makes the test easier. The end result we want + * is that we can test bound metadata and statements that represent a real named table with values + * without needing hard coded CQL, and without repeated code that builds cql. + * + *

So the testing strategy is below, building up on each layer: + * + *

    + *
  1. A single instance of CQL of a specific example of super shredding is defined in the test + * class {@link SuperShreddingCQLBuilderTest}, which validates that {@link + * SuperShreddingCQLBuilder} can create CQL that matches this specific example. This is our + * base level ground truth. + *
  2. Creating TableMetadata and IndexMetadata objects via the {@link + * SuperShreddingMetadataBuilder} it tested by the test class {@link + * SuperShreddingMetadataBuilderTest} which validates the CQL generated by the driver for + * these objects using the {@link SuperShreddingCQLBuilder}. TableMetadata is "bound" with + * names etc, and we use it as test data to represent what the driver returns about a table. + *
  3. TODO: we generate SimpleStatements via a build, and validate the CQL agains the cql builder + *
+ * + * More simply, dobelow with minimum duplication: + * + *
    + *
  1. Validate dynamic cql string against static cql string. + *
  2. Validate faked driver metadata against previously validated dynamic cql string + *
  3. Validate super shredding table predicate againdt previously validated metadata + *
  4. Validate statement objects against previously validated dynamic cql string + *
+ */ +public abstract class SuperShreddingBuilderTest { + + private static final Logger LOGGER = LoggerFactory.getLogger(SuperShreddingBuilderTest.class); + + protected final TestConstants TEST_CONSTANTS = new TestConstants(); + + // see constantIdentifiers + private static final CqlIdentifier KEYSPACE = CqlIdentifier.fromInternal("keyspace"); + private static final CqlIdentifier TABLE = CqlIdentifier.fromInternal("documents"); + + protected static final String COMMENT = + """ + {"collection":{"name":"documents","schema_version":2}}"""; + + protected static final int VECTOR_LENGTH = 1024; + protected static final String VECTOR_SIMILARITY_FUNCTION = "cosine"; + protected static final String VECTOR_SOURCE_MODEL = "OTHER"; + + protected static final String LEXICAL_INDEX_ANALYZER = "standard"; + + // NOTE: For validating the output of CQLBuilder against constant CQL we need + // static keyspace & table names, other tests should use TestConstants. + protected final boolean constantIdentifiers; + + // When creating CQL from Table or Index Metadata they do not add an IF NOT EXISTS + // so when comparing the CQL from one of these we need to set + // ifNotExists to false. + // BUT when testing the ground truth with CqlBuilderTest or testing + // SchmeaBiulder against CqlBUilder will normally want it enabled + protected final boolean ifNotExists; + + protected SuperShreddingBuilderTest() { + this(false, true); + } + + protected SuperShreddingBuilderTest(boolean constantIdentifiers, boolean ifNotExists) { + this.constantIdentifiers = constantIdentifiers; + this.ifNotExists = ifNotExists; + } + + protected CqlIdentifier keyspace() { + return constantIdentifiers ? KEYSPACE : TEST_CONSTANTS.COLLECTION_IDENTIFIER.keyspace(); + } + + protected CqlIdentifier table() { + return constantIdentifiers ? TABLE : TEST_CONSTANTS.COLLECTION_IDENTIFIER.table(); + } + + protected > T configDefault(T builder) { + return builder.withKeyspace(keyspace()).withCollection(table()).withIfNotExists(ifNotExists); + } + + protected > T configAllOptional(T builder) { + return configDefault(builder) + .withComment(COMMENT) + .withVector(VECTOR_LENGTH, VECTOR_SIMILARITY_FUNCTION, VECTOR_SOURCE_MODEL) + .withLexical(LEXICAL_INDEX_ANALYZER); + } + + protected > T configNoOptional(T builder) { + return configDefault(builder).withComment(COMMENT); + } + + protected > T configVectorOnly(T builder) { + return configDefault(builder) + .withComment(COMMENT) + .withVector(VECTOR_LENGTH, VECTOR_SIMILARITY_FUNCTION, VECTOR_SOURCE_MODEL); + } + + protected > T configLexicalOnly(T builder) { + return configDefault(builder).withComment(COMMENT).withLexical(LEXICAL_INDEX_ANALYZER); + } + + protected static List> upcastString( + List> components) { + return new ArrayList<>(components); + } + + protected static List> upcastDesc( + List> components) { + return new ArrayList<>(components); + } + + protected void assertComponents( + String testName, + List> expectedComponents, + List> actualComponents) { + + Objects.requireNonNull(expectedComponents, "expectedComponents must be null"); + Objects.requireNonNull(actualComponents, "actualComponents must be null"); + + assertThat(actualComponents) + .as("%s - Components same size as expected", testName) + .hasSize(expectedComponents.size()); + + for (var expected : expectedComponents) { + + var actual = + actualComponents.stream() + .filter(component -> component.identifier().equals(expected.identifier())) + .findFirst() + .orElse(null); + assertThat(actual) + .as("%s - Expected Component '%s' not found in actual", testName, expected.identifier()) + .isNotNull(); + + assertThat(actual.type()) + .as( + "%s - Actual Component with name '%s' should be of type '%s'", + testName, expected.identifier(), expected.type()) + .isEqualTo(expected.type()); + + var expectedCQL = collapseWhitespace(expected.asCql()); + var actualCql = collapseWhitespace(actual.asCql()); + + if (LOGGER.isInfoEnabled()) { + // extra spaces to line up for easier reading + LOGGER.info("assertTableCql() - testName: {}, expectedCOL: {}", testName, expectedCQL); + LOGGER.info("assertTableCql() - testName: {}, actualCQL: {}", testName, actualCql); + } + + assertThat(actualCql) + .as( + "%s - Actual CQL for component '%s' should match expected", + testName, expected.identifier()) + .isEqualTo(expectedCQL); + } + + Set expectedIdentifiers = + expectedComponents.stream() + .map(SuperShreddingBuilder.SuperShreddingComponent::identifier) + .collect(Collectors.toSet()); + + var unexpectedComponents = + actualComponents.stream() + .filter(component -> !expectedIdentifiers.contains(component.identifier())) + .toList(); + + assertThat(unexpectedComponents).as("%s - No unexpected components found", testName).isEmpty(); + } +} diff --git a/src/test/java/io/stargate/sgv2/jsonapi/service/schema/collections/spec/SuperShreddingCQLBuilderTest.java b/src/test/java/io/stargate/sgv2/jsonapi/service/schema/collections/spec/SuperShreddingCQLBuilderTest.java new file mode 100644 index 0000000000..47beda24eb --- /dev/null +++ b/src/test/java/io/stargate/sgv2/jsonapi/service/schema/collections/spec/SuperShreddingCQLBuilderTest.java @@ -0,0 +1,262 @@ +package io.stargate.sgv2.jsonapi.service.schema.collections.spec; + +import com.datastax.oss.driver.api.core.CqlIdentifier; +import java.util.*; +import org.junit.jupiter.api.Test; + +/** + * This is the base ground truth for what the CQL statements an actual instance of a super shredding + * table should look like. This tests that we can build a CQL string to match literal CQL, and then + * we build tests up from there. + * + *

Try to keep as literal as possible, validation of how the super shredding table is built from + * this test. + * + *

See {@link SuperShreddingBuilder} for more details. + */ +public class SuperShreddingCQLBuilderTest extends SuperShreddingBuilderTest { + + private static final String CREATE_TABLE_ALL_OPTIONAL = + """ + CREATE TABLE IF NOT EXISTS "keyspace"."documents" ( + "key" frozen>, + "tx_id" timeuuid, + "doc_json" text, + "exist_keys" set, + "array_size" map, + "array_contains" set, + "query_bool_values" map, + "query_dbl_values" map, + "query_text_values" map, + "query_timestamp_values" map, + "query_null_values" set, + "query_vector_value" vector, + "query_lexical_value" text, + PRIMARY KEY ("key") + ) WITH + comment = '{"collection":{"name":"documents","schema_version":2}}'; + """; + + private static final String CREATE_TABLE_NO_OPTIONAL = + """ + CREATE TABLE IF NOT EXISTS "keyspace"."documents" ( + "key" frozen>, + "tx_id" timeuuid, + "doc_json" text, + "exist_keys" set, + "array_size" map, + "array_contains" set, + "query_bool_values" map, + "query_dbl_values" map, + "query_text_values" map, + "query_timestamp_values" map, + "query_null_values" set, + PRIMARY KEY ("key") + ) WITH + comment = '{"collection":{"name":"documents","schema_version":2}}'; + """; + + private static final String CREATE_TABLE_VECTOR_ONLY = + """ + CREATE TABLE IF NOT EXISTS "keyspace"."documents" ( + "key" frozen>, + "tx_id" timeuuid, + "doc_json" text, + "exist_keys" set, + "array_size" map, + "array_contains" set, + "query_bool_values" map, + "query_dbl_values" map, + "query_text_values" map, + "query_timestamp_values" map, + "query_null_values" set, + "query_vector_value" vector, + PRIMARY KEY ("key") + ) WITH + comment = '{"collection":{"name":"documents","schema_version":2}}'; + """; + + private static final String CREATE_TABLE_LEXICAL_ONLY = + """ + CREATE TABLE IF NOT EXISTS "keyspace"."documents" ( + "key" frozen>, + "tx_id" timeuuid, + "doc_json" text, + "exist_keys" set, + "array_size" map, + "array_contains" set, + "query_bool_values" map, + "query_dbl_values" map, + "query_text_values" map, + "query_timestamp_values" map, + "query_null_values" set, + "query_lexical_value" text, + PRIMARY KEY ("key") + ) WITH + comment = '{"collection":{"name":"documents","schema_version":2}}'; + """; + + private static final Map REQUIRED_INDEXES = + Map.of( + "documents_exist_keys", + """ + CREATE CUSTOM INDEX IF NOT EXISTS "documents_exist_keys" + ON "keyspace"."documents" (values("exist_keys")) + USING 'StorageAttachedIndex'; + """, + "documents_array_size", + """ + CREATE CUSTOM INDEX IF NOT EXISTS "documents_array_size" + ON "keyspace"."documents" (entries("array_size")) + USING 'StorageAttachedIndex'; + """, + "documents_array_contains", + """ + CREATE CUSTOM INDEX IF NOT EXISTS "documents_array_contains" + ON "keyspace"."documents" (values("array_contains")) + USING 'StorageAttachedIndex'; + """, + "documents_query_bool_values", + """ + CREATE CUSTOM INDEX IF NOT EXISTS "documents_query_bool_values" + ON "keyspace"."documents" (entries("query_bool_values")) + USING 'StorageAttachedIndex'; + """, + "documents_query_dbl_values", + """ + CREATE CUSTOM INDEX IF NOT EXISTS "documents_query_dbl_values" + ON "keyspace"."documents" (entries("query_dbl_values")) + USING 'StorageAttachedIndex'; + """, + "documents_query_text_values", + """ + CREATE CUSTOM INDEX IF NOT EXISTS "documents_query_text_values" + ON "keyspace"."documents" (entries("query_text_values")) + USING 'StorageAttachedIndex'; + """, + "documents_query_timestamp_values", + """ + CREATE CUSTOM INDEX IF NOT EXISTS "documents_query_timestamp_values" + ON "keyspace"."documents" (entries("query_timestamp_values")) + USING 'StorageAttachedIndex'; + """, + "documents_query_null_values", + """ + CREATE CUSTOM INDEX IF NOT EXISTS "documents_query_null_values" + ON "keyspace"."documents" (values("query_null_values")) + USING 'StorageAttachedIndex'; + """); + + private static final Map OPTIONAL_INDEXES = + Map.of( + "documents_query_vector_value", + """ + CREATE CUSTOM INDEX IF NOT EXISTS "documents_query_vector_value" + ON "keyspace"."documents" ("query_vector_value") + USING 'StorageAttachedIndex' + WITH OPTIONS = { 'similarity_function' : 'cosine', 'source_model' : 'OTHER'}; + """, + "documents_query_lexical_value", + """ + CREATE CUSTOM INDEX IF NOT EXISTS "documents_query_lexical_value" + ON "keyspace"."documents" ("query_lexical_value") + USING 'StorageAttachedIndex' + WITH OPTIONS = { 'index_analyzer' : 'standard'}; + """); + + private static final Map ALL_INDEXES; + + static { + var local = new LinkedHashMap<>(REQUIRED_INDEXES); + local.putAll(OPTIONAL_INDEXES); + ALL_INDEXES = Collections.unmodifiableMap(local); + } + + public SuperShreddingCQLBuilderTest() { + super(true, true); + // ^^ need constant names that will match the strings in this class, want IF NOT EXIST + + } + + private List> asComponents( + String tableCql, Map indexCql) { + var components = + new ArrayList>(1 + indexCql.size()); + + components.add( + new SuperShreddingBuilder.SuperShreddingComponent<>( + table(), SuperShreddingBuilder.SuperShreddingComponentType.TABLE, tableCql.trim())); + + for (var indexEntry : indexCql.entrySet()) { + components.add( + new SuperShreddingBuilder.SuperShreddingComponent<>( + CqlIdentifier.fromInternal(indexEntry.getKey()), + SuperShreddingBuilder.SuperShreddingComponentType.INDEX, + indexEntry.getValue().trim())); + } + + return components; + } + + @Test + public void createTableAllOptional() { + + var expectedComponents = asComponents(CREATE_TABLE_ALL_OPTIONAL, ALL_INDEXES); + + var builder = configAllOptional(SuperShreddingCQLBuilder.cql()); + var actualComponents = builder.build(); + + assertComponents( + "createTableAllOptional()", + upcastString(expectedComponents), + upcastString(actualComponents)); + } + + @Test + public void createTableNoOptional() { + + var expectedComponents = asComponents(CREATE_TABLE_NO_OPTIONAL, REQUIRED_INDEXES); + + var builder = configNoOptional(SuperShreddingCQLBuilder.cql()); + var actualComponents = builder.build(); + + assertComponents( + "createTableNoOptional()", + upcastString(expectedComponents), + upcastString(actualComponents)); + } + + @Test + public void createTableVectorOnly() { + + var expectedIndexes = new LinkedHashMap<>(REQUIRED_INDEXES); + expectedIndexes.put( + "documents_query_vector_value", OPTIONAL_INDEXES.get("documents_query_vector_value")); + var expectedComponents = asComponents(CREATE_TABLE_VECTOR_ONLY, expectedIndexes); + + var builder = configVectorOnly(SuperShreddingCQLBuilder.cql()); + var actualComponents = builder.build(); + + assertComponents( + "createTableVectorOnly()", + upcastString(expectedComponents), + upcastString(actualComponents)); + } + + @Test + public void createTableLexicalOnly() { + + var expectedIndexes = new LinkedHashMap<>(REQUIRED_INDEXES); + expectedIndexes.put( + "documents_query_lexical_value", OPTIONAL_INDEXES.get("documents_query_lexical_value")); + var expectedComponents = asComponents(CREATE_TABLE_LEXICAL_ONLY, expectedIndexes); + + var builder = configLexicalOnly(SuperShreddingCQLBuilder.cql()); + var actualComponents = builder.build(); + + assertComponents( + "createTableLexicalOnly()", + upcastString(expectedComponents), + upcastString(actualComponents)); + } +} diff --git a/src/test/java/io/stargate/sgv2/jsonapi/service/schema/collections/spec/SuperShreddingMetadataBuilderTest.java b/src/test/java/io/stargate/sgv2/jsonapi/service/schema/collections/spec/SuperShreddingMetadataBuilderTest.java new file mode 100644 index 0000000000..431aff8baf --- /dev/null +++ b/src/test/java/io/stargate/sgv2/jsonapi/service/schema/collections/spec/SuperShreddingMetadataBuilderTest.java @@ -0,0 +1,68 @@ +package io.stargate.sgv2.jsonapi.service.schema.collections.spec; + +import io.stargate.sgv2.jsonapi.TestConstants; +import org.junit.jupiter.api.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Testing that when we build TableMetadata and IndexMetadata from + * {@link SuperShreddingMetadataBuilder} the CQL it represents matches that from + * {@link SuperShreddingCQLBuilder} , which was ground truthed to constant strings. + */ +public class SuperShreddingMetadataBuilderTest extends SuperShreddingBuilderTest { + + public SuperShreddingMetadataBuilderTest() { + super(false, false); + // ^^ ok to use dynamic schema names, but need to exclude ifNotexists because + // cql from TableMetadata etc does not add it. + } + + @Test + public void createTableAllOptional() { + + var expectedCqlBuilder = configAllOptional(SuperShreddingCQLBuilder.cql()); + var actualMetadataBuilder = configAllOptional(SuperShreddingMetadataBuilder.metadata()); + + assertComponents( + "createTableAllOptional()", + upcastString(expectedCqlBuilder.build()), + upcastDesc(actualMetadataBuilder.build())); + } + + @Test + public void createTableNoOptional() { + + var expectedCqlBuilder = configNoOptional(SuperShreddingCQLBuilder.cql()); + var actualMetadataBuilder = configNoOptional(SuperShreddingMetadataBuilder.metadata()); + + assertComponents( + "createTableNoOptional()", + upcastString(expectedCqlBuilder.build()), + upcastDesc(actualMetadataBuilder.build())); + } + + @Test + public void createTableVectorOnly() { + + var expectedCqlBuilder = configVectorOnly(SuperShreddingCQLBuilder.cql()); + var actualMetadataBuilder = configVectorOnly(SuperShreddingMetadataBuilder.metadata()); + + assertComponents( + "createTableVectorOnly()", + upcastString(expectedCqlBuilder.build()), + upcastDesc(actualMetadataBuilder.build())); + } + + @Test + public void createTableLexicalOnly() { + + var expectedCqlBuilder = configLexicalOnly(SuperShreddingCQLBuilder.cql()); + var actualMetadataBuilder = configLexicalOnly(SuperShreddingMetadataBuilder.metadata()); + + assertComponents( + "createTableLexicalOnly()", + upcastString(expectedCqlBuilder.build()), + upcastDesc(actualMetadataBuilder.build())); + } +} diff --git a/src/test/java/io/stargate/sgv2/jsonapi/service/schema/collections/CollectionTableMatcherTest.java b/src/test/java/io/stargate/sgv2/jsonapi/service/schema/collections/spec/SuperShreddingTablePredicateTest.java similarity index 95% rename from src/test/java/io/stargate/sgv2/jsonapi/service/schema/collections/CollectionTableMatcherTest.java rename to src/test/java/io/stargate/sgv2/jsonapi/service/schema/collections/spec/SuperShreddingTablePredicateTest.java index 27ca79a925..83e4998ff7 100644 --- a/src/test/java/io/stargate/sgv2/jsonapi/service/schema/collections/CollectionTableMatcherTest.java +++ b/src/test/java/io/stargate/sgv2/jsonapi/service/schema/collections/spec/SuperShreddingTablePredicateTest.java @@ -1,4 +1,4 @@ -package io.stargate.sgv2.jsonapi.service.schema.collections; +package io.stargate.sgv2.jsonapi.service.schema.collections.spec; import static org.assertj.core.api.Assertions.assertThat; @@ -17,9 +17,13 @@ import org.junit.jupiter.api.Nested; import org.junit.jupiter.api.Test; -class CollectionTableMatcherTest { +/** + * This is the initial test for the super shredding table predicate when that class was called + * CollectionTableMatcher, left in place to show we are passing the old tests. + */ +class SuperShreddingTablePredicateTest { - CollectionTableMatcher tableMatcher = new CollectionTableMatcher(); + SuperShreddingTablePredicate tableMatcher = new SuperShreddingTablePredicate(); @Nested class BuiltConditionPredicateTest { diff --git a/src/test/java/io/stargate/sgv2/jsonapi/service/schema/collections/spec/SuperShreddingTablePredicateTestV2.java b/src/test/java/io/stargate/sgv2/jsonapi/service/schema/collections/spec/SuperShreddingTablePredicateTestV2.java new file mode 100644 index 0000000000..da9d07e3e0 --- /dev/null +++ b/src/test/java/io/stargate/sgv2/jsonapi/service/schema/collections/spec/SuperShreddingTablePredicateTestV2.java @@ -0,0 +1,255 @@ +package io.stargate.sgv2.jsonapi.service.schema.collections.spec; + +import static io.stargate.sgv2.jsonapi.exception.ErrorFormatters.errFmt; +import static io.stargate.sgv2.jsonapi.util.CqlIdentifierUtil.cqlIdentifierToMessageString; +import static io.stargate.sgv2.jsonapi.util.TableMetadataTestUtil.*; +import static org.assertj.core.api.Assertions.assertThat; + +import com.datastax.oss.driver.api.core.metadata.schema.TableMetadata; +import com.datastax.oss.driver.api.core.type.DataTypes; +import io.stargate.sgv2.jsonapi.util.LoggerTestWrapper; +import org.junit.jupiter.api.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class SuperShreddingTablePredicateTestV2 extends SuperShreddingBuilderTest { + private static final Logger LOGGER = + LoggerFactory.getLogger(SuperShreddingTablePredicateTestV2.class); + + private void assertPredicate( + String testName, + boolean expectedResult, + SuperShreddingPredicateBuilder predicateBuilder, + SuperShreddingMetadataBuilder builder, + String logMessage) { + assertPredicate( + testName, + expectedResult, + predicateBuilder.buildTableOnly(), + (TableMetadata) builder.buildTableOnly(), + logMessage); + } + + private void assertPredicate( + String testName, + boolean expectedResult, + SuperShreddingTablePredicate predicate, + TableMetadata tableMetadata, + String logMessage) { + + try (var logWrapper = new LoggerTestWrapper(SuperShreddingTablePredicate.class)) { + + if (LOGGER.isInfoEnabled()) { + LOGGER.info( + "{} - expectedResult:{} , tableMetadata:{}", + testName, + expectedResult, + tableMetadata == null ? "null" : tableMetadata.describe(true)); + } + + var predicateResult = predicate.test(tableMetadata); + LOGGER.info( + "{} - expectedResult:{}, predicateResult:{}", testName, expectedResult, predicateResult); + assertThat(predicateResult) + .as("%s - predicate is %s", testName, expectedResult) + .isEqualTo(expectedResult); + + if (logMessage != null) { + assertThat(logWrapper.logMessages()) + .as("%s - log message: %s", testName, logMessage) + .anyMatch(s -> s.contains(logMessage)); + } + } + } + + @Test + public void nullTableMetadata() { + var predicate = configAllOptional(SuperShreddingPredicateBuilder.predicate()).buildTableOnly(); + + assertPredicate("nullTableMetadata()", false, predicate, null, null); + } + + @Test + public void createTableAllOptional() { + + var metadataBuilder = configAllOptional(SuperShreddingBuilder.metadata()); + var predicateBuilder = configAllOptional(SuperShreddingPredicateBuilder.predicate()); + + assertPredicate("createTableAllOptional()", true, predicateBuilder, metadataBuilder, null); + } + + @Test + public void createTableNoOptional() { + + var metadataBuilder = configNoOptional(SuperShreddingBuilder.metadata()); + var predicateBuilder = configNoOptional(SuperShreddingPredicateBuilder.predicate()); + assertPredicate("createTableNoOptional()", true, predicateBuilder, metadataBuilder, null); + } + + @Test + public void createTableVectorOnly() { + + var metadataBuilder = configVectorOnly(SuperShreddingBuilder.metadata()); + var predicateBuilder = configVectorOnly(SuperShreddingBuilder.predicate()); + assertPredicate("createTableVectorOnly()", true, predicateBuilder, metadataBuilder, null); + } + + @Test + public void createTableLexicalOnly() { + + var metadataBuilder = configLexicalOnly(SuperShreddingBuilder.metadata()); + var predicateBuilder = configLexicalOnly(SuperShreddingBuilder.predicate()); + assertPredicate("createTableLexicalOnly()", true, predicateBuilder, metadataBuilder, null); + } + + @Test + public void removeColumns() { + + var metadataBuilder = configAllOptional(SuperShreddingBuilder.metadata()); + var tableMetadata = (TableMetadata) metadataBuilder.buildTableOnly(); + var predicate = configAllOptional(SuperShreddingBuilder.predicate()).buildTableOnly(); + + // we expect all columns to be present, so use that as the list + removeAllColumns(tableMetadata, SuperShreddingMetadata.Identifiers.ALL) + .forEach( + entry -> { + assertPredicate( + "removeColumns(%s)".formatted(entry.column()), + false, + predicate, + entry.tableMetadata(), + "columns missing, columns: " + cqlIdentifierToMessageString(entry.column())); + }); + } + + @Test + public void removePartitionKey() { + + var metadataBuilder = configAllOptional(SuperShreddingBuilder.metadata()); + var tableMetadata = (TableMetadata) metadataBuilder.buildTableOnly(); + var predicate = configAllOptional(SuperShreddingBuilder.predicate()).buildTableOnly(); + + removeAllPartitionKeys(tableMetadata) + .forEach( + entry -> { + assertPredicate( + "removePartitionKey(%s)".formatted(entry.column()), + false, + predicate, + entry.tableMetadata(), + "partition key missing, columns: " + + cqlIdentifierToMessageString(entry.column())); + }); + } + + @Test + public void swapColumnTypes() { + + var metadataBuilder = configAllOptional(SuperShreddingBuilder.metadata()); + var tableMetadata = (TableMetadata) metadataBuilder.buildTableOnly(); + var predicate = configAllOptional(SuperShreddingBuilder.predicate()).buildTableOnly(); + + // we expect all columns to be present, so use that as the list + swapTypesAllColumns( + tableMetadata, + SuperShreddingMetadata.Identifiers.ALL, + DataTypes.TINYINT, + DataTypes.TEXT) + .forEach( + entry -> { + assertPredicate( + "swapColumnTypes(%s)".formatted(entry.column()), + false, + predicate, + entry.tableMetadata(), + "columns missing, columns: " + cqlIdentifierToMessageString(entry.column())); + }); + } + + @Test + public void unexpectedPartitionKeys() { + + var metadataBuilder = configAllOptional(SuperShreddingBuilder.metadata()); + var tableMetadata = (TableMetadata) metadataBuilder.buildTableOnly(); + + var columnName = "unexpected_key"; + var updatedTableAppended = addPartitionKey(tableMetadata, false, columnName, DataTypes.TEXT); + var updatedTableClearFirst = addPartitionKey(tableMetadata, true, columnName, DataTypes.TEXT); + + var predicate = configAllOptional(SuperShreddingBuilder.predicate()).buildTableOnly(); + + assertPredicate( + "unexpectedPartitionKeys(%s - %s)".formatted(columnName, "appended"), + false, + predicate, + updatedTableAppended, + "unexpected columns in partition key, columns: %s(%s)" + .formatted(columnName, errFmt(DataTypes.TEXT))); + + // This is really the same as removing the key but testing for completeness + assertPredicate( + "unexpectedPartitionKeys(%s - %s)".formatted(columnName, "clearFirst"), + false, + predicate, + updatedTableClearFirst, + "partition key missing, columns: key"); + } + + @Test + public void unexpectedClusteringColumns() { + + var metadataBuilder = configAllOptional(SuperShreddingBuilder.metadata()); + var tableMetadata = (TableMetadata) metadataBuilder.buildTableOnly(); + var columnName = "unexpected_column"; + var updatedTable = addClusteringColumn(tableMetadata, columnName, DataTypes.TEXT); + + var predicate = configAllOptional(SuperShreddingBuilder.predicate()).buildTableOnly(); + + assertPredicate( + "unexpectedClusteringColumns(%s)".formatted(columnName), + false, + predicate, + updatedTable, + "unexpected columns in clustering key, columns: %s(%s)" + .formatted(columnName, errFmt(DataTypes.TEXT))); + } + + @Test + public void unexpectedColumnsStrictMode() { + + var metadataBuilder = configAllOptional(SuperShreddingBuilder.metadata()); + var tableMetadata = (TableMetadata) metadataBuilder.buildTableOnly(); + var columnName = "unexpected_column"; + var updatedTable = addColumn(tableMetadata, columnName, DataTypes.TEXT); + + var predicate = configAllOptional(SuperShreddingBuilder.predicate()).buildTableOnly(); + + assertPredicate( + "unexpectedColumnsStrictMode(%s)".formatted(columnName), + false, + predicate, + updatedTable, + "unexpected columns in strict mode, columns: unexpected_column(text)" + .formatted(columnName, errFmt(DataTypes.TEXT))); + } + + @Test + public void unexpectedColumnsRelaxedMode() { + + var metadataBuilder = configAllOptional(SuperShreddingBuilder.metadata()); + var tableMetadata = (TableMetadata) metadataBuilder.buildTableOnly(); + var columnName = "unexpected_column"; + var updatedTable = addColumn(tableMetadata, columnName, DataTypes.TEXT); + + var predicate = + configAllOptional(SuperShreddingBuilder.predicate()).withStrict(false).buildTableOnly(); + + // in non-strict mode, we can have an extra column + assertPredicate( + "unexpectedColumnsRelaxedMode(%s)".formatted(columnName), + true, + predicate, + updatedTable, + null); + } +} diff --git a/src/test/java/io/stargate/sgv2/jsonapi/util/ColumnMetadataPredicateTest.java b/src/test/java/io/stargate/sgv2/jsonapi/util/ColumnMetadataPredicateTest.java new file mode 100644 index 0000000000..9c7c9fb4b8 --- /dev/null +++ b/src/test/java/io/stargate/sgv2/jsonapi/util/ColumnMetadataPredicateTest.java @@ -0,0 +1,298 @@ +package io.stargate.sgv2.jsonapi.util; + +import static io.stargate.sgv2.jsonapi.util.CqlIdentifierUtil.cqlIdentifierToMessageString; +import static org.assertj.core.api.Assertions.assertThat; + +import com.datastax.oss.driver.api.core.CqlIdentifier; +import com.datastax.oss.driver.api.core.metadata.schema.ColumnMetadata; +import com.datastax.oss.driver.api.core.type.DataType; +import com.datastax.oss.driver.api.core.type.DataTypes; +import com.datastax.oss.driver.internal.core.metadata.schema.DefaultColumnMetadata; +import io.stargate.sgv2.jsonapi.TestConstants; +import io.stargate.sgv2.jsonapi.service.cqldriver.override.ExtendedVectorType; +import org.junit.jupiter.api.Nested; +import org.junit.jupiter.api.Test; + +/** + * Tests for the {@link ColumnMetadataPredicate}. + * + *

NOTE: previously called CqlColumnMatcherTest + */ +class ColumnMetadataPredicateTest { + + private final TestConstants TEST_CONSTANTS = new TestConstants(); + + private final CqlIdentifier KEYSPACE = TEST_CONSTANTS.TABLE_IDENTIFIER.keyspace(); + private final CqlIdentifier TABLE = TEST_CONSTANTS.TABLE_IDENTIFIER.table(); + private final CqlIdentifier COLUMN = + CqlIdentifier.fromInternal("column_" + TEST_CONSTANTS.CORRELATION_ID); + private final CqlIdentifier WRONG = + CqlIdentifier.fromInternal("wrong_" + TEST_CONSTANTS.CORRELATION_ID); + + private ColumnMetadata columnMetadata(DataType type) { + return new DefaultColumnMetadata(KEYSPACE, TABLE, COLUMN, type, false); + } + + @Nested + class BasicType { + + @Test + public void correctMatch() { + var columnMetadata = columnMetadata(DataTypes.TEXT); + var matcher = new ColumnMetadataPredicate.Basic(COLUMN, DataTypes.TEXT); + + assertThat(matcher.test(columnMetadata)).isTrue(); + } + + @Test + public void wrongType() { + var columnMetadata = columnMetadata(DataTypes.INT); + var matcher = new ColumnMetadataPredicate.Basic(COLUMN, DataTypes.TEXT); + + assertThat(matcher.test(columnMetadata)).isFalse(); + } + + @Test + public void notBasicType() { + + var columnMetadata = columnMetadata(DataTypes.mapOf(DataTypes.INT, DataTypes.INT, false)); + var matcher = new ColumnMetadataPredicate.Basic(COLUMN, DataTypes.TEXT); + + assertThat(matcher.test(columnMetadata)).isFalse(); + } + + @Test + public void wrongName() { + var columnMetadata = columnMetadata(DataTypes.TEXT); + var matcher = new ColumnMetadataPredicate.Basic(WRONG, DataTypes.TEXT); + + assertThat(matcher.test(columnMetadata)).isFalse(); + } + + @Test + public void toStringFormat() { + var matcher = new ColumnMetadataPredicate.Basic(COLUMN, DataTypes.TEXT); + + assertThat(matcher.toString()).isEqualTo(cqlIdentifierToMessageString(COLUMN) + "(text)"); + } + } + + @Nested + class Tuple { + + @Test + public void correctMatch() { + var columnMetadata = columnMetadata(DataTypes.tupleOf(DataTypes.TEXT, DataTypes.INT)); + var matcher = new ColumnMetadataPredicate.Tuple(COLUMN, DataTypes.TEXT, DataTypes.INT); + + assertThat(matcher.test(columnMetadata)).isTrue(); + } + + @Test + public void wrongOrder() { + + var columnMetadata = columnMetadata(DataTypes.tupleOf(DataTypes.TEXT, DataTypes.INT)); + var matcher = new ColumnMetadataPredicate.Tuple(COLUMN, DataTypes.INT, DataTypes.TEXT); + + assertThat(matcher.test(columnMetadata)).isFalse(); + } + + @Test + public void wrongTuple() { + var columnMetadata = columnMetadata(DataTypes.tupleOf(DataTypes.TEXT, DataTypes.INT)); + var matcher = new ColumnMetadataPredicate.Tuple(COLUMN, DataTypes.INT); + + assertThat(matcher.test(columnMetadata)).isFalse(); + } + + @Test + public void notTuple() { + + var columnMetadata = columnMetadata(DataTypes.TEXT); + var matcher = new ColumnMetadataPredicate.Tuple(COLUMN, DataTypes.INT, DataTypes.TEXT); + + assertThat(matcher.test(columnMetadata)).isFalse(); + } + + @Test + public void wrongName() { + + var columnMetadata = columnMetadata(DataTypes.tupleOf(DataTypes.TEXT, DataTypes.INT)); + var matcher = new ColumnMetadataPredicate.Tuple(WRONG, DataTypes.TEXT, DataTypes.INT); + + assertThat(matcher.test(columnMetadata)).isFalse(); + } + + @Test + public void toStringFormat() { + var matcher = new ColumnMetadataPredicate.Tuple(COLUMN, DataTypes.TEXT, DataTypes.INT); + + assertThat(matcher.toString()) + .isEqualTo(cqlIdentifierToMessageString(COLUMN) + "(tuple)"); + } + } + + @Nested + class Map { + + @Test + public void correctMatch() { + + var columnMetadata = columnMetadata(DataTypes.mapOf(DataTypes.TEXT, DataTypes.INT)); + var matcher = new ColumnMetadataPredicate.Map(COLUMN, DataTypes.TEXT, DataTypes.INT); + + assertThat(matcher.test(columnMetadata)).isTrue(); + } + + @Test + public void wrongValue() { + + var columnMetadata = columnMetadata(DataTypes.mapOf(DataTypes.TEXT, DataTypes.TINYINT)); + var matcher = new ColumnMetadataPredicate.Map(COLUMN, DataTypes.TEXT, DataTypes.INT); + + assertThat(matcher.test(columnMetadata)).isFalse(); + } + + @Test + public void wrongKey() { + + var columnMetadata = columnMetadata(DataTypes.mapOf(DataTypes.INT, DataTypes.INT)); + var matcher = new ColumnMetadataPredicate.Map(COLUMN, DataTypes.TEXT, DataTypes.INT); + + assertThat(matcher.test(columnMetadata)).isFalse(); + } + + @Test + public void notMap() { + + var columnMetadata = columnMetadata(DataTypes.TEXT); + var matcher = new ColumnMetadataPredicate.Map(COLUMN, DataTypes.TEXT, DataTypes.INT); + + assertThat(matcher.test(columnMetadata)).isFalse(); + } + + @Test + public void wrongName() { + + var columnMetadata = columnMetadata(DataTypes.mapOf(DataTypes.TEXT, DataTypes.INT)); + var matcher = new ColumnMetadataPredicate.Map(WRONG, DataTypes.TEXT, DataTypes.INT); + + assertThat(matcher.test(columnMetadata)).isFalse(); + } + + @Test + public void toStringFormat() { + var matcher = new ColumnMetadataPredicate.Map(COLUMN, DataTypes.TEXT, DataTypes.INT); + + assertThat(matcher.toString()) + .isEqualTo(cqlIdentifierToMessageString(COLUMN) + "(map)"); + } + } + + @Nested + class Set { + + @Test + public void correctMatch() { + + var columnMetadata = columnMetadata(DataTypes.setOf(DataTypes.TEXT)); + var matcher = new ColumnMetadataPredicate.Set(COLUMN, DataTypes.TEXT); + + assertThat(matcher.test(columnMetadata)).isTrue(); + } + + @Test + public void wrongType() { + + var columnMetadata = columnMetadata(DataTypes.setOf(DataTypes.INT)); + var matcher = new ColumnMetadataPredicate.Set(COLUMN, DataTypes.TEXT); + + assertThat(matcher.test(columnMetadata)).isFalse(); + } + + @Test + public void notSet() { + + var columnMetadata = columnMetadata(DataTypes.TEXT); + var matcher = new ColumnMetadataPredicate.Set(COLUMN, DataTypes.TEXT); + + assertThat(matcher.test(columnMetadata)).isFalse(); + } + + @Test + public void wrongName() { + + var columnMetadata = columnMetadata(DataTypes.setOf(DataTypes.TEXT)); + var matcher = new ColumnMetadataPredicate.Set(WRONG, DataTypes.TEXT); + + assertThat(matcher.test(columnMetadata)).isFalse(); + } + + @Test + public void toStringFormat() { + var matcher = new ColumnMetadataPredicate.Set(COLUMN, DataTypes.TEXT); + + assertThat(matcher.toString()) + .isEqualTo(cqlIdentifierToMessageString(COLUMN) + "(set)"); + } + } + + @Nested + class Vector { + + @Test + public void correctMatchExtendedVectorType() { + + // making sure it works for both our extended and the default type + var columnMetadata = columnMetadata(new ExtendedVectorType(DataTypes.FLOAT, 1024)); + var matcher = new ColumnMetadataPredicate.Vector(COLUMN); + + assertThat(matcher.test(columnMetadata)).isTrue(); + } + + @Test + public void correctMatchDefaultVectorType() { + + // making sure it works for both our extended and the default type + var columnMetadata = columnMetadata(DataTypes.vectorOf(DataTypes.FLOAT, 1024)); + var matcher = new ColumnMetadataPredicate.Vector(COLUMN); + + assertThat(matcher.test(columnMetadata)).isTrue(); + } + + @Test + public void wrongVectorElementType() { + + var columnMetadata = columnMetadata(DataTypes.vectorOf(DataTypes.INT, 1024)); + var matcher = new ColumnMetadataPredicate.Vector(COLUMN, DataTypes.FLOAT); + + assertThat(matcher.test(columnMetadata)).isFalse(); + } + + @Test + public void notVector() { + + var columnMetadata = columnMetadata(DataTypes.TEXT); + var matcher = new ColumnMetadataPredicate.Vector(COLUMN, DataTypes.FLOAT); + + assertThat(matcher.test(columnMetadata)).isFalse(); + } + + @Test + public void wrongName() { + + var columnMetadata = columnMetadata(DataTypes.vectorOf(DataTypes.FLOAT, 1024)); + var matcher = new ColumnMetadataPredicate.Vector(WRONG, DataTypes.FLOAT); + + assertThat(matcher.test(columnMetadata)).isFalse(); + } + + @Test + public void toStringFormat() { + var matcher = new ColumnMetadataPredicate.Vector(COLUMN, DataTypes.FLOAT); + + assertThat(matcher.toString()) + .isEqualTo(cqlIdentifierToMessageString(COLUMN) + "(vector)"); + } + } +} diff --git a/src/test/java/io/stargate/sgv2/jsonapi/util/LoggerTestWrapper.java b/src/test/java/io/stargate/sgv2/jsonapi/util/LoggerTestWrapper.java new file mode 100644 index 0000000000..e7b64dd885 --- /dev/null +++ b/src/test/java/io/stargate/sgv2/jsonapi/util/LoggerTestWrapper.java @@ -0,0 +1,90 @@ +package io.stargate.sgv2.jsonapi.util; + +import java.util.ArrayDeque; +import java.util.Deque; +import java.util.List; +import java.util.Objects; +import java.util.logging.Level; +import java.util.logging.LogRecord; + +/** + * Provides an {@link AutoCloseable} wrapper around a {@link java.util.logging.Logger} that allows + * capturing log records for testing purposes. When exiting the try block the log level is restored + * to its previous value. + * + *

Example usage: + * + *

+ *     try (var logWrapper = new LoggerTestWrapper(SuperShreddingTablePredicate.class)) {
+ *        // do testing
+ *
+ *         assertThat(logWrapper.logMessages())
+ *             .anyMatch(s -> s.contains("the message I expect"));
+ *     }
+ * 
+ */ +public class LoggerTestWrapper implements AutoCloseable { + + // NOTE: using the java logger packages so we can change the logging level + private final java.util.logging.Logger targetLogger; + private final java.util.logging.Level previoiusLevel; + private final java.util.logging.Handler memoryHandler; + + private static final int MAX_RECORDS = 1000; + public final Deque records = new ArrayDeque<>(); + + /** + * Changes the log level for the logger to {@link Level#FINEST} + * + * @param clazz Name of the logger to change. + */ + public LoggerTestWrapper(Class clazz) { + this(clazz, Level.FINEST); + } + + /** + * Changes the log level for the logger to the specified level, while inside the auto closeable + * + * @param clazz Name of the logger to change. + * @param newLevel The new log level. + */ + public LoggerTestWrapper(Class clazz, java.util.logging.Level newLevel) { + + Objects.requireNonNull(clazz, "clazz cannot be null"); + Objects.requireNonNull(newLevel, "newLevel cannot be null"); + + this.targetLogger = java.util.logging.Logger.getLogger(clazz.getName()); + this.previoiusLevel = targetLogger.getLevel(); + targetLogger.setLevel(newLevel); + + this.memoryHandler = + new java.util.logging.Handler() { + public void publish(java.util.logging.LogRecord r) { + if (records.size() >= MAX_RECORDS) { + records.pollFirst(); + } + records.addLast(r); + } + + public void flush() {} + + public void close() {} + }; + this.memoryHandler.setLevel(newLevel); + targetLogger.addHandler(memoryHandler); + } + + public List logRecords() { + return List.copyOf(records); + } + + public List logMessages() { + return records.stream().map(LogRecord::getMessage).toList(); + } + + @Override + public void close() { + targetLogger.setLevel(previoiusLevel); + targetLogger.removeHandler(memoryHandler); + } +} diff --git a/src/test/java/io/stargate/sgv2/jsonapi/util/TableMetadataTestUtil.java b/src/test/java/io/stargate/sgv2/jsonapi/util/TableMetadataTestUtil.java new file mode 100644 index 0000000000..f68fbe6a94 --- /dev/null +++ b/src/test/java/io/stargate/sgv2/jsonapi/util/TableMetadataTestUtil.java @@ -0,0 +1,248 @@ +package io.stargate.sgv2.jsonapi.util; + +import static io.stargate.sgv2.jsonapi.util.CqlIdentifierUtil.cqlIdentifierFromUserInput; + +import com.datastax.oss.driver.api.core.CqlIdentifier; +import com.datastax.oss.driver.api.core.metadata.schema.ClusteringOrder; +import com.datastax.oss.driver.api.core.metadata.schema.ColumnMetadata; +import com.datastax.oss.driver.api.core.metadata.schema.TableMetadata; +import com.datastax.oss.driver.api.core.type.DataType; +import com.datastax.oss.driver.internal.core.metadata.schema.DefaultColumnMetadata; +import com.datastax.oss.driver.internal.core.metadata.schema.DefaultTableMetadata; +import java.util.ArrayList; +import java.util.Collection; +import java.util.LinkedHashMap; +import java.util.stream.Stream; + +/** + * Collection of utilities to make changes to {@link TableMetadata} and {@link + * com.datastax.oss.driver.api.core.metadata.schema.IndexMetadata} as part of testing. + */ +public class TableMetadataTestUtil { + + private TableMetadataTestUtil() {} + + public record TableAndColumn(TableMetadata tableMetadata, CqlIdentifier column) {} + + public static Stream removeAllColumns(TableMetadata tableMetadata) { + return removeAllColumns(tableMetadata, tableMetadata.getColumns().keySet()); + } + + public static Stream removeAllColumns( + TableMetadata tableMetadata, Collection columns) { + return columns.stream() + .map(column -> new TableAndColumn(removeColumn(tableMetadata, column), column)); + } + + public static TableMetadata removeColumn( + TableMetadata tableMetadata, ColumnMetadata columnMetadata) { + return removeColumn(tableMetadata, columnMetadata.getName()); + } + + public static TableMetadata removeColumn(TableMetadata tableMetadata, CqlIdentifier identifier) { + + var columns = new LinkedHashMap<>(tableMetadata.getColumns()); + if (columns.remove(identifier) == null) { + throw new IllegalStateException( + "Column not found. identifier:%s, tableMetadata:%s, " + .formatted(identifier, tableMetadata.describe(true))); + } + return new DefaultTableMetadata( + tableMetadata.getKeyspace(), + tableMetadata.getName(), + tableMetadata.getId().orElseThrow(), + tableMetadata.isCompactStorage(), + tableMetadata.isVirtual(), + tableMetadata.getPartitionKey(), + tableMetadata.getClusteringColumns(), + columns, + tableMetadata.getOptions(), + tableMetadata.getIndexes()); + } + + public static Stream removeAllPartitionKeys(TableMetadata tableMetadata) { + return removeAllPartitionKeys(tableMetadata, tableMetadata.getPartitionKey()); + } + + public static Stream removeAllPartitionKeys( + TableMetadata tableMetadata, Collection columns) { + return columns.stream() + .map( + column -> + new TableAndColumn(removePartitionKey(tableMetadata, column), column.getName())); + } + + public static TableMetadata removePartitionKey( + TableMetadata tableMetadata, ColumnMetadata columnMetadata) { + var partitionKeys = new ArrayList<>(tableMetadata.getPartitionKey()); + if (!partitionKeys.remove(columnMetadata)) { + throw new IllegalStateException( + "PartitionKey not found. columnMetadata:%s, tableMetadata:%s, " + .formatted(columnMetadata, tableMetadata.describe(true))); + } + return new DefaultTableMetadata( + tableMetadata.getKeyspace(), + tableMetadata.getName(), + tableMetadata.getId().orElseThrow(), + tableMetadata.isCompactStorage(), + tableMetadata.isVirtual(), + partitionKeys, + tableMetadata.getClusteringColumns(), + tableMetadata.getColumns(), + tableMetadata.getOptions(), + tableMetadata.getIndexes()); + } + + public static Stream swapTypesAllColumns( + TableMetadata tableMetadata, DataType swapToType, DataType collisionToType) { + return swapTypesAllColumns( + tableMetadata, tableMetadata.getColumns().keySet(), swapToType, collisionToType); + } + + public static Stream swapTypesAllColumns( + TableMetadata tableMetadata, + Collection columns, + DataType swapToType, + DataType collisionToType) { + return columns.stream() + .map( + column -> + new TableAndColumn( + swapType(tableMetadata, column, swapToType, collisionToType), column)); + } + + public static TableMetadata swapType( + TableMetadata tableMetadata, + CqlIdentifier identifier, + DataType swapToType, + DataType collisionToType) { + + var localColumns = new LinkedHashMap<>(tableMetadata.getColumns()); + var existingColumn = localColumns.get(identifier); + if (existingColumn == null) { + throw new IllegalStateException( + "Column not found. identifier:%s, tableMetadata:%s, " + .formatted(identifier, tableMetadata.describe(true))); + } + var newType = existingColumn.getType() == swapToType ? collisionToType : swapToType; + var newColumn = + new DefaultColumnMetadata( + existingColumn.getKeyspace(), + existingColumn.getParent(), + existingColumn.getName(), + newType, + existingColumn.isStatic()); + localColumns.put(identifier, newColumn); + + return new DefaultTableMetadata( + tableMetadata.getKeyspace(), + tableMetadata.getName(), + tableMetadata.getId().orElseThrow(), + tableMetadata.isCompactStorage(), + tableMetadata.isVirtual(), + tableMetadata.getPartitionKey(), + tableMetadata.getClusteringColumns(), + localColumns, + tableMetadata.getOptions(), + tableMetadata.getIndexes()); + } + + public static TableMetadata addPartitionKey( + TableMetadata tableMetadata, boolean clearFirst, String name, DataType datatype) { + + var column = + new DefaultColumnMetadata( + tableMetadata.getKeyspace(), + tableMetadata.getName(), + cqlIdentifierFromUserInput(name), + datatype, + false); + return addPartitionKey(tableMetadata, clearFirst, column); + } + + public static TableMetadata addPartitionKey( + TableMetadata tableMetadata, boolean clearFirst, ColumnMetadata columnMetadata) { + + var partitionKeys = new ArrayList<>(tableMetadata.getPartitionKey()); + if (clearFirst) { + partitionKeys.clear(); + } + partitionKeys.add(columnMetadata); + + return new DefaultTableMetadata( + tableMetadata.getKeyspace(), + tableMetadata.getName(), + tableMetadata.getId().orElseThrow(), + tableMetadata.isCompactStorage(), + tableMetadata.isVirtual(), + partitionKeys, + tableMetadata.getClusteringColumns(), + tableMetadata.getColumns(), + tableMetadata.getOptions(), + tableMetadata.getIndexes()); + } + + public static TableMetadata addClusteringColumn( + TableMetadata tableMetadata, String name, DataType datatype) { + + var column = + new DefaultColumnMetadata( + tableMetadata.getKeyspace(), + tableMetadata.getName(), + cqlIdentifierFromUserInput(name), + datatype, + false); + return addClusteringColumn(tableMetadata, column, ClusteringOrder.ASC); + } + + public static TableMetadata addClusteringColumn( + TableMetadata tableMetadata, ColumnMetadata columnMetadata, ClusteringOrder clusteringOrder) { + + var clusteringColumns = new LinkedHashMap<>(tableMetadata.getClusteringColumns()); + clusteringColumns.put(columnMetadata, clusteringOrder); + + return new DefaultTableMetadata( + tableMetadata.getKeyspace(), + tableMetadata.getName(), + tableMetadata.getId().orElseThrow(), + tableMetadata.isCompactStorage(), + tableMetadata.isVirtual(), + tableMetadata.getPartitionKey(), + clusteringColumns, + tableMetadata.getColumns(), + tableMetadata.getOptions(), + tableMetadata.getIndexes()); + } + + public static TableMetadata addColumn( + TableMetadata tableMetadata, String name, DataType datatype) { + + var column = + new DefaultColumnMetadata( + tableMetadata.getKeyspace(), + tableMetadata.getName(), + cqlIdentifierFromUserInput(name), + datatype, + false); + return addColumn(tableMetadata, column); + } + + public static TableMetadata addColumn( + TableMetadata tableMetadata, ColumnMetadata columnMetadata) { + + var columns = new LinkedHashMap<>(tableMetadata.getColumns()); + columns.put(columnMetadata.getName(), columnMetadata); + + return new DefaultTableMetadata( + tableMetadata.getKeyspace(), + tableMetadata.getName(), + tableMetadata.getId().orElseThrow(), + tableMetadata.isCompactStorage(), + tableMetadata.isVirtual(), + tableMetadata.getPartitionKey(), + tableMetadata.getClusteringColumns(), + columns, + tableMetadata.getOptions(), + tableMetadata.getIndexes()); + } +}