diff --git a/src/main/java/io/stargate/sgv2/jsonapi/api/configuration/CommandObjectMapperHandler.java b/src/main/java/io/stargate/sgv2/jsonapi/api/configuration/CommandObjectMapperHandler.java index b63fc9e763..8676cf1743 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/api/configuration/CommandObjectMapperHandler.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/api/configuration/CommandObjectMapperHandler.java @@ -67,6 +67,13 @@ public JavaType handleUnknownTypeId( int ix = baseCommand.indexOf("Command"); if (ix > 0) { baseCommand = baseCommand.substring(0, ix) + " " + "Command"; + } else { + // Also handle nested polymorphic operations like "AlterCollectionOperation" -> + // "AlterCollection Operation" so the error message reads more naturally. + int opIx = baseCommand.indexOf("Operation"); + if (opIx > 0) { + baseCommand = baseCommand.substring(0, opIx) + " " + "Operation"; + } } throw RequestException.Code.COMMAND_UNKNOWN.get( diff --git a/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/CollectionCommand.java b/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/CollectionCommand.java index 0c456b4acc..138a9e94db 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/CollectionCommand.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/CollectionCommand.java @@ -21,6 +21,7 @@ @JsonSubTypes.Type(value = InsertOneCommand.class), @JsonSubTypes.Type(value = UpdateManyCommand.class), @JsonSubTypes.Type(value = UpdateOneCommand.class), + @JsonSubTypes.Type(value = AlterCollectionCommand.class), // We have only collection resource that is used for API Tables @JsonSubTypes.Type(value = AlterTableCommand.class), @JsonSubTypes.Type(value = CreateIndexCommand.class), diff --git a/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/CommandName.java b/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/CommandName.java index 5c522d5fd2..68998b2346 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/CommandName.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/CommandName.java @@ -14,6 +14,7 @@ public enum CommandName { // they should not be DDL, they are not changing schema, we should add an CommandType.ADMIN for // them ? + ALTER_COLLECTION(Names.ALTER_COLLECTION, CommandType.DDL, CommandTarget.COLLECTION), ALTER_TABLE(Names.ALTER_TABLE, CommandType.DDL, CommandTarget.TABLE), ALTER_TYPE(Names.ALTER_TYPE, CommandType.DDL, CommandTarget.TABLE), COUNT_DOCUMENTS(Names.COUNT_DOCUMENTS, CommandType.DML, CommandTarget.COLLECTION), @@ -107,6 +108,7 @@ public static List filterByTarget(CommandTarget target) { } public interface Names { + String ALTER_COLLECTION = "alterCollection"; String ALTER_TABLE = "alterTable"; String ALTER_TYPE = "alterType"; String COUNT_DOCUMENTS = "countDocuments"; diff --git a/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/impl/AlterCollectionCommand.java b/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/impl/AlterCollectionCommand.java new file mode 100644 index 0000000000..d7a8049a73 --- /dev/null +++ b/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/impl/AlterCollectionCommand.java @@ -0,0 +1,20 @@ +package io.stargate.sgv2.jsonapi.api.model.command.impl; + +import com.fasterxml.jackson.annotation.JsonTypeName; +import io.stargate.sgv2.jsonapi.api.model.command.CollectionCommand; +import io.stargate.sgv2.jsonapi.api.model.command.CommandName; +import io.stargate.sgv2.jsonapi.api.model.command.NoOptionsCommand; +import org.eclipse.microprofile.openapi.annotations.media.Schema; + +@Schema( + description = + "Command that alters mutable settings of an existing collection. Currently supports enabling the 'lexical' feature.") +@JsonTypeName(CommandName.Names.ALTER_COLLECTION) +public record AlterCollectionCommand(AlterCollectionOperation operation) + implements CollectionCommand, NoOptionsCommand { + + @Override + public CommandName commandName() { + return CommandName.ALTER_COLLECTION; + } +} diff --git a/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/impl/AlterCollectionOperation.java b/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/impl/AlterCollectionOperation.java new file mode 100644 index 0000000000..aad9212475 --- /dev/null +++ b/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/impl/AlterCollectionOperation.java @@ -0,0 +1,14 @@ +package io.stargate.sgv2.jsonapi.api.model.command.impl; + +import com.fasterxml.jackson.annotation.JsonSubTypes; +import com.fasterxml.jackson.annotation.JsonTypeInfo; + +/** + * Polymorphic operation payload for {@link AlterCollectionCommand}. Each operation is represented + * by a record implementing this interface; Jackson selects the concrete subtype by the wrapper key + * (e.g. {@code "enableLexical"}). Mirrors {@link AlterTableOperation}. + */ +@JsonTypeInfo(use = JsonTypeInfo.Id.NAME, include = JsonTypeInfo.As.WRAPPER_OBJECT) +@JsonSubTypes({@JsonSubTypes.Type(value = AlterCollectionOperationImpl.EnableLexical.class)}) +public sealed interface AlterCollectionOperation + permits AlterCollectionOperationImpl.EnableLexical {} diff --git a/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/impl/AlterCollectionOperationImpl.java b/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/impl/AlterCollectionOperationImpl.java new file mode 100644 index 0000000000..87a40a7bbe --- /dev/null +++ b/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/impl/AlterCollectionOperationImpl.java @@ -0,0 +1,27 @@ +package io.stargate.sgv2.jsonapi.api.model.command.impl; + +import com.fasterxml.jackson.annotation.JsonInclude; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.fasterxml.jackson.annotation.JsonTypeName; +import com.fasterxml.jackson.databind.JsonNode; +import java.util.Map; +import javax.annotation.Nullable; +import org.eclipse.microprofile.openapi.annotations.media.Schema; + +/** Each operation that {@link AlterCollectionCommand} understands is represented by a record. */ +public class AlterCollectionOperationImpl { + + @Schema(description = "Operation to enable the lexical search feature on a collection.") + @JsonTypeName("enableLexical") + public record EnableLexical( + @Schema( + description = + "Analyzer to use for '$lexical' field: either String (name of a pre-defined analyzer), or JSON Object to specify custom one. Default: 'standard'.", + defaultValue = "standard", + oneOf = {String.class, Map.class}) + @JsonInclude(JsonInclude.Include.NON_NULL) + @JsonProperty("analyzer") + @Nullable + JsonNode analyzerDef) + implements AlterCollectionOperation {} +} diff --git a/src/main/java/io/stargate/sgv2/jsonapi/api/v1/CollectionResource.java b/src/main/java/io/stargate/sgv2/jsonapi/api/v1/CollectionResource.java index 7fbbe62563..34b622fd1e 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/api/v1/CollectionResource.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/api/v1/CollectionResource.java @@ -7,6 +7,7 @@ import io.smallrye.mutiny.Uni; import io.stargate.sgv2.jsonapi.ConfigPreLoader; import io.stargate.sgv2.jsonapi.api.model.command.*; +import io.stargate.sgv2.jsonapi.api.model.command.impl.AlterCollectionCommand; import io.stargate.sgv2.jsonapi.api.model.command.impl.AlterTableCommand; import io.stargate.sgv2.jsonapi.api.model.command.impl.CountDocumentsCommand; import io.stargate.sgv2.jsonapi.api.model.command.impl.CreateIndexCommand; @@ -138,6 +139,7 @@ public CollectionResource( InsertManyCommand.class, UpdateManyCommand.class, UpdateOneCommand.class, + AlterCollectionCommand.class, // Table Only commands AlterTableCommand.class, CreateIndexCommand.class, diff --git a/src/main/java/io/stargate/sgv2/jsonapi/exception/SchemaException.java b/src/main/java/io/stargate/sgv2/jsonapi/exception/SchemaException.java index be262773fb..ca7fdfbf73 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/exception/SchemaException.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/exception/SchemaException.java @@ -42,6 +42,7 @@ public enum Code implements ErrorCode { EXISTING_COLLECTION_DIFFERENT_SETTINGS, EXISTING_TABLE_NOT_DATA_API_COLLECTION, // converted from ErrorCodeV1 + INVALID_ALTER_COLLECTION_OPTIONS, INVALID_CREATE_COLLECTION_OPTIONS, INVALID_FORMAT_FOR_INDEX_CREATION_COLUMN, INVALID_INDEXING_DEFINITION, diff --git a/src/main/java/io/stargate/sgv2/jsonapi/service/operation/collections/AlterCollectionLexicalOperation.java b/src/main/java/io/stargate/sgv2/jsonapi/service/operation/collections/AlterCollectionLexicalOperation.java new file mode 100644 index 0000000000..dc6f157bc3 --- /dev/null +++ b/src/main/java/io/stargate/sgv2/jsonapi/service/operation/collections/AlterCollectionLexicalOperation.java @@ -0,0 +1,238 @@ +package io.stargate.sgv2.jsonapi.service.operation.collections; + +import static io.stargate.sgv2.jsonapi.exception.ErrorFormatters.errVars; + +import com.datastax.oss.driver.api.core.CqlIdentifier; +import com.datastax.oss.driver.api.core.cql.AsyncResultSet; +import com.datastax.oss.driver.api.core.cql.SimpleStatement; +import com.datastax.oss.driver.api.core.metadata.Metadata; +import com.datastax.oss.driver.api.core.metadata.schema.TableMetadata; +import com.fasterxml.jackson.core.JacksonException; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.databind.node.ObjectNode; +import io.smallrye.mutiny.Uni; +import io.stargate.sgv2.jsonapi.api.model.command.CommandContext; +import io.stargate.sgv2.jsonapi.api.model.command.CommandResult; +import io.stargate.sgv2.jsonapi.api.request.RequestContext; +import io.stargate.sgv2.jsonapi.config.DatabaseLimitsConfig; +import io.stargate.sgv2.jsonapi.config.constants.DocumentConstants; +import io.stargate.sgv2.jsonapi.config.constants.TableCommentConstants; +import io.stargate.sgv2.jsonapi.exception.DatabaseException; +import io.stargate.sgv2.jsonapi.exception.SchemaException; +import io.stargate.sgv2.jsonapi.service.cqldriver.executor.QueryExecutor; +import io.stargate.sgv2.jsonapi.service.operation.Operation; +import io.stargate.sgv2.jsonapi.service.schema.collections.CollectionLexicalDef; +import io.stargate.sgv2.jsonapi.service.schema.collections.CollectionSchemaObject; +import io.stargate.sgv2.jsonapi.service.schema.collections.CollectionTableComment; +import java.time.Duration; +import java.util.Optional; +import java.util.function.Supplier; + +/** + * Operation that enables the lexical feature on an existing collection by adding the {@code + * query_lexical_value} column, creating an analyzed SAI index on it, and updating the table + * "comment" JSON to record the new lexical config. + * + *

When {@link #noOp} is true the operation returns success without executing any DDL: this is + * used for the "already enabled with same settings" case. + * + *

No rollback on partial failure. If e.g. ADD COLUMN succeeds but CREATE INDEX fails, the + * column is left in place and the failure is propagated to the caller. This matches {@link + * CreateCollectionOperation}'s behavior and is intentional: + * + *

+ * + *

The comment is updated last, so an interrupted run can leave the column/index present while + * {@code findCollections} still reports lexical as disabled; a successful retry reconciles this + * (see {@code trulyEnabled} in {@code AlterCollectionCommandResolver}). + */ +public record AlterCollectionLexicalOperation( + CommandContext commandContext, + ObjectMapper objectMapper, + DatabaseLimitsConfig dbLimitsConfig, + int ddlDelayMillis, + CollectionLexicalDef newLexicalConfig, + boolean noOp) + implements Operation { + + private static final CqlIdentifier LEXICAL_COLUMN = + CqlIdentifier.fromInternal(DocumentConstants.Columns.LEXICAL_INDEX_COLUMN_NAME); + + @Override + public Uni> execute( + RequestContext requestContext, QueryExecutor queryExecutor) { + + if (noOp) { + return Uni.createFrom().>item(new SchemaChangeResult(true)); + } + + final CollectionSchemaObject schemaObject = commandContext.schemaObject(); + final String keyspace = schemaObject.tableMetadata().getKeyspace().asInternal(); + final String table = schemaObject.tableMetadata().getName().asInternal(); + + final String newComment; + try { + newComment = buildUpdatedComment(schemaObject); + } catch (JacksonException | RuntimeException e) { + // Resolver guarantees a V1 comment; if reading/updating still fails, surface a clean error + // rather than a raw JacksonException/IllegalStateException. + return Uni.createFrom() + .failure( + DatabaseException.Code.CORRUPTED_COLLECTION_SCHEMA.get( + errVars( + schemaObject, + map -> + map.put( + "errorMessage", + "Unable to update collection 'comment' to enable lexical: " + + e.getMessage())))); + } + + // Base all existence decisions on freshly-fetched metadata rather than the resolve-time + // snapshot, so a column/index left by an interrupted prior run (or a concurrent op) is seen + // here. This is also where we pre-flight the DB-wide index limit, before running any DDL. + return queryExecutor + .getDriverMetadata(requestContext) + .map(Metadata::getKeyspaces) + .flatMap( + allKeyspaces -> { + final TableMetadata currentTable = + Optional.ofNullable(allKeyspaces.get(schemaObject.tableMetadata().getKeyspace())) + .flatMap(ks -> ks.getTable(schemaObject.tableMetadata().getName())) + .orElse(schemaObject.tableMetadata()); + + final boolean columnExists = currentTable.getColumn(LEXICAL_COLUMN).isPresent(); + final boolean indexExists = + currentTable + .getIndexes() + .containsKey( + CqlIdentifier.fromInternal( + CreateCollectionOperation.lexicalIndexName(table))); + + // Only an absent index is net-new, so only then enforce the limit (mirrors + // CreateCollectionOperation): going over fails with TOO_MANY_INDEXES_FOR_COLLECTION + // before any DDL, not a generic error from a failed CREATE INDEX. + if (!indexExists) { + final int saisUsed = + allKeyspaces.values().stream() + .flatMap(ks -> ks.getTables().values().stream()) + .mapToInt(t -> t.getIndexes().size()) + .sum(); + // enableLexical adds exactly one SAI (the analyzed lexical index). + if (saisUsed + 1 > dbLimitsConfig.indexesAvailablePerDatabase()) { + return Uni.createFrom() + .>failure( + SchemaException.Code.TOO_MANY_INDEXES_FOR_COLLECTION.get( + errVars(schemaObject, map -> map.put("indexesPerCollection", "1")))); + } + } + + return executeLexicalDdl( + requestContext, queryExecutor, keyspace, table, newComment, columnExists); + }); + } + + /** + * Runs the enable-lexical DDL: ADD COLUMN (skipped when it already exists), CREATE CUSTOM INDEX + * IF NOT EXISTS, then ALTER TABLE WITH comment, spaced by {@link #ddlDelayMillis}. The {@code + * columnAlreadyExists} flag is derived from freshly-fetched metadata so a leftover column is + * skipped rather than failing the (plain) ADD — the backend does not support {@code ADD IF NOT + * EXISTS}. + */ + private Uni> executeLexicalDdl( + RequestContext requestContext, + QueryExecutor queryExecutor, + String keyspace, + String table, + String newComment, + boolean columnAlreadyExists) { + + SimpleStatement createIndexStmt = + CreateCollectionOperation.buildLexicalIndexStatement( + keyspace, table, newLexicalConfig, /* ifNotExists */ true); + + // Cassandra does not accept bind parameters for table options like `comment`, so the comment + // JSON is embedded directly into the CQL (as createCollection does); single quotes are doubled + // to keep the string literal valid. + SimpleStatement alterCommentStmt = + SimpleStatement.newInstance( + "ALTER TABLE \"%s\".\"%s\" WITH comment = '%s'" + .formatted(keyspace, table, newComment.replace("'", "''"))); + + final Duration delay = Duration.ofMillis(ddlDelayMillis > 0 ? ddlDelayMillis : 100); + + Uni pipeline; + if (columnAlreadyExists) { + pipeline = queryExecutor.executeCreateSchemaChange(requestContext, createIndexStmt); + } else { + SimpleStatement addColumnStmt = + SimpleStatement.newInstance( + "ALTER TABLE \"%s\".\"%s\" ADD %s text" + .formatted(keyspace, table, DocumentConstants.Columns.LEXICAL_INDEX_COLUMN_NAME)); + pipeline = + queryExecutor + .executeCreateSchemaChange(requestContext, addColumnStmt) + .onItem() + .delayIt() + .by(delay) + .onItem() + .transformToUni( + r1 -> queryExecutor.executeCreateSchemaChange(requestContext, createIndexStmt)); + } + + return pipeline + .onItem() + .delayIt() + .by(delay) + .onItem() + .transformToUni( + r2 -> queryExecutor.executeCreateSchemaChange(requestContext, alterCommentStmt)) + .map(r3 -> new SchemaChangeResult(true)); + } + + /** + * Reads the current table comment JSON and surgically replaces the {@code + * collection.options.lexical} sub-node, leaving all other options (vector / indexing / id / + * rerank / unknown fields) untouched. + * + *

The resolver guarantees we are operating on a V1-shaped comment (legacy/V0 collections are + * rejected before reaching the operation). + */ + private String buildUpdatedComment(CollectionSchemaObject schemaObject) throws JacksonException { + final String comment = CollectionTableComment.rawComment(schemaObject.tableMetadata()); + if (comment == null || comment.isBlank()) { + // Defensive: resolver should have rejected this case. + throw new IllegalStateException( + "Cannot alter collection: table comment is empty; expected V1 schema"); + } + + final ObjectNode rootNode = (ObjectNode) objectMapper.readTree(comment); + final ObjectNode collectionNode = + (ObjectNode) rootNode.get(TableCommentConstants.TOP_LEVEL_KEY); + if (collectionNode == null) { + // Defensive: resolver should have rejected this case. + throw new IllegalStateException( + "Cannot alter collection: comment does not have '" + + TableCommentConstants.TOP_LEVEL_KEY + + "' node"); + } + ObjectNode optionsNode = (ObjectNode) collectionNode.get(TableCommentConstants.OPTIONS_KEY); + if (optionsNode == null) { + optionsNode = objectMapper.createObjectNode(); + collectionNode.set(TableCommentConstants.OPTIONS_KEY, optionsNode); + } + optionsNode.putPOJO(TableCommentConstants.COLLECTION_LEXICAL_CONFIG_KEY, newLexicalConfig); + return objectMapper.writeValueAsString(rootNode); + } +} diff --git a/src/main/java/io/stargate/sgv2/jsonapi/service/operation/collections/CollectionDriverExceptionHandler.java b/src/main/java/io/stargate/sgv2/jsonapi/service/operation/collections/CollectionDriverExceptionHandler.java index 8a02ad61cc..9738aa19b4 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/service/operation/collections/CollectionDriverExceptionHandler.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/service/operation/collections/CollectionDriverExceptionHandler.java @@ -5,6 +5,7 @@ import com.datastax.oss.driver.api.core.cql.SimpleStatement; import com.datastax.oss.driver.api.core.servererrors.InvalidQueryException; +import io.stargate.sgv2.jsonapi.config.constants.DocumentConstants; import io.stargate.sgv2.jsonapi.exception.*; import io.stargate.sgv2.jsonapi.service.cqldriver.executor.DefaultDriverExceptionHandler; import io.stargate.sgv2.jsonapi.service.operation.tables.CreateIndexExceptionHandler; @@ -52,7 +53,9 @@ public RuntimeException handle(InvalidQueryException exception) { if (exception .getMessage() .contains( - "analyzed size for column query_lexical_value exceeds the cumulative limit for index")) { + "analyzed size for column " + + DocumentConstants.Columns.LEXICAL_INDEX_COLUMN_NAME + + " exceeds the cumulative limit for index")) { return DocumentException.Code.LEXICAL_CONTENT_TOO_LONG.get(errVars(schemaObject, exception)); } diff --git a/src/main/java/io/stargate/sgv2/jsonapi/service/operation/collections/CreateCollectionOperation.java b/src/main/java/io/stargate/sgv2/jsonapi/service/operation/collections/CreateCollectionOperation.java index 7776d6ba74..a3f5db62a0 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/service/operation/collections/CreateCollectionOperation.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/service/operation/collections/CreateCollectionOperation.java @@ -25,6 +25,7 @@ import io.stargate.sgv2.jsonapi.api.model.command.tracing.RequestTracing; import io.stargate.sgv2.jsonapi.api.request.RequestContext; import io.stargate.sgv2.jsonapi.config.DatabaseLimitsConfig; +import io.stargate.sgv2.jsonapi.config.constants.DocumentConstants; import io.stargate.sgv2.jsonapi.config.constants.TableCommentConstants; import io.stargate.sgv2.jsonapi.exception.DatabaseException; import io.stargate.sgv2.jsonapi.exception.SchemaException; @@ -612,15 +613,12 @@ private List getIndexStatements( } if (overrideLexicalDef.enabled()) { - var analyzerDef = overrideLexicalDef.analyzerDefinition(); - var analyzerString = analyzerDef.isTextual() ? analyzerDef.asText() : analyzerDef.toString(); statements.add( - buildSaiIndex( - collectionExisted, - "query_lexical_value", - "query_lexical_value", - false, - Map.of("index_analyzer", analyzerString))); + buildLexicalIndexStatement( + commandContext.schemaObject().identifier().keyspace().asInternal(), + collectionName.asInternal(), + overrideLexicalDef, + collectionExisted)); } if (LOGGER.isTraceEnabled()) { @@ -666,4 +664,42 @@ private SimpleStatement buildSaiIndex( return new ExtendedCreateIndex((DefaultCreateIndex) createIndex).build(); } + + /** + * Builds the {@code CREATE CUSTOM INDEX} statement for the lexical column, used both by + * createCollection (when the table is fresh or being recreated) and by alterCollection (when + * enabling lexical on an existing collection). + * + * @param ifNotExists when true, emits {@code IF NOT EXISTS} for idempotent retries + */ + public static SimpleStatement buildLexicalIndexStatement( + String keyspace, String table, CollectionLexicalDef lexicalConfig, boolean ifNotExists) { + var analyzerDef = lexicalConfig.analyzerDefinition(); + // Note: needs to be either plain (unquoted) String (NOT quoted JSON String) OR JSON Object + final String analyzerString = + analyzerDef.isTextual() ? analyzerDef.asText() : analyzerDef.toString(); + final String lexicalCol = DocumentConstants.Columns.LEXICAL_INDEX_COLUMN_NAME; + final String prefix = ifNotExists ? "CREATE CUSTOM INDEX IF NOT EXISTS" : "CREATE CUSTOM INDEX"; + return SimpleStatement.newInstance( + """ + %s "%s" ON "%s"."%s" (%s) + USING 'StorageAttachedIndex' WITH OPTIONS = { 'index_analyzer': '%s' } + """ + .formatted( + prefix, lexicalIndexName(table), keyspace, table, lexicalCol, analyzerString)); + } + + /** + * Name of the lexical SAI: {@code "_"}. Shared with {@link + * #buildLexicalIndexStatement} so callers referencing the index by name stay in sync with how it + * is created. + * + *

The {@code "

_"} format is part of the on-disk schema: existing collections + * have indexes named this way, and recovery paths (e.g. {@code alterCollection}'s already-exists + * check) match by this exact name. Do not change the format without a migration for existing + * collections. + */ + public static String lexicalIndexName(String table) { + return table + "_" + DocumentConstants.Columns.LEXICAL_INDEX_COLUMN_NAME; + } } diff --git a/src/main/java/io/stargate/sgv2/jsonapi/service/operation/collections/InsertCollectionOperation.java b/src/main/java/io/stargate/sgv2/jsonapi/service/operation/collections/InsertCollectionOperation.java index 32e6c72470..61e38022dd 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/service/operation/collections/InsertCollectionOperation.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/service/operation/collections/InsertCollectionOperation.java @@ -8,6 +8,7 @@ import io.stargate.sgv2.jsonapi.api.model.command.CommandContext; import io.stargate.sgv2.jsonapi.api.model.command.CommandResult; import io.stargate.sgv2.jsonapi.api.request.RequestContext; +import io.stargate.sgv2.jsonapi.config.constants.DocumentConstants; import io.stargate.sgv2.jsonapi.exception.DocumentException; import io.stargate.sgv2.jsonapi.exception.SchemaException; import io.stargate.sgv2.jsonapi.service.cqldriver.executor.QueryExecutor; @@ -215,7 +216,7 @@ public String buildInsertQuery(boolean vectorEnabled) { insertQuery.append(", query_vector_value"); } if (lexicalEnabled) { - insertQuery.append(", query_lexical_value"); + insertQuery.append(", ").append(DocumentConstants.Columns.LEXICAL_INDEX_COLUMN_NAME); } insertQuery.append(") VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?"); diff --git a/src/main/java/io/stargate/sgv2/jsonapi/service/resolver/AlterCollectionCommandResolver.java b/src/main/java/io/stargate/sgv2/jsonapi/service/resolver/AlterCollectionCommandResolver.java new file mode 100644 index 0000000000..5667353ca6 --- /dev/null +++ b/src/main/java/io/stargate/sgv2/jsonapi/service/resolver/AlterCollectionCommandResolver.java @@ -0,0 +1,120 @@ +package io.stargate.sgv2.jsonapi.service.resolver; + +import com.datastax.oss.driver.api.core.CqlIdentifier; +import com.fasterxml.jackson.databind.ObjectMapper; +import io.stargate.sgv2.jsonapi.api.model.command.CommandContext; +import io.stargate.sgv2.jsonapi.api.model.command.impl.AlterCollectionCommand; +import io.stargate.sgv2.jsonapi.api.model.command.impl.AlterCollectionOperationImpl; +import io.stargate.sgv2.jsonapi.api.model.command.impl.CreateCollectionCommand; +import io.stargate.sgv2.jsonapi.config.DatabaseLimitsConfig; +import io.stargate.sgv2.jsonapi.config.OperationsConfig; +import io.stargate.sgv2.jsonapi.config.constants.DocumentConstants; +import io.stargate.sgv2.jsonapi.exception.SchemaException; +import io.stargate.sgv2.jsonapi.service.operation.Operation; +import io.stargate.sgv2.jsonapi.service.operation.collections.AlterCollectionLexicalOperation; +import io.stargate.sgv2.jsonapi.service.schema.collections.CollectionLexicalDef; +import io.stargate.sgv2.jsonapi.service.schema.collections.CollectionSchemaObject; +import io.stargate.sgv2.jsonapi.service.schema.collections.CollectionTableComment; +import jakarta.enterprise.context.ApplicationScoped; +import jakarta.inject.Inject; +import java.util.Map; +import java.util.Objects; + +@ApplicationScoped +public class AlterCollectionCommandResolver implements CommandResolver { + + private static final CqlIdentifier LEXICAL_COLUMN = + CqlIdentifier.fromInternal(DocumentConstants.Columns.LEXICAL_INDEX_COLUMN_NAME); + + private final ObjectMapper objectMapper; + private final DatabaseLimitsConfig dbLimitsConfig; + + @Inject + public AlterCollectionCommandResolver( + ObjectMapper objectMapper, DatabaseLimitsConfig dbLimitsConfig) { + this.objectMapper = objectMapper; + this.dbLimitsConfig = dbLimitsConfig; + } + + @Override + public Class getCommandClass() { + return AlterCollectionCommand.class; + } + + @Override + public Operation resolveCollectionCommand( + CommandContext ctx, AlterCollectionCommand command) { + + if (command.operation() == null) { + throw badOptions("must specify 'operation' field"); + } + + // Sealed interface: switch is exhaustive, so a new operation subtype fails to compile until + // handled here. + return switch (command.operation()) { + case AlterCollectionOperationImpl.EnableLexical op -> handleEnableLexical(ctx, op); + }; + } + + private Operation handleEnableLexical( + CommandContext ctx, AlterCollectionOperationImpl.EnableLexical op) { + + // Reject legacy / pre-lexical collections up front: must have a V1 comment with + // collection.options. Doing this before analyzer validation gives users the actionable + // "recreate the collection" error on legacy schemas instead of an analyzer-validation + // error they can't act on. + if (!CollectionTableComment.hasV1Options(objectMapper, ctx.schemaObject().tableMetadata())) { + throw badOptions( + "collection has legacy metadata (pre-lexical schema); recreate the collection with lexical enabled"); + } + + // Synthesize a LexicalDesc with enabled=true so we can reuse the existing + // validation pipeline that createCollection uses. + final var lexicalDesc = + new CreateCollectionCommand.Options.LexicalDesc( + /* enabled */ Boolean.TRUE, op.analyzerDef()); + + // fromApiDesc throws: + // - LEXICAL_FEATURE_NOT_ENABLED via the SchemaFactory if the feature is disabled + // - INVALID_ALTER_COLLECTION_OPTIONS for malformed analyzer + final CollectionLexicalDef requested = + CollectionLexicalDef.fromApiDesc( + objectMapper, + lexicalDesc, + ctx.versionedSchema().lexicalDef(), + SchemaException.Code.INVALID_ALTER_COLLECTION_OPTIONS) + .runningValue(); + + final CollectionLexicalDef current = ctx.schemaObject().lexicalDef(); + final int ddlDelayMillis = + ctx.config().get(OperationsConfig.class).databaseConfig().ddlDelayMillis(); + + // "Truly enabled" means both the stored comment claims lexical is on AND the underlying + // column actually exists. If the comment says enabled but the column is missing (an + // inconsistent state from manual surgery or an interrupted prior alter), treat it as + // not-enabled and run the full DDL pipeline so the table catches up to the comment. + final boolean trulyEnabled = + current.enabled() + && ctx.schemaObject().tableMetadata().getColumn(LEXICAL_COLUMN).isPresent(); + + if (!trulyEnabled) { + return new AlterCollectionLexicalOperation( + ctx, objectMapper, dbLimitsConfig, ddlDelayMillis, requested, /* noOp */ false); + } + + // Both analyzer definitions are guaranteed non-null here (CollectionLexicalDef's + // constructor requires non-null analyzer when enabled=true). JsonNode.equals is value-based, + // so this gives strict structural comparison for both string and object analyzers. + if (!Objects.equals(current.analyzerDefinition(), requested.analyzerDefinition())) { + throw badOptions( + "lexical is already enabled for this collection with a different analyzer configuration"); + } + // Same settings already in effect: no-op success. + return new AlterCollectionLexicalOperation( + ctx, objectMapper, dbLimitsConfig, ddlDelayMillis, requested, /* noOp */ true); + } + + private static SchemaException badOptions(String message) { + return SchemaException.Code.INVALID_ALTER_COLLECTION_OPTIONS.get(Map.of("message", message)); + } +} diff --git a/src/main/java/io/stargate/sgv2/jsonapi/service/resolver/CreateCollectionCommandResolver.java b/src/main/java/io/stargate/sgv2/jsonapi/service/resolver/CreateCollectionCommandResolver.java index 4041163fa8..571eee0521 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/service/resolver/CreateCollectionCommandResolver.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/service/resolver/CreateCollectionCommandResolver.java @@ -90,7 +90,8 @@ public Operation resolveKeyspaceCommand( CollectionLexicalDef.fromApiDesc( objectMapper, getOrDefault(command.options(), CreateCollectionCommand.Options::lexical, null), - context.versionedSchema().lexicalDef()); + context.versionedSchema().lexicalDef(), + SchemaException.Code.INVALID_CREATE_COLLECTION_OPTIONS); var rerankDef = CollectionRerankDef.fromApiDesc( diff --git a/src/main/java/io/stargate/sgv2/jsonapi/service/schema/collections/CollectionLexicalDef.java b/src/main/java/io/stargate/sgv2/jsonapi/service/schema/collections/CollectionLexicalDef.java index 3c4323ec2d..ceb4eff109 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/service/schema/collections/CollectionLexicalDef.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/service/schema/collections/CollectionLexicalDef.java @@ -115,14 +115,18 @@ public CollectionLexicalDef(boolean enabled, JsonNode analyzerDefinition) { } /** - * Validate the configuration passed from the user and create the internal representation + * Validates the lexical config passed and constructs the runtime configuration object to use. + * Invalid-option errors are reported with {@code optionsErrorCode} so they get attributed to the + * invoking command (e.g. {@code INVALID_CREATE_COLLECTION_OPTIONS} from {@code createCollection}, + * {@code INVALID_ALTER_COLLECTION_OPTIONS} from {@code alterCollection}). * - * @return Valid CollectionLexicalConfig object + * @return Valid CollectionLexicalDef object */ public static SchemaHolder fromApiDesc( ObjectMapper mapper, CreateCollectionCommand.Options.LexicalDesc lexicalDesc, - SchemaFactory schemaFactory) { + SchemaFactory schemaFactory, + SchemaException.Code optionsErrorCode) { // Case 1: No lexical body provided - so no value from the user if (lexicalDesc == null) { @@ -132,7 +136,7 @@ public static SchemaHolder fromApiDesc( // Case 2: Validate 'enabled' flag is present var enabled = lexicalDesc.enabled(); if (enabled == null) { - throw SchemaException.Code.INVALID_CREATE_COLLECTION_OPTIONS.get( + throw optionsErrorCode.get( "message", "'enabled' is required property for 'lexical' Object value"); } @@ -149,7 +153,7 @@ public static SchemaHolder fromApiDesc( if (!enabled) { if (!analyzerNotDefined) { String nodeType = JsonUtil.nodeTypeAsString(lexicalDesc.analyzerDef()); - throw SchemaException.Code.INVALID_CREATE_COLLECTION_OPTIONS.get( + throw optionsErrorCode.get( "message", ("'lexical' is disabled, but 'lexical.analyzer' property was provided with an unexpected type: %s. " + "When 'lexical' is disabled, 'lexical.analyzer' must either be omitted or be JSON null, or an empty Object '{ }'.") @@ -183,7 +187,7 @@ public static SchemaHolder fromApiDesc( // First: check top level members for any invalid (misspelled etc) fields foundNames.removeAll(VALID_ANALYZER_FIELDS); if (!foundNames.isEmpty()) { - throw SchemaException.Code.INVALID_CREATE_COLLECTION_OPTIONS.get( + throw optionsErrorCode.get( "message", "Invalid field%s for 'lexical.analyzer'. Valid fields are: %s, found: %s" .formatted( @@ -212,7 +216,7 @@ public static SchemaHolder fromApiDesc( } }; if (!valueOk) { - throw SchemaException.Code.INVALID_CREATE_COLLECTION_OPTIONS.get( + throw optionsErrorCode.get( "message", "'%s' property of 'lexical.analyzer' must be JSON %s, is: %s" .formatted(entry.getKey(), expectedType, JsonUtil.nodeTypeAsString(fieldValue))); @@ -223,7 +227,7 @@ public static SchemaHolder fromApiDesc( cleanedAnalyzerDef = lexicalDesc.analyzerDef(); } else { // Otherwise, invalid definition - throw SchemaException.Code.INVALID_CREATE_COLLECTION_OPTIONS.get( + throw optionsErrorCode.get( "message", "'analyzer' property of 'lexical' must be either JSON Object or String, is: %s" .formatted(JsonUtil.nodeTypeAsString(lexicalDesc.analyzerDef()))); diff --git a/src/main/java/io/stargate/sgv2/jsonapi/service/schema/collections/CollectionSchemaObject.java b/src/main/java/io/stargate/sgv2/jsonapi/service/schema/collections/CollectionSchemaObject.java index 7be5777d5b..fb8afae46d 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/service/schema/collections/CollectionSchemaObject.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/service/schema/collections/CollectionSchemaObject.java @@ -136,7 +136,7 @@ public static CollectionSchemaObject getCollectionSettings( final Optional vectorColumn = table.getColumn(DocumentConstants.Columns.VECTOR_SEARCH_INDEX_COLUMN_NAME); boolean vectorEnabled = vectorColumn.isPresent(); - final String comment = (String) table.getOptions().get(CqlIdentifier.fromInternal("comment")); + final String comment = CollectionTableComment.rawComment(table); // if vector column exists if (vectorEnabled) { diff --git a/src/main/java/io/stargate/sgv2/jsonapi/service/schema/collections/CollectionTableComment.java b/src/main/java/io/stargate/sgv2/jsonapi/service/schema/collections/CollectionTableComment.java new file mode 100644 index 0000000000..110a5e8057 --- /dev/null +++ b/src/main/java/io/stargate/sgv2/jsonapi/service/schema/collections/CollectionTableComment.java @@ -0,0 +1,46 @@ +package io.stargate.sgv2.jsonapi.service.schema.collections; + +import com.datastax.oss.driver.api.core.CqlIdentifier; +import com.datastax.oss.driver.api.core.metadata.schema.TableMetadata; +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.ObjectMapper; +import io.stargate.sgv2.jsonapi.config.constants.TableCommentConstants; + +/** + * Helpers for the JSON stored as the CQL {@code comment} table option of a Collection's backing + * table. Centralizes "where the comment lives" and "what a V1 comment looks like" so callers (e.g. + * createCollection / alterCollection / settings parsing) do not each re-derive it. + */ +public final class CollectionTableComment { + + private static final CqlIdentifier COMMENT_OPTION = CqlIdentifier.fromInternal("comment"); + + private CollectionTableComment() {} + + /** The raw comment string stored on the table, or {@code null} if there is none. */ + public static String rawComment(TableMetadata table) { + Object comment = table.getOptions().get(COMMENT_OPTION); + return comment == null ? null : comment.toString(); + } + + /** + * Whether the table carries a V1-shaped comment, i.e. one with a {@code collection.options} JSON + * object. Legacy / pre-V1 comments (and missing or malformed ones) return {@code false}. + */ + public static boolean hasV1Options(ObjectMapper mapper, TableMetadata table) { + String comment = rawComment(table); + if (comment == null || comment.isBlank()) { + return false; + } + try { + JsonNode options = + mapper + .readTree(comment) + .path(TableCommentConstants.TOP_LEVEL_KEY) + .path(TableCommentConstants.OPTIONS_KEY); + return options.isObject(); + } catch (Exception e) { + return false; + } + } +} diff --git a/src/main/resources/errors.yaml b/src/main/resources/errors.yaml index 6d267f0457..b8d9f2cdbe 100644 --- a/src/main/resources/errors.yaml +++ b/src/main/resources/errors.yaml @@ -1368,12 +1368,20 @@ request-errors: Resend the command using only columns that use the `vector` type. + - scope: SCHEMA + code: INVALID_ALTER_COLLECTION_OPTIONS + title: Invalid options for alterCollection + body: |- + 'alterCollection' command option(s) invalid: ${message} + + Resend 'alterCollection' with valid options. + - scope: SCHEMA code: INVALID_CREATE_COLLECTION_OPTIONS title: Invalid options for createCollection body: |- 'createCollection' command option(s) invalid: ${message} - + Resend 'createCollection' with valid options. - scope: SCHEMA @@ -2025,13 +2033,13 @@ request-errors: - scope: SCHEMA code: TOO_MANY_INDEXES_FOR_COLLECTION - title: Cannot create collection due to number of existing indexes + title: Cannot create indexes for collection due to number of existing indexes body: |- - The command attempted to create an collection, however the number of indexes in the database has reached the maximum allowed. - - Failed to create Collection: ${keyspace}.${table}. - The number of indexes needed for each collection is: ${indexesPerCollection}. - + The command attempted to create one or more indexes for a collection, however the number of indexes in the database has reached the maximum allowed. + + Failed for Collection: ${keyspace}.${table}. + The number of indexes the command needs is: ${indexesPerCollection}. + Reduce the number of indexes in the database and resend the command. - scope: SCHEMA diff --git a/src/test/java/io/stargate/sgv2/jsonapi/api/v1/AlterCollectionTooManyIndexesIntegrationTest.java b/src/test/java/io/stargate/sgv2/jsonapi/api/v1/AlterCollectionTooManyIndexesIntegrationTest.java new file mode 100644 index 0000000000..0daf8a8cae --- /dev/null +++ b/src/test/java/io/stargate/sgv2/jsonapi/api/v1/AlterCollectionTooManyIndexesIntegrationTest.java @@ -0,0 +1,102 @@ +package io.stargate.sgv2.jsonapi.api.v1; + +import static io.stargate.sgv2.jsonapi.api.v1.ResponseAssertions.responseIsDDLSuccess; +import static io.stargate.sgv2.jsonapi.api.v1.ResponseAssertions.responseIsError; +import static org.hamcrest.Matchers.is; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import io.quarkus.test.common.QuarkusTestResource; +import io.quarkus.test.junit.QuarkusIntegrationTest; +import io.stargate.sgv2.jsonapi.exception.SchemaException; +import io.stargate.sgv2.jsonapi.testresource.DseTestResource; +import org.junit.jupiter.api.Assumptions; +import org.junit.jupiter.api.ClassOrderer; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.TestClassOrder; + +/** + * Separate integration test (its own container with a deliberately small DB-wide index budget) that + * verifies the index-limit pre-flight of {@code alterCollection}: enabling lexical adds one SAI, + * and if that would exceed the database index limit it must be rejected with {@code + * TOO_MANY_INDEXES_FOR_COLLECTION} before any DDL runs. + * + *

Companion to {@link CreateCollectionTooManyIndexesIntegrationTest}, which covers the same + * limit for {@code createCollection}. + */ +@QuarkusIntegrationTest +@QuarkusTestResource( + value = AlterCollectionTooManyIndexesIntegrationTest.LowIndexBudgetTestResource.class, + restrictToAnnotatedClass = true) +@TestClassOrder(ClassOrderer.OrderAnnotation.class) +class AlterCollectionTooManyIndexesIntegrationTest extends AbstractKeyspaceIntegrationTestBase { + + // A lexical-disabled, non-vector collection uses 9 SAIs (a lexical-enabled one uses 10 — see + // CreateCollectionTooManyIndexesIntegrationTest). With the DB budget capped at 10, creating the + // disabled collection fits, but enabling lexical afterwards needs the 10th index. + private static final int INDEXES_PER_DB = 10; + + public static class LowIndexBudgetTestResource extends DseTestResource { + public LowIndexBudgetTestResource() {} + + @Override + public int getIndexesPerDBOverride() { + return INDEXES_PER_DB; + } + } + + @Test + public void enableLexicalRejectedWhenIndexBudgetExhausted() { + Assumptions.assumeTrue(isLexicalAvailableForDB()); + + final String coll = "alter_lex_limit"; + + // 1) Create a collection with lexical disabled (uses 9 of the 10 available SAIs). + String create = + """ + { + "createCollection": { + "name": "%s", + "options": { "lexical": { "enabled": false } } + } + } + """ + .formatted(coll); + givenHeadersAndJson(create) + .when() + .post(KeyspaceResource.BASE_PATH, keyspaceName) + .then() + .statusCode(200) + .body("$", responseIsDDLSuccess()) + .body("status.ok", is(1)); + + // 2) Push the DB to its index ceiling out-of-band (via CQL, to bypass the API's own create-time + // limit check). 9 (collection) + 2 (padding) = 11 SAIs, already over the limit of 10. + boolean padded = + executeCqlStatement( + "CREATE TABLE \"%s\".\"alter_lex_pad\" (id int PRIMARY KEY, c0 int, c1 int)" + .formatted(keyspaceName), + "CREATE CUSTOM INDEX alter_lex_pad_c0 ON \"%s\".\"alter_lex_pad\" (c0) USING 'StorageAttachedIndex'" + .formatted(keyspaceName), + "CREATE CUSTOM INDEX alter_lex_pad_c1 ON \"%s\".\"alter_lex_pad\" (c1) USING 'StorageAttachedIndex'" + .formatted(keyspaceName)); + assertTrue(padded, "Pre-condition: padding table and indexes should be created"); + + // 3) enableLexical needs one more SAI -> over the limit -> rejected by the pre-flight, no DDL. + String alter = + """ + { + "alterCollection": { + "operation": { "enableLexical": { } } + } + } + """; + givenHeadersAndJson(alter) + .when() + .post(CollectionResource.BASE_PATH, keyspaceName, coll) + .then() + .statusCode(200) + .body("$", responseIsError()) + .body( + "errors[0].errorCode", is(SchemaException.Code.TOO_MANY_INDEXES_FOR_COLLECTION.name())); + } +} diff --git a/src/test/java/io/stargate/sgv2/jsonapi/api/v1/AlterCollectionWithLexicalIntegrationTest.java b/src/test/java/io/stargate/sgv2/jsonapi/api/v1/AlterCollectionWithLexicalIntegrationTest.java new file mode 100644 index 0000000000..7ca724d55e --- /dev/null +++ b/src/test/java/io/stargate/sgv2/jsonapi/api/v1/AlterCollectionWithLexicalIntegrationTest.java @@ -0,0 +1,563 @@ +package io.stargate.sgv2.jsonapi.api.v1; + +import static io.restassured.RestAssured.given; +import static io.stargate.sgv2.jsonapi.api.v1.ResponseAssertions.responseIsDDLSuccess; +import static io.stargate.sgv2.jsonapi.api.v1.ResponseAssertions.responseIsError; +import static net.javacrumbs.jsonunit.JsonMatchers.jsonEquals; +import static org.hamcrest.Matchers.containsString; +import static org.hamcrest.Matchers.is; +import static org.hamcrest.Matchers.nullValue; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import io.quarkus.test.common.WithTestResource; +import io.quarkus.test.junit.QuarkusIntegrationTest; +import io.restassured.http.ContentType; +import io.restassured.response.ValidatableResponse; +import io.stargate.sgv2.jsonapi.config.constants.DocumentConstants; +import io.stargate.sgv2.jsonapi.exception.RequestException; +import io.stargate.sgv2.jsonapi.exception.SchemaException; +import io.stargate.sgv2.jsonapi.testresource.DseTestResource; +import org.apache.commons.lang3.RandomStringUtils; +import org.junit.jupiter.api.Assumptions; +import org.junit.jupiter.api.ClassOrderer; +import org.junit.jupiter.api.Nested; +import org.junit.jupiter.api.Order; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.TestClassOrder; + +@QuarkusIntegrationTest +@WithTestResource(value = DseTestResource.class) +@TestClassOrder(ClassOrderer.OrderAnnotation.class) +class AlterCollectionWithLexicalIntegrationTest extends AbstractKeyspaceIntegrationTestBase { + + @Nested + @Order(1) + class AlterCollectionEnableLexicalHappyPath { + + @Test + void enableLexicalDefaultAnalyzerOnDisabledCollection() { + Assumptions.assumeTrue(isLexicalAvailableForDB()); + + final String name = freshCollectionName(); + createCollectionWithLexicalDisabled(name); + + String json = + """ + { + "alterCollection": { + "operation": { + "enableLexical": { } + } + } + } + """; + postToCollection(name, json) + .statusCode(200) + .body("$", responseIsDDLSuccess()) + .body("status.ok", is(1)); + + // Sanity check: lexical insert/find should now work via $lexical sort. + String insertOk = + """ + { + "insertOne": { + "document": { "_id": "doc1", "$lexical": "hello world" } + } + } + """; + postToCollection(name, insertOk).statusCode(200).body("errors", is(nullValue())); + + String find = + """ + { + "findOne": { + "sort": { "$lexical": "hello" } + } + } + """; + postToCollection(name, find) + .statusCode(200) + .body("errors", is(nullValue())) + .body("data.document._id", is("doc1")); + + deleteCollection(name); + } + + @Test + void enableLexicalCustomAnalyzerOnDisabledCollection() { + Assumptions.assumeTrue(isLexicalAvailableForDB()); + + final String name = freshCollectionName(); + createCollectionWithLexicalDisabled(name); + + String json = + """ + { + "alterCollection": { + "operation": { + "enableLexical": { + "analyzer": { "tokenizer": { "name": "whitespace" } } + } + } + } + } + """; + postToCollection(name, json) + .statusCode(200) + .body("$", responseIsDDLSuccess()) + .body("status.ok", is(1)); + + deleteCollection(name); + } + + // Locks in the surgical-replace contract of buildUpdatedComment: when alterCollection enables + // lexical, all other previously-configured collection options (vector, indexing, defaultId, + // rerank) must remain unchanged in the stored comment. + @Test + void preservesOtherOptionsAcrossAlter() { + Assumptions.assumeTrue(isLexicalAvailableForDB()); + + final String name = freshCollectionName(); + String createBody = + """ + { + "createCollection": { + "name": "%s", + "options": { + "defaultId": { "type": "objectId" }, + "vector": { "dimension": 5, "metric": "cosine" }, + "indexing": { "deny": ["comment"] }, + "lexical": { "enabled": false }, + "rerank": { "enabled": false } + } + } + } + """ + .formatted(name); + given() + .port(getTestPort()) + .headers(getHeaders()) + .contentType(ContentType.JSON) + .body(createBody) + .when() + .post(KeyspaceResource.BASE_PATH, keyspaceName) + .then() + .statusCode(200) + .body("$", responseIsDDLSuccess()) + .body("status.ok", is(1)); + + // Enable lexical via alterCollection. + String alterBody = + """ + { + "alterCollection": { + "operation": { + "enableLexical": { } + } + } + } + """; + postToCollection(name, alterBody) + .statusCode(200) + .body("$", responseIsDDLSuccess()) + .body("status.ok", is(1)); + + // Verify via findCollections + explain that everything except lexical is unchanged, + // and that lexical has flipped to enabled with the default analyzer. + String expected = + """ + { + "name": "%s", + "options": { + "defaultId": { "type": "objectId" }, + "vector": { "dimension": 5, "metric": "cosine", "sourceModel": "other" }, + "indexing": { "deny": ["comment"] }, + "lexical": { "enabled": true, "analyzer": "standard" }, + "rerank": { "enabled": false } + } + } + """ + .formatted(name); + given() + .port(getTestPort()) + .headers(getHeaders()) + .contentType(ContentType.JSON) + .body( + """ + { + "findCollections": { + "options": { "explain": true } + } + } + """) + .when() + .post(KeyspaceResource.BASE_PATH, keyspaceName) + .then() + .statusCode(200) + .body("$", responseIsDDLSuccess()) + .body( + "status.collections.find { it.name == '%s' }".formatted(name), jsonEquals(expected)); + + deleteCollection(name); + } + + @Test + void enableLexicalAlreadyEnabledSameSettingsIsNoOp() { + Assumptions.assumeTrue(isLexicalAvailableForDB()); + + final String name = freshCollectionName(); + // Create with lexical enabled, default analyzer. + createCollectionWithLexical(name, "{ \"enabled\": true, \"analyzer\": \"standard\" }"); + + String json = + """ + { + "alterCollection": { + "operation": { + "enableLexical": { "analyzer": "standard" } + } + } + } + """; + postToCollection(name, json) + .statusCode(200) + .body("$", responseIsDDLSuccess()) + .body("status.ok", is(1)); + + deleteCollection(name); + } + } + + @Nested + @Order(2) + class AlterCollectionLexicalFail { + + @Test + void failEnableLexicalDifferentAnalyzer() { + Assumptions.assumeTrue(isLexicalAvailableForDB()); + + final String name = freshCollectionName(); + createCollectionWithLexical(name, "{ \"enabled\": true, \"analyzer\": \"standard\" }"); + + String json = + """ + { + "alterCollection": { + "operation": { + "enableLexical": { + "analyzer": { "tokenizer": { "name": "whitespace" } } + } + } + } + } + """; + postToCollection(name, json) + .statusCode(200) + .body("$", responseIsError()) + .body( + "errors[0].errorCode", + is(SchemaException.Code.INVALID_ALTER_COLLECTION_OPTIONS.name())) + .body("errors[0].message", containsString("different analyzer configuration")); + + deleteCollection(name); + } + + @Test + void failMissingOperation() { + Assumptions.assumeTrue(isLexicalAvailableForDB()); + + final String name = freshCollectionName(); + createCollectionWithLexicalDisabled(name); + + String json = + """ + { + "alterCollection": { } + } + """; + postToCollection(name, json) + .statusCode(200) + .body("$", responseIsError()) + .body( + "errors[0].errorCode", + is(SchemaException.Code.INVALID_ALTER_COLLECTION_OPTIONS.name())) + .body("errors[0].message", containsString("must specify 'operation' field")); + + deleteCollection(name); + } + + // Unknown operation key under "operation" — Jackson surfaces this via the global + // CommandObjectMapperHandler.handleUnknownTypeId path which throws COMMAND_UNKNOWN. + @Test + void failUnknownOperation() { + Assumptions.assumeTrue(isLexicalAvailableForDB()); + + final String name = freshCollectionName(); + createCollectionWithLexicalDisabled(name); + + String json = + """ + { + "alterCollection": { + "operation": { + "unknownOp": { } + } + } + } + """; + postToCollection(name, json) + .statusCode(200) + .body("$", responseIsError()) + .body("errors[0].errorCode", is(RequestException.Code.COMMAND_UNKNOWN.name())) + .body( + "errors[0].message", + containsString("Command 'unknownOp' is not a AlterCollection Operation recognized")) + .body("errors[0].message", containsString("AlterCollection Operations: [enableLexical]")); + + deleteCollection(name); + } + + @Test + void failAnalyzerWrongJsonType() { + Assumptions.assumeTrue(isLexicalAvailableForDB()); + + final String name = freshCollectionName(); + createCollectionWithLexicalDisabled(name); + + String json = + """ + { + "alterCollection": { + "operation": { + "enableLexical": { + "analyzer": [1, 2, 3] + } + } + } + } + """; + postToCollection(name, json) + .statusCode(200) + .body("$", responseIsError()) + .body( + "errors[0].errorCode", + is(SchemaException.Code.INVALID_ALTER_COLLECTION_OPTIONS.name())) + .body( + "errors[0].message", + containsString( + "'analyzer' property of 'lexical' must be either JSON Object or String, is: Array")); + + deleteCollection(name); + } + + @Test + void failAnalyzerMisspelledField() { + Assumptions.assumeTrue(isLexicalAvailableForDB()); + + final String name = freshCollectionName(); + createCollectionWithLexicalDisabled(name); + + String json = + """ + { + "alterCollection": { + "operation": { + "enableLexical": { + "analyzer": { + "tokeniser": { "name": "standard" } + } + } + } + } + } + """; + postToCollection(name, json) + .statusCode(200) + .body("$", responseIsError()) + .body( + "errors[0].errorCode", + is(SchemaException.Code.INVALID_ALTER_COLLECTION_OPTIONS.name())) + .body( + "errors[0].message", + containsString( + "Invalid field for 'lexical.analyzer'. Valid fields are: [charFilters, filters, tokenizer], found: [tokeniser]")); + + deleteCollection(name); + } + + // Locks in that the `enableLexical` body rejects unknown fields via Jackson's + // FAIL_ON_UNKNOWN_PROPERTIES setting. + @Test + void failEnableLexicalUnknownField() { + Assumptions.assumeTrue(isLexicalAvailableForDB()); + + final String name = freshCollectionName(); + createCollectionWithLexicalDisabled(name); + + String json = + """ + { + "alterCollection": { + "operation": { + "enableLexical": { + "analyzer": "standard", + "foo": "bar" + } + } + } + } + """; + postToCollection(name, json) + .statusCode(200) + .body("$", responseIsError()) + .body("errors[0].errorCode", is(RequestException.Code.COMMAND_FIELD_UNKNOWN.name())) + .body("errors[0].message", containsString("Command field 'foo' not recognized")); + + deleteCollection(name); + } + + @Test + void failEnableWhenLexicalNotAvailableForDB() { + Assumptions.assumeFalse(isLexicalAvailableForDB()); + + final String name = freshCollectionName(); + createCollectionWithLexicalDisabled(name); + + String json = + """ + { + "alterCollection": { + "operation": { + "enableLexical": { } + } + } + } + """; + postToCollection(name, json) + .statusCode(200) + .body("$", responseIsError()) + .body("errors[0].errorCode", is(SchemaException.Code.LEXICAL_FEATURE_NOT_ENABLED.name())); + + deleteCollection(name); + } + } + + @Nested + @Order(3) + class AlterCollectionEnableLexicalIdempotency { + + // An interrupted prior run (or a concurrent op) can leave the lexical column present while the + // stored comment still says lexical is disabled. The resolver then treats the collection as + // "not truly enabled" and re-runs the full DDL pipeline. The operation checks freshly-fetched + // metadata, sees the column already exists, and skips ADD COLUMN (the backend does not support + // ADD IF NOT EXISTS), so enableLexical still succeeds and reconciles the state instead of + // failing with "column already exists". + @Test + void enableLexicalWhenColumnAlreadyPresent() { + Assumptions.assumeTrue(isLexicalAvailableForDB()); + + final String name = freshCollectionName(); + createCollectionWithLexicalDisabled(name); + + // Simulate the leftover column from an interrupted alter, bypassing the Data API. + boolean applied = + executeCqlStatement( + "ALTER TABLE \"%s\".\"%s\" ADD %s text" + .formatted( + keyspaceName, name, DocumentConstants.Columns.LEXICAL_INDEX_COLUMN_NAME)); + assertTrue(applied, "Pre-condition: manual ADD COLUMN should apply"); + + // enableLexical forces a schema refresh, so the resolver sees the orphan column; the DDL + // then runs ADD IF NOT EXISTS (no-op) + CREATE INDEX IF NOT EXISTS + comment update. + String alter = + """ + { + "alterCollection": { + "operation": { + "enableLexical": { } + } + } + } + """; + postToCollection(name, alter) + .statusCode(200) + .body("$", responseIsDDLSuccess()) + .body("status.ok", is(1)); + + // Lexical must actually work now (column + index + comment all consistent). + String insertOk = + """ + { + "insertOne": { + "document": { "_id": "doc1", "$lexical": "hello world" } + } + } + """; + postToCollection(name, insertOk).statusCode(200).body("errors", is(nullValue())); + + String find = + """ + { + "findOne": { + "sort": { "$lexical": "hello" } + } + } + """; + postToCollection(name, find) + .statusCode(200) + .body("errors", is(nullValue())) + .body("data.document._id", is("doc1")); + + deleteCollection(name); + } + } + + // ----------------------------------------------------------------- + // Helpers + // ----------------------------------------------------------------- + + private static String freshCollectionName() { + return "alter_lex_" + RandomStringUtils.insecure().nextAlphanumeric(12); + } + + private void createCollectionWithLexicalDisabled(String collectionName) { + createCollectionWithLexical(collectionName, "{ \"enabled\": false }"); + } + + private void createCollectionWithLexical(String collectionName, String lexicalDef) { + String body = + """ + { + "createCollection": { + "name": "%s", + "options": { + "lexical": %s + } + } + } + """ + .formatted(collectionName, lexicalDef); + given() + .port(getTestPort()) + .headers(getHeaders()) + .contentType(ContentType.JSON) + .body(body) + .when() + .post(KeyspaceResource.BASE_PATH, keyspaceName) + .then() + .statusCode(200) + .body("$", responseIsDDLSuccess()) + .body("status.ok", is(1)); + } + + private ValidatableResponse postToCollection(String collectionName, String json) { + return given() + .port(getTestPort()) + .headers(getHeaders()) + .contentType(ContentType.JSON) + .body(json) + .when() + .post(CollectionResource.BASE_PATH, keyspaceName, collectionName) + .then(); + } +} diff --git a/src/test/java/io/stargate/sgv2/jsonapi/api/v1/CollectionResourceIntegrationTest.java b/src/test/java/io/stargate/sgv2/jsonapi/api/v1/CollectionResourceIntegrationTest.java index c2f9c2e49e..4a00a37cf3 100644 --- a/src/test/java/io/stargate/sgv2/jsonapi/api/v1/CollectionResourceIntegrationTest.java +++ b/src/test/java/io/stargate/sgv2/jsonapi/api/v1/CollectionResourceIntegrationTest.java @@ -77,7 +77,8 @@ public void unknownCommand() { "Command 'unknownCommand' is not a Collection Command recognized by Data API.")) .body( "errors[0].message", - containsString("Data API supports following Collection Commands: [alterTable,")); + containsString( + "Data API supports following Collection Commands: [alterCollection, alterTable,")); } @Test