diff --git a/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/CollectionCommand.java b/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/CollectionCommand.java index 0c456b4acc..138a9e94db 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/CollectionCommand.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/CollectionCommand.java @@ -21,6 +21,7 @@ @JsonSubTypes.Type(value = InsertOneCommand.class), @JsonSubTypes.Type(value = UpdateManyCommand.class), @JsonSubTypes.Type(value = UpdateOneCommand.class), + @JsonSubTypes.Type(value = AlterCollectionCommand.class), // We have only collection resource that is used for API Tables @JsonSubTypes.Type(value = AlterTableCommand.class), @JsonSubTypes.Type(value = CreateIndexCommand.class), diff --git a/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/CommandName.java b/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/CommandName.java index 5c522d5fd2..68998b2346 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/CommandName.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/CommandName.java @@ -14,6 +14,7 @@ public enum CommandName { // they should not be DDL, they are not changing schema, we should add an CommandType.ADMIN for // them ? + ALTER_COLLECTION(Names.ALTER_COLLECTION, CommandType.DDL, CommandTarget.COLLECTION), ALTER_TABLE(Names.ALTER_TABLE, CommandType.DDL, CommandTarget.TABLE), ALTER_TYPE(Names.ALTER_TYPE, CommandType.DDL, CommandTarget.TABLE), COUNT_DOCUMENTS(Names.COUNT_DOCUMENTS, CommandType.DML, CommandTarget.COLLECTION), @@ -107,6 +108,7 @@ public static List filterByTarget(CommandTarget target) { } public interface Names { + String ALTER_COLLECTION = "alterCollection"; String ALTER_TABLE = "alterTable"; String ALTER_TYPE = "alterType"; String COUNT_DOCUMENTS = "countDocuments"; diff --git a/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/impl/AlterCollectionCommand.java b/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/impl/AlterCollectionCommand.java new file mode 100644 index 0000000000..e09104ab53 --- /dev/null +++ b/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/impl/AlterCollectionCommand.java @@ -0,0 +1,37 @@ +package io.stargate.sgv2.jsonapi.api.model.command.impl; + +import com.fasterxml.jackson.annotation.JsonInclude; +import com.fasterxml.jackson.annotation.JsonTypeName; +import io.stargate.sgv2.jsonapi.api.model.command.CollectionCommand; +import io.stargate.sgv2.jsonapi.api.model.command.CommandName; +import jakarta.validation.Valid; +import javax.annotation.Nullable; +import org.eclipse.microprofile.openapi.annotations.enums.SchemaType; +import org.eclipse.microprofile.openapi.annotations.media.Schema; + +@Schema( + description = + "Command that alters mutable settings of an existing collection. Currently supports enabling the 'lexical' feature.") +@JsonTypeName(CommandName.Names.ALTER_COLLECTION) +public record AlterCollectionCommand( + @Valid + @JsonInclude(JsonInclude.Include.NON_NULL) + @Nullable + @Schema( + description = + "Lexical configuration to apply. Currently only enabling is supported ('enabled' must be true).", + type = SchemaType.OBJECT, + implementation = CreateCollectionCommand.Options.LexicalDesc.class) + CreateCollectionCommand.Options.LexicalDesc lexical) + implements CollectionCommand { + + @Override + public CommandName commandName() { + return CommandName.ALTER_COLLECTION; + } + + @Override + public boolean isForceSchemaRefresh() { + return true; + } +} diff --git a/src/main/java/io/stargate/sgv2/jsonapi/api/v1/CollectionResource.java b/src/main/java/io/stargate/sgv2/jsonapi/api/v1/CollectionResource.java index 7fbbe62563..34b622fd1e 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/api/v1/CollectionResource.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/api/v1/CollectionResource.java @@ -7,6 +7,7 @@ import io.smallrye.mutiny.Uni; import io.stargate.sgv2.jsonapi.ConfigPreLoader; import io.stargate.sgv2.jsonapi.api.model.command.*; +import io.stargate.sgv2.jsonapi.api.model.command.impl.AlterCollectionCommand; import io.stargate.sgv2.jsonapi.api.model.command.impl.AlterTableCommand; import io.stargate.sgv2.jsonapi.api.model.command.impl.CountDocumentsCommand; import io.stargate.sgv2.jsonapi.api.model.command.impl.CreateIndexCommand; @@ -138,6 +139,7 @@ public CollectionResource( InsertManyCommand.class, UpdateManyCommand.class, UpdateOneCommand.class, + AlterCollectionCommand.class, // Table Only commands AlterTableCommand.class, CreateIndexCommand.class, diff --git a/src/main/java/io/stargate/sgv2/jsonapi/exception/SchemaException.java b/src/main/java/io/stargate/sgv2/jsonapi/exception/SchemaException.java index be262773fb..ca7fdfbf73 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/exception/SchemaException.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/exception/SchemaException.java @@ -42,6 +42,7 @@ public enum Code implements ErrorCode { EXISTING_COLLECTION_DIFFERENT_SETTINGS, EXISTING_TABLE_NOT_DATA_API_COLLECTION, // converted from ErrorCodeV1 + INVALID_ALTER_COLLECTION_OPTIONS, INVALID_CREATE_COLLECTION_OPTIONS, INVALID_FORMAT_FOR_INDEX_CREATION_COLUMN, INVALID_INDEXING_DEFINITION, diff --git a/src/main/java/io/stargate/sgv2/jsonapi/service/operation/collections/AlterCollectionLexicalOperation.java b/src/main/java/io/stargate/sgv2/jsonapi/service/operation/collections/AlterCollectionLexicalOperation.java new file mode 100644 index 0000000000..eaab9bd7c0 --- /dev/null +++ b/src/main/java/io/stargate/sgv2/jsonapi/service/operation/collections/AlterCollectionLexicalOperation.java @@ -0,0 +1,162 @@ +package io.stargate.sgv2.jsonapi.service.operation.collections; + +import com.datastax.oss.driver.api.core.CqlIdentifier; +import com.datastax.oss.driver.api.core.cql.AsyncResultSet; +import com.datastax.oss.driver.api.core.cql.SimpleStatement; +import com.fasterxml.jackson.core.JacksonException; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.databind.node.ObjectNode; +import io.smallrye.mutiny.Uni; +import io.stargate.sgv2.jsonapi.api.model.command.CommandContext; +import io.stargate.sgv2.jsonapi.api.model.command.CommandResult; +import io.stargate.sgv2.jsonapi.api.request.RequestContext; +import io.stargate.sgv2.jsonapi.config.constants.DocumentConstants; +import io.stargate.sgv2.jsonapi.config.constants.TableCommentConstants; +import io.stargate.sgv2.jsonapi.service.cqldriver.executor.QueryExecutor; +import io.stargate.sgv2.jsonapi.service.operation.Operation; +import io.stargate.sgv2.jsonapi.service.schema.collections.CollectionLexicalDef; +import io.stargate.sgv2.jsonapi.service.schema.collections.CollectionSchemaObject; +import java.time.Duration; +import java.util.function.Supplier; + +/** + * Operation that enables the lexical feature on an existing collection by adding the {@code + * query_lexical_value} column, creating an analyzed SAI index on it, and updating the table + * "comment" JSON to record the new lexical config. + * + *

When {@link #noOp} is true the operation returns success without executing any DDL: this is + * the "already enabled with same settings" case. + * + *

No rollback on partial failure. If e.g. ADD COLUMN succeeds but CREATE INDEX fails, the + * column is left in place and the failure is propagated to the caller. This matches {@link + * CreateCollectionOperation}'s behavior and is intentional: + * + *

+ */ +public record AlterCollectionLexicalOperation( + CommandContext commandContext, + ObjectMapper objectMapper, + int ddlDelayMillis, + CollectionLexicalDef newLexicalDef, + boolean noOp) + implements Operation { + + private static final CqlIdentifier COMMENT_OPTION = CqlIdentifier.fromInternal("comment"); + + private static final CqlIdentifier LEXICAL_COLUMN = + CqlIdentifier.fromInternal(DocumentConstants.Columns.LEXICAL_INDEX_COLUMN_NAME); + + @Override + public Uni> execute( + RequestContext requestContext, QueryExecutor queryExecutor) { + + if (noOp) { + // Type witness needed: Mutiny's item(T) and item(Supplier) overloads otherwise + // both match SchemaChangeResult (which is a Supplier), and inference picks + // the wrong T. + return Uni.createFrom().>item(new SchemaChangeResult(true)); + } + + final CollectionSchemaObject schemaObject = commandContext.schemaObject(); + final String keyspace = schemaObject.tableMetadata().getKeyspace().asInternal(); + final String table = schemaObject.tableMetadata().getName().asInternal(); + + final String newComment; + try { + newComment = buildUpdatedComment(schemaObject); + } catch (JacksonException e) { + return Uni.createFrom().failure(e); + } + + // Idempotent for retry after partial failure: skip ADD COLUMN if the column already exists. + final boolean columnAlreadyExists = + schemaObject.tableMetadata().getColumn(LEXICAL_COLUMN).isPresent(); + + SimpleStatement createIndexStmt = + CreateCollectionOperation.buildLexicalIndexStatement( + keyspace, table, newLexicalDef, /* ifNotExists */ true); + + // Cassandra does not accept bind parameters for table options like `comment`; embed the + // JSON directly with CQL single-quote escaping (matches + // CreateCollectionOperation.getCreateTable). + SimpleStatement alterCommentStmt = + SimpleStatement.newInstance( + "ALTER TABLE \"%s\".\"%s\" WITH comment = '%s'" + .formatted(keyspace, table, newComment.replace("'", "''"))); + + final Duration delay = Duration.ofMillis(ddlDelayMillis > 0 ? ddlDelayMillis : 100); + + Uni pipeline; + if (columnAlreadyExists) { + pipeline = queryExecutor.executeCreateSchemaChange(requestContext, createIndexStmt); + } else { + SimpleStatement addColumnStmt = + SimpleStatement.newInstance( + "ALTER TABLE \"%s\".\"%s\" ADD %s text" + .formatted(keyspace, table, DocumentConstants.Columns.LEXICAL_INDEX_COLUMN_NAME)); + pipeline = + queryExecutor + .executeCreateSchemaChange(requestContext, addColumnStmt) + .onItem() + .delayIt() + .by(delay) + .onItem() + .transformToUni( + r1 -> queryExecutor.executeCreateSchemaChange(requestContext, createIndexStmt)); + } + + return pipeline + .onItem() + .delayIt() + .by(delay) + .onItem() + .transformToUni( + r2 -> queryExecutor.executeCreateSchemaChange(requestContext, alterCommentStmt)) + .map(r3 -> new SchemaChangeResult(true)); + } + + /** + * Reads the current table comment JSON and surgically replaces the {@code + * collection.options.lexical} sub-node, leaving all other options (vector / indexing / id / + * rerank / unknown fields) untouched. + * + *

The resolver guarantees we are operating on a V1-shaped comment (legacy/V0 collections are + * rejected before reaching the operation). + */ + private String buildUpdatedComment(CollectionSchemaObject schemaObject) throws JacksonException { + final Object commentObj = schemaObject.tableMetadata().getOptions().get(COMMENT_OPTION); + final String comment = commentObj == null ? null : commentObj.toString(); + if (comment == null || comment.isBlank()) { + // Defensive: resolver should have rejected this case. + throw new IllegalStateException( + "Cannot alter collection: table comment is empty; expected V1 schema"); + } + + final ObjectNode rootNode = (ObjectNode) objectMapper.readTree(comment); + final ObjectNode collectionNode = + (ObjectNode) rootNode.get(TableCommentConstants.TOP_LEVEL_KEY); + if (collectionNode == null) { + // Defensive: resolver should have rejected this case. + throw new IllegalStateException( + "Cannot alter collection: comment does not have '" + + TableCommentConstants.TOP_LEVEL_KEY + + "' node"); + } + ObjectNode optionsNode = (ObjectNode) collectionNode.get(TableCommentConstants.OPTIONS_KEY); + if (optionsNode == null) { + optionsNode = objectMapper.createObjectNode(); + collectionNode.set(TableCommentConstants.OPTIONS_KEY, optionsNode); + } + optionsNode.putPOJO(TableCommentConstants.COLLECTION_LEXICAL_CONFIG_KEY, newLexicalDef); + return objectMapper.writeValueAsString(rootNode); + } +} diff --git a/src/main/java/io/stargate/sgv2/jsonapi/service/operation/collections/CollectionDriverExceptionHandler.java b/src/main/java/io/stargate/sgv2/jsonapi/service/operation/collections/CollectionDriverExceptionHandler.java index 8a02ad61cc..9738aa19b4 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/service/operation/collections/CollectionDriverExceptionHandler.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/service/operation/collections/CollectionDriverExceptionHandler.java @@ -5,6 +5,7 @@ import com.datastax.oss.driver.api.core.cql.SimpleStatement; import com.datastax.oss.driver.api.core.servererrors.InvalidQueryException; +import io.stargate.sgv2.jsonapi.config.constants.DocumentConstants; import io.stargate.sgv2.jsonapi.exception.*; import io.stargate.sgv2.jsonapi.service.cqldriver.executor.DefaultDriverExceptionHandler; import io.stargate.sgv2.jsonapi.service.operation.tables.CreateIndexExceptionHandler; @@ -52,7 +53,9 @@ public RuntimeException handle(InvalidQueryException exception) { if (exception .getMessage() .contains( - "analyzed size for column query_lexical_value exceeds the cumulative limit for index")) { + "analyzed size for column " + + DocumentConstants.Columns.LEXICAL_INDEX_COLUMN_NAME + + " exceeds the cumulative limit for index")) { return DocumentException.Code.LEXICAL_CONTENT_TOO_LONG.get(errVars(schemaObject, exception)); } diff --git a/src/main/java/io/stargate/sgv2/jsonapi/service/operation/collections/CreateCollectionOperation.java b/src/main/java/io/stargate/sgv2/jsonapi/service/operation/collections/CreateCollectionOperation.java index 7776d6ba74..3c0492f4b1 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/service/operation/collections/CreateCollectionOperation.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/service/operation/collections/CreateCollectionOperation.java @@ -25,6 +25,7 @@ import io.stargate.sgv2.jsonapi.api.model.command.tracing.RequestTracing; import io.stargate.sgv2.jsonapi.api.request.RequestContext; import io.stargate.sgv2.jsonapi.config.DatabaseLimitsConfig; +import io.stargate.sgv2.jsonapi.config.constants.DocumentConstants; import io.stargate.sgv2.jsonapi.config.constants.TableCommentConstants; import io.stargate.sgv2.jsonapi.exception.DatabaseException; import io.stargate.sgv2.jsonapi.exception.SchemaException; @@ -612,15 +613,10 @@ private List getIndexStatements( } if (overrideLexicalDef.enabled()) { - var analyzerDef = overrideLexicalDef.analyzerDefinition(); - var analyzerString = analyzerDef.isTextual() ? analyzerDef.asText() : analyzerDef.toString(); + var keyspace = commandContext.schemaObject().identifier().keyspace(); statements.add( - buildSaiIndex( - collectionExisted, - "query_lexical_value", - "query_lexical_value", - false, - Map.of("index_analyzer", analyzerString))); + buildLexicalIndexStatement( + keyspace, collectionName, overrideLexicalDef, collectionExisted)); } if (LOGGER.isTraceEnabled()) { @@ -666,4 +662,44 @@ private SimpleStatement buildSaiIndex( return new ExtendedCreateIndex((DefaultCreateIndex) createIndex).build(); } + + /** + * Builds the {@code CREATE CUSTOM INDEX} statement for the lexical column, used both by + * createCollection (when the table is fresh or being recreated) and by alterCollection (when + * enabling lexical on an existing collection). + * + * @param ifNotExists when true, emits {@code IF NOT EXISTS} for idempotent retries + */ + public static SimpleStatement buildLexicalIndexStatement( + String keyspace, String table, CollectionLexicalDef lexicalDef, boolean ifNotExists) { + return buildLexicalIndexStatement( + CqlIdentifier.fromInternal(keyspace), + CqlIdentifier.fromInternal(table), + lexicalDef, + ifNotExists); + } + + private static SimpleStatement buildLexicalIndexStatement( + CqlIdentifier keyspace, + CqlIdentifier table, + CollectionLexicalDef lexicalDef, + boolean ifNotExists) { + var analyzerDef = lexicalDef.analyzerDefinition(); + // Note: needs to be either plain (unquoted) String (NOT quoted JSON String) OR JSON Object + var analyzerString = analyzerDef.isTextual() ? analyzerDef.asText() : analyzerDef.toString(); + var lexicalCol = DocumentConstants.Columns.LEXICAL_INDEX_COLUMN_NAME; + var index = CqlIdentifier.fromInternal(table.asInternal() + "_" + lexicalCol); + var column = CqlIdentifier.fromInternal(lexicalCol); + + var start = SchemaBuilder.createIndex(index).custom(CQLSAIIndex.SAI_CLASS_NAME); + if (ifNotExists) { + start = start.ifNotExists(); + } + var createIndex = + start + .onTable(keyspace, table) + .andColumn(column) + .withSASIOptions(Map.of("index_analyzer", analyzerString)); + return new ExtendedCreateIndex((DefaultCreateIndex) createIndex).build(); + } } diff --git a/src/main/java/io/stargate/sgv2/jsonapi/service/operation/collections/InsertCollectionOperation.java b/src/main/java/io/stargate/sgv2/jsonapi/service/operation/collections/InsertCollectionOperation.java index 32e6c72470..61e38022dd 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/service/operation/collections/InsertCollectionOperation.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/service/operation/collections/InsertCollectionOperation.java @@ -8,6 +8,7 @@ import io.stargate.sgv2.jsonapi.api.model.command.CommandContext; import io.stargate.sgv2.jsonapi.api.model.command.CommandResult; import io.stargate.sgv2.jsonapi.api.request.RequestContext; +import io.stargate.sgv2.jsonapi.config.constants.DocumentConstants; import io.stargate.sgv2.jsonapi.exception.DocumentException; import io.stargate.sgv2.jsonapi.exception.SchemaException; import io.stargate.sgv2.jsonapi.service.cqldriver.executor.QueryExecutor; @@ -215,7 +216,7 @@ public String buildInsertQuery(boolean vectorEnabled) { insertQuery.append(", query_vector_value"); } if (lexicalEnabled) { - insertQuery.append(", query_lexical_value"); + insertQuery.append(", ").append(DocumentConstants.Columns.LEXICAL_INDEX_COLUMN_NAME); } insertQuery.append(") VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?"); diff --git a/src/main/java/io/stargate/sgv2/jsonapi/service/resolver/AlterCollectionCommandResolver.java b/src/main/java/io/stargate/sgv2/jsonapi/service/resolver/AlterCollectionCommandResolver.java new file mode 100644 index 0000000000..39211ecb40 --- /dev/null +++ b/src/main/java/io/stargate/sgv2/jsonapi/service/resolver/AlterCollectionCommandResolver.java @@ -0,0 +1,122 @@ +package io.stargate.sgv2.jsonapi.service.resolver; + +import com.datastax.oss.driver.api.core.CqlIdentifier; +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.ObjectMapper; +import io.stargate.sgv2.jsonapi.api.model.command.CommandContext; +import io.stargate.sgv2.jsonapi.api.model.command.impl.AlterCollectionCommand; +import io.stargate.sgv2.jsonapi.config.OperationsConfig; +import io.stargate.sgv2.jsonapi.config.constants.DocumentConstants; +import io.stargate.sgv2.jsonapi.config.constants.TableCommentConstants; +import io.stargate.sgv2.jsonapi.exception.SchemaException; +import io.stargate.sgv2.jsonapi.service.operation.Operation; +import io.stargate.sgv2.jsonapi.service.operation.collections.AlterCollectionLexicalOperation; +import io.stargate.sgv2.jsonapi.service.schema.collections.CollectionLexicalDef; +import io.stargate.sgv2.jsonapi.service.schema.collections.CollectionSchemaObject; +import jakarta.enterprise.context.ApplicationScoped; +import jakarta.inject.Inject; +import java.util.Map; +import java.util.Objects; + +@ApplicationScoped +public class AlterCollectionCommandResolver implements CommandResolver { + + private static final CqlIdentifier COMMENT_OPTION = CqlIdentifier.fromInternal("comment"); + + private static final CqlIdentifier LEXICAL_COLUMN = + CqlIdentifier.fromInternal(DocumentConstants.Columns.LEXICAL_INDEX_COLUMN_NAME); + + private final ObjectMapper objectMapper; + + @Inject + public AlterCollectionCommandResolver(ObjectMapper objectMapper) { + this.objectMapper = objectMapper; + } + + @Override + public Class getCommandClass() { + return AlterCollectionCommand.class; + } + + @Override + public Operation resolveCollectionCommand( + CommandContext ctx, AlterCollectionCommand command) { + + if (command.lexical() == null) { + throw badOptions("must specify 'lexical' field"); + } + + // fromApiDesc throws: + // - LEXICAL_FEATURE_NOT_ENABLED if requested.enabled && feature is disabled (via + // SchemaFactory) + // - INVALID_ALTER_COLLECTION_OPTIONS for malformed analyzer / missing 'enabled' / etc. + final CollectionLexicalDef requested = + CollectionLexicalDef.fromApiDesc( + objectMapper, + command.lexical(), + ctx.versionedSchema().lexicalDef(), + SchemaException.Code.INVALID_ALTER_COLLECTION_OPTIONS) + .runningValue(); + + // Phase 1: disabling lexical is not supported. + if (!requested.enabled()) { + throw badOptions( + "'lexical.enabled' must be true; alterCollection cannot disable lexical search"); + } + + // Reject legacy / pre-lexical collections: must have a V1 comment with collection.options. + if (isLegacyComment(ctx.schemaObject())) { + throw badOptions( + "collection has legacy metadata (pre-lexical schema); recreate the collection with lexical enabled"); + } + + final CollectionLexicalDef current = ctx.schemaObject().lexicalDef(); + final int ddlDelayMillis = + ctx.config().get(OperationsConfig.class).databaseConfig().ddlDelayMillis(); + + // "Truly enabled" means both the stored comment claims lexical is on AND the underlying + // column actually exists. If the comment says enabled but the column is missing (an + // inconsistent state from manual surgery or an interrupted prior alter), treat it as + // not-enabled and run the full DDL pipeline so the table catches up to the comment. + final boolean trulyEnabled = + current.enabled() + && ctx.schemaObject().tableMetadata().getColumn(LEXICAL_COLUMN).isPresent(); + + if (!trulyEnabled) { + return new AlterCollectionLexicalOperation( + ctx, objectMapper, ddlDelayMillis, requested, /* noOp */ false); + } + + // Both analyzer definitions are guaranteed non-null here (CollectionLexicalDef's + // constructor requires non-null analyzer when enabled=true). JsonNode.equals is value-based, + // so this gives strict structural comparison for both string and object analyzers. + if (!Objects.equals(current.analyzerDefinition(), requested.analyzerDefinition())) { + throw badOptions( + "lexical is already enabled for this collection with a different analyzer configuration"); + } + // Same settings already in effect: no-op success. + return new AlterCollectionLexicalOperation( + ctx, objectMapper, ddlDelayMillis, requested, /* noOp */ true); + } + + private static SchemaException badOptions(String message) { + return SchemaException.Code.INVALID_ALTER_COLLECTION_OPTIONS.get(Map.of("message", message)); + } + + private boolean isLegacyComment(CollectionSchemaObject schemaObject) { + final Object commentObj = schemaObject.tableMetadata().getOptions().get(COMMENT_OPTION); + if (commentObj == null) { + return true; + } + try { + JsonNode optionsNode = + objectMapper + .readTree(commentObj.toString()) + .path(TableCommentConstants.TOP_LEVEL_KEY) + .path(TableCommentConstants.OPTIONS_KEY); + return !optionsNode.isObject(); + } catch (Exception e) { + return true; + } + } +} diff --git a/src/main/java/io/stargate/sgv2/jsonapi/service/resolver/CreateCollectionCommandResolver.java b/src/main/java/io/stargate/sgv2/jsonapi/service/resolver/CreateCollectionCommandResolver.java index 4041163fa8..571eee0521 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/service/resolver/CreateCollectionCommandResolver.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/service/resolver/CreateCollectionCommandResolver.java @@ -90,7 +90,8 @@ public Operation resolveKeyspaceCommand( CollectionLexicalDef.fromApiDesc( objectMapper, getOrDefault(command.options(), CreateCollectionCommand.Options::lexical, null), - context.versionedSchema().lexicalDef()); + context.versionedSchema().lexicalDef(), + SchemaException.Code.INVALID_CREATE_COLLECTION_OPTIONS); var rerankDef = CollectionRerankDef.fromApiDesc( diff --git a/src/main/java/io/stargate/sgv2/jsonapi/service/schema/collections/CollectionLexicalDef.java b/src/main/java/io/stargate/sgv2/jsonapi/service/schema/collections/CollectionLexicalDef.java index 3c4323ec2d..ceb4eff109 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/service/schema/collections/CollectionLexicalDef.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/service/schema/collections/CollectionLexicalDef.java @@ -115,14 +115,18 @@ public CollectionLexicalDef(boolean enabled, JsonNode analyzerDefinition) { } /** - * Validate the configuration passed from the user and create the internal representation + * Validates the lexical config passed and constructs the runtime configuration object to use. + * Invalid-option errors are reported with {@code optionsErrorCode} so they get attributed to the + * invoking command (e.g. {@code INVALID_CREATE_COLLECTION_OPTIONS} from {@code createCollection}, + * {@code INVALID_ALTER_COLLECTION_OPTIONS} from {@code alterCollection}). * - * @return Valid CollectionLexicalConfig object + * @return Valid CollectionLexicalDef object */ public static SchemaHolder fromApiDesc( ObjectMapper mapper, CreateCollectionCommand.Options.LexicalDesc lexicalDesc, - SchemaFactory schemaFactory) { + SchemaFactory schemaFactory, + SchemaException.Code optionsErrorCode) { // Case 1: No lexical body provided - so no value from the user if (lexicalDesc == null) { @@ -132,7 +136,7 @@ public static SchemaHolder fromApiDesc( // Case 2: Validate 'enabled' flag is present var enabled = lexicalDesc.enabled(); if (enabled == null) { - throw SchemaException.Code.INVALID_CREATE_COLLECTION_OPTIONS.get( + throw optionsErrorCode.get( "message", "'enabled' is required property for 'lexical' Object value"); } @@ -149,7 +153,7 @@ public static SchemaHolder fromApiDesc( if (!enabled) { if (!analyzerNotDefined) { String nodeType = JsonUtil.nodeTypeAsString(lexicalDesc.analyzerDef()); - throw SchemaException.Code.INVALID_CREATE_COLLECTION_OPTIONS.get( + throw optionsErrorCode.get( "message", ("'lexical' is disabled, but 'lexical.analyzer' property was provided with an unexpected type: %s. " + "When 'lexical' is disabled, 'lexical.analyzer' must either be omitted or be JSON null, or an empty Object '{ }'.") @@ -183,7 +187,7 @@ public static SchemaHolder fromApiDesc( // First: check top level members for any invalid (misspelled etc) fields foundNames.removeAll(VALID_ANALYZER_FIELDS); if (!foundNames.isEmpty()) { - throw SchemaException.Code.INVALID_CREATE_COLLECTION_OPTIONS.get( + throw optionsErrorCode.get( "message", "Invalid field%s for 'lexical.analyzer'. Valid fields are: %s, found: %s" .formatted( @@ -212,7 +216,7 @@ public static SchemaHolder fromApiDesc( } }; if (!valueOk) { - throw SchemaException.Code.INVALID_CREATE_COLLECTION_OPTIONS.get( + throw optionsErrorCode.get( "message", "'%s' property of 'lexical.analyzer' must be JSON %s, is: %s" .formatted(entry.getKey(), expectedType, JsonUtil.nodeTypeAsString(fieldValue))); @@ -223,7 +227,7 @@ public static SchemaHolder fromApiDesc( cleanedAnalyzerDef = lexicalDesc.analyzerDef(); } else { // Otherwise, invalid definition - throw SchemaException.Code.INVALID_CREATE_COLLECTION_OPTIONS.get( + throw optionsErrorCode.get( "message", "'analyzer' property of 'lexical' must be either JSON Object or String, is: %s" .formatted(JsonUtil.nodeTypeAsString(lexicalDesc.analyzerDef()))); diff --git a/src/main/resources/errors.yaml b/src/main/resources/errors.yaml index 6d267f0457..2b046435a7 100644 --- a/src/main/resources/errors.yaml +++ b/src/main/resources/errors.yaml @@ -1368,12 +1368,20 @@ request-errors: Resend the command using only columns that use the `vector` type. + - scope: SCHEMA + code: INVALID_ALTER_COLLECTION_OPTIONS + title: Invalid options for alterCollection + body: |- + 'alterCollection' command option(s) invalid: ${message} + + Resend 'alterCollection' with valid options. + - scope: SCHEMA code: INVALID_CREATE_COLLECTION_OPTIONS title: Invalid options for createCollection body: |- 'createCollection' command option(s) invalid: ${message} - + Resend 'createCollection' with valid options. - scope: SCHEMA diff --git a/src/test/java/io/stargate/sgv2/jsonapi/api/v1/AlterCollectionWithLexicalIntegrationTest.java b/src/test/java/io/stargate/sgv2/jsonapi/api/v1/AlterCollectionWithLexicalIntegrationTest.java new file mode 100644 index 0000000000..8fa8e6d8cc --- /dev/null +++ b/src/test/java/io/stargate/sgv2/jsonapi/api/v1/AlterCollectionWithLexicalIntegrationTest.java @@ -0,0 +1,496 @@ +package io.stargate.sgv2.jsonapi.api.v1; + +import static io.restassured.RestAssured.given; +import static io.stargate.sgv2.jsonapi.api.v1.ResponseAssertions.responseIsDDLSuccess; +import static io.stargate.sgv2.jsonapi.api.v1.ResponseAssertions.responseIsError; +import static net.javacrumbs.jsonunit.JsonMatchers.jsonEquals; +import static org.hamcrest.Matchers.containsString; +import static org.hamcrest.Matchers.is; + +import io.quarkus.test.common.WithTestResource; +import io.quarkus.test.junit.QuarkusIntegrationTest; +import io.restassured.http.ContentType; +import io.restassured.response.ValidatableResponse; +import io.stargate.sgv2.jsonapi.exception.SchemaException; +import io.stargate.sgv2.jsonapi.testresource.DseTestResource; +import org.apache.commons.lang3.RandomStringUtils; +import org.junit.jupiter.api.Assumptions; +import org.junit.jupiter.api.ClassOrderer; +import org.junit.jupiter.api.Nested; +import org.junit.jupiter.api.Order; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.TestClassOrder; + +@QuarkusIntegrationTest +@WithTestResource(value = DseTestResource.class) +@TestClassOrder(ClassOrderer.OrderAnnotation.class) +class AlterCollectionWithLexicalIntegrationTest extends AbstractKeyspaceIntegrationTestBase { + + @Nested + @Order(1) + class AlterCollectionEnableLexicalHappyPath { + + @Test + void enableLexicalDefaultAnalyzerOnDisabledCollection() { + Assumptions.assumeTrue(isLexicalAvailableForDB()); + + final String name = freshCollectionName(); + createCollectionWithLexicalDisabled(name); + + String json = + """ + { + "alterCollection": { + "lexical": { "enabled": true } + } + } + """; + postToCollection(name, json) + .statusCode(200) + .body("$", responseIsDDLSuccess()) + .body("status.ok", is(1)); + + // Sanity check: lexical insert/find should now work via $lexical sort. + String insertOk = + """ + { + "insertOne": { + "document": { "_id": "doc1", "$lexical": "hello world" } + } + } + """; + postToCollection(name, insertOk) + .statusCode(200) + .body("errors", is(org.hamcrest.Matchers.nullValue())); + + String find = + """ + { + "findOne": { + "sort": { "$lexical": "hello" } + } + } + """; + postToCollection(name, find) + .statusCode(200) + .body("errors", is(org.hamcrest.Matchers.nullValue())) + .body("data.document._id", is("doc1")); + + deleteCollection(name); + } + + @Test + void enableLexicalCustomAnalyzerOnDisabledCollection() { + Assumptions.assumeTrue(isLexicalAvailableForDB()); + + final String name = freshCollectionName(); + createCollectionWithLexicalDisabled(name); + + String json = + """ + { + "alterCollection": { + "lexical": { + "enabled": true, + "analyzer": { "tokenizer": { "name": "whitespace" } } + } + } + } + """; + postToCollection(name, json) + .statusCode(200) + .body("$", responseIsDDLSuccess()) + .body("status.ok", is(1)); + + deleteCollection(name); + } + + // Locks in the surgical-replace contract of buildUpdatedComment: when alterCollection enables + // lexical, all other previously-configured collection options (vector, indexing, defaultId, + // rerank) must remain unchanged in the stored comment. + @Test + void preservesOtherOptionsAcrossAlter() { + Assumptions.assumeTrue(isLexicalAvailableForDB()); + + final String name = freshCollectionName(); + String createBody = + """ + { + "createCollection": { + "name": "%s", + "options": { + "defaultId": { "type": "objectId" }, + "vector": { "dimension": 5, "metric": "cosine" }, + "indexing": { "deny": ["comment"] }, + "lexical": { "enabled": false }, + "rerank": { "enabled": false } + } + } + } + """ + .formatted(name); + given() + .port(getTestPort()) + .headers(getHeaders()) + .contentType(ContentType.JSON) + .body(createBody) + .when() + .post(KeyspaceResource.BASE_PATH, keyspaceName) + .then() + .statusCode(200) + .body("$", responseIsDDLSuccess()) + .body("status.ok", is(1)); + + // Enable lexical via alterCollection. + String alterBody = + """ + { + "alterCollection": { + "lexical": { "enabled": true } + } + } + """; + postToCollection(name, alterBody) + .statusCode(200) + .body("$", responseIsDDLSuccess()) + .body("status.ok", is(1)); + + // Verify via findCollections + explain that everything except lexical is unchanged, + // and that lexical has flipped to enabled with the default analyzer. + String expected = + """ + { + "name": "%s", + "options": { + "defaultId": { "type": "objectId" }, + "vector": { "dimension": 5, "metric": "cosine", "sourceModel": "other" }, + "indexing": { "deny": ["comment"] }, + "lexical": { "enabled": true, "analyzer": "standard" }, + "rerank": { "enabled": false } + } + } + """ + .formatted(name); + given() + .port(getTestPort()) + .headers(getHeaders()) + .contentType(ContentType.JSON) + .body( + """ + { + "findCollections": { + "options": { "explain": true } + } + } + """) + .when() + .post(KeyspaceResource.BASE_PATH, keyspaceName) + .then() + .statusCode(200) + .body("$", responseIsDDLSuccess()) + .body( + "status.collections.find { it.name == '%s' }".formatted(name), jsonEquals(expected)); + + deleteCollection(name); + } + + @Test + void enableLexicalAlreadyEnabledSameSettingsIsNoOp() { + Assumptions.assumeTrue(isLexicalAvailableForDB()); + + final String name = freshCollectionName(); + // Create with lexical enabled, default analyzer. + createCollectionWithLexical(name, "{ \"enabled\": true, \"analyzer\": \"standard\" }"); + + String json = + """ + { + "alterCollection": { + "lexical": { "enabled": true, "analyzer": "standard" } + } + } + """; + postToCollection(name, json) + .statusCode(200) + .body("$", responseIsDDLSuccess()) + .body("status.ok", is(1)); + + deleteCollection(name); + } + } + + @Nested + @Order(2) + class AlterCollectionLexicalFail { + + @Test + void failEnableLexicalDifferentAnalyzer() { + Assumptions.assumeTrue(isLexicalAvailableForDB()); + + final String name = freshCollectionName(); + createCollectionWithLexical(name, "{ \"enabled\": true, \"analyzer\": \"standard\" }"); + + String json = + """ + { + "alterCollection": { + "lexical": { + "enabled": true, + "analyzer": { "tokenizer": { "name": "whitespace" } } + } + } + } + """; + postToCollection(name, json) + .statusCode(200) + .body("$", responseIsError()) + .body( + "errors[0].errorCode", + is(SchemaException.Code.INVALID_ALTER_COLLECTION_OPTIONS.name())) + .body("errors[0].message", containsString("different analyzer configuration")); + + deleteCollection(name); + } + + @Test + void failDisableLexical() { + Assumptions.assumeTrue(isLexicalAvailableForDB()); + + final String name = freshCollectionName(); + createCollectionWithLexical(name, "{ \"enabled\": true }"); + + String json = + """ + { + "alterCollection": { + "lexical": { "enabled": false } + } + } + """; + postToCollection(name, json) + .statusCode(200) + .body("$", responseIsError()) + .body( + "errors[0].errorCode", + is(SchemaException.Code.INVALID_ALTER_COLLECTION_OPTIONS.name())) + .body("errors[0].message", containsString("cannot disable lexical")); + + deleteCollection(name); + } + + @Test + void failUnknownRootField() { + Assumptions.assumeTrue(isLexicalAvailableForDB()); + + final String name = freshCollectionName(); + createCollectionWithLexicalDisabled(name); + + String json = + """ + { + "alterCollection": { + "lexical": { "enabled": true }, + "unknownField": 1 + } + } + """; + // Jackson rejects the unknown root property; we just assert an error is returned. + postToCollection(name, json).statusCode(200).body("$", responseIsError()); + + deleteCollection(name); + } + + @Test + void failMissingLexical() { + Assumptions.assumeTrue(isLexicalAvailableForDB()); + + final String name = freshCollectionName(); + createCollectionWithLexicalDisabled(name); + + String json = + """ + { + "alterCollection": { } + } + """; + postToCollection(name, json) + .statusCode(200) + .body("$", responseIsError()) + .body( + "errors[0].errorCode", + is(SchemaException.Code.INVALID_ALTER_COLLECTION_OPTIONS.name())) + .body("errors[0].message", containsString("must specify 'lexical' field")); + + deleteCollection(name); + } + + // Malformed `lexical` body must yield INVALID_ALTER_COLLECTION_OPTIONS (not the + // createCollection variant). Mirrors the equivalent CreateCollectionWithLexicalIntegrationTest + // failure cases, but with the alterCollection-specific error code. + @Test + void failMissingEnabledFlag() { + Assumptions.assumeTrue(isLexicalAvailableForDB()); + + final String name = freshCollectionName(); + createCollectionWithLexicalDisabled(name); + + String json = + """ + { + "alterCollection": { + "lexical": { } + } + } + """; + postToCollection(name, json) + .statusCode(200) + .body("$", responseIsError()) + .body( + "errors[0].errorCode", + is(SchemaException.Code.INVALID_ALTER_COLLECTION_OPTIONS.name())) + .body( + "errors[0].message", + containsString("'enabled' is required property for 'lexical' Object value")); + + deleteCollection(name); + } + + @Test + void failAnalyzerWrongJsonType() { + Assumptions.assumeTrue(isLexicalAvailableForDB()); + + final String name = freshCollectionName(); + createCollectionWithLexicalDisabled(name); + + String json = + """ + { + "alterCollection": { + "lexical": { + "enabled": true, + "analyzer": [1, 2, 3] + } + } + } + """; + postToCollection(name, json) + .statusCode(200) + .body("$", responseIsError()) + .body( + "errors[0].errorCode", + is(SchemaException.Code.INVALID_ALTER_COLLECTION_OPTIONS.name())) + .body( + "errors[0].message", + containsString( + "'analyzer' property of 'lexical' must be either JSON Object or String, is: Array")); + + deleteCollection(name); + } + + @Test + void failAnalyzerMisspelledField() { + Assumptions.assumeTrue(isLexicalAvailableForDB()); + + final String name = freshCollectionName(); + createCollectionWithLexicalDisabled(name); + + String json = + """ + { + "alterCollection": { + "lexical": { + "enabled": true, + "analyzer": { + "tokeniser": { "name": "standard" } + } + } + } + } + """; + postToCollection(name, json) + .statusCode(200) + .body("$", responseIsError()) + .body( + "errors[0].errorCode", + is(SchemaException.Code.INVALID_ALTER_COLLECTION_OPTIONS.name())) + .body( + "errors[0].message", + containsString( + "Invalid field for 'lexical.analyzer'. Valid fields are: [charFilters, filters, tokenizer], found: [tokeniser]")); + + deleteCollection(name); + } + + @Test + void failEnableWhenLexicalNotAvailableForDB() { + Assumptions.assumeFalse(isLexicalAvailableForDB()); + + final String name = freshCollectionName(); + createCollectionWithLexicalDisabled(name); + + String json = + """ + { + "alterCollection": { + "lexical": { "enabled": true } + } + } + """; + postToCollection(name, json) + .statusCode(200) + .body("$", responseIsError()) + .body("errors[0].errorCode", is(SchemaException.Code.LEXICAL_FEATURE_NOT_ENABLED.name())); + + deleteCollection(name); + } + } + + // ----------------------------------------------------------------- + // Helpers + // ----------------------------------------------------------------- + + private static String freshCollectionName() { + return "alter_lex_" + RandomStringUtils.insecure().nextAlphanumeric(12); + } + + private void createCollectionWithLexicalDisabled(String collectionName) { + createCollectionWithLexical(collectionName, "{ \"enabled\": false }"); + } + + private void createCollectionWithLexical(String collectionName, String lexicalDef) { + String body = + """ + { + "createCollection": { + "name": "%s", + "options": { + "lexical": %s + } + } + } + """ + .formatted(collectionName, lexicalDef); + given() + .port(getTestPort()) + .headers(getHeaders()) + .contentType(ContentType.JSON) + .body(body) + .when() + .post(KeyspaceResource.BASE_PATH, keyspaceName) + .then() + .statusCode(200) + .body("$", responseIsDDLSuccess()) + .body("status.ok", is(1)); + } + + private ValidatableResponse postToCollection(String collectionName, String json) { + return given() + .port(getTestPort()) + .headers(getHeaders()) + .contentType(ContentType.JSON) + .body(json) + .when() + .post(CollectionResource.BASE_PATH, keyspaceName, collectionName) + .then(); + } +} diff --git a/src/test/java/io/stargate/sgv2/jsonapi/api/v1/CollectionResourceIntegrationTest.java b/src/test/java/io/stargate/sgv2/jsonapi/api/v1/CollectionResourceIntegrationTest.java index c2f9c2e49e..4a00a37cf3 100644 --- a/src/test/java/io/stargate/sgv2/jsonapi/api/v1/CollectionResourceIntegrationTest.java +++ b/src/test/java/io/stargate/sgv2/jsonapi/api/v1/CollectionResourceIntegrationTest.java @@ -77,7 +77,8 @@ public void unknownCommand() { "Command 'unknownCommand' is not a Collection Command recognized by Data API.")) .body( "errors[0].message", - containsString("Data API supports following Collection Commands: [alterTable,")); + containsString( + "Data API supports following Collection Commands: [alterCollection, alterTable,")); } @Test