From 8f084948ecab2dffe01610dac494e01b51da3199 Mon Sep 17 00:00:00 2001 From: Rhett CfZhuang Date: Tue, 14 Apr 2026 16:55:10 +0800 Subject: [PATCH] [core] Add validation to prevent primary key in sequence-group (#7052) When a primary key field is configured in a sequence-group of partial-update merge engine, it causes Parquet decoding failures during compaction because the key field may be set to null. This commit adds early validation at configuration parsing time to reject such invalid configurations with a clear error message. --- .../compact/PartialUpdateMergeFunction.java | 19 +++++- .../PartialUpdateMergeFunctionTest.java | 63 +++++++++++++++++++ 2 files changed, 80 insertions(+), 2 deletions(-) diff --git a/paimon-core/src/main/java/org/apache/paimon/mergetree/compact/PartialUpdateMergeFunction.java b/paimon-core/src/main/java/org/apache/paimon/mergetree/compact/PartialUpdateMergeFunction.java index 96c337169226..a290369db9af 100644 --- a/paimon-core/src/main/java/org/apache/paimon/mergetree/compact/PartialUpdateMergeFunction.java +++ b/paimon-core/src/main/java/org/apache/paimon/mergetree/compact/PartialUpdateMergeFunction.java @@ -420,6 +420,14 @@ private Factory(Options options, RowType rowType, List primaryKeys) { .map(fieldName -> requireField(fieldName, fieldNames)) .forEach( field -> { + String protectedFieldName = fieldNames.get(field); + if (primaryKeys.contains(protectedFieldName)) { + throw new IllegalArgumentException( + String.format( + "The sequence-group '%s' contains primary key field '%s', " + + "which is not allowed. Primary key columns cannot be put in sequence-group.", + k, protectedFieldName)); + } if (fieldSeqComparators.containsKey(field)) { throw new IllegalArgumentException( String.format( @@ -427,13 +435,20 @@ private Factory(Options options, RowType rowType, List primaryKeys) { fieldNames.get(field), k)); } fieldSeqComparators.put(field, userDefinedSeqComparator); - fieldsProtectedBySequenceGroup.add(fieldNames.get(field)); + fieldsProtectedBySequenceGroup.add(protectedFieldName); }); // add self for (int index : sequenceFields) { - allSequenceFields.add(fieldNames.get(index)); String fieldName = fieldNames.get(index); + if (primaryKeys.contains(fieldName)) { + throw new IllegalArgumentException( + String.format( + "The sequence-group '%s' contains primary key field '%s', " + + "which is not allowed. Primary key columns cannot be put in sequence-group.", + k, fieldName)); + } + allSequenceFields.add(fieldName); fieldSeqComparators.put(index, userDefinedSeqComparator); sequenceGroupMap.put(fieldName, index); } diff --git a/paimon-core/src/test/java/org/apache/paimon/mergetree/compact/PartialUpdateMergeFunctionTest.java b/paimon-core/src/test/java/org/apache/paimon/mergetree/compact/PartialUpdateMergeFunctionTest.java index f4f2d28f7578..350431458173 100644 --- a/paimon-core/src/test/java/org/apache/paimon/mergetree/compact/PartialUpdateMergeFunctionTest.java +++ b/paimon-core/src/test/java/org/apache/paimon/mergetree/compact/PartialUpdateMergeFunctionTest.java @@ -887,6 +887,69 @@ public void testAggregationWithoutSequenceGroup() { "Must use sequence group for aggregation functions but not found for field f1.")); } + @Test + public void testSequenceGroupCannotContainPrimaryKey() { + // Issue #7052: Putting a primary key column in sequence-group should be forbidden + // as it causes Parquet decoding failures during compaction + Options options = new Options(); + options.set("fields.f0.sequence-group", "f1,f2"); + RowType rowType = + RowType.of(DataTypes.INT(), DataTypes.INT(), DataTypes.INT(), DataTypes.INT()); + assertThatThrownBy( + () -> + PartialUpdateMergeFunction.factory( + options, rowType, ImmutableList.of("f0"))) + .hasMessageContaining( + "The sequence-group 'fields.f0.sequence-group' contains primary key field 'f0', " + + "which is not allowed. Primary key columns cannot be put in sequence-group."); + } + + @Test + public void testMultiSequenceFieldsCannotContainPrimaryKey() { + // Issue #7052: Multi-field sequence-group also cannot contain primary key columns + // The sequence fields (f2,f3) are the "self" part, they must not contain PKs + Options options = new Options(); + options.set("fields.f2,f3.sequence-group", "f0,f4"); + RowType rowType = + RowType.of( + DataTypes.INT(), + DataTypes.INT(), + DataTypes.INT(), + DataTypes.INT(), + DataTypes.INT(), + DataTypes.INT()); + assertThatThrownBy( + () -> + PartialUpdateMergeFunction.factory( + options, rowType, ImmutableList.of("f2"))) + .hasMessageContaining( + "The sequence-group 'fields.f2,f3.sequence-group' contains primary key field 'f2', " + + "which is not allowed. Primary key columns cannot be put in sequence-group."); + } + + @Test + public void testMultiPrimaryKeyCannotBeInSequenceGroup() { + // Issue #7052: With composite primary keys, none of them can be in sequence-group + // f2 is a PK and appears in the value part of the sequence-group + Options options = new Options(); + options.set("fields.f4.sequence-group", "f1,f2"); + RowType rowType = + RowType.of( + DataTypes.INT(), + DataTypes.INT(), + DataTypes.INT(), + DataTypes.INT(), + DataTypes.INT(), + DataTypes.INT()); + assertThatThrownBy( + () -> + PartialUpdateMergeFunction.factory( + options, rowType, ImmutableList.of("f2"))) + .hasMessageContaining( + "The sequence-group 'fields.f4.sequence-group' contains primary key field 'f2', " + + "which is not allowed. Primary key columns cannot be put in sequence-group."); + } + @Test public void testDeleteReproduceCorrectSequenceNumber() { Options options = new Options();