From fcd8c0a632b66806b06aeff619c13f93fe32b691 Mon Sep 17 00:00:00 2001 From: kanthi subramanian Date: Mon, 26 Jan 2026 12:33:18 -0600 Subject: [PATCH 1/7] Added logic to support bucket and truncate partitioning. --- .../altinity/ice/cli/internal/iceberg/Partitioning.java | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/ice/src/main/java/com/altinity/ice/cli/internal/iceberg/Partitioning.java b/ice/src/main/java/com/altinity/ice/cli/internal/iceberg/Partitioning.java index 3a8e8267..b106bb1f 100644 --- a/ice/src/main/java/com/altinity/ice/cli/internal/iceberg/Partitioning.java +++ b/ice/src/main/java/com/altinity/ice/cli/internal/iceberg/Partitioning.java @@ -281,6 +281,15 @@ public static Map> partition( continue; } String transformName = transform.toString(); + + // Handle parameterized transforms (bucket[N], truncate[N]) + // PartitionKey.partition() applies the transform, so we store the original value + if (transformName.startsWith("bucket[") || transformName.startsWith("truncate[")) { + partitionRecord.setField( + sourceFieldName, toGenericRecordFieldValue(value, fieldSpec.type())); + continue; + } + switch (transformName) { case "hour", "day", "month", "year": if (fieldSpec.type().typeId() != Type.TypeID.DATE) { From 98d5c7a9bf37a2fea7de890e2f756d4573f774e0 Mon Sep 17 00:00:00 2001 From: kanthi subramanian Date: Mon, 26 Jan 2026 12:34:40 -0600 Subject: [PATCH 2/7] Added logic to support bucket and truncate partitioning. --- .../com/altinity/ice/cli/internal/iceberg/Partitioning.java | 2 -- 1 file changed, 2 deletions(-) diff --git a/ice/src/main/java/com/altinity/ice/cli/internal/iceberg/Partitioning.java b/ice/src/main/java/com/altinity/ice/cli/internal/iceberg/Partitioning.java index b106bb1f..d65abc4f 100644 --- a/ice/src/main/java/com/altinity/ice/cli/internal/iceberg/Partitioning.java +++ b/ice/src/main/java/com/altinity/ice/cli/internal/iceberg/Partitioning.java @@ -282,8 +282,6 @@ public static Map> partition( } String transformName = transform.toString(); - // Handle parameterized transforms (bucket[N], truncate[N]) - // PartitionKey.partition() applies the transform, so we store the original value if (transformName.startsWith("bucket[") || transformName.startsWith("truncate[")) { partitionRecord.setField( sourceFieldName, toGenericRecordFieldValue(value, fieldSpec.type())); From 1003f7f648b0de78d734dc41f707b496b833d6a5 Mon Sep 17 00:00:00 2001 From: kanthi subramanian Date: Mon, 23 Mar 2026 12:11:57 -0500 Subject: [PATCH 3/7] Removed truncate partition --- .gitignore | 1 + .../com/altinity/ice/cli/internal/iceberg/Partitioning.java | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 52ecf198..4ae9503a 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ +.idea/ target /demo/data /demo/clickhouse-udfs.xml diff --git a/ice/src/main/java/com/altinity/ice/cli/internal/iceberg/Partitioning.java b/ice/src/main/java/com/altinity/ice/cli/internal/iceberg/Partitioning.java index d65abc4f..080813f1 100644 --- a/ice/src/main/java/com/altinity/ice/cli/internal/iceberg/Partitioning.java +++ b/ice/src/main/java/com/altinity/ice/cli/internal/iceberg/Partitioning.java @@ -282,7 +282,7 @@ public static Map> partition( } String transformName = transform.toString(); - if (transformName.startsWith("bucket[") || transformName.startsWith("truncate[")) { + if (transformName.startsWith("bucket[")) { partitionRecord.setField( sourceFieldName, toGenericRecordFieldValue(value, fieldSpec.type())); continue; From 4f7a96cda2e63489034b4bfd3fb2ab8b16dc7a34 Mon Sep 17 00:00:00 2001 From: kanthi subramanian Date: Mon, 23 Mar 2026 12:50:10 -0500 Subject: [PATCH 4/7] Added test for bucket partitioning --- .../scenarios/insert-partitioned/run.sh.tmpl | 31 +++++++++++++++++++ .../insert-partitioned/scenario.yaml | 4 +++ 2 files changed, 35 insertions(+) diff --git a/ice-rest-catalog/src/test/resources/scenarios/insert-partitioned/run.sh.tmpl b/ice-rest-catalog/src/test/resources/scenarios/insert-partitioned/run.sh.tmpl index ca7a076d..fdf95306 100644 --- a/ice-rest-catalog/src/test/resources/scenarios/insert-partitioned/run.sh.tmpl +++ b/ice-rest-catalog/src/test/resources/scenarios/insert-partitioned/run.sh.tmpl @@ -62,11 +62,42 @@ if [[ "${FILES_OUT_ONTIME}" != *${EXPECTED_DATA_PATH_ONTIME}* ]]; then fi echo "OK Validated correct partitioned data file path for ${TABLE_NAME_ONTIME}" +# Create table with bucket partition and insert data +{{ICE_CLI}} --config {{CLI_CONFIG}} insert --create-table ${TABLE_NAME_BUCKET} ${INPUT_PATH_IRIS} --partition="${PARTITION_SPEC_BUCKET}" +echo "OK Inserted data with bucket partitioning into table ${TABLE_NAME_BUCKET}" + +# List partitions and validate output +LIST_PARTITIONS_OUT_BUCKET=$(mktemp) +trap "rm -f '${LIST_PARTITIONS_OUT_BUCKET}'" EXIT +{{ICE_CLI}} --config {{CLI_CONFIG}} list-partitions ${TABLE_NAME_BUCKET} > "${LIST_PARTITIONS_OUT_BUCKET}" +if ! grep -q "partitions:" "${LIST_PARTITIONS_OUT_BUCKET}"; then + echo "FAIL: list-partitions output missing 'partitions:' section" + cat "${LIST_PARTITIONS_OUT_BUCKET}" + exit 1 +fi +if ! grep -qE -- "- *[^=]+=" "${LIST_PARTITIONS_OUT_BUCKET}"; then + echo "FAIL: list-partitions output has no partition entries" + cat "${LIST_PARTITIONS_OUT_BUCKET}" + exit 1 +fi +echo "OK Listed and validated bucket partitions for ${TABLE_NAME_BUCKET}" + +# Validate data file was inserted to correct bucket-partitioned path +FILES_OUT_BUCKET=$({{ICE_CLI}} --config {{CLI_CONFIG}} files ${TABLE_NAME_BUCKET}) + +if [[ "${FILES_OUT_BUCKET}" != *${EXPECTED_DATA_PATH_BUCKET}* ]]; then + echo "FAIL: expected substring '${EXPECTED_DATA_PATH_BUCKET}' not found in files command output: ${FILES_OUT_BUCKET}" + exit 1 +fi +echo "OK Validated correct bucket-partitioned data file path for ${TABLE_NAME_BUCKET}" + # Cleanup {{ICE_CLI}} --config {{CLI_CONFIG}} delete-table ${TABLE_NAME_IRIS} echo "OK Deleted table: ${TABLE_NAME_IRIS}" {{ICE_CLI}} --config {{CLI_CONFIG}} delete-table ${TABLE_NAME_ONTIME} echo "OK Deleted table: ${TABLE_NAME_ONTIME}" +{{ICE_CLI}} --config {{CLI_CONFIG}} delete-table ${TABLE_NAME_BUCKET} +echo "OK Deleted table: ${TABLE_NAME_BUCKET}" {{ICE_CLI}} --config {{CLI_CONFIG}} delete-namespace ${NAMESPACE_NAME} echo "OK Deleted namespace: ${NAMESPACE_NAME}" diff --git a/ice-rest-catalog/src/test/resources/scenarios/insert-partitioned/scenario.yaml b/ice-rest-catalog/src/test/resources/scenarios/insert-partitioned/scenario.yaml index ed4fe7a4..16e23826 100644 --- a/ice-rest-catalog/src/test/resources/scenarios/insert-partitioned/scenario.yaml +++ b/ice-rest-catalog/src/test/resources/scenarios/insert-partitioned/scenario.yaml @@ -13,4 +13,8 @@ env: PARTITION_SPEC_IRIS: '[{"column":"variety","transform":"identity"}]' PARTITION_SPEC_ONTIME: '[{"column":"Year"}]' EXPECTED_DATA_PATH_ONTIME: "s3://test-bucket/warehouse/test_insert_partitioned/ontime_partitioned/data/Year=2010/*.parquet" + TABLE_NAME_BUCKET: "test_insert_partitioned.iris_bucket_partitioned" + PARTITION_SPEC_BUCKET: '[{"column":"variety","transform":"bucket[3]"}]' + # Scheme-agnostic: files output may use s3:// or s3a:// depending on Hadoop FS + EXPECTED_DATA_PATH_BUCKET: "iris_bucket_partitioned/data/variety_bucket=" From 2f7501bb4deb67a09d40db5837900f62e47eea43 Mon Sep 17 00:00:00 2001 From: kanthi subramanian Date: Mon, 23 Mar 2026 13:19:55 -0500 Subject: [PATCH 5/7] Replaced trap with variables --- .../scenarios/insert-partitioned/run.sh.tmpl | 36 ++++++++----------- 1 file changed, 15 insertions(+), 21 deletions(-) diff --git a/ice-rest-catalog/src/test/resources/scenarios/insert-partitioned/run.sh.tmpl b/ice-rest-catalog/src/test/resources/scenarios/insert-partitioned/run.sh.tmpl index fdf95306..b330c871 100644 --- a/ice-rest-catalog/src/test/resources/scenarios/insert-partitioned/run.sh.tmpl +++ b/ice-rest-catalog/src/test/resources/scenarios/insert-partitioned/run.sh.tmpl @@ -17,17 +17,15 @@ INPUT_PATH_ONTIME="${SCENARIO_DIR}/${INPUT_FILE_ONTIME}" echo "OK Inserted data with partitioning into table ${TABLE_NAME_IRIS}" # List partitions and validate output -LIST_PARTITIONS_OUT_IRIS=$(mktemp) -trap "rm -f '${LIST_PARTITIONS_OUT_IRIS}'" EXIT -{{ICE_CLI}} --config {{CLI_CONFIG}} list-partitions ${TABLE_NAME_IRIS} > "${LIST_PARTITIONS_OUT_IRIS}" -if ! grep -q "partitions:" "${LIST_PARTITIONS_OUT_IRIS}"; then +LIST_PARTITIONS_OUT_IRIS=$({{ICE_CLI}} --config {{CLI_CONFIG}} list-partitions "${TABLE_NAME_IRIS}") +if ! grep -q "partitions:" <<<"${LIST_PARTITIONS_OUT_IRIS}"; then echo "FAIL: list-partitions output missing 'partitions:' section" - cat "${LIST_PARTITIONS_OUT_IRIS}" + printf '%s\n' "${LIST_PARTITIONS_OUT_IRIS}" exit 1 fi -if ! grep -qE -- "- *[^=]+=" "${LIST_PARTITIONS_OUT_IRIS}"; then +if ! grep -qE -- "- *[^=]+=" <<<"${LIST_PARTITIONS_OUT_IRIS}"; then echo "FAIL: list-partitions output has no partition entries (expected at least one key=value)" - cat "${LIST_PARTITIONS_OUT_IRIS}" + printf '%s\n' "${LIST_PARTITIONS_OUT_IRIS}" exit 1 fi echo "OK Listed and validated partitions for ${TABLE_NAME_IRIS}" @@ -37,17 +35,15 @@ echo "OK Listed and validated partitions for ${TABLE_NAME_IRIS}" echo "OK Inserted data with partitioning into table ${TABLE_NAME_ONTIME}" # List partitions and validate output -LIST_PARTITIONS_OUT_ONTIME=$(mktemp) -trap "rm -f '${LIST_PARTITIONS_OUT_ONTIME}'" EXIT -{{ICE_CLI}} --config {{CLI_CONFIG}} list-partitions ${TABLE_NAME_ONTIME} > "${LIST_PARTITIONS_OUT_ONTIME}" -if ! grep -q "partitions:" "${LIST_PARTITIONS_OUT_ONTIME}"; then +LIST_PARTITIONS_OUT_ONTIME=$({{ICE_CLI}} --config {{CLI_CONFIG}} list-partitions "${TABLE_NAME_ONTIME}") +if ! grep -q "partitions:" <<<"${LIST_PARTITIONS_OUT_ONTIME}"; then echo "FAIL: list-partitions output missing 'partitions:' section" - cat "${LIST_PARTITIONS_OUT_ONTIME}" + printf '%s\n' "${LIST_PARTITIONS_OUT_ONTIME}" exit 1 fi -if ! grep -qE -- "- *[^=]+=" "${LIST_PARTITIONS_OUT_ONTIME}"; then +if ! grep -qE -- "- *[^=]+=" <<<"${LIST_PARTITIONS_OUT_ONTIME}"; then echo "FAIL: list-partitions output has no partition entries (expected at least one key=value)" - cat "${LIST_PARTITIONS_OUT_ONTIME}" + printf '%s\n' "${LIST_PARTITIONS_OUT_ONTIME}" exit 1 fi echo "OK Listed and validated partitions for ${TABLE_NAME_ONTIME}" @@ -67,17 +63,15 @@ echo "OK Validated correct partitioned data file path for ${TABLE_NAME_ONTIME}" echo "OK Inserted data with bucket partitioning into table ${TABLE_NAME_BUCKET}" # List partitions and validate output -LIST_PARTITIONS_OUT_BUCKET=$(mktemp) -trap "rm -f '${LIST_PARTITIONS_OUT_BUCKET}'" EXIT -{{ICE_CLI}} --config {{CLI_CONFIG}} list-partitions ${TABLE_NAME_BUCKET} > "${LIST_PARTITIONS_OUT_BUCKET}" -if ! grep -q "partitions:" "${LIST_PARTITIONS_OUT_BUCKET}"; then +LIST_PARTITIONS_OUT_BUCKET=$({{ICE_CLI}} --config {{CLI_CONFIG}} list-partitions "${TABLE_NAME_BUCKET}") +if ! grep -q "partitions:" <<<"${LIST_PARTITIONS_OUT_BUCKET}"; then echo "FAIL: list-partitions output missing 'partitions:' section" - cat "${LIST_PARTITIONS_OUT_BUCKET}" + printf '%s\n' "${LIST_PARTITIONS_OUT_BUCKET}" exit 1 fi -if ! grep -qE -- "- *[^=]+=" "${LIST_PARTITIONS_OUT_BUCKET}"; then +if ! grep -qE -- "- *[^=]+=" <<<"${LIST_PARTITIONS_OUT_BUCKET}"; then echo "FAIL: list-partitions output has no partition entries" - cat "${LIST_PARTITIONS_OUT_BUCKET}" + printf '%s\n' "${LIST_PARTITIONS_OUT_BUCKET}" exit 1 fi echo "OK Listed and validated bucket partitions for ${TABLE_NAME_BUCKET}" From 149b874d4ed8c3a800ac306c41fe7e834a08c662 Mon Sep 17 00:00:00 2001 From: kanthi subramanian Date: Mon, 23 Mar 2026 13:53:35 -0500 Subject: [PATCH 6/7] Changed to use list-partitions --- .../test/resources/scenarios/insert-partitioned/run.sh.tmpl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ice-rest-catalog/src/test/resources/scenarios/insert-partitioned/run.sh.tmpl b/ice-rest-catalog/src/test/resources/scenarios/insert-partitioned/run.sh.tmpl index b330c871..eee5a06d 100644 --- a/ice-rest-catalog/src/test/resources/scenarios/insert-partitioned/run.sh.tmpl +++ b/ice-rest-catalog/src/test/resources/scenarios/insert-partitioned/run.sh.tmpl @@ -69,8 +69,8 @@ if ! grep -q "partitions:" <<<"${LIST_PARTITIONS_OUT_BUCKET}"; then printf '%s\n' "${LIST_PARTITIONS_OUT_BUCKET}" exit 1 fi -if ! grep -qE -- "- *[^=]+=" <<<"${LIST_PARTITIONS_OUT_BUCKET}"; then - echo "FAIL: list-partitions output has no partition entries" +if ! grep -q "variety_bucket=" <<<"${LIST_PARTITIONS_OUT_BUCKET}"; then + echo "FAIL: list-partitions output missing bucket partition entries (expected 'variety_bucket=')" printf '%s\n' "${LIST_PARTITIONS_OUT_BUCKET}" exit 1 fi From 41e66b9e57e22cd5b821f50fbfa0897c88c1b83d Mon Sep 17 00:00:00 2001 From: kanthi subramanian Date: Mon, 23 Mar 2026 14:49:54 -0500 Subject: [PATCH 7/7] Removed duplicate partitioning logic of bucket/truncate --- .../scenarios/partition-custom-name/run.sh.tmpl | 10 +++++----- .../scenarios/partition-custom-name/scenario.yaml | 2 +- .../ice/cli/internal/iceberg/Partitioning.java | 8 +------- 3 files changed, 7 insertions(+), 13 deletions(-) diff --git a/ice-rest-catalog/src/test/resources/scenarios/partition-custom-name/run.sh.tmpl b/ice-rest-catalog/src/test/resources/scenarios/partition-custom-name/run.sh.tmpl index 0fdc69a4..e9492248 100644 --- a/ice-rest-catalog/src/test/resources/scenarios/partition-custom-name/run.sh.tmpl +++ b/ice-rest-catalog/src/test/resources/scenarios/partition-custom-name/run.sh.tmpl @@ -13,14 +13,14 @@ echo "OK Created namespace" --partition="${PARTITION_SPEC}" echo "OK Inserted data with custom partition name" -{{ICE_CLI}} --config {{CLI_CONFIG}} describe -s ${TABLE_NAME} > /tmp/custom_part_describe.txt +DESCRIBE_OUT=$({{ICE_CLI}} --config {{CLI_CONFIG}} describe -s "${TABLE_NAME}") -if ! grep -q "var_trunc" /tmp/custom_part_describe.txt; then - echo "FAIL describe -s output missing custom partition name 'var_trunc'" - cat /tmp/custom_part_describe.txt +if ! grep -q "var_bucket" <<<"${DESCRIBE_OUT}"; then + echo "FAIL describe -s output missing custom partition name 'var_bucket'" + printf '%s\n' "${DESCRIBE_OUT}" exit 1 fi -echo "OK Custom partition name 'var_trunc' found in describe output" +echo "OK Custom partition name 'var_bucket' found in describe output" {{ICE_CLI}} --config {{CLI_CONFIG}} delete-table ${TABLE_NAME} {{ICE_CLI}} --config {{CLI_CONFIG}} delete-namespace ${NAMESPACE_NAME} diff --git a/ice-rest-catalog/src/test/resources/scenarios/partition-custom-name/scenario.yaml b/ice-rest-catalog/src/test/resources/scenarios/partition-custom-name/scenario.yaml index 47982dea..87139637 100644 --- a/ice-rest-catalog/src/test/resources/scenarios/partition-custom-name/scenario.yaml +++ b/ice-rest-catalog/src/test/resources/scenarios/partition-custom-name/scenario.yaml @@ -8,4 +8,4 @@ env: NAMESPACE_NAME: "test_custom_part" TABLE_NAME: "test_custom_part.iris_custom" INPUT_FILE: "input.parquet" - PARTITION_SPEC: '[{"column":"variety","transform":"truncate[3]","name":"var_trunc"}]' + PARTITION_SPEC: '[{"column":"variety","transform":"bucket[3]","name":"var_bucket"}]' diff --git a/ice/src/main/java/com/altinity/ice/cli/internal/iceberg/Partitioning.java b/ice/src/main/java/com/altinity/ice/cli/internal/iceberg/Partitioning.java index 3dd482fb..25b89b7b 100644 --- a/ice/src/main/java/com/altinity/ice/cli/internal/iceberg/Partitioning.java +++ b/ice/src/main/java/com/altinity/ice/cli/internal/iceberg/Partitioning.java @@ -344,12 +344,6 @@ public static Map> partition( } String transformName = transform.toString(); - if (transformName.startsWith("bucket[")) { - partitionRecord.setField( - sourceFieldName, toGenericRecordFieldValue(value, fieldSpec.type())); - continue; - } - switch (transformName) { case "hour", "day", "month", "year": if (fieldSpec.type().typeId() != Type.TypeID.DATE) { @@ -359,7 +353,7 @@ public static Map> partition( sourceFieldName, toGenericRecordFieldValue(value, fieldSpec.type())); break; default: - if (transformName.startsWith("truncate[") || transformName.startsWith("bucket[")) { + if (transformName.startsWith("bucket[")) { partitionRecord.setField( sourceFieldName, toGenericRecordFieldValue(value, fieldSpec.type())); } else {