Skip to content
Original file line number Diff line number Diff line change
Expand Up @@ -17,17 +17,15 @@ INPUT_PATH_ONTIME="${SCENARIO_DIR}/${INPUT_FILE_ONTIME}"
echo "OK Inserted data with partitioning into table ${TABLE_NAME_IRIS}"

# List partitions and validate output
LIST_PARTITIONS_OUT_IRIS=$(mktemp)
trap "rm -f '${LIST_PARTITIONS_OUT_IRIS}'" EXIT
{{ICE_CLI}} --config {{CLI_CONFIG}} list-partitions ${TABLE_NAME_IRIS} > "${LIST_PARTITIONS_OUT_IRIS}"
if ! grep -q "partitions:" "${LIST_PARTITIONS_OUT_IRIS}"; then
LIST_PARTITIONS_OUT_IRIS=$({{ICE_CLI}} --config {{CLI_CONFIG}} list-partitions "${TABLE_NAME_IRIS}")
if ! grep -q "partitions:" <<<"${LIST_PARTITIONS_OUT_IRIS}"; then
echo "FAIL: list-partitions output missing 'partitions:' section"
cat "${LIST_PARTITIONS_OUT_IRIS}"
printf '%s\n' "${LIST_PARTITIONS_OUT_IRIS}"
exit 1
fi
if ! grep -qE -- "- *[^=]+=" "${LIST_PARTITIONS_OUT_IRIS}"; then
if ! grep -qE -- "- *[^=]+=" <<<"${LIST_PARTITIONS_OUT_IRIS}"; then
echo "FAIL: list-partitions output has no partition entries (expected at least one key=value)"
cat "${LIST_PARTITIONS_OUT_IRIS}"
printf '%s\n' "${LIST_PARTITIONS_OUT_IRIS}"
exit 1
fi
echo "OK Listed and validated partitions for ${TABLE_NAME_IRIS}"
Expand All @@ -37,17 +35,15 @@ echo "OK Listed and validated partitions for ${TABLE_NAME_IRIS}"
echo "OK Inserted data with partitioning into table ${TABLE_NAME_ONTIME}"

# List partitions and validate output
LIST_PARTITIONS_OUT_ONTIME=$(mktemp)
trap "rm -f '${LIST_PARTITIONS_OUT_ONTIME}'" EXIT
{{ICE_CLI}} --config {{CLI_CONFIG}} list-partitions ${TABLE_NAME_ONTIME} > "${LIST_PARTITIONS_OUT_ONTIME}"
if ! grep -q "partitions:" "${LIST_PARTITIONS_OUT_ONTIME}"; then
LIST_PARTITIONS_OUT_ONTIME=$({{ICE_CLI}} --config {{CLI_CONFIG}} list-partitions "${TABLE_NAME_ONTIME}")
if ! grep -q "partitions:" <<<"${LIST_PARTITIONS_OUT_ONTIME}"; then
echo "FAIL: list-partitions output missing 'partitions:' section"
cat "${LIST_PARTITIONS_OUT_ONTIME}"
printf '%s\n' "${LIST_PARTITIONS_OUT_ONTIME}"
exit 1
fi
if ! grep -qE -- "- *[^=]+=" "${LIST_PARTITIONS_OUT_ONTIME}"; then
if ! grep -qE -- "- *[^=]+=" <<<"${LIST_PARTITIONS_OUT_ONTIME}"; then
echo "FAIL: list-partitions output has no partition entries (expected at least one key=value)"
cat "${LIST_PARTITIONS_OUT_ONTIME}"
printf '%s\n' "${LIST_PARTITIONS_OUT_ONTIME}"
exit 1
fi
echo "OK Listed and validated partitions for ${TABLE_NAME_ONTIME}"
Expand All @@ -62,11 +58,40 @@ if [[ "${FILES_OUT_ONTIME}" != *${EXPECTED_DATA_PATH_ONTIME}* ]]; then
fi
echo "OK Validated correct partitioned data file path for ${TABLE_NAME_ONTIME}"

# Create table with bucket partition and insert data
{{ICE_CLI}} --config {{CLI_CONFIG}} insert --create-table ${TABLE_NAME_BUCKET} ${INPUT_PATH_IRIS} --partition="${PARTITION_SPEC_BUCKET}"
echo "OK Inserted data with bucket partitioning into table ${TABLE_NAME_BUCKET}"

# List partitions and validate output
LIST_PARTITIONS_OUT_BUCKET=$({{ICE_CLI}} --config {{CLI_CONFIG}} list-partitions "${TABLE_NAME_BUCKET}")
if ! grep -q "partitions:" <<<"${LIST_PARTITIONS_OUT_BUCKET}"; then
echo "FAIL: list-partitions output missing 'partitions:' section"
printf '%s\n' "${LIST_PARTITIONS_OUT_BUCKET}"
exit 1
fi
if ! grep -q "variety_bucket=" <<<"${LIST_PARTITIONS_OUT_BUCKET}"; then
echo "FAIL: list-partitions output missing bucket partition entries (expected 'variety_bucket=')"
printf '%s\n' "${LIST_PARTITIONS_OUT_BUCKET}"
exit 1
fi
echo "OK Listed and validated bucket partitions for ${TABLE_NAME_BUCKET}"

# Validate data file was inserted to correct bucket-partitioned path
FILES_OUT_BUCKET=$({{ICE_CLI}} --config {{CLI_CONFIG}} files ${TABLE_NAME_BUCKET})

if [[ "${FILES_OUT_BUCKET}" != *${EXPECTED_DATA_PATH_BUCKET}* ]]; then
echo "FAIL: expected substring '${EXPECTED_DATA_PATH_BUCKET}' not found in files command output: ${FILES_OUT_BUCKET}"
exit 1
fi
echo "OK Validated correct bucket-partitioned data file path for ${TABLE_NAME_BUCKET}"

# Cleanup
{{ICE_CLI}} --config {{CLI_CONFIG}} delete-table ${TABLE_NAME_IRIS}
echo "OK Deleted table: ${TABLE_NAME_IRIS}"
{{ICE_CLI}} --config {{CLI_CONFIG}} delete-table ${TABLE_NAME_ONTIME}
echo "OK Deleted table: ${TABLE_NAME_ONTIME}"
{{ICE_CLI}} --config {{CLI_CONFIG}} delete-table ${TABLE_NAME_BUCKET}
echo "OK Deleted table: ${TABLE_NAME_BUCKET}"

{{ICE_CLI}} --config {{CLI_CONFIG}} delete-namespace ${NAMESPACE_NAME}
echo "OK Deleted namespace: ${NAMESPACE_NAME}"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,8 @@ env:
PARTITION_SPEC_IRIS: '[{"column":"variety","transform":"identity"}]'
PARTITION_SPEC_ONTIME: '[{"column":"Year"}]'
EXPECTED_DATA_PATH_ONTIME: "s3://test-bucket/warehouse/test_insert_partitioned/ontime_partitioned/data/Year=2010/*.parquet"
TABLE_NAME_BUCKET: "test_insert_partitioned.iris_bucket_partitioned"
PARTITION_SPEC_BUCKET: '[{"column":"variety","transform":"bucket[3]"}]'
# Scheme-agnostic: files output may use s3:// or s3a:// depending on Hadoop FS
EXPECTED_DATA_PATH_BUCKET: "iris_bucket_partitioned/data/variety_bucket="

Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,14 @@ echo "OK Created namespace"
--partition="${PARTITION_SPEC}"
echo "OK Inserted data with custom partition name"

{{ICE_CLI}} --config {{CLI_CONFIG}} describe -s ${TABLE_NAME} > /tmp/custom_part_describe.txt
DESCRIBE_OUT=$({{ICE_CLI}} --config {{CLI_CONFIG}} describe -s "${TABLE_NAME}")

if ! grep -q "var_trunc" /tmp/custom_part_describe.txt; then
echo "FAIL describe -s output missing custom partition name 'var_trunc'"
cat /tmp/custom_part_describe.txt
if ! grep -q "var_bucket" <<<"${DESCRIBE_OUT}"; then
echo "FAIL describe -s output missing custom partition name 'var_bucket'"
printf '%s\n' "${DESCRIBE_OUT}"
exit 1
fi
echo "OK Custom partition name 'var_trunc' found in describe output"
echo "OK Custom partition name 'var_bucket' found in describe output"

{{ICE_CLI}} --config {{CLI_CONFIG}} delete-table ${TABLE_NAME}
{{ICE_CLI}} --config {{CLI_CONFIG}} delete-namespace ${NAMESPACE_NAME}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,4 @@ env:
NAMESPACE_NAME: "test_custom_part"
TABLE_NAME: "test_custom_part.iris_custom"
INPUT_FILE: "input.parquet"
PARTITION_SPEC: '[{"column":"variety","transform":"truncate[3]","name":"var_trunc"}]'
PARTITION_SPEC: '[{"column":"variety","transform":"bucket[3]","name":"var_bucket"}]'
Original file line number Diff line number Diff line change
Expand Up @@ -343,6 +343,7 @@ public static Map<PartitionKey, List<org.apache.iceberg.data.Record>> partition(
continue;
}
String transformName = transform.toString();

switch (transformName) {
case "hour", "day", "month", "year":
if (fieldSpec.type().typeId() != Type.TypeID.DATE) {
Expand All @@ -352,7 +353,7 @@ public static Map<PartitionKey, List<org.apache.iceberg.data.Record>> partition(
sourceFieldName, toGenericRecordFieldValue(value, fieldSpec.type()));
break;
default:
if (transformName.startsWith("truncate[") || transformName.startsWith("bucket[")) {
if (transformName.startsWith("bucket[")) {
partitionRecord.setField(
sourceFieldName, toGenericRecordFieldValue(value, fieldSpec.type()));
} else {
Expand Down
Loading