@@ -74,9 +74,6 @@ pub(crate) struct KeyValueWriteConfig {
7474 pub sequence_field_indices : Vec < usize > ,
7575 /// Merge engine for deduplication.
7676 pub merge_engine : MergeEngine ,
77- /// Column index in user schema that provides the row kind value.
78- /// Resolved from: `rowkind.field` option > `_VALUE_KIND` column > None (all INSERT).
79- pub value_kind_col_index : Option < usize > ,
8077}
8178
8279impl KeyValueFileWriter {
@@ -200,23 +197,8 @@ impl KeyValueFileWriter {
200197 let min_key = self . extract_key_binary_row ( & combined, first_row) ?;
201198 let max_key = self . extract_key_binary_row ( & combined, last_row) ?;
202199
203- // Build physical schema (thin-mode): [_SEQUENCE_NUMBER, _VALUE_KIND, all_user_cols...]
204- let user_fields = user_schema. fields ( ) ;
205- let mut physical_fields: Vec < Arc < ArrowField > > = Vec :: new ( ) ;
206- physical_fields. push ( Arc :: new ( ArrowField :: new (
207- SEQUENCE_NUMBER_FIELD_NAME ,
208- ArrowDataType :: Int64 ,
209- false ,
210- ) ) ) ;
211- physical_fields. push ( Arc :: new ( ArrowField :: new (
212- VALUE_KIND_FIELD_NAME ,
213- ArrowDataType :: Int8 ,
214- false ,
215- ) ) ) ;
216- for field in user_fields. iter ( ) {
217- physical_fields. push ( field. clone ( ) ) ;
218- }
219- let physical_schema = Arc :: new ( ArrowSchema :: new ( physical_fields) ) ;
200+ // Build physical schema and open writer.
201+ let physical_schema = build_physical_schema ( & user_schema) ;
220202
221203 // Open parquet writer.
222204 let file_name = format ! (
@@ -262,8 +244,13 @@ impl KeyValueFileWriter {
262244 } ,
263245 ) ?,
264246 ) ;
265- // Value kind column.
266- match self . config . value_kind_col_index {
247+ // Value kind column — resolve from batch schema.
248+ let vk_idx = combined
249+ . schema ( )
250+ . fields ( )
251+ . iter ( )
252+ . position ( |f| f. name ( ) == crate :: spec:: VALUE_KIND_FIELD_NAME ) ;
253+ match vk_idx {
267254 Some ( vk_idx) => {
268255 physical_columns. push (
269256 arrow_select:: take:: take (
@@ -282,8 +269,11 @@ impl KeyValueFileWriter {
282269 physical_columns. push ( Arc :: new ( Int8Array :: from ( vec ! [ 0i8 ; chunk_len] ) ) ) ;
283270 }
284271 }
285- // All user columns.
272+ // All user columns (skip _VALUE_KIND if present — already handled above) .
286273 for idx in 0 ..combined. num_columns ( ) {
274+ if Some ( idx) == vk_idx {
275+ continue ;
276+ }
287277 physical_columns. push (
288278 arrow_select:: take:: take ( combined. column ( idx) . as_ref ( ) , & chunk_indices, None )
289279 . map_err ( |e| crate :: Error :: DataInvalid {
@@ -459,3 +449,24 @@ impl KeyValueFileWriter {
459449 Ok ( builder. build_serialized ( ) )
460450 }
461451}
452+
453+ /// Build the physical schema: [_SEQUENCE_NUMBER, _VALUE_KIND, user_cols (excluding _VALUE_KIND)...]
454+ pub ( crate ) fn build_physical_schema ( user_schema : & ArrowSchema ) -> Arc < ArrowSchema > {
455+ let mut physical_fields: Vec < Arc < ArrowField > > = Vec :: new ( ) ;
456+ physical_fields. push ( Arc :: new ( ArrowField :: new (
457+ SEQUENCE_NUMBER_FIELD_NAME ,
458+ ArrowDataType :: Int64 ,
459+ false ,
460+ ) ) ) ;
461+ physical_fields. push ( Arc :: new ( ArrowField :: new (
462+ VALUE_KIND_FIELD_NAME ,
463+ ArrowDataType :: Int8 ,
464+ false ,
465+ ) ) ) ;
466+ for field in user_schema. fields ( ) . iter ( ) {
467+ if field. name ( ) != VALUE_KIND_FIELD_NAME {
468+ physical_fields. push ( field. clone ( ) ) ;
469+ }
470+ }
471+ Arc :: new ( ArrowSchema :: new ( physical_fields) )
472+ }
0 commit comments