From 7d57ac4dc78040b527d1b7ada719c6b084063f93 Mon Sep 17 00:00:00 2001 From: xiaohongbo Date: Fri, 27 Feb 2026 17:37:38 +0800 Subject: [PATCH 01/20] support read after data evolution updating by shard --- docs/content/pypaimon/data-evolution.md | 5 + .../read/reader/concat_batch_reader.py | 15 +- .../read/reader/data_file_batch_reader.py | 181 ++++++++++-- paimon-python/pypaimon/read/split_read.py | 265 +++++++++++++++-- .../pypaimon/tests/data_evolution_test.py | 74 ++++- .../tests/shard_table_updator_test.py | 275 +++++++++++++++++- 6 files changed, 759 insertions(+), 56 deletions(-) diff --git a/docs/content/pypaimon/data-evolution.md b/docs/content/pypaimon/data-evolution.md index 91714e52c72f..073f939382c0 100644 --- a/docs/content/pypaimon/data-evolution.md +++ b/docs/content/pypaimon/data-evolution.md @@ -204,3 +204,8 @@ commit.close() - **Row order matters**: the batches you write must have the **same number of rows** as the batches you read, in the same order for that shard. - **Parallelism**: run multiple shards by calling `new_shard_updator(shard_idx, num_shards)` for each shard. + +## Read After Partial Shard Update + +- **Full table read**: rows from updated shards have the new column; rows from other shards have null for that column. +- **Per-shard read** (`with_shard(shard_idx, num_shards)`): read only the shard(s) you need. (new column where written, null elsewhere). diff --git a/paimon-python/pypaimon/read/reader/concat_batch_reader.py b/paimon-python/pypaimon/read/reader/concat_batch_reader.py index 4318f883eb2e..367ef4deea5d 100644 --- a/paimon-python/pypaimon/read/reader/concat_batch_reader.py +++ b/paimon-python/pypaimon/read/reader/concat_batch_reader.py @@ -141,6 +141,8 @@ class DataEvolutionMergeReader(RecordBatchReader): - The fourth field comes from batch1, and it is at offset 1 in batch1. - The fifth field comes from batch2, and it is at offset 1 in batch2. - The sixth field comes from batch1, and it is at offset 0 in batch1. + + When row_offsets[i] == -1 (no file provides that field), output a column of nulls using schema. """ def __init__( @@ -207,9 +209,20 @@ def read_arrow_batch(self) -> Optional[RecordBatch]: for i in range(len(self.row_offsets)): batch_index = self.row_offsets[i] field_index = self.field_offsets[i] + field_name = self.schema.field(i).name + if batch_index >= 0 and batches[batch_index] is not None: - columns.append(batches[batch_index].column(field_index).slice(0, min_rows)) + src_batch = batches[batch_index] + if field_name in src_batch.schema.names: + column = src_batch.column( + src_batch.schema.get_field_index(field_name) + ).slice(0, min_rows) + columns.append(column) + else: + # Field doesn't exist in this batch, fill with nulls + columns.append(pa.nulls(min_rows, type=self.schema.field(i).type)) else: + # No batch provides this field, fill with nulls columns.append(pa.nulls(min_rows, type=self.schema.field(i).type)) for i in range(len(self.readers)): diff --git a/paimon-python/pypaimon/read/reader/data_file_batch_reader.py b/paimon-python/pypaimon/read/reader/data_file_batch_reader.py index 7f2e1c61e1f4..ce0db60ae3a5 100644 --- a/paimon-python/pypaimon/read/reader/data_file_batch_reader.py +++ b/paimon-python/pypaimon/read/reader/data_file_batch_reader.py @@ -16,7 +16,7 @@ # limitations under the License. ################################################################################ -from typing import List, Optional +from typing import List, Optional, Tuple import pyarrow as pa from pyarrow import RecordBatch @@ -53,6 +53,7 @@ def __init__(self, format_reader: RecordBatchReader, index_mapping: List[int], p self.first_row_id = first_row_id self.max_sequence_number = max_sequence_number self.system_fields = system_fields +<<<<<<< HEAD self.blob_as_descriptor = blob_as_descriptor self.blob_descriptor_fields = blob_descriptor_fields or set() self.file_io = file_io @@ -66,6 +67,35 @@ def __init__(self, format_reader: RecordBatchReader, index_mapping: List[int], p for field_name in self.blob_descriptor_fields if field_name in self.blob_field_names } +======= + self.requested_field_names = [field.name for field in fields] if fields else None + self.fields = fields + + def _align_to_requested_names( + self, + inter_arrays: List, + inter_names: List, + requested_field_names: List[str], + num_rows: int, + ) -> Tuple[List, List]: + name_to_idx = {n: i for i, n in enumerate(inter_names)} + ordered_arrays = [] + ordered_names = [] + for name in requested_field_names: + idx = name_to_idx.get(name) + if idx is None and name.startswith("_KEY_") and name[5:] in name_to_idx: + idx = name_to_idx[name[5:]] + if idx is not None: + ordered_arrays.append(inter_arrays[idx]) + ordered_names.append(name) + else: + field = self.schema_map.get(name) + ordered_arrays.append( + pa.nulls(num_rows, type=field.type) if field is not None else pa.nulls(num_rows) + ) + ordered_names.append(name) + return ordered_arrays, ordered_names +>>>>>>> 277fef48c (support shards read of data evolution) def read_arrow_batch(self, start_idx=None, end_idx=None) -> Optional[RecordBatch]: if isinstance(self.format_reader, FormatBlobReader): @@ -75,11 +105,27 @@ def read_arrow_batch(self, start_idx=None, end_idx=None) -> Optional[RecordBatch if record_batch is None: return None + num_rows = record_batch.num_rows if self.partition_info is None and self.index_mapping is None: if self.row_tracking_enabled and self.system_fields: record_batch = self._assign_row_tracking(record_batch) + if self.requested_field_names is not None: + inter_arrays = list(record_batch.columns) + inter_names = list(record_batch.schema.names) + ordered_arrays, ordered_names = self._align_to_requested_names( + inter_arrays, inter_names, self.requested_field_names, num_rows + ) + record_batch = pa.RecordBatch.from_arrays(ordered_arrays, ordered_names) return record_batch + if (self.partition_info is None and self.index_mapping is not None + and not self.requested_field_names): + ncol = record_batch.num_columns + if len(self.index_mapping) == ncol and self.index_mapping == list(range(ncol)): + if self.row_tracking_enabled and self.system_fields: + record_batch = self._assign_row_tracking(record_batch) + return record_batch + inter_arrays = [] inter_names = [] num_rows = record_batch.num_rows @@ -93,32 +139,123 @@ def read_arrow_batch(self, start_idx=None, end_idx=None) -> Optional[RecordBatch inter_names.append(partition_field.name) else: real_index = self.partition_info.get_real_index(i) - if real_index < record_batch.num_columns: + name = ( + self.requested_field_names[i] + if self.requested_field_names and i < len(self.requested_field_names) + else f"_col_{i}" + ) + batch_names = record_batch.schema.names + col_idx = None + if name in batch_names: + col_idx = record_batch.schema.get_field_index(name) + elif name.startswith("_KEY_") and name[5:] in batch_names: + col_idx = record_batch.schema.get_field_index(name[5:]) + if col_idx is not None: + inter_arrays.append(record_batch.column(col_idx)) + inter_names.append(name) + elif real_index < record_batch.num_columns: inter_arrays.append(record_batch.column(real_index)) - inter_names.append(record_batch.schema.field(real_index).name) + inter_names.append(name) + else: + field = self.schema_map.get(name) + inter_arrays.append( + pa.nulls(num_rows, type=field.type) if field is not None else pa.nulls(num_rows) + ) + inter_names.append(name) else: - inter_arrays = record_batch.columns - inter_names = record_batch.schema.names + inter_arrays = list(record_batch.columns) + inter_names = list(record_batch.schema.names) - if self.index_mapping is not None: + if self.requested_field_names is not None: + inter_arrays, inter_names = self._align_to_requested_names( + inter_arrays, inter_names, self.requested_field_names, num_rows + ) + + if self.index_mapping is not None and not ( + self.requested_field_names is not None and inter_names == self.requested_field_names): mapped_arrays = [] mapped_names = [] + partition_names = ( + set(pf.name for pf in self.partition_info.partition_fields) + if self.partition_info else set() + ) + non_partition_indices = [idx for idx, name in enumerate(inter_names) if name not in partition_names] for i, real_index in enumerate(self.index_mapping): - if 0 <= real_index < len(inter_arrays): - mapped_arrays.append(inter_arrays[real_index]) - mapped_names.append(inter_names[real_index]) + if 0 <= real_index < len(non_partition_indices): + actual_index = non_partition_indices[real_index] + mapped_arrays.append(inter_arrays[actual_index]) + mapped_names.append(inter_names[actual_index]) else: - null_array = pa.nulls(num_rows) + name = ( + self.requested_field_names[i] + if self.requested_field_names and i < len(self.requested_field_names) + else f"null_col_{i}" + ) + field = self.schema_map.get(name) + null_array = pa.nulls(num_rows, type=field.type) if field is not None else pa.nulls(num_rows) mapped_arrays.append(null_array) - mapped_names.append(f"null_col_{i}") + mapped_names.append(name) + + if self.partition_info: + partition_arrays_map = { + inter_names[i]: inter_arrays[i] + for i in range(len(inter_names)) + if inter_names[i] in partition_names + } + + if self.requested_field_names: + final_arrays = [] + final_names = [] + mapped_name_to_array = {name: arr for name, arr in zip(mapped_names, mapped_arrays)} + + for name in self.requested_field_names: + if name in mapped_name_to_array: + final_arrays.append(mapped_name_to_array[name]) + final_names.append(name) + elif name in partition_arrays_map: + final_arrays.append(partition_arrays_map[name]) + final_names.append(name) + else: + # Field not in file (e.g. index_mapping -1): output null column + field = self.schema_map.get(name) + null_arr = pa.nulls(num_rows, type=field.type) if field is not None else pa.nulls(num_rows) + final_arrays.append(null_arr) + final_names.append(name) + + inter_arrays = final_arrays + inter_names = final_names + else: + mapped_name_set = set(mapped_names) + for name, arr in partition_arrays_map.items(): + if name not in mapped_name_set: + mapped_arrays.append(arr) + mapped_names.append(name) + inter_arrays = mapped_arrays + inter_names = mapped_names + else: + inter_arrays = mapped_arrays + inter_names = mapped_names if self.system_primary_key: for i in range(len(self.system_primary_key)): - if not mapped_names[i].startswith("_KEY_"): - mapped_names[i] = f"_KEY_{mapped_names[i]}" + if i < len(inter_names) and not inter_names[i].startswith("_KEY_"): + inter_names[i] = f"_KEY_{inter_names[i]}" + + if self.requested_field_names is not None and len(inter_arrays) < len(self.requested_field_names): + for name in self.requested_field_names[len(inter_arrays):]: + field = self.schema_map.get(name) + inter_arrays.append( + pa.nulls(num_rows, type=field.type) if field is not None else pa.nulls(num_rows) + ) + inter_names.append(name) - inter_arrays = mapped_arrays - inter_names = mapped_names + for i, name in enumerate(inter_names): + target_field = self.schema_map.get(name) + if target_field is not None and inter_arrays[i].type != target_field.type: + try: + inter_arrays[i] = inter_arrays[i].cast(target_field.type) + except (pa.ArrowInvalid, pa.ArrowNotImplementedError): + inter_arrays[i] = pa.nulls(num_rows, type=target_field.type) # to contains 'not null' property final_fields = [] @@ -205,18 +342,28 @@ def _deserialize_descriptor_or_none(raw: bytes): def _assign_row_tracking(self, record_batch: RecordBatch) -> RecordBatch: """Assign row tracking meta fields (_ROW_ID and _SEQUENCE_NUMBER).""" arrays = list(record_batch.columns) + num_cols = len(arrays) # Handle _ROW_ID field if SpecialFields.ROW_ID.name in self.system_fields.keys(): idx = self.system_fields[SpecialFields.ROW_ID.name] # Create a new array that fills with computed row IDs - arrays[idx] = pa.array(range(self.first_row_id, self.first_row_id + record_batch.num_rows), type=pa.int64()) + if idx < num_cols: + if self.first_row_id is None: + raise ValueError( + "Row tracking requires first_row_id on the file; " + "got None. Ensure file metadata has first_row_id when reading _ROW_ID." + ) + arrays[idx] = pa.array( + range(self.first_row_id, self.first_row_id + record_batch.num_rows), + type=pa.int64()) # Handle _SEQUENCE_NUMBER field if SpecialFields.SEQUENCE_NUMBER.name in self.system_fields.keys(): idx = self.system_fields[SpecialFields.SEQUENCE_NUMBER.name] # Create a new array that fills with max_sequence_number - arrays[idx] = pa.repeat(self.max_sequence_number, record_batch.num_rows) + if idx < num_cols: + arrays[idx] = pa.repeat(self.max_sequence_number, record_batch.num_rows) names = record_batch.schema.names table = None diff --git a/paimon-python/pypaimon/read/split_read.py b/paimon-python/pypaimon/read/split_read.py index d76a71682b17..af365d438d2a 100644 --- a/paimon-python/pypaimon/read/split_read.py +++ b/paimon-python/pypaimon/read/split_read.py @@ -114,13 +114,20 @@ def create_reader(self) -> RecordReader: """Create a record reader for the given split.""" def file_reader_supplier(self, file: DataFileMeta, for_merge_read: bool, - read_fields: List[str], row_tracking_enabled: bool) -> RecordBatchReader: + read_fields: List[str], row_tracking_enabled: bool, + use_requested_field_names: bool = True) -> RecordBatchReader: (read_file_fields, read_arrow_predicate) = self._get_fields_and_predicate(file.schema_id, read_fields) - # Use external_path if available, otherwise use file_path file_path = file.external_path if file.external_path else file.file_path _, extension = os.path.splitext(file_path) file_format = extension[1:] + is_blob_file = file_format == CoreOptions.FILE_FORMAT_BLOB + + if getattr(file, "write_cols", None): + read_file_fields = list(read_file_fields) + for col in file.write_cols: + if col in read_fields and col not in read_file_fields: + read_file_fields.append(col) batch_size = self.table.options.read_batch_size() @@ -142,23 +149,49 @@ def file_reader_supplier(self, file: DataFileMeta, for_merge_read: bool, else: raise ValueError(f"Unexpected file format: {file_format}") +<<<<<<< HEAD blob_as_descriptor = CoreOptions.blob_as_descriptor(self.table.options) blob_descriptor_fields = CoreOptions.blob_descriptor_fields(self.table.options) index_mapping = self.create_index_mapping() partition_info = self._create_partition_info() system_fields = SpecialFields.find_system_fields(self.read_fields) +======= + index_mapping = self.create_index_mapping( + file=file, read_file_fields=read_file_fields, read_fields=read_fields, is_blob_file=is_blob_file + ) + +>>>>>>> 277fef48c (support shards read of data evolution) table_schema_fields = ( SpecialFields.row_type_with_row_tracking(self.table.table_schema.fields) if row_tracking_enabled else self.table.table_schema.fields ) + write_cols = getattr(file, "write_cols", None) + fields = self._output_fields_for_file_reader( + for_merge_read, is_blob_file, use_requested_field_names, write_cols, + read_file_fields, read_fields, table_schema_fields + ) + + system_fields = SpecialFields.find_system_fields(fields) + + actual_read_fields_for_partition = self._actual_read_fields_for_partition( + read_file_fields, table_schema_fields + ) + fields = self._output_fields_for_partition_info( + for_merge_read, fields, actual_read_fields_for_partition, table_schema_fields + ) + partition_info = self._create_partition_info( + actual_read_fields=actual_read_fields_for_partition or None, + output_fields=fields + ) + if for_merge_read: return DataFileBatchReader( format_reader, index_mapping, partition_info, self.trimmed_primary_key, - table_schema_fields, + fields, file.max_sequence_number, file.first_row_id, row_tracking_enabled, @@ -172,7 +205,7 @@ def file_reader_supplier(self, file: DataFileMeta, for_merge_read: bool, index_mapping, partition_info, None, - table_schema_fields, + fields, file.max_sequence_number, file.first_row_id, row_tracking_enabled, @@ -199,6 +232,28 @@ def _get_fields_and_predicate(self, schema_id: int, read_fields): self.schema_id_2_fields[key] = (read_file_fields, read_arrow_predicate) return self.schema_id_2_fields[key] + def _output_fields_for_file_reader( + self, + for_merge_read: bool, + is_blob_file: bool, + use_requested_field_names: bool, + write_cols, + read_file_fields: List[str], + read_fields: List[str], + table_schema_fields: List[DataField], + ) -> List[DataField]: + if for_merge_read: + return self.read_fields + if is_blob_file: + names = read_file_fields + elif use_requested_field_names and write_cols: + names = read_fields + else: + names = [f.name for f in self.read_fields] + field_map = {f.name: f for f in table_schema_fields} + requested = [field_map[n] for n in names if n in field_map] + return requested if requested else table_schema_fields + @abstractmethod def _get_all_data_fields(self): """Get all data fields""" @@ -230,7 +285,12 @@ def _create_key_value_fields(self, value_field: List[DataField]): return all_data_fields - def create_index_mapping(self): + def create_index_mapping(self, file: Optional[DataFileMeta] = None, read_file_fields: Optional[List[str]] = None, + read_fields: Optional[List[str]] = None, is_blob_file: bool = False): + write_cols = getattr(file, "write_cols", None) if file else None + if write_cols and read_file_fields is not None and read_fields is not None: + num_cols = len(read_file_fields) if is_blob_file else len(read_fields) + return list(range(num_cols)) if num_cols > 0 else None base_index_mapping = self._create_base_index_mapping(self.read_fields, self._get_read_data_fields()) trimmed_key_mapping, _ = self._get_trimmed_fields(self._get_read_data_fields(), self._get_all_data_fields()) if base_index_mapping is None: @@ -317,30 +377,79 @@ def _get_trimmed_fields(self, read_data_fields: List[DataField], return trimmed_mapping, trimmed_fields - def _create_partition_info(self): + def _actual_read_fields_for_partition( + self, + read_file_fields: List[str], + table_schema_fields: List[DataField], + ) -> List[DataField]: + """Fields actually read from this file (for partition mapping).""" + field_map = {f.name: f for f in table_schema_fields} + return [field_map[fn] for fn in read_file_fields if fn in field_map] + + def _output_fields_for_partition_info( + self, + for_merge_read: bool, + fields: List[DataField], + actual_read_fields_for_partition: List[DataField], + table_schema_fields: List[DataField], + ) -> List[DataField]: + """ + Output field list for this file's partition mapping. + When partition + data evolution, narrow to partition + actual read columns + so mapping indices match the record batch from this file. + """ + if ( + for_merge_read + or not self.table.partition_keys + or not actual_read_fields_for_partition + or fields is not table_schema_fields + ): + return fields + partition_row = self.split.partition + full_partition_and_file = list(partition_row.fields) + actual_read_fields_for_partition + available_names = {f.name for f in full_partition_and_file} + out = [f for f in self.read_fields if f.name in available_names] + return out if out else full_partition_and_file + + def _create_partition_info( + self, + actual_read_fields: Optional[List[DataField]] = None, + output_fields: Optional[List[DataField]] = None): if not self.table.partition_keys: return None - partition_mapping = self._construct_partition_mapping() + partition_mapping = self._construct_partition_mapping(actual_read_fields, output_fields) if not partition_mapping: return None return PartitionInfo(partition_mapping, self.split.partition) - def _construct_partition_mapping(self) -> List[int]: - _, trimmed_fields = self._get_trimmed_fields( - self._get_read_data_fields(), self._get_all_data_fields() - ) + def _construct_partition_mapping( + self, + actual_read_fields: Optional[List[DataField]] = None, + output_fields: Optional[List[DataField]] = None) -> List[int]: + if actual_read_fields is not None: + read_data_fields = actual_read_fields + else: + read_data_fields = self._get_read_data_fields() + + if output_fields is not None: + fields_to_map = output_fields + else: + fields_to_map = read_data_fields + + actual_read_field_names = {field.name: idx for idx, field in enumerate(read_data_fields)} partition_names = self.table.partition_keys + num_record_batch_cols = len(read_data_fields) - mapping = [0] * (len(trimmed_fields) + 1) - p_count = 0 + mapping = [0] * (len(fields_to_map) + 1) - for i, field in enumerate(trimmed_fields): + for i, field in enumerate(fields_to_map): if field.name in partition_names: partition_index = partition_names.index(field.name) mapping[i] = -(partition_index + 1) - p_count += 1 + elif field.name in actual_read_field_names: + mapping[i] = actual_read_field_names[field.name] + 1 else: - mapping[i] = (i - p_count) + 1 + mapping[i] = num_record_batch_cols + 1 return mapping @@ -483,7 +592,9 @@ def create_reader(self) -> RecordReader: if len(need_merge_files) == 1 or not self.read_fields: # No need to merge fields, just create a single file reader suppliers.append( - lambda f=need_merge_files[0]: self._create_file_reader(f, self._get_final_read_data_fields()) + lambda f=need_merge_files[0]: self._create_file_reader( + f, self._get_final_read_data_fields(), use_requested_field_names=False + ) ) else: suppliers.append( @@ -547,6 +658,14 @@ def _create_union_reader(self, need_merge_files: List[DataFileMeta]) -> RecordRe # Split field bunches fields_files = self._split_field_bunches(need_merge_files) + def _bunch_sort_key(bunch: FieldBunch) -> tuple: + first_file = bunch.files()[0] + max_seq = max(f.max_sequence_number for f in bunch.files()) + is_partial = 1 if (first_file.write_cols and len(first_file.write_cols) > 0) else 0 + return (max_seq, is_partial) + + fields_files = sorted(fields_files, key=_bunch_sort_key, reverse=True) + # Validate row counts and first row IDs row_count = fields_files[0].row_count() first_row_id = fields_files[0].files()[0].first_row_id @@ -564,10 +683,26 @@ def _create_union_reader(self, need_merge_files: List[DataFileMeta]) -> RecordRe file_record_readers = [None] * len(fields_files) read_field_index = [field.id for field in all_read_fields] - # Initialize offsets + # Initialize offsets and per-bunch read_fields (built in two passes) row_offsets = [-1] * len(all_read_fields) field_offsets = [-1] * len(all_read_fields) + read_fields_per_bunch = [[] for _ in range(len(fields_files))] + # Pass 1: Assign from partial bunches (write_cols) by name first. This ensures columns + for i, bunch in enumerate(fields_files): + first_file = bunch.files()[0] + if not (first_file.write_cols and len(first_file.write_cols) > 0): + continue + for j, field in enumerate(all_read_fields): + if row_offsets[j] == -1 and field.name in first_file.write_cols: + # Do not assign non-blob fields to a blob bunch (blob file only has blob column) + if DataFileMeta.is_blob_file(first_file.file_name) and field.name != first_file.write_cols[0]: + continue + row_offsets[j] = i + field_offsets[j] = len(read_fields_per_bunch[i]) + read_fields_per_bunch[i].append(field) + + # Pass 2: Assign remaining fields by field id (full-schema base and system fields) for i, bunch in enumerate(fields_files): first_file = bunch.files()[0] @@ -578,10 +713,13 @@ def _create_union_reader(self, need_merge_files: List[DataFileMeta]) -> RecordRe elif first_file.write_cols: field_ids = self._get_field_ids_from_write_cols(first_file.write_cols) else: - # For regular files, get all field IDs from the schema - field_ids = [field.id for field in self.table.fields] - - read_fields = [] + schema = self.table.schema_manager.get_schema(first_file.schema_id) + schema_fields = ( + SpecialFields.row_type_with_row_tracking(schema.fields) + if self.row_tracking_enabled else schema.fields + ) + field_ids = [field.id for field in schema_fields] + read_fields = list(read_fields_per_bunch[i]) for j, read_field_id in enumerate(read_field_index): for field_id in field_ids: if read_field_id == field_id: @@ -590,25 +728,49 @@ def _create_union_reader(self, need_merge_files: List[DataFileMeta]) -> RecordRe field_offsets[j] = len(read_fields) read_fields.append(all_read_fields[j]) break + read_fields_per_bunch[i] = read_fields + self._assign_remaining_fields_by_write_cols( + all_read_fields, row_offsets, field_offsets, read_fields_per_bunch, fields_files + ) + use_requested_field_names = self._use_requested_field_names_for_merge(fields_files) + + table_field_names_set = {f.name for f in self.table.fields} + for i, bunch in enumerate(fields_files): + read_fields = list(read_fields_per_bunch[i]) if not read_fields: file_record_readers[i] = None else: + if not DataFileMeta.is_blob_file(bunch.files()[0].file_name): + schema = self.table.schema_manager.get_schema(bunch.files()[0].schema_id) + schema_fields = ( + SpecialFields.row_type_with_row_tracking(schema.fields) + if self.row_tracking_enabled else schema.fields + ) + read_field_names_set = {f.name for f in read_fields} + for f in schema_fields: + if f.name in table_field_names_set and f.name not in read_field_names_set: + read_fields.append(f) + read_field_names_set.add(f.name) read_field_names = self._remove_partition_fields(read_fields) table_fields = self.read_fields self.read_fields = read_fields # create reader based on read_fields batch_size = self.table.options.read_batch_size() # Create reader for this bunch if len(bunch.files()) == 1: - suppliers = [lambda r=self._create_file_reader( - bunch.files()[0], read_field_names - ): r] + suppliers = [ + partial(self._create_file_reader, file=bunch.files()[0], + read_fields=read_field_names, + use_requested_field_names=use_requested_field_names) + ] file_record_readers[i] = MergeAllBatchReader(suppliers, batch_size=batch_size) else: # Create concatenated reader for multiple files suppliers = [ partial(self._create_file_reader, file=file, - read_fields=read_field_names) for file in bunch.files() + read_fields=read_field_names, + use_requested_field_names=use_requested_field_names) + for file in bunch.files() ] file_record_readers[i] = MergeAllBatchReader(suppliers, batch_size=batch_size) self.read_fields = table_fields @@ -622,20 +784,61 @@ def _create_union_reader(self, need_merge_files: List[DataFileMeta]) -> RecordRe output_schema = PyarrowFieldParser.from_paimon_schema(all_read_fields) return DataEvolutionMergeReader(row_offsets, field_offsets, file_record_readers, schema=output_schema) - def _create_file_reader(self, file: DataFileMeta, read_fields: [str]) -> Optional[RecordReader]: + def _assign_remaining_fields_by_write_cols( + self, + all_read_fields: List[DataField], + row_offsets: List[int], + field_offsets: List[int], + read_fields_per_bunch: List[List[DataField]], + fields_files: List[FieldBunch], + ) -> None: + """Assign any still-unassigned table field to a bunch that has it in write_cols (by name).""" + table_field_names = {f.name for f in self.table.fields} + for i, field in enumerate(all_read_fields): + if row_offsets[i] != -1 or field.name not in table_field_names: + continue + for bi, bunch in enumerate(fields_files): + first_file = bunch.files()[0] + if not first_file.write_cols or field.name not in first_file.write_cols: + continue + if DataFileMeta.is_blob_file(first_file.file_name) and field.name != first_file.write_cols[0]: + continue + row_offsets[i] = bi + field_offsets[i] = len(read_fields_per_bunch[bi]) + read_fields_per_bunch[bi].append(field) + break + + def _use_requested_field_names_for_merge(self, fields_files: List[FieldBunch]) -> bool: + """True when non-blob bunches have different write_cols, so output column order must be unified.""" + write_cols_tuples = [ + tuple(f.files()[0].write_cols or ()) + for f in fields_files + if not DataFileMeta.is_blob_file(f.files()[0].file_name) + ] + all_same = len(set(write_cols_tuples)) <= 1 if write_cols_tuples else True + return not all_same + + def _create_file_reader(self, file: DataFileMeta, read_fields: [str], + use_requested_field_names: bool = True) -> Optional[RecordReader]: """Create a file reader for a single file.""" def create_record_reader(): return self.file_reader_supplier( file=file, for_merge_read=False, read_fields=read_fields, - row_tracking_enabled=True) + row_tracking_enabled=True, + use_requested_field_names=use_requested_field_names) + + base = create_record_reader() if self.row_ranges is None: - return create_record_reader() - row_ranges = Range.and_(self.row_ranges, [file.row_id_range()]) + return base + file_range = file.row_id_range() + if file_range is None: + return base + row_ranges = Range.and_(self.row_ranges, [file_range]) if len(row_ranges) == 0: return EmptyRecordBatchReader() - return RowIdFilterRecordBatchReader(create_record_reader(), file.first_row_id, row_ranges) + return RowIdFilterRecordBatchReader(base, file.first_row_id, row_ranges) def _split_field_bunches(self, need_merge_files: List[DataFileMeta]) -> List[FieldBunch]: """Split files into field bunches.""" diff --git a/paimon-python/pypaimon/tests/data_evolution_test.py b/paimon-python/pypaimon/tests/data_evolution_test.py index 3759bfdb46af..9fd0a35e077b 100644 --- a/paimon-python/pypaimon/tests/data_evolution_test.py +++ b/paimon-python/pypaimon/tests/data_evolution_test.py @@ -27,8 +27,10 @@ from pypaimon import CatalogFactory, Schema from pypaimon.common.predicate import Predicate +from pypaimon.common.predicate_builder import PredicateBuilder from pypaimon.manifest.manifest_list_manager import ManifestListManager -from pypaimon.read.read_builder import ReadBuilder +from pypaimon.read.reader.iface.record_batch_reader import RecordBatchReader +from pypaimon.schema.data_types import AtomicType, DataField from pypaimon.snapshot.snapshot_manager import SnapshotManager from pypaimon.table.row.offset_row import OffsetRow @@ -141,13 +143,63 @@ def test_basic(self): ('f1', pa.int16()), ])) self.assertEqual(actual_data, expect_data) + self.assertEqual( + len(actual_data.schema), len(expect_data.schema), + 'Read output column count must match schema') + self.assertEqual( + actual_data.schema.names, expect_data.schema.names, + 'Read output column names must match schema') - # assert manifest file meta contains min and max row id + def test_partitioned_read_requested_column_missing_in_file(self): + pa_schema = pa.schema([('f0', pa.int32()), ('f1', pa.string()), ('dt', pa.string())]) + schema = Schema.from_pyarrow_schema( + pa_schema, + partition_keys=['dt'], + options={'row-tracking.enabled': 'true', 'data-evolution.enabled': 'true'} + ) + self.catalog.create_table('default.test_partition_missing_col', schema, False) + table = self.catalog.get_table('default.test_partition_missing_col') + wb = table.new_batch_write_builder() + + tw1 = wb.new_write() + tc1 = wb.new_commit() + tw1.write_arrow(pa.Table.from_pydict( + {'f0': [1, 2], 'f1': ['a', 'b'], 'dt': ['p1', 'p1']}, + schema=pa_schema + )) + tc1.commit(tw1.prepare_commit()) + tw1.close() + tc1.close() + + tw2 = wb.new_write().with_write_type(['f0', 'dt']) + tc2 = wb.new_commit() + # Row key extractor uses table column indices; pass table-ordered data with null for f1 + tw2.write_arrow(pa.Table.from_pydict( + {'f0': [3, 4], 'f1': [None, None], 'dt': ['p1', 'p1']}, + schema=pa_schema + )) + tc2.commit(tw2.prepare_commit()) + tw2.close() + tc2.close() + + actual = table.new_read_builder().new_read().to_arrow(table.new_read_builder().new_scan().plan().splits()) + self.assertEqual(len(actual.schema), 3, 'Must have f0, f1, dt (no silent drop when f1 missing in file)') + self.assertEqual(actual.schema.names, ['f0', 'f1', 'dt']) + self.assertEqual(actual.num_rows, 4) + f1_col = actual.column('f1') + self.assertEqual(f1_col[0].as_py(), 'a') + self.assertEqual(f1_col[1].as_py(), 'b') + self.assertIsNone(f1_col[2].as_py()) + self.assertIsNone(f1_col[3].as_py()) + + # Assert manifest file meta contains min and max row id manifest_list_manager = ManifestListManager(table) snapshot_manager = SnapshotManager(table) - manifest = manifest_list_manager.read(snapshot_manager.get_latest_snapshot().delta_manifest_list)[0] - self.assertEqual(0, manifest.min_row_id) - self.assertEqual(1, manifest.max_row_id) + all_manifests = manifest_list_manager.read_all(snapshot_manager.get_latest_snapshot()) + first_commit = next((m for m in all_manifests if m.min_row_id == 0 and m.max_row_id == 1), None) + self.assertIsNotNone(first_commit, "Should have a manifest with min_row_id=0, max_row_id=1") + second_commit = next((m for m in all_manifests if m.min_row_id == 2 and m.max_row_id == 3), None) + self.assertIsNotNone(second_commit, "Should have a manifest with min_row_id=2, max_row_id=3") def test_merge_reader(self): from pypaimon.read.reader.concat_batch_reader import MergeAllBatchReader @@ -280,6 +332,14 @@ def test_with_slice(self): [2, 1001, 2001], "with_slice(1, 4) should return id in (2, 1001, 2001). Got ids=%s" % ids, ) + scan_oob = rb.new_scan().with_slice(10, 12) + splits_oob = scan_oob.plan().splits() + result_oob = rb.new_read().to_pandas(splits_oob) + self.assertEqual( + len(result_oob), + 0, + "with_slice(10, 12) on 6 rows should return 0 rows (out of bounds), got %d" % len(result_oob), + ) # Out-of-bounds slice: 6 rows total, slice(10, 12) should return 0 rows scan_oob = rb.new_scan().with_slice(10, 12) @@ -439,6 +499,8 @@ def test_multiple_appends(self): 'f2': ['b'] * 100 + ['y'] + ['d'], }, schema=simple_pa_schema) self.assertEqual(actual, expect) + self.assertEqual(len(actual.schema), len(expect.schema), 'Merge read output column count must match schema') + self.assertEqual(actual.schema.names, expect.schema.names, 'Merge read output column names must match schema') def test_disorder_cols_append(self): simple_pa_schema = pa.schema([ @@ -1175,6 +1237,7 @@ def test_read_row_tracking_metadata(self): pa.field('_SEQUENCE_NUMBER', pa.int64(), nullable=False), ])) self.assertEqual(actual_data, expect_data) + self.assertEqual(len(actual_data.schema), len(expect_data.schema), 'Read output column count must match schema') # write 2 table_write = write_builder.new_write().with_write_type(['f0']) @@ -1210,6 +1273,7 @@ def test_read_row_tracking_metadata(self): pa.field('_SEQUENCE_NUMBER', pa.int64(), nullable=False), ])) self.assertEqual(actual_data, expect_data) + self.assertEqual(len(actual_data.schema), len(expect_data.schema), 'Read output column count must match schema') def test_from_arrays_without_schema(self): schema = pa.schema([ diff --git a/paimon-python/pypaimon/tests/shard_table_updator_test.py b/paimon-python/pypaimon/tests/shard_table_updator_test.py index 967dfbcd6ecd..1ff658c609ad 100644 --- a/paimon-python/pypaimon/tests/shard_table_updator_test.py +++ b/paimon-python/pypaimon/tests/shard_table_updator_test.py @@ -85,7 +85,7 @@ def test_compute_column_d_equals_c_plus_b_minus_a(self): # Step 3: Use ShardTableUpdator to compute d = c + b - a table_update = write_builder.new_update() - table_update.with_read_projection(['a', 'b', 'c']) + table_update.with_read_projection(['a', 'b', 'c', '_ROW_ID']) table_update.with_update_type(['d']) shard_updator = table_update.new_shard_updator(0, 1) @@ -98,7 +98,13 @@ def test_compute_column_d_equals_c_plus_b_minus_a(self): a_values = batch.column('a').to_pylist() b_values = batch.column('b').to_pylist() c_values = batch.column('c').to_pylist() - + row_id_values = batch.column('_ROW_ID').to_pylist() + self.assertEqual( + row_id_values, + list(range(len(a_values))), + '_ROW_ID should be [0, 1, 2, ...] for sequential rows', + ) + d_values = [c + b - a for a, b, c in zip(a_values, b_values, c_values)] # Create batch with d column @@ -321,5 +327,270 @@ def test_compute_column_with_existing_column(self): self.assertEqual(actual, expected) print("\n✅ Test passed! Column d = c + b - a computed correctly!") + def test_partial_shard_update_full_read_schema_unified(self): + table_schema = pa.schema([ + ('a', pa.int32()), + ('b', pa.int32()), + ('c', pa.int32()), + ('d', pa.int32()), + ]) + schema = Schema.from_pyarrow_schema( + table_schema, + options={'row-tracking.enabled': 'true', 'data-evolution.enabled': 'true'}, + ) + name = self._create_unique_table_name() + self.catalog.create_table(name, schema, False) + table = self.catalog.get_table(name) + + # Two commits => two files (two first_row_id ranges) + for start, end in [(1, 10), (10, 20)]: + wb = table.new_batch_write_builder() + tw = wb.new_write().with_write_type(['a', 'b', 'c']) + tc = wb.new_commit() + data = pa.Table.from_pydict({ + 'a': list(range(start, end + 1)), + 'b': [i * 10 for i in range(start, end + 1)], + 'c': [i * 100 for i in range(start, end + 1)], + }, schema=pa.schema([ + ('a', pa.int32()), ('b', pa.int32()), ('c', pa.int32()), + ])) + tw.write_arrow(data) + tc.commit(tw.prepare_commit()) + tw.close() + tc.close() + + # Only shard 0 runs => only first file gets d + wb = table.new_batch_write_builder() + upd = wb.new_update() + upd.with_read_projection(['a', 'b', 'c']) + upd.with_update_type(['d']) + shard0 = upd.new_shard_updator(0, 2) + reader = shard0.arrow_reader() + for batch in iter(reader.read_next_batch, None): + a_ = batch.column('a').to_pylist() + b_ = batch.column('b').to_pylist() + c_ = batch.column('c').to_pylist() + d_ = [c + b - a for a, b, c in zip(a_, b_, c_)] + shard0.update_by_arrow_batch(pa.RecordBatch.from_pydict( + {'d': d_}, schema=pa.schema([('d', pa.int32())]), + )) + tc = wb.new_commit() + tc.commit(shard0.prepare_commit()) + tc.close() + + rb = table.new_read_builder() + tr = rb.new_read() + actual = tr.to_arrow(rb.new_scan().plan().splits()) + self.assertEqual(actual.num_rows, 21) + d_col = actual.column('d') + # First 10 rows (shard 0): d = c+b-a + for i in range(10): + self.assertEqual(d_col[i].as_py(), (i + 1) * 100 + (i + 1) * 10 - (i + 1)) + # Rows 10-20 (shard 1 not run): d is null + for i in range(10, 21): + self.assertIsNone(d_col[i].as_py()) + + def test_with_shard_read_after_partial_shard_update(self): + table_schema = pa.schema([ + ('a', pa.int32()), + ('b', pa.int32()), + ('c', pa.int32()), + ('d', pa.int32()), + ]) + schema = Schema.from_pyarrow_schema( + table_schema, + options={'row-tracking.enabled': 'true', 'data-evolution.enabled': 'true'}, + ) + name = self._create_unique_table_name() + self.catalog.create_table(name, schema, False) + table = self.catalog.get_table(name) + + for start, end in [(1, 10), (10, 20)]: + wb = table.new_batch_write_builder() + tw = wb.new_write().with_write_type(['a', 'b', 'c']) + tc = wb.new_commit() + data = pa.Table.from_pydict({ + 'a': list(range(start, end + 1)), + 'b': [i * 10 for i in range(start, end + 1)], + 'c': [i * 100 for i in range(start, end + 1)], + }, schema=pa.schema([ + ('a', pa.int32()), ('b', pa.int32()), ('c', pa.int32()), + ])) + tw.write_arrow(data) + tc.commit(tw.prepare_commit()) + tw.close() + tc.close() + + wb = table.new_batch_write_builder() + upd = wb.new_update() + upd.with_read_projection(['a', 'b', 'c']) + upd.with_update_type(['d']) + shard0 = upd.new_shard_updator(0, 2) + reader = shard0.arrow_reader() + for batch in iter(reader.read_next_batch, None): + a_ = batch.column('a').to_pylist() + b_ = batch.column('b').to_pylist() + c_ = batch.column('c').to_pylist() + d_ = [c + b - a for a, b, c in zip(a_, b_, c_)] + shard0.update_by_arrow_batch(pa.RecordBatch.from_pydict( + {'d': d_}, schema=pa.schema([('d', pa.int32())]), + )) + tc = wb.new_commit() + tc.commit(shard0.prepare_commit()) + tc.close() + + rb = table.new_read_builder() + tr = rb.new_read() + + splits_0 = rb.new_scan().with_shard(0, 2).plan().splits() + result_0 = tr.to_arrow(splits_0) + self.assertEqual(result_0.num_rows, 11) + d_col_0 = result_0.column('d') + for i in range(10): + self.assertEqual( + d_col_0[i].as_py(), + (i + 1) * 100 + (i + 1) * 10 - (i + 1), + "Shard 0 row %d: d should be c+b-a" % i, + ) + self.assertIsNone(d_col_0[10].as_py(), "Shard 0 row 10: d not updated, should be null") + + splits_1 = rb.new_scan().with_shard(1, 2).plan().splits() + result_1 = tr.to_arrow(splits_1) + self.assertEqual(result_1.num_rows, 10) + d_col_1 = result_1.column('d') + for i in range(10): + self.assertIsNone(d_col_1[i].as_py(), "Shard 1 row %d: d should be null" % i) + + full_splits = rb.new_scan().plan().splits() + full_result = tr.to_arrow(full_splits) + self.assertEqual( + result_0.num_rows + result_1.num_rows, + full_result.num_rows, + "Shard 0 + Shard 1 row count should equal full scan (21)", + ) + + rb_filter = table.new_read_builder() + rb_filter.with_projection(['a', 'b', 'c', 'd', '_ROW_ID']) + pb = rb_filter.new_predicate_builder() + pred_row_id = pb.is_in('_ROW_ID', [0, 1, 2, 3, 4]) + rb_filter.with_filter(pred_row_id) + tr_filter = rb_filter.new_read() + splits_row_id = rb_filter.new_scan().plan().splits() + result_row_id = tr_filter.to_arrow(splits_row_id) + self.assertEqual(result_row_id.num_rows, 5, "Filter _ROW_ID in [0..4] should return 5 rows") + a_col = result_row_id.column('a') + d_col_r = result_row_id.column('d') + for i in range(5): + self.assertEqual(a_col[i].as_py(), i + 1) + self.assertEqual( + d_col_r[i].as_py(), + (i + 1) * 100 + (i + 1) * 10 - (i + 1), + "Filter-by-_row_id row %d: d should be c+b-a" % i, + ) + + rb_slice = table.new_read_builder() + tr_slice = rb_slice.new_read() + slice_0 = rb_slice.new_scan().with_slice(0, 10).plan().splits() + result_slice_0 = tr_slice.to_arrow(slice_0) + self.assertEqual(result_slice_0.num_rows, 10, "with_slice(0, 10) should return 10 rows") + d_s0 = result_slice_0.column('d') + for i in range(10): + self.assertEqual( + d_s0[i].as_py(), + (i + 1) * 100 + (i + 1) * 10 - (i + 1), + "Slice [0,10) row %d: d should be c+b-a" % i, + ) + slice_1 = rb_slice.new_scan().with_slice(10, 21).plan().splits() + result_slice_1 = tr_slice.to_arrow(slice_1) + self.assertEqual(result_slice_1.num_rows, 11, "with_slice(10, 21) should return 11 rows") + d_s1 = result_slice_1.column('d') + for i in range(11): + self.assertIsNone(d_s1[i].as_py(), "Slice [10,21) row %d: d should be null" % i) + + cross_slice = rb_slice.new_scan().with_slice(5, 16).plan().splits() + result_cross = tr_slice.to_arrow(cross_slice) + self.assertEqual( + result_cross.num_rows, 11, + "Cross-shard with_slice(5, 16) should return 11 rows (5 from file1 + 6 from file2)", + ) + a_cross = result_cross.column('a') + d_cross = result_cross.column('d') + for i in range(5): + self.assertEqual(a_cross[i].as_py(), 6 + i) + self.assertEqual( + d_cross[i].as_py(), + (6 + i) * 100 + (6 + i) * 10 - (6 + i), + "Cross-shard slice row %d (from file1): d should be c+b-a" % i, + ) + for i in range(5, 11): + self.assertEqual(a_cross[i].as_py(), 10 + (i - 5)) + self.assertIsNone(d_cross[i].as_py(), "Cross-shard slice row %d (from file2): d null" % i) + + rb_col = table.new_read_builder() + rb_col.with_projection(['a', 'b', 'c', 'd']) + pb_col = rb_col.new_predicate_builder() + pred_d = pb_col.is_in('d', [109, 218]) # d = c+b-a for a=1,2 + rb_col.with_filter(pred_d) + tr_col = rb_col.new_read() + splits_d = rb_col.new_scan().plan().splits() + result_d = tr_col.to_arrow(splits_d) + self.assertEqual(result_d.num_rows, 2, "Filter d in [109, 218] should return 2 rows") + a_d = result_d.column('a') + d_d = result_d.column('d') + self.assertEqual(a_d[0].as_py(), 1) + self.assertEqual(d_d[0].as_py(), 109) + self.assertEqual(a_d[1].as_py(), 2) + self.assertEqual(d_d[1].as_py(), 218) + + def test_read_projection(self): + table_schema = pa.schema([ + ('a', pa.int32()), + ('b', pa.int32()), + ('c', pa.int32()), + ]) + schema = Schema.from_pyarrow_schema( + table_schema, + options={'row-tracking.enabled': 'true', 'data-evolution.enabled': 'true'} + ) + name = self._create_unique_table_name('read_proj') + self.catalog.create_table(name, schema, False) + table = self.catalog.get_table(name) + + write_builder = table.new_batch_write_builder() + table_write = write_builder.new_write().with_write_type(['a', 'b', 'c']) + table_commit = write_builder.new_commit() + init_data = pa.Table.from_pydict( + {'a': [1, 2, 3], 'b': [10, 20, 30], 'c': [100, 200, 300]}, + schema=pa.schema([('a', pa.int32()), ('b', pa.int32()), ('c', pa.int32())]) + ) + table_write.write_arrow(init_data) + cmts = table_write.prepare_commit() + for cmt in cmts: + for nf in cmt.new_files: + nf.first_row_id = 0 + table_commit.commit(cmts) + table_write.close() + table_commit.close() + + table_update = write_builder.new_update() + table_update.with_read_projection(['a', 'b', 'c']) + table_update.with_update_type(['a']) + shard_updator = table_update.new_shard_updator(0, 1) + reader = shard_updator.arrow_reader() + + batch = reader.read_next_batch() + self.assertIsNotNone(batch, "Should have at least one batch") + actual_columns = set(batch.schema.names) + + expected_columns = {'a', 'b', 'c'} + self.assertEqual( + actual_columns, + expected_columns, + "with_read_projection(['a','b','c']) should return only a,b,c; " + "got %s. _ROW_ID and _SEQUENCE_NUMBER should NOT be returned when not in projection." + % actual_columns + ) + + if __name__ == '__main__': unittest.main() From a7e703080c1399cc5d5c2d981b71389ce3760ed7 Mon Sep 17 00:00:00 2001 From: xiaohongbo Date: Fri, 27 Feb 2026 18:06:49 +0800 Subject: [PATCH 02/20] fix merge issue --- .../pypaimon/read/reader/concat_batch_reader.py | 2 -- .../pypaimon/read/reader/data_file_batch_reader.py | 3 --- paimon-python/pypaimon/read/split_read.py | 13 ++----------- 3 files changed, 2 insertions(+), 16 deletions(-) diff --git a/paimon-python/pypaimon/read/reader/concat_batch_reader.py b/paimon-python/pypaimon/read/reader/concat_batch_reader.py index 367ef4deea5d..be4bd731234a 100644 --- a/paimon-python/pypaimon/read/reader/concat_batch_reader.py +++ b/paimon-python/pypaimon/read/reader/concat_batch_reader.py @@ -219,10 +219,8 @@ def read_arrow_batch(self) -> Optional[RecordBatch]: ).slice(0, min_rows) columns.append(column) else: - # Field doesn't exist in this batch, fill with nulls columns.append(pa.nulls(min_rows, type=self.schema.field(i).type)) else: - # No batch provides this field, fill with nulls columns.append(pa.nulls(min_rows, type=self.schema.field(i).type)) for i in range(len(self.readers)): diff --git a/paimon-python/pypaimon/read/reader/data_file_batch_reader.py b/paimon-python/pypaimon/read/reader/data_file_batch_reader.py index ce0db60ae3a5..919e52197364 100644 --- a/paimon-python/pypaimon/read/reader/data_file_batch_reader.py +++ b/paimon-python/pypaimon/read/reader/data_file_batch_reader.py @@ -53,7 +53,6 @@ def __init__(self, format_reader: RecordBatchReader, index_mapping: List[int], p self.first_row_id = first_row_id self.max_sequence_number = max_sequence_number self.system_fields = system_fields -<<<<<<< HEAD self.blob_as_descriptor = blob_as_descriptor self.blob_descriptor_fields = blob_descriptor_fields or set() self.file_io = file_io @@ -67,7 +66,6 @@ def __init__(self, format_reader: RecordBatchReader, index_mapping: List[int], p for field_name in self.blob_descriptor_fields if field_name in self.blob_field_names } -======= self.requested_field_names = [field.name for field in fields] if fields else None self.fields = fields @@ -95,7 +93,6 @@ def _align_to_requested_names( ) ordered_names.append(name) return ordered_arrays, ordered_names ->>>>>>> 277fef48c (support shards read of data evolution) def read_arrow_batch(self, start_idx=None, end_idx=None) -> Optional[RecordBatch]: if isinstance(self.format_reader, FormatBlobReader): diff --git a/paimon-python/pypaimon/read/split_read.py b/paimon-python/pypaimon/read/split_read.py index af365d438d2a..9a13d719c036 100644 --- a/paimon-python/pypaimon/read/split_read.py +++ b/paimon-python/pypaimon/read/split_read.py @@ -130,13 +130,14 @@ def file_reader_supplier(self, file: DataFileMeta, for_merge_read: bool, read_file_fields.append(col) batch_size = self.table.options.read_batch_size() + blob_as_descriptor = CoreOptions.blob_as_descriptor(self.table.options) + blob_descriptor_fields = CoreOptions.blob_descriptor_fields(self.table.options) format_reader: RecordBatchReader if file_format == CoreOptions.FILE_FORMAT_AVRO: format_reader = FormatAvroReader(self.table.file_io, file_path, read_file_fields, self.read_fields, read_arrow_predicate, batch_size=batch_size) elif file_format == CoreOptions.FILE_FORMAT_BLOB: - blob_as_descriptor = CoreOptions.blob_as_descriptor(self.table.options) format_reader = FormatBlobReader(self.table.file_io, file_path, read_file_fields, self.read_fields, read_arrow_predicate, blob_as_descriptor, batch_size=batch_size) @@ -149,19 +150,9 @@ def file_reader_supplier(self, file: DataFileMeta, for_merge_read: bool, else: raise ValueError(f"Unexpected file format: {file_format}") -<<<<<<< HEAD - blob_as_descriptor = CoreOptions.blob_as_descriptor(self.table.options) - blob_descriptor_fields = CoreOptions.blob_descriptor_fields(self.table.options) - - index_mapping = self.create_index_mapping() - partition_info = self._create_partition_info() - system_fields = SpecialFields.find_system_fields(self.read_fields) -======= index_mapping = self.create_index_mapping( file=file, read_file_fields=read_file_fields, read_fields=read_fields, is_blob_file=is_blob_file ) - ->>>>>>> 277fef48c (support shards read of data evolution) table_schema_fields = ( SpecialFields.row_type_with_row_tracking(self.table.table_schema.fields) if row_tracking_enabled else self.table.table_schema.fields From 93897a9d2af0a992c23b59450b16da9b9a3534b6 Mon Sep 17 00:00:00 2001 From: xiaohongbo Date: Fri, 27 Feb 2026 18:25:48 +0800 Subject: [PATCH 03/20] clean code --- .../read/reader/data_file_batch_reader.py | 57 +++++++++---------- 1 file changed, 26 insertions(+), 31 deletions(-) diff --git a/paimon-python/pypaimon/read/reader/data_file_batch_reader.py b/paimon-python/pypaimon/read/reader/data_file_batch_reader.py index 919e52197364..02ee54e96341 100644 --- a/paimon-python/pypaimon/read/reader/data_file_batch_reader.py +++ b/paimon-python/pypaimon/read/reader/data_file_batch_reader.py @@ -29,6 +29,8 @@ from pypaimon.table.row.blob import Blob, BlobDescriptor from pypaimon.table.special_fields import SpecialFields +_KEY_PREFIX = "_KEY_" + class DataFileBatchReader(RecordBatchReader): """ @@ -81,8 +83,8 @@ def _align_to_requested_names( ordered_names = [] for name in requested_field_names: idx = name_to_idx.get(name) - if idx is None and name.startswith("_KEY_") and name[5:] in name_to_idx: - idx = name_to_idx[name[5:]] + if idx is None and name.startswith(_KEY_PREFIX) and name[len(_KEY_PREFIX):] in name_to_idx: + idx = name_to_idx[name[len(_KEY_PREFIX):]] if idx is not None: ordered_arrays.append(inter_arrays[idx]) ordered_names.append(name) @@ -115,14 +117,6 @@ def read_arrow_batch(self, start_idx=None, end_idx=None) -> Optional[RecordBatch record_batch = pa.RecordBatch.from_arrays(ordered_arrays, ordered_names) return record_batch - if (self.partition_info is None and self.index_mapping is not None - and not self.requested_field_names): - ncol = record_batch.num_columns - if len(self.index_mapping) == ncol and self.index_mapping == list(range(ncol)): - if self.row_tracking_enabled and self.system_fields: - record_batch = self._assign_row_tracking(record_batch) - return record_batch - inter_arrays = [] inter_names = [] num_rows = record_batch.num_rows @@ -136,21 +130,18 @@ def read_arrow_batch(self, start_idx=None, end_idx=None) -> Optional[RecordBatch inter_names.append(partition_field.name) else: real_index = self.partition_info.get_real_index(i) - name = ( - self.requested_field_names[i] - if self.requested_field_names and i < len(self.requested_field_names) - else f"_col_{i}" - ) - batch_names = record_batch.schema.names - col_idx = None - if name in batch_names: - col_idx = record_batch.schema.get_field_index(name) - elif name.startswith("_KEY_") and name[5:] in batch_names: - col_idx = record_batch.schema.get_field_index(name[5:]) - if col_idx is not None: - inter_arrays.append(record_batch.column(col_idx)) - inter_names.append(name) + if self.requested_field_names and i < len(self.requested_field_names): + name = self.requested_field_names[i] elif real_index < record_batch.num_columns: + name = record_batch.schema.field(real_index).name + elif self.fields and i < len(self.fields): + name = self.fields[i].name + else: + raise ValueError( + f"Cannot resolve name for output column i={i}: " + "need requested_field_names, batch column, or fields" + ) + if real_index < record_batch.num_columns: inter_arrays.append(record_batch.column(real_index)) inter_names.append(name) else: @@ -183,11 +174,15 @@ def read_arrow_batch(self, start_idx=None, end_idx=None) -> Optional[RecordBatch mapped_arrays.append(inter_arrays[actual_index]) mapped_names.append(inter_names[actual_index]) else: - name = ( - self.requested_field_names[i] - if self.requested_field_names and i < len(self.requested_field_names) - else f"null_col_{i}" - ) + if self.requested_field_names and i < len(self.requested_field_names): + name = self.requested_field_names[i] + elif self.fields and i < len(self.fields): + name = self.fields[i].name + else: + raise ValueError( + f"Cannot resolve name for null column at i={i}: " + "need requested_field_names or fields" + ) field = self.schema_map.get(name) null_array = pa.nulls(num_rows, type=field.type) if field is not None else pa.nulls(num_rows) mapped_arrays.append(null_array) @@ -235,8 +230,8 @@ def read_arrow_batch(self, start_idx=None, end_idx=None) -> Optional[RecordBatch if self.system_primary_key: for i in range(len(self.system_primary_key)): - if i < len(inter_names) and not inter_names[i].startswith("_KEY_"): - inter_names[i] = f"_KEY_{inter_names[i]}" + if i < len(inter_names) and not inter_names[i].startswith(_KEY_PREFIX): + inter_names[i] = f"{_KEY_PREFIX}{inter_names[i]}" if self.requested_field_names is not None and len(inter_arrays) < len(self.requested_field_names): for name in self.requested_field_names[len(inter_arrays):]: From 6d5f8c41e48eb99377288f929fd51c7cbddb0352 Mon Sep 17 00:00:00 2001 From: xiaohongbo Date: Fri, 27 Feb 2026 18:54:16 +0800 Subject: [PATCH 04/20] add create_index_mapping back and fix code format --- .../read/reader/concat_batch_reader.py | 1 - .../read/reader/data_file_batch_reader.py | 25 +++++++++++-------- paimon-python/pypaimon/read/split_read.py | 6 ++++- .../pypaimon/tests/data_evolution_test.py | 3 --- 4 files changed, 19 insertions(+), 16 deletions(-) diff --git a/paimon-python/pypaimon/read/reader/concat_batch_reader.py b/paimon-python/pypaimon/read/reader/concat_batch_reader.py index be4bd731234a..71934c5f1241 100644 --- a/paimon-python/pypaimon/read/reader/concat_batch_reader.py +++ b/paimon-python/pypaimon/read/reader/concat_batch_reader.py @@ -208,7 +208,6 @@ def read_arrow_batch(self) -> Optional[RecordBatch]: columns = [] for i in range(len(self.row_offsets)): batch_index = self.row_offsets[i] - field_index = self.field_offsets[i] field_name = self.schema.field(i).name if batch_index >= 0 and batches[batch_index] is not None: diff --git a/paimon-python/pypaimon/read/reader/data_file_batch_reader.py b/paimon-python/pypaimon/read/reader/data_file_batch_reader.py index 02ee54e96341..465a21541ccb 100644 --- a/paimon-python/pypaimon/read/reader/data_file_batch_reader.py +++ b/paimon-python/pypaimon/read/reader/data_file_batch_reader.py @@ -130,18 +130,21 @@ def read_arrow_batch(self, start_idx=None, end_idx=None) -> Optional[RecordBatch inter_names.append(partition_field.name) else: real_index = self.partition_info.get_real_index(i) - if self.requested_field_names and i < len(self.requested_field_names): - name = self.requested_field_names[i] + name = ( + self.requested_field_names[i] + if self.requested_field_names and i < len(self.requested_field_names) + else f"_col_{i}" + ) + batch_names = record_batch.schema.names + col_idx = None + if name in batch_names: + col_idx = record_batch.schema.get_field_index(name) + elif name.startswith(_KEY_PREFIX) and name[len(_KEY_PREFIX):] in batch_names: + col_idx = record_batch.schema.get_field_index(name[len(_KEY_PREFIX):]) + if col_idx is not None: + inter_arrays.append(record_batch.column(col_idx)) + inter_names.append(name) elif real_index < record_batch.num_columns: - name = record_batch.schema.field(real_index).name - elif self.fields and i < len(self.fields): - name = self.fields[i].name - else: - raise ValueError( - f"Cannot resolve name for output column i={i}: " - "need requested_field_names, batch column, or fields" - ) - if real_index < record_batch.num_columns: inter_arrays.append(record_batch.column(real_index)) inter_names.append(name) else: diff --git a/paimon-python/pypaimon/read/split_read.py b/paimon-python/pypaimon/read/split_read.py index 9a13d719c036..b240047f8cd5 100644 --- a/paimon-python/pypaimon/read/split_read.py +++ b/paimon-python/pypaimon/read/split_read.py @@ -512,8 +512,12 @@ def _get_all_data_fields(self): class MergeFileSplitRead(SplitRead): + def create_index_mapping(self, file=None, read_file_fields=None, read_fields=None, is_blob_file=False): + return None + def kv_reader_supplier(self, file: DataFileMeta, dv_factory: Optional[Callable] = None) -> RecordReader: - file_batch_reader = self.file_reader_supplier(file, True, self._get_final_read_data_fields(), False) + merge_read_fields = [f.name for f in self._get_read_data_fields()] + file_batch_reader = self.file_reader_supplier(file, True, merge_read_fields, False) dv = dv_factory() if dv_factory else None if dv: return ApplyDeletionVectorReader( diff --git a/paimon-python/pypaimon/tests/data_evolution_test.py b/paimon-python/pypaimon/tests/data_evolution_test.py index 9fd0a35e077b..c6b994cf571e 100644 --- a/paimon-python/pypaimon/tests/data_evolution_test.py +++ b/paimon-python/pypaimon/tests/data_evolution_test.py @@ -27,10 +27,7 @@ from pypaimon import CatalogFactory, Schema from pypaimon.common.predicate import Predicate -from pypaimon.common.predicate_builder import PredicateBuilder from pypaimon.manifest.manifest_list_manager import ManifestListManager -from pypaimon.read.reader.iface.record_batch_reader import RecordBatchReader -from pypaimon.schema.data_types import AtomicType, DataField from pypaimon.snapshot.snapshot_manager import SnapshotManager from pypaimon.table.row.offset_row import OffsetRow From 5a0d223a8e2b564c027031b9d2b1cc9b223d3087 Mon Sep 17 00:00:00 2001 From: xiaohongbo Date: Sat, 28 Feb 2026 11:17:50 +0800 Subject: [PATCH 05/20] revert change --- docs/content/pypaimon/data-evolution.md | 5 - .../read/reader/concat_batch_reader.py | 14 +- .../read/reader/data_file_batch_reader.py | 176 ++--------- paimon-python/pypaimon/read/split_read.py | 276 +++--------------- 4 files changed, 58 insertions(+), 413 deletions(-) diff --git a/docs/content/pypaimon/data-evolution.md b/docs/content/pypaimon/data-evolution.md index 073f939382c0..91714e52c72f 100644 --- a/docs/content/pypaimon/data-evolution.md +++ b/docs/content/pypaimon/data-evolution.md @@ -204,8 +204,3 @@ commit.close() - **Row order matters**: the batches you write must have the **same number of rows** as the batches you read, in the same order for that shard. - **Parallelism**: run multiple shards by calling `new_shard_updator(shard_idx, num_shards)` for each shard. - -## Read After Partial Shard Update - -- **Full table read**: rows from updated shards have the new column; rows from other shards have null for that column. -- **Per-shard read** (`with_shard(shard_idx, num_shards)`): read only the shard(s) you need. (new column where written, null elsewhere). diff --git a/paimon-python/pypaimon/read/reader/concat_batch_reader.py b/paimon-python/pypaimon/read/reader/concat_batch_reader.py index 71934c5f1241..4318f883eb2e 100644 --- a/paimon-python/pypaimon/read/reader/concat_batch_reader.py +++ b/paimon-python/pypaimon/read/reader/concat_batch_reader.py @@ -141,8 +141,6 @@ class DataEvolutionMergeReader(RecordBatchReader): - The fourth field comes from batch1, and it is at offset 1 in batch1. - The fifth field comes from batch2, and it is at offset 1 in batch2. - The sixth field comes from batch1, and it is at offset 0 in batch1. - - When row_offsets[i] == -1 (no file provides that field), output a column of nulls using schema. """ def __init__( @@ -208,17 +206,9 @@ def read_arrow_batch(self) -> Optional[RecordBatch]: columns = [] for i in range(len(self.row_offsets)): batch_index = self.row_offsets[i] - field_name = self.schema.field(i).name - + field_index = self.field_offsets[i] if batch_index >= 0 and batches[batch_index] is not None: - src_batch = batches[batch_index] - if field_name in src_batch.schema.names: - column = src_batch.column( - src_batch.schema.get_field_index(field_name) - ).slice(0, min_rows) - columns.append(column) - else: - columns.append(pa.nulls(min_rows, type=self.schema.field(i).type)) + columns.append(batches[batch_index].column(field_index).slice(0, min_rows)) else: columns.append(pa.nulls(min_rows, type=self.schema.field(i).type)) diff --git a/paimon-python/pypaimon/read/reader/data_file_batch_reader.py b/paimon-python/pypaimon/read/reader/data_file_batch_reader.py index 465a21541ccb..7f2e1c61e1f4 100644 --- a/paimon-python/pypaimon/read/reader/data_file_batch_reader.py +++ b/paimon-python/pypaimon/read/reader/data_file_batch_reader.py @@ -16,7 +16,7 @@ # limitations under the License. ################################################################################ -from typing import List, Optional, Tuple +from typing import List, Optional import pyarrow as pa from pyarrow import RecordBatch @@ -29,8 +29,6 @@ from pypaimon.table.row.blob import Blob, BlobDescriptor from pypaimon.table.special_fields import SpecialFields -_KEY_PREFIX = "_KEY_" - class DataFileBatchReader(RecordBatchReader): """ @@ -68,33 +66,6 @@ def __init__(self, format_reader: RecordBatchReader, index_mapping: List[int], p for field_name in self.blob_descriptor_fields if field_name in self.blob_field_names } - self.requested_field_names = [field.name for field in fields] if fields else None - self.fields = fields - - def _align_to_requested_names( - self, - inter_arrays: List, - inter_names: List, - requested_field_names: List[str], - num_rows: int, - ) -> Tuple[List, List]: - name_to_idx = {n: i for i, n in enumerate(inter_names)} - ordered_arrays = [] - ordered_names = [] - for name in requested_field_names: - idx = name_to_idx.get(name) - if idx is None and name.startswith(_KEY_PREFIX) and name[len(_KEY_PREFIX):] in name_to_idx: - idx = name_to_idx[name[len(_KEY_PREFIX):]] - if idx is not None: - ordered_arrays.append(inter_arrays[idx]) - ordered_names.append(name) - else: - field = self.schema_map.get(name) - ordered_arrays.append( - pa.nulls(num_rows, type=field.type) if field is not None else pa.nulls(num_rows) - ) - ordered_names.append(name) - return ordered_arrays, ordered_names def read_arrow_batch(self, start_idx=None, end_idx=None) -> Optional[RecordBatch]: if isinstance(self.format_reader, FormatBlobReader): @@ -104,17 +75,9 @@ def read_arrow_batch(self, start_idx=None, end_idx=None) -> Optional[RecordBatch if record_batch is None: return None - num_rows = record_batch.num_rows if self.partition_info is None and self.index_mapping is None: if self.row_tracking_enabled and self.system_fields: record_batch = self._assign_row_tracking(record_batch) - if self.requested_field_names is not None: - inter_arrays = list(record_batch.columns) - inter_names = list(record_batch.schema.names) - ordered_arrays, ordered_names = self._align_to_requested_names( - inter_arrays, inter_names, self.requested_field_names, num_rows - ) - record_batch = pa.RecordBatch.from_arrays(ordered_arrays, ordered_names) return record_batch inter_arrays = [] @@ -130,127 +93,32 @@ def read_arrow_batch(self, start_idx=None, end_idx=None) -> Optional[RecordBatch inter_names.append(partition_field.name) else: real_index = self.partition_info.get_real_index(i) - name = ( - self.requested_field_names[i] - if self.requested_field_names and i < len(self.requested_field_names) - else f"_col_{i}" - ) - batch_names = record_batch.schema.names - col_idx = None - if name in batch_names: - col_idx = record_batch.schema.get_field_index(name) - elif name.startswith(_KEY_PREFIX) and name[len(_KEY_PREFIX):] in batch_names: - col_idx = record_batch.schema.get_field_index(name[len(_KEY_PREFIX):]) - if col_idx is not None: - inter_arrays.append(record_batch.column(col_idx)) - inter_names.append(name) - elif real_index < record_batch.num_columns: + if real_index < record_batch.num_columns: inter_arrays.append(record_batch.column(real_index)) - inter_names.append(name) - else: - field = self.schema_map.get(name) - inter_arrays.append( - pa.nulls(num_rows, type=field.type) if field is not None else pa.nulls(num_rows) - ) - inter_names.append(name) + inter_names.append(record_batch.schema.field(real_index).name) else: - inter_arrays = list(record_batch.columns) - inter_names = list(record_batch.schema.names) + inter_arrays = record_batch.columns + inter_names = record_batch.schema.names - if self.requested_field_names is not None: - inter_arrays, inter_names = self._align_to_requested_names( - inter_arrays, inter_names, self.requested_field_names, num_rows - ) - - if self.index_mapping is not None and not ( - self.requested_field_names is not None and inter_names == self.requested_field_names): + if self.index_mapping is not None: mapped_arrays = [] mapped_names = [] - partition_names = ( - set(pf.name for pf in self.partition_info.partition_fields) - if self.partition_info else set() - ) - non_partition_indices = [idx for idx, name in enumerate(inter_names) if name not in partition_names] for i, real_index in enumerate(self.index_mapping): - if 0 <= real_index < len(non_partition_indices): - actual_index = non_partition_indices[real_index] - mapped_arrays.append(inter_arrays[actual_index]) - mapped_names.append(inter_names[actual_index]) + if 0 <= real_index < len(inter_arrays): + mapped_arrays.append(inter_arrays[real_index]) + mapped_names.append(inter_names[real_index]) else: - if self.requested_field_names and i < len(self.requested_field_names): - name = self.requested_field_names[i] - elif self.fields and i < len(self.fields): - name = self.fields[i].name - else: - raise ValueError( - f"Cannot resolve name for null column at i={i}: " - "need requested_field_names or fields" - ) - field = self.schema_map.get(name) - null_array = pa.nulls(num_rows, type=field.type) if field is not None else pa.nulls(num_rows) + null_array = pa.nulls(num_rows) mapped_arrays.append(null_array) - mapped_names.append(name) - - if self.partition_info: - partition_arrays_map = { - inter_names[i]: inter_arrays[i] - for i in range(len(inter_names)) - if inter_names[i] in partition_names - } - - if self.requested_field_names: - final_arrays = [] - final_names = [] - mapped_name_to_array = {name: arr for name, arr in zip(mapped_names, mapped_arrays)} - - for name in self.requested_field_names: - if name in mapped_name_to_array: - final_arrays.append(mapped_name_to_array[name]) - final_names.append(name) - elif name in partition_arrays_map: - final_arrays.append(partition_arrays_map[name]) - final_names.append(name) - else: - # Field not in file (e.g. index_mapping -1): output null column - field = self.schema_map.get(name) - null_arr = pa.nulls(num_rows, type=field.type) if field is not None else pa.nulls(num_rows) - final_arrays.append(null_arr) - final_names.append(name) - - inter_arrays = final_arrays - inter_names = final_names - else: - mapped_name_set = set(mapped_names) - for name, arr in partition_arrays_map.items(): - if name not in mapped_name_set: - mapped_arrays.append(arr) - mapped_names.append(name) - inter_arrays = mapped_arrays - inter_names = mapped_names - else: - inter_arrays = mapped_arrays - inter_names = mapped_names + mapped_names.append(f"null_col_{i}") if self.system_primary_key: for i in range(len(self.system_primary_key)): - if i < len(inter_names) and not inter_names[i].startswith(_KEY_PREFIX): - inter_names[i] = f"{_KEY_PREFIX}{inter_names[i]}" - - if self.requested_field_names is not None and len(inter_arrays) < len(self.requested_field_names): - for name in self.requested_field_names[len(inter_arrays):]: - field = self.schema_map.get(name) - inter_arrays.append( - pa.nulls(num_rows, type=field.type) if field is not None else pa.nulls(num_rows) - ) - inter_names.append(name) + if not mapped_names[i].startswith("_KEY_"): + mapped_names[i] = f"_KEY_{mapped_names[i]}" - for i, name in enumerate(inter_names): - target_field = self.schema_map.get(name) - if target_field is not None and inter_arrays[i].type != target_field.type: - try: - inter_arrays[i] = inter_arrays[i].cast(target_field.type) - except (pa.ArrowInvalid, pa.ArrowNotImplementedError): - inter_arrays[i] = pa.nulls(num_rows, type=target_field.type) + inter_arrays = mapped_arrays + inter_names = mapped_names # to contains 'not null' property final_fields = [] @@ -337,28 +205,18 @@ def _deserialize_descriptor_or_none(raw: bytes): def _assign_row_tracking(self, record_batch: RecordBatch) -> RecordBatch: """Assign row tracking meta fields (_ROW_ID and _SEQUENCE_NUMBER).""" arrays = list(record_batch.columns) - num_cols = len(arrays) # Handle _ROW_ID field if SpecialFields.ROW_ID.name in self.system_fields.keys(): idx = self.system_fields[SpecialFields.ROW_ID.name] # Create a new array that fills with computed row IDs - if idx < num_cols: - if self.first_row_id is None: - raise ValueError( - "Row tracking requires first_row_id on the file; " - "got None. Ensure file metadata has first_row_id when reading _ROW_ID." - ) - arrays[idx] = pa.array( - range(self.first_row_id, self.first_row_id + record_batch.num_rows), - type=pa.int64()) + arrays[idx] = pa.array(range(self.first_row_id, self.first_row_id + record_batch.num_rows), type=pa.int64()) # Handle _SEQUENCE_NUMBER field if SpecialFields.SEQUENCE_NUMBER.name in self.system_fields.keys(): idx = self.system_fields[SpecialFields.SEQUENCE_NUMBER.name] # Create a new array that fills with max_sequence_number - if idx < num_cols: - arrays[idx] = pa.repeat(self.max_sequence_number, record_batch.num_rows) + arrays[idx] = pa.repeat(self.max_sequence_number, record_batch.num_rows) names = record_batch.schema.names table = None diff --git a/paimon-python/pypaimon/read/split_read.py b/paimon-python/pypaimon/read/split_read.py index b240047f8cd5..d76a71682b17 100644 --- a/paimon-python/pypaimon/read/split_read.py +++ b/paimon-python/pypaimon/read/split_read.py @@ -114,30 +114,22 @@ def create_reader(self) -> RecordReader: """Create a record reader for the given split.""" def file_reader_supplier(self, file: DataFileMeta, for_merge_read: bool, - read_fields: List[str], row_tracking_enabled: bool, - use_requested_field_names: bool = True) -> RecordBatchReader: + read_fields: List[str], row_tracking_enabled: bool) -> RecordBatchReader: (read_file_fields, read_arrow_predicate) = self._get_fields_and_predicate(file.schema_id, read_fields) + # Use external_path if available, otherwise use file_path file_path = file.external_path if file.external_path else file.file_path _, extension = os.path.splitext(file_path) file_format = extension[1:] - is_blob_file = file_format == CoreOptions.FILE_FORMAT_BLOB - - if getattr(file, "write_cols", None): - read_file_fields = list(read_file_fields) - for col in file.write_cols: - if col in read_fields and col not in read_file_fields: - read_file_fields.append(col) batch_size = self.table.options.read_batch_size() - blob_as_descriptor = CoreOptions.blob_as_descriptor(self.table.options) - blob_descriptor_fields = CoreOptions.blob_descriptor_fields(self.table.options) format_reader: RecordBatchReader if file_format == CoreOptions.FILE_FORMAT_AVRO: format_reader = FormatAvroReader(self.table.file_io, file_path, read_file_fields, self.read_fields, read_arrow_predicate, batch_size=batch_size) elif file_format == CoreOptions.FILE_FORMAT_BLOB: + blob_as_descriptor = CoreOptions.blob_as_descriptor(self.table.options) format_reader = FormatBlobReader(self.table.file_io, file_path, read_file_fields, self.read_fields, read_arrow_predicate, blob_as_descriptor, batch_size=batch_size) @@ -150,39 +142,23 @@ def file_reader_supplier(self, file: DataFileMeta, for_merge_read: bool, else: raise ValueError(f"Unexpected file format: {file_format}") - index_mapping = self.create_index_mapping( - file=file, read_file_fields=read_file_fields, read_fields=read_fields, is_blob_file=is_blob_file - ) + blob_as_descriptor = CoreOptions.blob_as_descriptor(self.table.options) + blob_descriptor_fields = CoreOptions.blob_descriptor_fields(self.table.options) + + index_mapping = self.create_index_mapping() + partition_info = self._create_partition_info() + system_fields = SpecialFields.find_system_fields(self.read_fields) table_schema_fields = ( SpecialFields.row_type_with_row_tracking(self.table.table_schema.fields) if row_tracking_enabled else self.table.table_schema.fields ) - write_cols = getattr(file, "write_cols", None) - fields = self._output_fields_for_file_reader( - for_merge_read, is_blob_file, use_requested_field_names, write_cols, - read_file_fields, read_fields, table_schema_fields - ) - - system_fields = SpecialFields.find_system_fields(fields) - - actual_read_fields_for_partition = self._actual_read_fields_for_partition( - read_file_fields, table_schema_fields - ) - fields = self._output_fields_for_partition_info( - for_merge_read, fields, actual_read_fields_for_partition, table_schema_fields - ) - partition_info = self._create_partition_info( - actual_read_fields=actual_read_fields_for_partition or None, - output_fields=fields - ) - if for_merge_read: return DataFileBatchReader( format_reader, index_mapping, partition_info, self.trimmed_primary_key, - fields, + table_schema_fields, file.max_sequence_number, file.first_row_id, row_tracking_enabled, @@ -196,7 +172,7 @@ def file_reader_supplier(self, file: DataFileMeta, for_merge_read: bool, index_mapping, partition_info, None, - fields, + table_schema_fields, file.max_sequence_number, file.first_row_id, row_tracking_enabled, @@ -223,28 +199,6 @@ def _get_fields_and_predicate(self, schema_id: int, read_fields): self.schema_id_2_fields[key] = (read_file_fields, read_arrow_predicate) return self.schema_id_2_fields[key] - def _output_fields_for_file_reader( - self, - for_merge_read: bool, - is_blob_file: bool, - use_requested_field_names: bool, - write_cols, - read_file_fields: List[str], - read_fields: List[str], - table_schema_fields: List[DataField], - ) -> List[DataField]: - if for_merge_read: - return self.read_fields - if is_blob_file: - names = read_file_fields - elif use_requested_field_names and write_cols: - names = read_fields - else: - names = [f.name for f in self.read_fields] - field_map = {f.name: f for f in table_schema_fields} - requested = [field_map[n] for n in names if n in field_map] - return requested if requested else table_schema_fields - @abstractmethod def _get_all_data_fields(self): """Get all data fields""" @@ -276,12 +230,7 @@ def _create_key_value_fields(self, value_field: List[DataField]): return all_data_fields - def create_index_mapping(self, file: Optional[DataFileMeta] = None, read_file_fields: Optional[List[str]] = None, - read_fields: Optional[List[str]] = None, is_blob_file: bool = False): - write_cols = getattr(file, "write_cols", None) if file else None - if write_cols and read_file_fields is not None and read_fields is not None: - num_cols = len(read_file_fields) if is_blob_file else len(read_fields) - return list(range(num_cols)) if num_cols > 0 else None + def create_index_mapping(self): base_index_mapping = self._create_base_index_mapping(self.read_fields, self._get_read_data_fields()) trimmed_key_mapping, _ = self._get_trimmed_fields(self._get_read_data_fields(), self._get_all_data_fields()) if base_index_mapping is None: @@ -368,79 +317,30 @@ def _get_trimmed_fields(self, read_data_fields: List[DataField], return trimmed_mapping, trimmed_fields - def _actual_read_fields_for_partition( - self, - read_file_fields: List[str], - table_schema_fields: List[DataField], - ) -> List[DataField]: - """Fields actually read from this file (for partition mapping).""" - field_map = {f.name: f for f in table_schema_fields} - return [field_map[fn] for fn in read_file_fields if fn in field_map] - - def _output_fields_for_partition_info( - self, - for_merge_read: bool, - fields: List[DataField], - actual_read_fields_for_partition: List[DataField], - table_schema_fields: List[DataField], - ) -> List[DataField]: - """ - Output field list for this file's partition mapping. - When partition + data evolution, narrow to partition + actual read columns - so mapping indices match the record batch from this file. - """ - if ( - for_merge_read - or not self.table.partition_keys - or not actual_read_fields_for_partition - or fields is not table_schema_fields - ): - return fields - partition_row = self.split.partition - full_partition_and_file = list(partition_row.fields) + actual_read_fields_for_partition - available_names = {f.name for f in full_partition_and_file} - out = [f for f in self.read_fields if f.name in available_names] - return out if out else full_partition_and_file - - def _create_partition_info( - self, - actual_read_fields: Optional[List[DataField]] = None, - output_fields: Optional[List[DataField]] = None): + def _create_partition_info(self): if not self.table.partition_keys: return None - partition_mapping = self._construct_partition_mapping(actual_read_fields, output_fields) + partition_mapping = self._construct_partition_mapping() if not partition_mapping: return None return PartitionInfo(partition_mapping, self.split.partition) - def _construct_partition_mapping( - self, - actual_read_fields: Optional[List[DataField]] = None, - output_fields: Optional[List[DataField]] = None) -> List[int]: - if actual_read_fields is not None: - read_data_fields = actual_read_fields - else: - read_data_fields = self._get_read_data_fields() - - if output_fields is not None: - fields_to_map = output_fields - else: - fields_to_map = read_data_fields - - actual_read_field_names = {field.name: idx for idx, field in enumerate(read_data_fields)} + def _construct_partition_mapping(self) -> List[int]: + _, trimmed_fields = self._get_trimmed_fields( + self._get_read_data_fields(), self._get_all_data_fields() + ) partition_names = self.table.partition_keys - num_record_batch_cols = len(read_data_fields) - mapping = [0] * (len(fields_to_map) + 1) + mapping = [0] * (len(trimmed_fields) + 1) + p_count = 0 - for i, field in enumerate(fields_to_map): + for i, field in enumerate(trimmed_fields): if field.name in partition_names: partition_index = partition_names.index(field.name) mapping[i] = -(partition_index + 1) - elif field.name in actual_read_field_names: - mapping[i] = actual_read_field_names[field.name] + 1 + p_count += 1 else: - mapping[i] = num_record_batch_cols + 1 + mapping[i] = (i - p_count) + 1 return mapping @@ -512,12 +412,8 @@ def _get_all_data_fields(self): class MergeFileSplitRead(SplitRead): - def create_index_mapping(self, file=None, read_file_fields=None, read_fields=None, is_blob_file=False): - return None - def kv_reader_supplier(self, file: DataFileMeta, dv_factory: Optional[Callable] = None) -> RecordReader: - merge_read_fields = [f.name for f in self._get_read_data_fields()] - file_batch_reader = self.file_reader_supplier(file, True, merge_read_fields, False) + file_batch_reader = self.file_reader_supplier(file, True, self._get_final_read_data_fields(), False) dv = dv_factory() if dv_factory else None if dv: return ApplyDeletionVectorReader( @@ -587,9 +483,7 @@ def create_reader(self) -> RecordReader: if len(need_merge_files) == 1 or not self.read_fields: # No need to merge fields, just create a single file reader suppliers.append( - lambda f=need_merge_files[0]: self._create_file_reader( - f, self._get_final_read_data_fields(), use_requested_field_names=False - ) + lambda f=need_merge_files[0]: self._create_file_reader(f, self._get_final_read_data_fields()) ) else: suppliers.append( @@ -653,14 +547,6 @@ def _create_union_reader(self, need_merge_files: List[DataFileMeta]) -> RecordRe # Split field bunches fields_files = self._split_field_bunches(need_merge_files) - def _bunch_sort_key(bunch: FieldBunch) -> tuple: - first_file = bunch.files()[0] - max_seq = max(f.max_sequence_number for f in bunch.files()) - is_partial = 1 if (first_file.write_cols and len(first_file.write_cols) > 0) else 0 - return (max_seq, is_partial) - - fields_files = sorted(fields_files, key=_bunch_sort_key, reverse=True) - # Validate row counts and first row IDs row_count = fields_files[0].row_count() first_row_id = fields_files[0].files()[0].first_row_id @@ -678,26 +564,10 @@ def _bunch_sort_key(bunch: FieldBunch) -> tuple: file_record_readers = [None] * len(fields_files) read_field_index = [field.id for field in all_read_fields] - # Initialize offsets and per-bunch read_fields (built in two passes) + # Initialize offsets row_offsets = [-1] * len(all_read_fields) field_offsets = [-1] * len(all_read_fields) - read_fields_per_bunch = [[] for _ in range(len(fields_files))] - # Pass 1: Assign from partial bunches (write_cols) by name first. This ensures columns - for i, bunch in enumerate(fields_files): - first_file = bunch.files()[0] - if not (first_file.write_cols and len(first_file.write_cols) > 0): - continue - for j, field in enumerate(all_read_fields): - if row_offsets[j] == -1 and field.name in first_file.write_cols: - # Do not assign non-blob fields to a blob bunch (blob file only has blob column) - if DataFileMeta.is_blob_file(first_file.file_name) and field.name != first_file.write_cols[0]: - continue - row_offsets[j] = i - field_offsets[j] = len(read_fields_per_bunch[i]) - read_fields_per_bunch[i].append(field) - - # Pass 2: Assign remaining fields by field id (full-schema base and system fields) for i, bunch in enumerate(fields_files): first_file = bunch.files()[0] @@ -708,13 +578,10 @@ def _bunch_sort_key(bunch: FieldBunch) -> tuple: elif first_file.write_cols: field_ids = self._get_field_ids_from_write_cols(first_file.write_cols) else: - schema = self.table.schema_manager.get_schema(first_file.schema_id) - schema_fields = ( - SpecialFields.row_type_with_row_tracking(schema.fields) - if self.row_tracking_enabled else schema.fields - ) - field_ids = [field.id for field in schema_fields] - read_fields = list(read_fields_per_bunch[i]) + # For regular files, get all field IDs from the schema + field_ids = [field.id for field in self.table.fields] + + read_fields = [] for j, read_field_id in enumerate(read_field_index): for field_id in field_ids: if read_field_id == field_id: @@ -723,49 +590,25 @@ def _bunch_sort_key(bunch: FieldBunch) -> tuple: field_offsets[j] = len(read_fields) read_fields.append(all_read_fields[j]) break - read_fields_per_bunch[i] = read_fields - - self._assign_remaining_fields_by_write_cols( - all_read_fields, row_offsets, field_offsets, read_fields_per_bunch, fields_files - ) - use_requested_field_names = self._use_requested_field_names_for_merge(fields_files) - table_field_names_set = {f.name for f in self.table.fields} - for i, bunch in enumerate(fields_files): - read_fields = list(read_fields_per_bunch[i]) if not read_fields: file_record_readers[i] = None else: - if not DataFileMeta.is_blob_file(bunch.files()[0].file_name): - schema = self.table.schema_manager.get_schema(bunch.files()[0].schema_id) - schema_fields = ( - SpecialFields.row_type_with_row_tracking(schema.fields) - if self.row_tracking_enabled else schema.fields - ) - read_field_names_set = {f.name for f in read_fields} - for f in schema_fields: - if f.name in table_field_names_set and f.name not in read_field_names_set: - read_fields.append(f) - read_field_names_set.add(f.name) read_field_names = self._remove_partition_fields(read_fields) table_fields = self.read_fields self.read_fields = read_fields # create reader based on read_fields batch_size = self.table.options.read_batch_size() # Create reader for this bunch if len(bunch.files()) == 1: - suppliers = [ - partial(self._create_file_reader, file=bunch.files()[0], - read_fields=read_field_names, - use_requested_field_names=use_requested_field_names) - ] + suppliers = [lambda r=self._create_file_reader( + bunch.files()[0], read_field_names + ): r] file_record_readers[i] = MergeAllBatchReader(suppliers, batch_size=batch_size) else: # Create concatenated reader for multiple files suppliers = [ partial(self._create_file_reader, file=file, - read_fields=read_field_names, - use_requested_field_names=use_requested_field_names) - for file in bunch.files() + read_fields=read_field_names) for file in bunch.files() ] file_record_readers[i] = MergeAllBatchReader(suppliers, batch_size=batch_size) self.read_fields = table_fields @@ -779,61 +622,20 @@ def _bunch_sort_key(bunch: FieldBunch) -> tuple: output_schema = PyarrowFieldParser.from_paimon_schema(all_read_fields) return DataEvolutionMergeReader(row_offsets, field_offsets, file_record_readers, schema=output_schema) - def _assign_remaining_fields_by_write_cols( - self, - all_read_fields: List[DataField], - row_offsets: List[int], - field_offsets: List[int], - read_fields_per_bunch: List[List[DataField]], - fields_files: List[FieldBunch], - ) -> None: - """Assign any still-unassigned table field to a bunch that has it in write_cols (by name).""" - table_field_names = {f.name for f in self.table.fields} - for i, field in enumerate(all_read_fields): - if row_offsets[i] != -1 or field.name not in table_field_names: - continue - for bi, bunch in enumerate(fields_files): - first_file = bunch.files()[0] - if not first_file.write_cols or field.name not in first_file.write_cols: - continue - if DataFileMeta.is_blob_file(first_file.file_name) and field.name != first_file.write_cols[0]: - continue - row_offsets[i] = bi - field_offsets[i] = len(read_fields_per_bunch[bi]) - read_fields_per_bunch[bi].append(field) - break - - def _use_requested_field_names_for_merge(self, fields_files: List[FieldBunch]) -> bool: - """True when non-blob bunches have different write_cols, so output column order must be unified.""" - write_cols_tuples = [ - tuple(f.files()[0].write_cols or ()) - for f in fields_files - if not DataFileMeta.is_blob_file(f.files()[0].file_name) - ] - all_same = len(set(write_cols_tuples)) <= 1 if write_cols_tuples else True - return not all_same - - def _create_file_reader(self, file: DataFileMeta, read_fields: [str], - use_requested_field_names: bool = True) -> Optional[RecordReader]: + def _create_file_reader(self, file: DataFileMeta, read_fields: [str]) -> Optional[RecordReader]: """Create a file reader for a single file.""" def create_record_reader(): return self.file_reader_supplier( file=file, for_merge_read=False, read_fields=read_fields, - row_tracking_enabled=True, - use_requested_field_names=use_requested_field_names) - - base = create_record_reader() + row_tracking_enabled=True) if self.row_ranges is None: - return base - file_range = file.row_id_range() - if file_range is None: - return base - row_ranges = Range.and_(self.row_ranges, [file_range]) + return create_record_reader() + row_ranges = Range.and_(self.row_ranges, [file.row_id_range()]) if len(row_ranges) == 0: return EmptyRecordBatchReader() - return RowIdFilterRecordBatchReader(base, file.first_row_id, row_ranges) + return RowIdFilterRecordBatchReader(create_record_reader(), file.first_row_id, row_ranges) def _split_field_bunches(self, need_merge_files: List[DataFileMeta]) -> List[FieldBunch]: """Split files into field bunches.""" From a320c38ef9e6ac382fe760e4650a12e1c73f5985 Mon Sep 17 00:00:00 2001 From: xiaohongbo Date: Sat, 28 Feb 2026 11:37:19 +0800 Subject: [PATCH 06/20] support read after data evolution updating by shard --- .../read/reader/concat_batch_reader.py | 3 +- .../read/reader/data_file_batch_reader.py | 8 +-- .../read/reader/format_pyarrow_reader.py | 20 +++++-- paimon-python/pypaimon/read/split_read.py | 59 +++++++++++++------ 4 files changed, 62 insertions(+), 28 deletions(-) diff --git a/paimon-python/pypaimon/read/reader/concat_batch_reader.py b/paimon-python/pypaimon/read/reader/concat_batch_reader.py index 4318f883eb2e..71391e57ba8b 100644 --- a/paimon-python/pypaimon/read/reader/concat_batch_reader.py +++ b/paimon-python/pypaimon/read/reader/concat_batch_reader.py @@ -206,9 +206,8 @@ def read_arrow_batch(self) -> Optional[RecordBatch]: columns = [] for i in range(len(self.row_offsets)): batch_index = self.row_offsets[i] - field_index = self.field_offsets[i] if batch_index >= 0 and batches[batch_index] is not None: - columns.append(batches[batch_index].column(field_index).slice(0, min_rows)) + columns.append(batches[batch_index].column(i).slice(0, min_rows)) else: columns.append(pa.nulls(min_rows, type=self.schema.field(i).type)) diff --git a/paimon-python/pypaimon/read/reader/data_file_batch_reader.py b/paimon-python/pypaimon/read/reader/data_file_batch_reader.py index 7f2e1c61e1f4..b172ad378051 100644 --- a/paimon-python/pypaimon/read/reader/data_file_batch_reader.py +++ b/paimon-python/pypaimon/read/reader/data_file_batch_reader.py @@ -221,10 +221,10 @@ def _assign_row_tracking(self, record_batch: RecordBatch) -> RecordBatch: names = record_batch.schema.names table = None for i, name in enumerate(names): - field = pa.field( - name, arrays[i].type, - nullable=record_batch.schema.field(name).nullable - ) + nullable = record_batch.schema.field(name).nullable + if SpecialFields.is_system_field(name): + nullable = False + field = pa.field(name, arrays[i].type, nullable=nullable) if table is None: table = pa.table({name: arrays[i]}, schema=pa.schema([field])) else: diff --git a/paimon-python/pypaimon/read/reader/format_pyarrow_reader.py b/paimon-python/pypaimon/read/reader/format_pyarrow_reader.py index dd5330227ded..c06b607b3e91 100644 --- a/paimon-python/pypaimon/read/reader/format_pyarrow_reader.py +++ b/paimon-python/pypaimon/read/reader/format_pyarrow_reader.py @@ -33,10 +33,12 @@ class FormatPyArrowReader(RecordBatchReader): """ def __init__(self, file_io: FileIO, file_format: str, file_path: str, read_fields: List[str], - push_down_predicate: Any, batch_size: int = 1024): + push_down_predicate: Any, batch_size: int = 1024, + output_schema: Optional[pa.Schema] = None): file_path_for_pyarrow = file_io.to_filesystem_path(file_path) self.dataset = ds.dataset(file_path_for_pyarrow, format=file_format, filesystem=file_io.filesystem) self.read_fields = read_fields + self.output_schema = output_schema # Identify which fields exist in the file and which are missing file_schema_names = set(self.dataset.schema.names) @@ -57,8 +59,17 @@ def read_arrow_batch(self) -> Optional[RecordBatch]: if not self.missing_fields: return batch - # Create columns for missing fields with null values - missing_columns = [pa.nulls(batch.num_rows, type=pa.null()) for _ in self.missing_fields] + def _type_for_missing(name: str) -> pa.DataType: + if self.output_schema is not None: + idx = self.output_schema.get_field_index(name) + if idx >= 0: + return self.output_schema.field(idx).type + return pa.null() + + missing_columns = [ + pa.nulls(batch.num_rows, type=_type_for_missing(name)) + for name in self.missing_fields + ] # Reconstruct the batch with all fields in the correct order all_columns = [] @@ -72,8 +83,9 @@ def read_arrow_batch(self) -> Optional[RecordBatch]: else: # Get the column from missing fields column_idx = self.missing_fields.index(field_name) + col_type = _type_for_missing(field_name) all_columns.append(missing_columns[column_idx]) - out_fields.append(pa.field(field_name, pa.null(), nullable=True)) + out_fields.append(pa.field(field_name, col_type, nullable=True)) # Create a new RecordBatch with all columns return pa.RecordBatch.from_arrays(all_columns, schema=pa.schema(out_fields)) diff --git a/paimon-python/pypaimon/read/split_read.py b/paimon-python/pypaimon/read/split_read.py index d76a71682b17..de69780e8c86 100644 --- a/paimon-python/pypaimon/read/split_read.py +++ b/paimon-python/pypaimon/read/split_read.py @@ -114,13 +114,23 @@ def create_reader(self) -> RecordReader: """Create a record reader for the given split.""" def file_reader_supplier(self, file: DataFileMeta, for_merge_read: bool, - read_fields: List[str], row_tracking_enabled: bool) -> RecordBatchReader: + read_fields: List[str], row_tracking_enabled: bool, + merge_output_fields: Optional[List[DataField]] = None) -> RecordBatchReader: (read_file_fields, read_arrow_predicate) = self._get_fields_and_predicate(file.schema_id, read_fields) # Use external_path if available, otherwise use file_path file_path = file.external_path if file.external_path else file.file_path _, extension = os.path.splitext(file_path) file_format = extension[1:] + is_blob_file = file_format == CoreOptions.FILE_FORMAT_BLOB + + if merge_output_fields is not None and not is_blob_file: + partition_keys = set(self.table.partition_keys or []) + columns_for_format = [f.name for f in merge_output_fields if f.name not in partition_keys] + output_schema_pa = PyarrowFieldParser.from_paimon_schema(merge_output_fields) + else: + columns_for_format = read_file_fields + output_schema_pa = None batch_size = self.table.options.read_batch_size() @@ -137,28 +147,35 @@ def file_reader_supplier(self, file: DataFileMeta, for_merge_read: bool, format_reader = FormatLanceReader(self.table.file_io, file_path, read_file_fields, read_arrow_predicate, batch_size=batch_size) elif file_format == CoreOptions.FILE_FORMAT_PARQUET or file_format == CoreOptions.FILE_FORMAT_ORC: - format_reader = FormatPyArrowReader(self.table.file_io, file_format, file_path, - read_file_fields, read_arrow_predicate, batch_size=batch_size) + format_reader = FormatPyArrowReader( + self.table.file_io, file_format, file_path, + columns_for_format, read_arrow_predicate, batch_size=batch_size, + output_schema=output_schema_pa + ) else: raise ValueError(f"Unexpected file format: {file_format}") blob_as_descriptor = CoreOptions.blob_as_descriptor(self.table.options) blob_descriptor_fields = CoreOptions.blob_descriptor_fields(self.table.options) - index_mapping = self.create_index_mapping() + index_mapping = None if merge_output_fields is not None else self.create_index_mapping() partition_info = self._create_partition_info() - system_fields = SpecialFields.find_system_fields(self.read_fields) - table_schema_fields = ( - SpecialFields.row_type_with_row_tracking(self.table.table_schema.fields) - if row_tracking_enabled else self.table.table_schema.fields + output_fields = ( + merge_output_fields + if merge_output_fields is not None + else ( + SpecialFields.row_type_with_row_tracking(self.table.table_schema.fields) + if row_tracking_enabled else self.table.table_schema.fields + ) ) + system_fields = SpecialFields.find_system_fields(output_fields) if for_merge_read: return DataFileBatchReader( format_reader, index_mapping, partition_info, self.trimmed_primary_key, - table_schema_fields, + output_fields, file.max_sequence_number, file.first_row_id, row_tracking_enabled, @@ -172,7 +189,7 @@ def file_reader_supplier(self, file: DataFileMeta, for_merge_read: bool, index_mapping, partition_info, None, - table_schema_fields, + output_fields, file.max_sequence_number, file.first_row_id, row_tracking_enabled, @@ -481,9 +498,10 @@ def create_reader(self) -> RecordReader: for need_merge_files in split_by_row_id: if len(need_merge_files) == 1 or not self.read_fields: - # No need to merge fields, just create a single file reader suppliers.append( - lambda f=need_merge_files[0]: self._create_file_reader(f, self._get_final_read_data_fields()) + lambda f=need_merge_files[0], mof=self.read_fields: self._create_file_reader( + f, self._get_final_read_data_fields(), merge_output_fields=mof + ) ) else: suppliers.append( @@ -598,17 +616,20 @@ def _create_union_reader(self, need_merge_files: List[DataFileMeta]) -> RecordRe table_fields = self.read_fields self.read_fields = read_fields # create reader based on read_fields batch_size = self.table.options.read_batch_size() + merge_output_fields = all_read_fields # Create reader for this bunch if len(bunch.files()) == 1: - suppliers = [lambda r=self._create_file_reader( - bunch.files()[0], read_field_names - ): r] + suppliers = [ + lambda f=bunch.files()[0], rn=read_field_names, mof=merge_output_fields: self._create_file_reader(f, rn, merge_output_fields=mof) + ] file_record_readers[i] = MergeAllBatchReader(suppliers, batch_size=batch_size) else: # Create concatenated reader for multiple files suppliers = [ partial(self._create_file_reader, file=file, - read_fields=read_field_names) for file in bunch.files() + read_fields=read_field_names, + merge_output_fields=merge_output_fields) + for file in bunch.files() ] file_record_readers[i] = MergeAllBatchReader(suppliers, batch_size=batch_size) self.read_fields = table_fields @@ -622,14 +643,16 @@ def _create_union_reader(self, need_merge_files: List[DataFileMeta]) -> RecordRe output_schema = PyarrowFieldParser.from_paimon_schema(all_read_fields) return DataEvolutionMergeReader(row_offsets, field_offsets, file_record_readers, schema=output_schema) - def _create_file_reader(self, file: DataFileMeta, read_fields: [str]) -> Optional[RecordReader]: + def _create_file_reader(self, file: DataFileMeta, read_fields: [str], + merge_output_fields: Optional[List[DataField]] = None) -> Optional[RecordReader]: """Create a file reader for a single file.""" def create_record_reader(): return self.file_reader_supplier( file=file, for_merge_read=False, read_fields=read_fields, - row_tracking_enabled=True) + row_tracking_enabled=True, + merge_output_fields=merge_output_fields) if self.row_ranges is None: return create_record_reader() row_ranges = Range.and_(self.row_ranges, [file.row_id_range()]) From 55b82fab34773b3c6ca85191683567397ac63c0c Mon Sep 17 00:00:00 2001 From: xiaohongbo Date: Sat, 28 Feb 2026 11:51:25 +0800 Subject: [PATCH 07/20] fix system field not nullable issue --- .../pypaimon/read/reader/data_file_batch_reader.py | 8 ++++---- .../pypaimon/read/reader/format_pyarrow_reader.py | 4 +++- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/paimon-python/pypaimon/read/reader/data_file_batch_reader.py b/paimon-python/pypaimon/read/reader/data_file_batch_reader.py index b172ad378051..7f2e1c61e1f4 100644 --- a/paimon-python/pypaimon/read/reader/data_file_batch_reader.py +++ b/paimon-python/pypaimon/read/reader/data_file_batch_reader.py @@ -221,10 +221,10 @@ def _assign_row_tracking(self, record_batch: RecordBatch) -> RecordBatch: names = record_batch.schema.names table = None for i, name in enumerate(names): - nullable = record_batch.schema.field(name).nullable - if SpecialFields.is_system_field(name): - nullable = False - field = pa.field(name, arrays[i].type, nullable=nullable) + field = pa.field( + name, arrays[i].type, + nullable=record_batch.schema.field(name).nullable + ) if table is None: table = pa.table({name: arrays[i]}, schema=pa.schema([field])) else: diff --git a/paimon-python/pypaimon/read/reader/format_pyarrow_reader.py b/paimon-python/pypaimon/read/reader/format_pyarrow_reader.py index c06b607b3e91..6ac7ef9b1403 100644 --- a/paimon-python/pypaimon/read/reader/format_pyarrow_reader.py +++ b/paimon-python/pypaimon/read/reader/format_pyarrow_reader.py @@ -24,6 +24,7 @@ from pypaimon.common.file_io import FileIO from pypaimon.read.reader.iface.record_batch_reader import RecordBatchReader +from pypaimon.table.special_fields import SpecialFields class FormatPyArrowReader(RecordBatchReader): @@ -85,7 +86,8 @@ def _type_for_missing(name: str) -> pa.DataType: column_idx = self.missing_fields.index(field_name) col_type = _type_for_missing(field_name) all_columns.append(missing_columns[column_idx]) - out_fields.append(pa.field(field_name, col_type, nullable=True)) + nullable = not SpecialFields.is_system_field(field_name) + out_fields.append(pa.field(field_name, col_type, nullable=nullable)) # Create a new RecordBatch with all columns return pa.RecordBatch.from_arrays(all_columns, schema=pa.schema(out_fields)) From f60182b38805ae6e93523e19833e35ee6c44b50d Mon Sep 17 00:00:00 2001 From: xiaohongbo Date: Sat, 28 Feb 2026 14:07:22 +0800 Subject: [PATCH 08/20] clean code --- paimon-python/pypaimon/read/split_read.py | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/paimon-python/pypaimon/read/split_read.py b/paimon-python/pypaimon/read/split_read.py index de69780e8c86..918ae28a8b56 100644 --- a/paimon-python/pypaimon/read/split_read.py +++ b/paimon-python/pypaimon/read/split_read.py @@ -160,22 +160,18 @@ def file_reader_supplier(self, file: DataFileMeta, for_merge_read: bool, index_mapping = None if merge_output_fields is not None else self.create_index_mapping() partition_info = self._create_partition_info() - output_fields = ( - merge_output_fields - if merge_output_fields is not None - else ( - SpecialFields.row_type_with_row_tracking(self.table.table_schema.fields) - if row_tracking_enabled else self.table.table_schema.fields - ) + system_fields = SpecialFields.find_system_fields(self.read_fields) + table_schema_fields = ( + SpecialFields.row_type_with_row_tracking(self.table.table_schema.fields) + if row_tracking_enabled else self.table.table_schema.fields ) - system_fields = SpecialFields.find_system_fields(output_fields) if for_merge_read: return DataFileBatchReader( format_reader, index_mapping, partition_info, self.trimmed_primary_key, - output_fields, + table_schema_fields, file.max_sequence_number, file.first_row_id, row_tracking_enabled, @@ -189,7 +185,7 @@ def file_reader_supplier(self, file: DataFileMeta, for_merge_read: bool, index_mapping, partition_info, None, - output_fields, + table_schema_fields, file.max_sequence_number, file.first_row_id, row_tracking_enabled, @@ -495,7 +491,7 @@ def create_reader(self) -> RecordReader: # Split files by row ID split_by_row_id = self._split_by_row_id(files) - + # No need to merge fields, just create a single file reader for need_merge_files in split_by_row_id: if len(need_merge_files) == 1 or not self.read_fields: suppliers.append( From c737850ada7c83de19774c323a230d4f0abcb7f5 Mon Sep 17 00:00:00 2001 From: xiaohongbo Date: Sat, 28 Feb 2026 14:08:43 +0800 Subject: [PATCH 09/20] clean code --- paimon-python/pypaimon/read/split_read.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/paimon-python/pypaimon/read/split_read.py b/paimon-python/pypaimon/read/split_read.py index 918ae28a8b56..7a9eeaa855a0 100644 --- a/paimon-python/pypaimon/read/split_read.py +++ b/paimon-python/pypaimon/read/split_read.py @@ -616,7 +616,9 @@ def _create_union_reader(self, need_merge_files: List[DataFileMeta]) -> RecordRe # Create reader for this bunch if len(bunch.files()) == 1: suppliers = [ - lambda f=bunch.files()[0], rn=read_field_names, mof=merge_output_fields: self._create_file_reader(f, rn, merge_output_fields=mof) + lambda f=bunch.files()[0], rn=read_field_names, mof=merge_output_fields: ( + self._create_file_reader(f, rn, merge_output_fields=mof) + ) ] file_record_readers[i] = MergeAllBatchReader(suppliers, batch_size=batch_size) else: From 347b16d89f27e5066450174b7995f30449e1d464 Mon Sep 17 00:00:00 2001 From: xiaohongbo Date: Sat, 28 Feb 2026 14:10:55 +0800 Subject: [PATCH 10/20] clean code --- paimon-python/pypaimon/read/split_read.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/paimon-python/pypaimon/read/split_read.py b/paimon-python/pypaimon/read/split_read.py index 7a9eeaa855a0..08d7483fe49c 100644 --- a/paimon-python/pypaimon/read/split_read.py +++ b/paimon-python/pypaimon/read/split_read.py @@ -491,9 +491,10 @@ def create_reader(self) -> RecordReader: # Split files by row ID split_by_row_id = self._split_by_row_id(files) - # No need to merge fields, just create a single file reader + for need_merge_files in split_by_row_id: if len(need_merge_files) == 1 or not self.read_fields: + # No need to merge fields, just create a single file reader suppliers.append( lambda f=need_merge_files[0], mof=self.read_fields: self._create_file_reader( f, self._get_final_read_data_fields(), merge_output_fields=mof From 36618f9441fa2472aeb235469b882d1643d6d3e4 Mon Sep 17 00:00:00 2001 From: xiaohongbo Date: Sat, 28 Feb 2026 14:55:02 +0800 Subject: [PATCH 11/20] fix index_mapping when blob --- paimon-python/pypaimon/read/split_read.py | 6 +- .../pypaimon/tests/data_evolution_test.py | 59 +++++++++++++++++++ 2 files changed, 64 insertions(+), 1 deletion(-) diff --git a/paimon-python/pypaimon/read/split_read.py b/paimon-python/pypaimon/read/split_read.py index 08d7483fe49c..b6351ae0b831 100644 --- a/paimon-python/pypaimon/read/split_read.py +++ b/paimon-python/pypaimon/read/split_read.py @@ -158,7 +158,11 @@ def file_reader_supplier(self, file: DataFileMeta, for_merge_read: bool, blob_as_descriptor = CoreOptions.blob_as_descriptor(self.table.options) blob_descriptor_fields = CoreOptions.blob_descriptor_fields(self.table.options) - index_mapping = None if merge_output_fields is not None else self.create_index_mapping() + index_mapping = ( + None + if (merge_output_fields is not None and not is_blob_file) + else self.create_index_mapping() + ) partition_info = self._create_partition_info() system_fields = SpecialFields.find_system_fields(self.read_fields) table_schema_fields = ( diff --git a/paimon-python/pypaimon/tests/data_evolution_test.py b/paimon-python/pypaimon/tests/data_evolution_test.py index c6b994cf571e..afb12cd94876 100644 --- a/paimon-python/pypaimon/tests/data_evolution_test.py +++ b/paimon-python/pypaimon/tests/data_evolution_test.py @@ -1272,6 +1272,65 @@ def test_read_row_tracking_metadata(self): self.assertEqual(actual_data, expect_data) self.assertEqual(len(actual_data.schema), len(expect_data.schema), 'Read output column count must match schema') + def test_with_blob(self): + from pypaimon.table.row.blob import BlobDescriptor + + pa_schema = pa.schema([ + ('id', pa.int32()), + ('picture', pa.large_binary()), + ]) + schema = Schema.from_pyarrow_schema( + pa_schema, + options={ + 'row-tracking.enabled': 'true', + 'data-evolution.enabled': 'true', + 'blob-as-descriptor': 'true', + }, + ) + self.catalog.create_table('default.test_with_blob', schema, False) + table = self.catalog.get_table('default.test_with_blob') + + blob_path = os.path.join(self.tempdir, 'blob_ev') + with open(blob_path, 'wb') as f: + f.write(b'x') + descriptor = BlobDescriptor(blob_path, 0, 1) + + wb = table.new_batch_write_builder() + tw = wb.new_write() + tc = wb.new_commit() + tw.write_arrow(pa.Table.from_pydict( + {'id': [1], 'picture': [descriptor.serialize()]}, + schema=pa_schema, + )) + cmts = tw.prepare_commit() + if cmts and cmts[0].new_files: + for nf in cmts[0].new_files: + nf.first_row_id = 0 + tc.commit(cmts) + tw.close() + tc.close() + + tw = wb.new_write() + tc = wb.new_commit() + tw.write_arrow(pa.Table.from_pydict( + {'id': [2], 'picture': [descriptor.serialize()]}, + schema=pa_schema, + )) + cmts = tw.prepare_commit() + if cmts and cmts[0].new_files: + for nf in cmts[0].new_files: + nf.first_row_id = 1 + tc.commit(cmts) + tw.close() + tc.close() + + rb = table.new_read_builder() + rb.with_projection(['id', '_ROW_ID', 'picture', '_SEQUENCE_NUMBER']) + actual = rb.new_read().to_arrow(rb.new_scan().plan().splits()) + self.assertEqual(actual.num_rows, 2) + self.assertEqual(actual.column('id').to_pylist(), [1, 2]) + self.assertEqual(actual.column('_ROW_ID').to_pylist(), [0, 1]) + def test_from_arrays_without_schema(self): schema = pa.schema([ ('f0', pa.int8()), From 76d55b8882430dccba337ad4ebebb77534798291 Mon Sep 17 00:00:00 2001 From: xiaohongbo Date: Sat, 28 Feb 2026 15:52:10 +0800 Subject: [PATCH 12/20] fix blob_table_test --- .../read/reader/concat_batch_reader.py | 6 ++- .../read/reader/data_file_batch_reader.py | 12 ++++-- paimon-python/pypaimon/read/split_read.py | 37 +++++++++++++------ paimon-python/pypaimon/read/table_read.py | 3 -- 4 files changed, 40 insertions(+), 18 deletions(-) diff --git a/paimon-python/pypaimon/read/reader/concat_batch_reader.py b/paimon-python/pypaimon/read/reader/concat_batch_reader.py index 71391e57ba8b..4003f67c1597 100644 --- a/paimon-python/pypaimon/read/reader/concat_batch_reader.py +++ b/paimon-python/pypaimon/read/reader/concat_batch_reader.py @@ -207,7 +207,11 @@ def read_arrow_batch(self) -> Optional[RecordBatch]: for i in range(len(self.row_offsets)): batch_index = self.row_offsets[i] if batch_index >= 0 and batches[batch_index] is not None: - columns.append(batches[batch_index].column(i).slice(0, min_rows)) + batch = batches[batch_index] + if i < batch.num_columns: + columns.append(batch.column(i).slice(0, min_rows)) + else: + columns.append(pa.nulls(min_rows, type=self.schema.field(i).type)) else: columns.append(pa.nulls(min_rows, type=self.schema.field(i).type)) diff --git a/paimon-python/pypaimon/read/reader/data_file_batch_reader.py b/paimon-python/pypaimon/read/reader/data_file_batch_reader.py index 7f2e1c61e1f4..87f025ee7956 100644 --- a/paimon-python/pypaimon/read/reader/data_file_batch_reader.py +++ b/paimon-python/pypaimon/read/reader/data_file_batch_reader.py @@ -43,7 +43,8 @@ def __init__(self, format_reader: RecordBatchReader, index_mapping: List[int], p system_fields: dict, blob_as_descriptor: bool = False, blob_descriptor_fields: Optional[set] = None, - file_io: Optional[FileIO] = None): + file_io: Optional[FileIO] = None, + output_field_names: Optional[List[str]] = None): self.format_reader = format_reader self.index_mapping = index_mapping self.partition_info = partition_info @@ -56,6 +57,7 @@ def __init__(self, format_reader: RecordBatchReader, index_mapping: List[int], p self.blob_as_descriptor = blob_as_descriptor self.blob_descriptor_fields = blob_descriptor_fields or set() self.file_io = file_io + self.output_field_names = output_field_names self.blob_field_names = { field.name for field in fields @@ -110,7 +112,9 @@ def read_arrow_batch(self, start_idx=None, end_idx=None) -> Optional[RecordBatch else: null_array = pa.nulls(num_rows) mapped_arrays.append(null_array) - mapped_names.append(f"null_col_{i}") + name = (self.output_field_names[i] if self.output_field_names and i < len(self.output_field_names) + else f"null_col_{i}") + mapped_names.append(name) if self.system_primary_key: for i in range(len(self.system_primary_key)): @@ -155,7 +159,9 @@ def _convert_descriptor_stored_blob_columns(self, record_batch: RecordBatch) -> field_idx = record_batch.schema.get_field_index(field_name) values = record_batch.column(field_idx).to_pylist() - if self.blob_as_descriptor: + if field_name in self.descriptor_blob_fields: + converted = [self._blob_cell_to_data(v) for v in values] + elif self.blob_as_descriptor: converted = [self._normalize_blob_cell(v) for v in values] else: converted = [self._blob_cell_to_data(v) for v in values] diff --git a/paimon-python/pypaimon/read/split_read.py b/paimon-python/pypaimon/read/split_read.py index b6351ae0b831..6b83df2af7a7 100644 --- a/paimon-python/pypaimon/read/split_read.py +++ b/paimon-python/pypaimon/read/split_read.py @@ -158,17 +158,30 @@ def file_reader_supplier(self, file: DataFileMeta, for_merge_read: bool, blob_as_descriptor = CoreOptions.blob_as_descriptor(self.table.options) blob_descriptor_fields = CoreOptions.blob_descriptor_fields(self.table.options) - index_mapping = ( - None - if (merge_output_fields is not None and not is_blob_file) - else self.create_index_mapping() - ) + if merge_output_fields is not None and is_blob_file: + full_names = [f.name for f in merge_output_fields] + index_mapping = [ + read_file_fields.index(name) if name in read_file_fields else NULL_FIELD_INDEX + for name in full_names + ] + output_field_names = full_names + table_schema_fields = merge_output_fields + elif merge_output_fields is not None and not is_blob_file: + index_mapping = None + output_field_names = None + table_schema_fields = ( + SpecialFields.row_type_with_row_tracking(self.table.table_schema.fields) + if row_tracking_enabled else self.table.table_schema.fields + ) + else: + index_mapping = self.create_index_mapping() + output_field_names = None + table_schema_fields = ( + SpecialFields.row_type_with_row_tracking(self.table.table_schema.fields) + if row_tracking_enabled else self.table.table_schema.fields + ) partition_info = self._create_partition_info() system_fields = SpecialFields.find_system_fields(self.read_fields) - table_schema_fields = ( - SpecialFields.row_type_with_row_tracking(self.table.table_schema.fields) - if row_tracking_enabled else self.table.table_schema.fields - ) if for_merge_read: return DataFileBatchReader( format_reader, @@ -182,7 +195,8 @@ def file_reader_supplier(self, file: DataFileMeta, for_merge_read: bool, system_fields, blob_as_descriptor=blob_as_descriptor, blob_descriptor_fields=blob_descriptor_fields, - file_io=self.table.file_io) + file_io=self.table.file_io, + output_field_names=output_field_names) else: return DataFileBatchReader( format_reader, @@ -196,7 +210,8 @@ def file_reader_supplier(self, file: DataFileMeta, for_merge_read: bool, system_fields, blob_as_descriptor=blob_as_descriptor, blob_descriptor_fields=blob_descriptor_fields, - file_io=self.table.file_io) + file_io=self.table.file_io, + output_field_names=output_field_names) def _get_fields_and_predicate(self, schema_id: int, read_fields): key = (schema_id, tuple(read_fields)) diff --git a/paimon-python/pypaimon/read/table_read.py b/paimon-python/pypaimon/read/table_read.py index 5206147f80a8..79984901a812 100644 --- a/paimon-python/pypaimon/read/table_read.py +++ b/paimon-python/pypaimon/read/table_read.py @@ -92,9 +92,6 @@ def to_arrow(self, splits: List[Split]) -> Optional[pyarrow.Table]: return self._convert_descriptor_stored_fields_for_read(table) def _convert_descriptor_stored_fields_for_read(self, table: pyarrow.Table) -> pyarrow.Table: - if CoreOptions.blob_as_descriptor(self.table.options): - return table - descriptor_fields = CoreOptions.blob_descriptor_fields(self.table.options) if not descriptor_fields: return table From 61b0e4c6c7758c82bbab23b6cbc5bd9a4f7c78c5 Mon Sep 17 00:00:00 2001 From: xiaohongbo Date: Sat, 28 Feb 2026 17:00:50 +0800 Subject: [PATCH 13/20] fix --- .../read/reader/concat_batch_reader.py | 32 ++++++++- .../read/reader/data_file_batch_reader.py | 13 ++-- .../read/reader/format_pyarrow_reader.py | 2 +- paimon-python/pypaimon/read/split_read.py | 68 ++++++++++--------- 4 files changed, 72 insertions(+), 43 deletions(-) diff --git a/paimon-python/pypaimon/read/reader/concat_batch_reader.py b/paimon-python/pypaimon/read/reader/concat_batch_reader.py index 4003f67c1597..558fcd51afde 100644 --- a/paimon-python/pypaimon/read/reader/concat_batch_reader.py +++ b/paimon-python/pypaimon/read/reader/concat_batch_reader.py @@ -149,6 +149,8 @@ def __init__( field_offsets: List[int], readers: List[Optional[RecordBatchReader]], schema: pa.Schema, + first_row_id: Optional[int] = None, + max_sequence_number: Optional[int] = None, ): if row_offsets is None: raise ValueError("Row offsets must not be null") @@ -164,6 +166,8 @@ def __init__( self.field_offsets = field_offsets self.readers = readers self.schema = schema + self.first_row_id = first_row_id + self.max_sequence_number = max_sequence_number self._buffers: List[Optional[RecordBatch]] = [None] * len(readers) def read_arrow_batch(self) -> Optional[RecordBatch]: @@ -206,10 +210,11 @@ def read_arrow_batch(self) -> Optional[RecordBatch]: columns = [] for i in range(len(self.row_offsets)): batch_index = self.row_offsets[i] + field_offset = self.field_offsets[i] if batch_index >= 0 and batches[batch_index] is not None: batch = batches[batch_index] - if i < batch.num_columns: - columns.append(batch.column(i).slice(0, min_rows)) + if 0 <= field_offset < batch.num_columns: + columns.append(batch.column(field_offset).slice(0, min_rows)) else: columns.append(pa.nulls(min_rows, type=self.schema.field(i).type)) else: @@ -219,7 +224,28 @@ def read_arrow_batch(self) -> Optional[RecordBatch]: if batches[i] is not None and batches[i].num_rows > min_rows: self._buffers[i] = batches[i].slice(min_rows, batches[i].num_rows - min_rows) - return pa.RecordBatch.from_arrays(columns, schema=self.schema) + batch = pa.RecordBatch.from_arrays(columns, schema=self.schema) + if self.first_row_id is not None and self.max_sequence_number is not None: + batch = self._fill_row_tracking(batch) + return batch + + def _fill_row_tracking(self, batch: RecordBatch) -> RecordBatch: + nrows = batch.num_rows + arrays = list(batch.columns) + filled = False + for i in range(len(batch.schema)): + name = batch.schema.field(i).name + if name == "_ROW_ID": + arrays[i] = pa.array( + range(self.first_row_id, self.first_row_id + nrows), type=pa.int64() + ) + filled = True + elif name == "_SEQUENCE_NUMBER": + arrays[i] = pa.repeat(self.max_sequence_number, nrows) + filled = True + if not filled: + return batch + return pa.RecordBatch.from_arrays(arrays, schema=batch.schema) def close(self) -> None: try: diff --git a/paimon-python/pypaimon/read/reader/data_file_batch_reader.py b/paimon-python/pypaimon/read/reader/data_file_batch_reader.py index 87f025ee7956..7b40eb92f83f 100644 --- a/paimon-python/pypaimon/read/reader/data_file_batch_reader.py +++ b/paimon-python/pypaimon/read/reader/data_file_batch_reader.py @@ -211,18 +211,19 @@ def _deserialize_descriptor_or_none(raw: bytes): def _assign_row_tracking(self, record_batch: RecordBatch) -> RecordBatch: """Assign row tracking meta fields (_ROW_ID and _SEQUENCE_NUMBER).""" arrays = list(record_batch.columns) + num_rows = record_batch.num_rows - # Handle _ROW_ID field + # Handle _ROW_ID field (only if batch has that column index) if SpecialFields.ROW_ID.name in self.system_fields.keys(): idx = self.system_fields[SpecialFields.ROW_ID.name] - # Create a new array that fills with computed row IDs - arrays[idx] = pa.array(range(self.first_row_id, self.first_row_id + record_batch.num_rows), type=pa.int64()) + if idx < len(arrays): + arrays[idx] = pa.array(range(self.first_row_id, self.first_row_id + num_rows), type=pa.int64()) - # Handle _SEQUENCE_NUMBER field + # Handle _SEQUENCE_NUMBER field (only if batch has that column index) if SpecialFields.SEQUENCE_NUMBER.name in self.system_fields.keys(): idx = self.system_fields[SpecialFields.SEQUENCE_NUMBER.name] - # Create a new array that fills with max_sequence_number - arrays[idx] = pa.repeat(self.max_sequence_number, record_batch.num_rows) + if idx < len(arrays): + arrays[idx] = pa.repeat(self.max_sequence_number, num_rows) names = record_batch.schema.names table = None diff --git a/paimon-python/pypaimon/read/reader/format_pyarrow_reader.py b/paimon-python/pypaimon/read/reader/format_pyarrow_reader.py index 6ac7ef9b1403..bca143c3313f 100644 --- a/paimon-python/pypaimon/read/reader/format_pyarrow_reader.py +++ b/paimon-python/pypaimon/read/reader/format_pyarrow_reader.py @@ -86,7 +86,7 @@ def _type_for_missing(name: str) -> pa.DataType: column_idx = self.missing_fields.index(field_name) col_type = _type_for_missing(field_name) all_columns.append(missing_columns[column_idx]) - nullable = not SpecialFields.is_system_field(field_name) + nullable = True if col_type == pa.null() else not SpecialFields.is_system_field(field_name) out_fields.append(pa.field(field_name, col_type, nullable=nullable)) # Create a new RecordBatch with all columns return pa.RecordBatch.from_arrays(all_columns, schema=pa.schema(out_fields)) diff --git a/paimon-python/pypaimon/read/split_read.py b/paimon-python/pypaimon/read/split_read.py index 6b83df2af7a7..fa9feb619317 100644 --- a/paimon-python/pypaimon/read/split_read.py +++ b/paimon-python/pypaimon/read/split_read.py @@ -124,7 +124,12 @@ def file_reader_supplier(self, file: DataFileMeta, for_merge_read: bool, file_format = extension[1:] is_blob_file = file_format == CoreOptions.FILE_FORMAT_BLOB - if merge_output_fields is not None and not is_blob_file: + # Only when used inside a merge (for_merge_read): each reader returns just its columns. + # Single-file read still expands to full schema so row tracking has enough columns. + if for_merge_read and merge_output_fields is not None and not is_blob_file: + columns_for_format = read_file_fields + output_schema_pa = None + elif merge_output_fields is not None and not is_blob_file: partition_keys = set(self.table.partition_keys or []) columns_for_format = [f.name for f in merge_output_fields if f.name not in partition_keys] output_schema_pa = PyarrowFieldParser.from_paimon_schema(merge_output_fields) @@ -158,30 +163,17 @@ def file_reader_supplier(self, file: DataFileMeta, for_merge_read: bool, blob_as_descriptor = CoreOptions.blob_as_descriptor(self.table.options) blob_descriptor_fields = CoreOptions.blob_descriptor_fields(self.table.options) - if merge_output_fields is not None and is_blob_file: - full_names = [f.name for f in merge_output_fields] - index_mapping = [ - read_file_fields.index(name) if name in read_file_fields else NULL_FIELD_INDEX - for name in full_names - ] - output_field_names = full_names - table_schema_fields = merge_output_fields - elif merge_output_fields is not None and not is_blob_file: - index_mapping = None - output_field_names = None - table_schema_fields = ( - SpecialFields.row_type_with_row_tracking(self.table.table_schema.fields) - if row_tracking_enabled else self.table.table_schema.fields - ) - else: - index_mapping = self.create_index_mapping() - output_field_names = None - table_schema_fields = ( - SpecialFields.row_type_with_row_tracking(self.table.table_schema.fields) - if row_tracking_enabled else self.table.table_schema.fields - ) + index_mapping = ( + None + if (merge_output_fields is not None and not is_blob_file) + else self.create_index_mapping() + ) partition_info = self._create_partition_info() system_fields = SpecialFields.find_system_fields(self.read_fields) + table_schema_fields = ( + SpecialFields.row_type_with_row_tracking(self.table.table_schema.fields) + if row_tracking_enabled else self.table.table_schema.fields + ) if for_merge_read: return DataFileBatchReader( format_reader, @@ -195,8 +187,7 @@ def file_reader_supplier(self, file: DataFileMeta, for_merge_read: bool, system_fields, blob_as_descriptor=blob_as_descriptor, blob_descriptor_fields=blob_descriptor_fields, - file_io=self.table.file_io, - output_field_names=output_field_names) + file_io=self.table.file_io) else: return DataFileBatchReader( format_reader, @@ -210,8 +201,7 @@ def file_reader_supplier(self, file: DataFileMeta, for_merge_read: bool, system_fields, blob_as_descriptor=blob_as_descriptor, blob_descriptor_fields=blob_descriptor_fields, - file_io=self.table.file_io, - output_field_names=output_field_names) + file_io=self.table.file_io) def _get_fields_and_predicate(self, schema_id: int, read_fields): key = (schema_id, tuple(read_fields)) @@ -617,6 +607,10 @@ def _create_union_reader(self, need_merge_files: List[DataFileMeta]) -> RecordRe read_fields = [] for j, read_field_id in enumerate(read_field_index): + # In merge, _ROW_ID and _SEQUENCE_NUMBER are filled from merge metadata, not from file + if (SpecialFields.ROW_ID.name == all_read_fields[j].name or + SpecialFields.SEQUENCE_NUMBER.name == all_read_fields[j].name): + continue for field_id in field_ids: if read_field_id == field_id: if row_offsets[j] == -1: @@ -637,7 +631,7 @@ def _create_union_reader(self, need_merge_files: List[DataFileMeta]) -> RecordRe if len(bunch.files()) == 1: suppliers = [ lambda f=bunch.files()[0], rn=read_field_names, mof=merge_output_fields: ( - self._create_file_reader(f, rn, merge_output_fields=mof) + self._create_file_reader(f, rn, merge_output_fields=mof, for_merge_read=True) ) ] file_record_readers[i] = MergeAllBatchReader(suppliers, batch_size=batch_size) @@ -646,28 +640,36 @@ def _create_union_reader(self, need_merge_files: List[DataFileMeta]) -> RecordRe suppliers = [ partial(self._create_file_reader, file=file, read_fields=read_field_names, - merge_output_fields=merge_output_fields) + merge_output_fields=merge_output_fields, + for_merge_read=True) for file in bunch.files() ] file_record_readers[i] = MergeAllBatchReader(suppliers, batch_size=batch_size) self.read_fields = table_fields - # Validate that all required fields are found + # Validate that all required fields are found (system fields are filled by merge reader) for i, field in enumerate(all_read_fields): if row_offsets[i] == -1: + if field.name in (SpecialFields.ROW_ID.name, SpecialFields.SEQUENCE_NUMBER.name): + continue if not field.type.nullable: raise ValueError(f"Field {field} is not null but can't find any file contains it.") + max_sequence_number = max(f.max_sequence_number for f in need_merge_files) output_schema = PyarrowFieldParser.from_paimon_schema(all_read_fields) - return DataEvolutionMergeReader(row_offsets, field_offsets, file_record_readers, schema=output_schema) + return DataEvolutionMergeReader( + row_offsets, field_offsets, file_record_readers, schema=output_schema, + first_row_id=first_row_id, max_sequence_number=max_sequence_number + ) def _create_file_reader(self, file: DataFileMeta, read_fields: [str], - merge_output_fields: Optional[List[DataField]] = None) -> Optional[RecordReader]: + merge_output_fields: Optional[List[DataField]] = None, + for_merge_read: bool = False) -> Optional[RecordReader]: """Create a file reader for a single file.""" def create_record_reader(): return self.file_reader_supplier( file=file, - for_merge_read=False, + for_merge_read=for_merge_read, read_fields=read_fields, row_tracking_enabled=True, merge_output_fields=merge_output_fields) From cd695af827321f0133cca6a9279a4202f7cc29d8 Mon Sep 17 00:00:00 2001 From: xiaohongbo Date: Sat, 28 Feb 2026 17:24:38 +0800 Subject: [PATCH 14/20] fix --- .../read/reader/concat_batch_reader.py | 27 +------------------ .../read/reader/data_file_batch_reader.py | 24 +++++++++++------ paimon-python/pypaimon/read/split_read.py | 14 +++------- 3 files changed, 20 insertions(+), 45 deletions(-) diff --git a/paimon-python/pypaimon/read/reader/concat_batch_reader.py b/paimon-python/pypaimon/read/reader/concat_batch_reader.py index 558fcd51afde..ae5f8f6b9a8f 100644 --- a/paimon-python/pypaimon/read/reader/concat_batch_reader.py +++ b/paimon-python/pypaimon/read/reader/concat_batch_reader.py @@ -149,8 +149,6 @@ def __init__( field_offsets: List[int], readers: List[Optional[RecordBatchReader]], schema: pa.Schema, - first_row_id: Optional[int] = None, - max_sequence_number: Optional[int] = None, ): if row_offsets is None: raise ValueError("Row offsets must not be null") @@ -166,8 +164,6 @@ def __init__( self.field_offsets = field_offsets self.readers = readers self.schema = schema - self.first_row_id = first_row_id - self.max_sequence_number = max_sequence_number self._buffers: List[Optional[RecordBatch]] = [None] * len(readers) def read_arrow_batch(self) -> Optional[RecordBatch]: @@ -224,28 +220,7 @@ def read_arrow_batch(self) -> Optional[RecordBatch]: if batches[i] is not None and batches[i].num_rows > min_rows: self._buffers[i] = batches[i].slice(min_rows, batches[i].num_rows - min_rows) - batch = pa.RecordBatch.from_arrays(columns, schema=self.schema) - if self.first_row_id is not None and self.max_sequence_number is not None: - batch = self._fill_row_tracking(batch) - return batch - - def _fill_row_tracking(self, batch: RecordBatch) -> RecordBatch: - nrows = batch.num_rows - arrays = list(batch.columns) - filled = False - for i in range(len(batch.schema)): - name = batch.schema.field(i).name - if name == "_ROW_ID": - arrays[i] = pa.array( - range(self.first_row_id, self.first_row_id + nrows), type=pa.int64() - ) - filled = True - elif name == "_SEQUENCE_NUMBER": - arrays[i] = pa.repeat(self.max_sequence_number, nrows) - filled = True - if not filled: - return batch - return pa.RecordBatch.from_arrays(arrays, schema=batch.schema) + return pa.RecordBatch.from_arrays(columns, schema=self.schema) def close(self) -> None: try: diff --git a/paimon-python/pypaimon/read/reader/data_file_batch_reader.py b/paimon-python/pypaimon/read/reader/data_file_batch_reader.py index 7b40eb92f83f..dbb2ff05c04d 100644 --- a/paimon-python/pypaimon/read/reader/data_file_batch_reader.py +++ b/paimon-python/pypaimon/read/reader/data_file_batch_reader.py @@ -212,17 +212,25 @@ def _assign_row_tracking(self, record_batch: RecordBatch) -> RecordBatch: """Assign row tracking meta fields (_ROW_ID and _SEQUENCE_NUMBER).""" arrays = list(record_batch.columns) num_rows = record_batch.num_rows - - # Handle _ROW_ID field (only if batch has that column index) - if SpecialFields.ROW_ID.name in self.system_fields.keys(): - idx = self.system_fields[SpecialFields.ROW_ID.name] - if idx < len(arrays): + schema_names = record_batch.schema.names + + def _idx(name: str) -> int: + if name not in self.system_fields.keys(): + return -1 + if name in schema_names: + return schema_names.index(name) + return -1 + + # Handle _ROW_ID field + if self.first_row_id is not None and SpecialFields.ROW_ID.name in self.system_fields.keys(): + idx = _idx(SpecialFields.ROW_ID.name) + if 0 <= idx < len(arrays): arrays[idx] = pa.array(range(self.first_row_id, self.first_row_id + num_rows), type=pa.int64()) - # Handle _SEQUENCE_NUMBER field (only if batch has that column index) + # Handle _SEQUENCE_NUMBER field if SpecialFields.SEQUENCE_NUMBER.name in self.system_fields.keys(): - idx = self.system_fields[SpecialFields.SEQUENCE_NUMBER.name] - if idx < len(arrays): + idx = _idx(SpecialFields.SEQUENCE_NUMBER.name) + if 0 <= idx < len(arrays): arrays[idx] = pa.repeat(self.max_sequence_number, num_rows) names = record_batch.schema.names diff --git a/paimon-python/pypaimon/read/split_read.py b/paimon-python/pypaimon/read/split_read.py index fa9feb619317..ce83bcea48d1 100644 --- a/paimon-python/pypaimon/read/split_read.py +++ b/paimon-python/pypaimon/read/split_read.py @@ -168,7 +168,7 @@ def file_reader_supplier(self, file: DataFileMeta, for_merge_read: bool, if (merge_output_fields is not None and not is_blob_file) else self.create_index_mapping() ) - partition_info = self._create_partition_info() + partition_info = None if for_merge_read else self._create_partition_info() system_fields = SpecialFields.find_system_fields(self.read_fields) table_schema_fields = ( SpecialFields.row_type_with_row_tracking(self.table.table_schema.fields) @@ -607,10 +607,6 @@ def _create_union_reader(self, need_merge_files: List[DataFileMeta]) -> RecordRe read_fields = [] for j, read_field_id in enumerate(read_field_index): - # In merge, _ROW_ID and _SEQUENCE_NUMBER are filled from merge metadata, not from file - if (SpecialFields.ROW_ID.name == all_read_fields[j].name or - SpecialFields.SEQUENCE_NUMBER.name == all_read_fields[j].name): - continue for field_id in field_ids: if read_field_id == field_id: if row_offsets[j] == -1: @@ -647,19 +643,15 @@ def _create_union_reader(self, need_merge_files: List[DataFileMeta]) -> RecordRe file_record_readers[i] = MergeAllBatchReader(suppliers, batch_size=batch_size) self.read_fields = table_fields - # Validate that all required fields are found (system fields are filled by merge reader) + # Validate that all required fields are found for i, field in enumerate(all_read_fields): if row_offsets[i] == -1: - if field.name in (SpecialFields.ROW_ID.name, SpecialFields.SEQUENCE_NUMBER.name): - continue if not field.type.nullable: raise ValueError(f"Field {field} is not null but can't find any file contains it.") - max_sequence_number = max(f.max_sequence_number for f in need_merge_files) output_schema = PyarrowFieldParser.from_paimon_schema(all_read_fields) return DataEvolutionMergeReader( - row_offsets, field_offsets, file_record_readers, schema=output_schema, - first_row_id=first_row_id, max_sequence_number=max_sequence_number + row_offsets, field_offsets, file_record_readers, schema=output_schema ) def _create_file_reader(self, file: DataFileMeta, read_fields: [str], From 5d6020334e4f89f2a08ca317421d2e300078310a Mon Sep 17 00:00:00 2001 From: xiaohongbo Date: Sat, 28 Feb 2026 17:34:26 +0800 Subject: [PATCH 15/20] fix --- paimon-python/pypaimon/read/split_read.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/paimon-python/pypaimon/read/split_read.py b/paimon-python/pypaimon/read/split_read.py index ce83bcea48d1..4c6c4e783816 100644 --- a/paimon-python/pypaimon/read/split_read.py +++ b/paimon-python/pypaimon/read/split_read.py @@ -168,7 +168,7 @@ def file_reader_supplier(self, file: DataFileMeta, for_merge_read: bool, if (merge_output_fields is not None and not is_blob_file) else self.create_index_mapping() ) - partition_info = None if for_merge_read else self._create_partition_info() + partition_info = self._create_partition_info() system_fields = SpecialFields.find_system_fields(self.read_fields) table_schema_fields = ( SpecialFields.row_type_with_row_tracking(self.table.table_schema.fields) From 9f0ab3bd93a996433894760db38591991cc86158 Mon Sep 17 00:00:00 2001 From: xiaohongbo Date: Sat, 28 Feb 2026 17:40:54 +0800 Subject: [PATCH 16/20] revert change in concat_batch_reader.py --- paimon-python/pypaimon/read/reader/concat_batch_reader.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/paimon-python/pypaimon/read/reader/concat_batch_reader.py b/paimon-python/pypaimon/read/reader/concat_batch_reader.py index ae5f8f6b9a8f..4318f883eb2e 100644 --- a/paimon-python/pypaimon/read/reader/concat_batch_reader.py +++ b/paimon-python/pypaimon/read/reader/concat_batch_reader.py @@ -206,13 +206,9 @@ def read_arrow_batch(self) -> Optional[RecordBatch]: columns = [] for i in range(len(self.row_offsets)): batch_index = self.row_offsets[i] - field_offset = self.field_offsets[i] + field_index = self.field_offsets[i] if batch_index >= 0 and batches[batch_index] is not None: - batch = batches[batch_index] - if 0 <= field_offset < batch.num_columns: - columns.append(batch.column(field_offset).slice(0, min_rows)) - else: - columns.append(pa.nulls(min_rows, type=self.schema.field(i).type)) + columns.append(batches[batch_index].column(field_index).slice(0, min_rows)) else: columns.append(pa.nulls(min_rows, type=self.schema.field(i).type)) From 34fcc38d367881594d472941d32b8d336739aba4 Mon Sep 17 00:00:00 2001 From: xiaohongbo Date: Sat, 28 Feb 2026 18:09:16 +0800 Subject: [PATCH 17/20] refactor --- .../read/reader/data_file_batch_reader.py | 52 +++++++++++-------- paimon-python/pypaimon/read/split_read.py | 19 +++++-- 2 files changed, 44 insertions(+), 27 deletions(-) diff --git a/paimon-python/pypaimon/read/reader/data_file_batch_reader.py b/paimon-python/pypaimon/read/reader/data_file_batch_reader.py index dbb2ff05c04d..f0f436b1cd4c 100644 --- a/paimon-python/pypaimon/read/reader/data_file_batch_reader.py +++ b/paimon-python/pypaimon/read/reader/data_file_batch_reader.py @@ -44,7 +44,7 @@ def __init__(self, format_reader: RecordBatchReader, index_mapping: List[int], p blob_as_descriptor: bool = False, blob_descriptor_fields: Optional[set] = None, file_io: Optional[FileIO] = None, - output_field_names: Optional[List[str]] = None): + output_schema_names: Optional[List[str]] = None): self.format_reader = format_reader self.index_mapping = index_mapping self.partition_info = partition_info @@ -57,7 +57,7 @@ def __init__(self, format_reader: RecordBatchReader, index_mapping: List[int], p self.blob_as_descriptor = blob_as_descriptor self.blob_descriptor_fields = blob_descriptor_fields or set() self.file_io = file_io - self.output_field_names = output_field_names + self.output_schema_names = output_schema_names self.blob_field_names = { field.name for field in fields @@ -78,6 +78,8 @@ def read_arrow_batch(self, start_idx=None, end_idx=None) -> Optional[RecordBatch return None if self.partition_info is None and self.index_mapping is None: + if self.output_schema_names is not None and record_batch.schema.names != self.output_schema_names: + record_batch = self._expand_batch_to_schema(record_batch, self.output_schema_names) if self.row_tracking_enabled and self.system_fields: record_batch = self._assign_row_tracking(record_batch) return record_batch @@ -112,9 +114,7 @@ def read_arrow_batch(self, start_idx=None, end_idx=None) -> Optional[RecordBatch else: null_array = pa.nulls(num_rows) mapped_arrays.append(null_array) - name = (self.output_field_names[i] if self.output_field_names and i < len(self.output_field_names) - else f"null_col_{i}") - mapped_names.append(name) + mapped_names.append(f"null_col_{i}") if self.system_primary_key: for i in range(len(self.system_primary_key)): @@ -159,9 +159,7 @@ def _convert_descriptor_stored_blob_columns(self, record_batch: RecordBatch) -> field_idx = record_batch.schema.get_field_index(field_name) values = record_batch.column(field_idx).to_pylist() - if field_name in self.descriptor_blob_fields: - converted = [self._blob_cell_to_data(v) for v in values] - elif self.blob_as_descriptor: + if self.blob_as_descriptor: converted = [self._normalize_blob_cell(v) for v in values] else: converted = [self._blob_cell_to_data(v) for v in values] @@ -208,29 +206,37 @@ def _deserialize_descriptor_or_none(raw: bytes): return None return BlobDescriptor.deserialize(raw) + def _expand_batch_to_schema(self, record_batch: RecordBatch, schema_names: List[str]) -> RecordBatch: + num_rows = record_batch.num_rows + batch_names = record_batch.schema.names + arrays = [] + out_fields = [] + for name in schema_names: + if name in batch_names: + idx = batch_names.index(name) + arrays.append(record_batch.column(idx)) + out_fields.append(record_batch.schema.field(idx)) + else: + pa_type = pa.null() + if name in self.schema_map: + pa_type = self.schema_map[name].type + arrays.append(pa.nulls(num_rows, type=pa_type)) + out_fields.append(pa.field(name, pa_type, nullable=True)) + return pa.RecordBatch.from_arrays(arrays, schema=pa.schema(out_fields)) + def _assign_row_tracking(self, record_batch: RecordBatch) -> RecordBatch: - """Assign row tracking meta fields (_ROW_ID and _SEQUENCE_NUMBER).""" + """Assign row tracking meta fields (_ROW_ID and _SEQUENCE_NUMBER). Call after batch is expanded to full schema.""" arrays = list(record_batch.columns) num_rows = record_batch.num_rows - schema_names = record_batch.schema.names - - def _idx(name: str) -> int: - if name not in self.system_fields.keys(): - return -1 - if name in schema_names: - return schema_names.index(name) - return -1 - # Handle _ROW_ID field if self.first_row_id is not None and SpecialFields.ROW_ID.name in self.system_fields.keys(): - idx = _idx(SpecialFields.ROW_ID.name) - if 0 <= idx < len(arrays): + idx = self.system_fields[SpecialFields.ROW_ID.name] + if idx < len(arrays): arrays[idx] = pa.array(range(self.first_row_id, self.first_row_id + num_rows), type=pa.int64()) - # Handle _SEQUENCE_NUMBER field if SpecialFields.SEQUENCE_NUMBER.name in self.system_fields.keys(): - idx = _idx(SpecialFields.SEQUENCE_NUMBER.name) - if 0 <= idx < len(arrays): + idx = self.system_fields[SpecialFields.SEQUENCE_NUMBER.name] + if idx < len(arrays): arrays[idx] = pa.repeat(self.max_sequence_number, num_rows) names = record_batch.schema.names diff --git a/paimon-python/pypaimon/read/split_read.py b/paimon-python/pypaimon/read/split_read.py index 4c6c4e783816..54eba6c87a70 100644 --- a/paimon-python/pypaimon/read/split_read.py +++ b/paimon-python/pypaimon/read/split_read.py @@ -115,7 +115,8 @@ def create_reader(self) -> RecordReader: def file_reader_supplier(self, file: DataFileMeta, for_merge_read: bool, read_fields: List[str], row_tracking_enabled: bool, - merge_output_fields: Optional[List[DataField]] = None) -> RecordBatchReader: + merge_output_fields: Optional[List[DataField]] = None, + output_schema_names: Optional[List[str]] = None) -> RecordBatchReader: (read_file_fields, read_arrow_predicate) = self._get_fields_and_predicate(file.schema_id, read_fields) # Use external_path if available, otherwise use file_path @@ -174,6 +175,13 @@ def file_reader_supplier(self, file: DataFileMeta, for_merge_read: bool, SpecialFields.row_type_with_row_tracking(self.table.table_schema.fields) if row_tracking_enabled else self.table.table_schema.fields ) + output_schema_names = output_schema_names if output_schema_names is not None else [f.name for f in self.read_fields] + if output_schema_names is not None and output_schema_names != [f.name for f in self.read_fields]: + system_fields = { + name: output_schema_names.index(name) + for name in (SpecialFields.ROW_ID.name, SpecialFields.SEQUENCE_NUMBER.name) + if name in output_schema_names + } if for_merge_read: return DataFileBatchReader( format_reader, @@ -187,7 +195,8 @@ def file_reader_supplier(self, file: DataFileMeta, for_merge_read: bool, system_fields, blob_as_descriptor=blob_as_descriptor, blob_descriptor_fields=blob_descriptor_fields, - file_io=self.table.file_io) + file_io=self.table.file_io, + output_schema_names=output_schema_names) else: return DataFileBatchReader( format_reader, @@ -201,7 +210,8 @@ def file_reader_supplier(self, file: DataFileMeta, for_merge_read: bool, system_fields, blob_as_descriptor=blob_as_descriptor, blob_descriptor_fields=blob_descriptor_fields, - file_io=self.table.file_io) + file_io=self.table.file_io, + output_schema_names=output_schema_names) def _get_fields_and_predicate(self, schema_id: int, read_fields): key = (schema_id, tuple(read_fields)) @@ -664,7 +674,8 @@ def create_record_reader(): for_merge_read=for_merge_read, read_fields=read_fields, row_tracking_enabled=True, - merge_output_fields=merge_output_fields) + merge_output_fields=merge_output_fields, + output_schema_names=read_fields if for_merge_read else None) if self.row_ranges is None: return create_record_reader() row_ranges = Range.and_(self.row_ranges, [file.row_id_range()]) From 47c293adf520b13cb86119cdc534c7091f64562c Mon Sep 17 00:00:00 2001 From: xiaohongbo Date: Sat, 28 Feb 2026 22:59:50 +0800 Subject: [PATCH 18/20] revert --- .../read/reader/data_file_batch_reader.py | 39 +++------- .../read/reader/format_pyarrow_reader.py | 22 +----- paimon-python/pypaimon/read/split_read.py | 76 ++++--------------- paimon-python/pypaimon/read/table_read.py | 3 + 4 files changed, 31 insertions(+), 109 deletions(-) diff --git a/paimon-python/pypaimon/read/reader/data_file_batch_reader.py b/paimon-python/pypaimon/read/reader/data_file_batch_reader.py index f0f436b1cd4c..7f2e1c61e1f4 100644 --- a/paimon-python/pypaimon/read/reader/data_file_batch_reader.py +++ b/paimon-python/pypaimon/read/reader/data_file_batch_reader.py @@ -43,8 +43,7 @@ def __init__(self, format_reader: RecordBatchReader, index_mapping: List[int], p system_fields: dict, blob_as_descriptor: bool = False, blob_descriptor_fields: Optional[set] = None, - file_io: Optional[FileIO] = None, - output_schema_names: Optional[List[str]] = None): + file_io: Optional[FileIO] = None): self.format_reader = format_reader self.index_mapping = index_mapping self.partition_info = partition_info @@ -57,7 +56,6 @@ def __init__(self, format_reader: RecordBatchReader, index_mapping: List[int], p self.blob_as_descriptor = blob_as_descriptor self.blob_descriptor_fields = blob_descriptor_fields or set() self.file_io = file_io - self.output_schema_names = output_schema_names self.blob_field_names = { field.name for field in fields @@ -78,8 +76,6 @@ def read_arrow_batch(self, start_idx=None, end_idx=None) -> Optional[RecordBatch return None if self.partition_info is None and self.index_mapping is None: - if self.output_schema_names is not None and record_batch.schema.names != self.output_schema_names: - record_batch = self._expand_batch_to_schema(record_batch, self.output_schema_names) if self.row_tracking_enabled and self.system_fields: record_batch = self._assign_row_tracking(record_batch) return record_batch @@ -206,38 +202,21 @@ def _deserialize_descriptor_or_none(raw: bytes): return None return BlobDescriptor.deserialize(raw) - def _expand_batch_to_schema(self, record_batch: RecordBatch, schema_names: List[str]) -> RecordBatch: - num_rows = record_batch.num_rows - batch_names = record_batch.schema.names - arrays = [] - out_fields = [] - for name in schema_names: - if name in batch_names: - idx = batch_names.index(name) - arrays.append(record_batch.column(idx)) - out_fields.append(record_batch.schema.field(idx)) - else: - pa_type = pa.null() - if name in self.schema_map: - pa_type = self.schema_map[name].type - arrays.append(pa.nulls(num_rows, type=pa_type)) - out_fields.append(pa.field(name, pa_type, nullable=True)) - return pa.RecordBatch.from_arrays(arrays, schema=pa.schema(out_fields)) - def _assign_row_tracking(self, record_batch: RecordBatch) -> RecordBatch: - """Assign row tracking meta fields (_ROW_ID and _SEQUENCE_NUMBER). Call after batch is expanded to full schema.""" + """Assign row tracking meta fields (_ROW_ID and _SEQUENCE_NUMBER).""" arrays = list(record_batch.columns) - num_rows = record_batch.num_rows - if self.first_row_id is not None and SpecialFields.ROW_ID.name in self.system_fields.keys(): + # Handle _ROW_ID field + if SpecialFields.ROW_ID.name in self.system_fields.keys(): idx = self.system_fields[SpecialFields.ROW_ID.name] - if idx < len(arrays): - arrays[idx] = pa.array(range(self.first_row_id, self.first_row_id + num_rows), type=pa.int64()) + # Create a new array that fills with computed row IDs + arrays[idx] = pa.array(range(self.first_row_id, self.first_row_id + record_batch.num_rows), type=pa.int64()) + # Handle _SEQUENCE_NUMBER field if SpecialFields.SEQUENCE_NUMBER.name in self.system_fields.keys(): idx = self.system_fields[SpecialFields.SEQUENCE_NUMBER.name] - if idx < len(arrays): - arrays[idx] = pa.repeat(self.max_sequence_number, num_rows) + # Create a new array that fills with max_sequence_number + arrays[idx] = pa.repeat(self.max_sequence_number, record_batch.num_rows) names = record_batch.schema.names table = None diff --git a/paimon-python/pypaimon/read/reader/format_pyarrow_reader.py b/paimon-python/pypaimon/read/reader/format_pyarrow_reader.py index bca143c3313f..dd5330227ded 100644 --- a/paimon-python/pypaimon/read/reader/format_pyarrow_reader.py +++ b/paimon-python/pypaimon/read/reader/format_pyarrow_reader.py @@ -24,7 +24,6 @@ from pypaimon.common.file_io import FileIO from pypaimon.read.reader.iface.record_batch_reader import RecordBatchReader -from pypaimon.table.special_fields import SpecialFields class FormatPyArrowReader(RecordBatchReader): @@ -34,12 +33,10 @@ class FormatPyArrowReader(RecordBatchReader): """ def __init__(self, file_io: FileIO, file_format: str, file_path: str, read_fields: List[str], - push_down_predicate: Any, batch_size: int = 1024, - output_schema: Optional[pa.Schema] = None): + push_down_predicate: Any, batch_size: int = 1024): file_path_for_pyarrow = file_io.to_filesystem_path(file_path) self.dataset = ds.dataset(file_path_for_pyarrow, format=file_format, filesystem=file_io.filesystem) self.read_fields = read_fields - self.output_schema = output_schema # Identify which fields exist in the file and which are missing file_schema_names = set(self.dataset.schema.names) @@ -60,17 +57,8 @@ def read_arrow_batch(self) -> Optional[RecordBatch]: if not self.missing_fields: return batch - def _type_for_missing(name: str) -> pa.DataType: - if self.output_schema is not None: - idx = self.output_schema.get_field_index(name) - if idx >= 0: - return self.output_schema.field(idx).type - return pa.null() - - missing_columns = [ - pa.nulls(batch.num_rows, type=_type_for_missing(name)) - for name in self.missing_fields - ] + # Create columns for missing fields with null values + missing_columns = [pa.nulls(batch.num_rows, type=pa.null()) for _ in self.missing_fields] # Reconstruct the batch with all fields in the correct order all_columns = [] @@ -84,10 +72,8 @@ def _type_for_missing(name: str) -> pa.DataType: else: # Get the column from missing fields column_idx = self.missing_fields.index(field_name) - col_type = _type_for_missing(field_name) all_columns.append(missing_columns[column_idx]) - nullable = True if col_type == pa.null() else not SpecialFields.is_system_field(field_name) - out_fields.append(pa.field(field_name, col_type, nullable=nullable)) + out_fields.append(pa.field(field_name, pa.null(), nullable=True)) # Create a new RecordBatch with all columns return pa.RecordBatch.from_arrays(all_columns, schema=pa.schema(out_fields)) diff --git a/paimon-python/pypaimon/read/split_read.py b/paimon-python/pypaimon/read/split_read.py index 54eba6c87a70..d76a71682b17 100644 --- a/paimon-python/pypaimon/read/split_read.py +++ b/paimon-python/pypaimon/read/split_read.py @@ -114,29 +114,13 @@ def create_reader(self) -> RecordReader: """Create a record reader for the given split.""" def file_reader_supplier(self, file: DataFileMeta, for_merge_read: bool, - read_fields: List[str], row_tracking_enabled: bool, - merge_output_fields: Optional[List[DataField]] = None, - output_schema_names: Optional[List[str]] = None) -> RecordBatchReader: + read_fields: List[str], row_tracking_enabled: bool) -> RecordBatchReader: (read_file_fields, read_arrow_predicate) = self._get_fields_and_predicate(file.schema_id, read_fields) # Use external_path if available, otherwise use file_path file_path = file.external_path if file.external_path else file.file_path _, extension = os.path.splitext(file_path) file_format = extension[1:] - is_blob_file = file_format == CoreOptions.FILE_FORMAT_BLOB - - # Only when used inside a merge (for_merge_read): each reader returns just its columns. - # Single-file read still expands to full schema so row tracking has enough columns. - if for_merge_read and merge_output_fields is not None and not is_blob_file: - columns_for_format = read_file_fields - output_schema_pa = None - elif merge_output_fields is not None and not is_blob_file: - partition_keys = set(self.table.partition_keys or []) - columns_for_format = [f.name for f in merge_output_fields if f.name not in partition_keys] - output_schema_pa = PyarrowFieldParser.from_paimon_schema(merge_output_fields) - else: - columns_for_format = read_file_fields - output_schema_pa = None batch_size = self.table.options.read_batch_size() @@ -153,35 +137,21 @@ def file_reader_supplier(self, file: DataFileMeta, for_merge_read: bool, format_reader = FormatLanceReader(self.table.file_io, file_path, read_file_fields, read_arrow_predicate, batch_size=batch_size) elif file_format == CoreOptions.FILE_FORMAT_PARQUET or file_format == CoreOptions.FILE_FORMAT_ORC: - format_reader = FormatPyArrowReader( - self.table.file_io, file_format, file_path, - columns_for_format, read_arrow_predicate, batch_size=batch_size, - output_schema=output_schema_pa - ) + format_reader = FormatPyArrowReader(self.table.file_io, file_format, file_path, + read_file_fields, read_arrow_predicate, batch_size=batch_size) else: raise ValueError(f"Unexpected file format: {file_format}") blob_as_descriptor = CoreOptions.blob_as_descriptor(self.table.options) blob_descriptor_fields = CoreOptions.blob_descriptor_fields(self.table.options) - index_mapping = ( - None - if (merge_output_fields is not None and not is_blob_file) - else self.create_index_mapping() - ) + index_mapping = self.create_index_mapping() partition_info = self._create_partition_info() system_fields = SpecialFields.find_system_fields(self.read_fields) table_schema_fields = ( SpecialFields.row_type_with_row_tracking(self.table.table_schema.fields) if row_tracking_enabled else self.table.table_schema.fields ) - output_schema_names = output_schema_names if output_schema_names is not None else [f.name for f in self.read_fields] - if output_schema_names is not None and output_schema_names != [f.name for f in self.read_fields]: - system_fields = { - name: output_schema_names.index(name) - for name in (SpecialFields.ROW_ID.name, SpecialFields.SEQUENCE_NUMBER.name) - if name in output_schema_names - } if for_merge_read: return DataFileBatchReader( format_reader, @@ -195,8 +165,7 @@ def file_reader_supplier(self, file: DataFileMeta, for_merge_read: bool, system_fields, blob_as_descriptor=blob_as_descriptor, blob_descriptor_fields=blob_descriptor_fields, - file_io=self.table.file_io, - output_schema_names=output_schema_names) + file_io=self.table.file_io) else: return DataFileBatchReader( format_reader, @@ -210,8 +179,7 @@ def file_reader_supplier(self, file: DataFileMeta, for_merge_read: bool, system_fields, blob_as_descriptor=blob_as_descriptor, blob_descriptor_fields=blob_descriptor_fields, - file_io=self.table.file_io, - output_schema_names=output_schema_names) + file_io=self.table.file_io) def _get_fields_and_predicate(self, schema_id: int, read_fields): key = (schema_id, tuple(read_fields)) @@ -515,9 +483,7 @@ def create_reader(self) -> RecordReader: if len(need_merge_files) == 1 or not self.read_fields: # No need to merge fields, just create a single file reader suppliers.append( - lambda f=need_merge_files[0], mof=self.read_fields: self._create_file_reader( - f, self._get_final_read_data_fields(), merge_output_fields=mof - ) + lambda f=need_merge_files[0]: self._create_file_reader(f, self._get_final_read_data_fields()) ) else: suppliers.append( @@ -632,23 +598,17 @@ def _create_union_reader(self, need_merge_files: List[DataFileMeta]) -> RecordRe table_fields = self.read_fields self.read_fields = read_fields # create reader based on read_fields batch_size = self.table.options.read_batch_size() - merge_output_fields = all_read_fields # Create reader for this bunch if len(bunch.files()) == 1: - suppliers = [ - lambda f=bunch.files()[0], rn=read_field_names, mof=merge_output_fields: ( - self._create_file_reader(f, rn, merge_output_fields=mof, for_merge_read=True) - ) - ] + suppliers = [lambda r=self._create_file_reader( + bunch.files()[0], read_field_names + ): r] file_record_readers[i] = MergeAllBatchReader(suppliers, batch_size=batch_size) else: # Create concatenated reader for multiple files suppliers = [ partial(self._create_file_reader, file=file, - read_fields=read_field_names, - merge_output_fields=merge_output_fields, - for_merge_read=True) - for file in bunch.files() + read_fields=read_field_names) for file in bunch.files() ] file_record_readers[i] = MergeAllBatchReader(suppliers, batch_size=batch_size) self.read_fields = table_fields @@ -660,22 +620,16 @@ def _create_union_reader(self, need_merge_files: List[DataFileMeta]) -> RecordRe raise ValueError(f"Field {field} is not null but can't find any file contains it.") output_schema = PyarrowFieldParser.from_paimon_schema(all_read_fields) - return DataEvolutionMergeReader( - row_offsets, field_offsets, file_record_readers, schema=output_schema - ) + return DataEvolutionMergeReader(row_offsets, field_offsets, file_record_readers, schema=output_schema) - def _create_file_reader(self, file: DataFileMeta, read_fields: [str], - merge_output_fields: Optional[List[DataField]] = None, - for_merge_read: bool = False) -> Optional[RecordReader]: + def _create_file_reader(self, file: DataFileMeta, read_fields: [str]) -> Optional[RecordReader]: """Create a file reader for a single file.""" def create_record_reader(): return self.file_reader_supplier( file=file, - for_merge_read=for_merge_read, + for_merge_read=False, read_fields=read_fields, - row_tracking_enabled=True, - merge_output_fields=merge_output_fields, - output_schema_names=read_fields if for_merge_read else None) + row_tracking_enabled=True) if self.row_ranges is None: return create_record_reader() row_ranges = Range.and_(self.row_ranges, [file.row_id_range()]) diff --git a/paimon-python/pypaimon/read/table_read.py b/paimon-python/pypaimon/read/table_read.py index 79984901a812..5206147f80a8 100644 --- a/paimon-python/pypaimon/read/table_read.py +++ b/paimon-python/pypaimon/read/table_read.py @@ -92,6 +92,9 @@ def to_arrow(self, splits: List[Split]) -> Optional[pyarrow.Table]: return self._convert_descriptor_stored_fields_for_read(table) def _convert_descriptor_stored_fields_for_read(self, table: pyarrow.Table) -> pyarrow.Table: + if CoreOptions.blob_as_descriptor(self.table.options): + return table + descriptor_fields = CoreOptions.blob_descriptor_fields(self.table.options) if not descriptor_fields: return table From 8da40bd828a8d7ea6a51ed43ad97c4521e2caf51 Mon Sep 17 00:00:00 2001 From: xiaohongbo Date: Sat, 28 Feb 2026 23:27:56 +0800 Subject: [PATCH 19/20] [python] Fix FormatPyArrowReader missing-column type issue --- .../read/reader/format_pyarrow_reader.py | 39 ++++++++++++++++--- paimon-python/pypaimon/read/split_read.py | 6 ++- 2 files changed, 37 insertions(+), 8 deletions(-) diff --git a/paimon-python/pypaimon/read/reader/format_pyarrow_reader.py b/paimon-python/pypaimon/read/reader/format_pyarrow_reader.py index dd5330227ded..046b7d5ffa69 100644 --- a/paimon-python/pypaimon/read/reader/format_pyarrow_reader.py +++ b/paimon-python/pypaimon/read/reader/format_pyarrow_reader.py @@ -24,6 +24,8 @@ from pypaimon.common.file_io import FileIO from pypaimon.read.reader.iface.record_batch_reader import RecordBatchReader +from pypaimon.schema.data_types import DataField, PyarrowFieldParser +from pypaimon.table.special_fields import SpecialFields class FormatPyArrowReader(RecordBatchReader): @@ -33,15 +35,17 @@ class FormatPyArrowReader(RecordBatchReader): """ def __init__(self, file_io: FileIO, file_format: str, file_path: str, read_fields: List[str], - push_down_predicate: Any, batch_size: int = 1024): + push_down_predicate: Any, batch_size: int = 1024, + read_schema_fields: Optional[List[DataField]] = None): file_path_for_pyarrow = file_io.to_filesystem_path(file_path) self.dataset = ds.dataset(file_path_for_pyarrow, format=file_format, filesystem=file_io.filesystem) self.read_fields = read_fields + self.read_schema_fields = read_schema_fields # Identify which fields exist in the file and which are missing file_schema_names = set(self.dataset.schema.names) - self.existing_fields = [field for field in read_fields if field in file_schema_names] - self.missing_fields = [field for field in read_fields if field not in file_schema_names] + self.existing_fields = [f for f in read_fields if f in file_schema_names] + self.missing_fields = [f for f in read_fields if f not in file_schema_names] # Only pass existing fields to PyArrow scanner to avoid errors self.reader = self.dataset.scanner( @@ -50,6 +54,16 @@ def __init__(self, file_io: FileIO, file_format: str, file_path: str, read_field batch_size=batch_size ).to_reader() + def _build_output_schema(self) -> Optional[pa.Schema]: + """Build PyArrow schema for type lookup when filling missing columns.""" + if self.read_schema_fields is None: + return None + name_to_field = {f.name: f for f in self.read_schema_fields} + ordered_fields = [name_to_field[n] for n in self.read_fields if n in name_to_field] + if not ordered_fields: + return None + return PyarrowFieldParser.from_paimon_schema(ordered_fields) + def read_arrow_batch(self) -> Optional[RecordBatch]: try: batch = self.reader.read_next_batch() @@ -57,8 +71,19 @@ def read_arrow_batch(self) -> Optional[RecordBatch]: if not self.missing_fields: return batch - # Create columns for missing fields with null values - missing_columns = [pa.nulls(batch.num_rows, type=pa.null()) for _ in self.missing_fields] + output_schema = self._build_output_schema() + + def _type_for_missing(name: str) -> pa.DataType: + if output_schema is not None: + idx = output_schema.get_field_index(name) + if idx >= 0: + return output_schema.field(idx).type + return pa.null() + + missing_columns = [ + pa.nulls(batch.num_rows, type=_type_for_missing(name)) + for name in self.missing_fields + ] # Reconstruct the batch with all fields in the correct order all_columns = [] @@ -72,8 +97,10 @@ def read_arrow_batch(self) -> Optional[RecordBatch]: else: # Get the column from missing fields column_idx = self.missing_fields.index(field_name) + col_type = _type_for_missing(field_name) all_columns.append(missing_columns[column_idx]) - out_fields.append(pa.field(field_name, pa.null(), nullable=True)) + nullable = not SpecialFields.is_system_field(field_name) + out_fields.append(pa.field(field_name, col_type, nullable=nullable)) # Create a new RecordBatch with all columns return pa.RecordBatch.from_arrays(all_columns, schema=pa.schema(out_fields)) diff --git a/paimon-python/pypaimon/read/split_read.py b/paimon-python/pypaimon/read/split_read.py index d76a71682b17..13214d433a43 100644 --- a/paimon-python/pypaimon/read/split_read.py +++ b/paimon-python/pypaimon/read/split_read.py @@ -137,8 +137,10 @@ def file_reader_supplier(self, file: DataFileMeta, for_merge_read: bool, format_reader = FormatLanceReader(self.table.file_io, file_path, read_file_fields, read_arrow_predicate, batch_size=batch_size) elif file_format == CoreOptions.FILE_FORMAT_PARQUET or file_format == CoreOptions.FILE_FORMAT_ORC: - format_reader = FormatPyArrowReader(self.table.file_io, file_format, file_path, - read_file_fields, read_arrow_predicate, batch_size=batch_size) + format_reader = FormatPyArrowReader( + self.table.file_io, file_format, file_path, + read_file_fields, read_arrow_predicate, batch_size=batch_size, + read_schema_fields=self.read_fields) else: raise ValueError(f"Unexpected file format: {file_format}") From c2a204e4fef4d3b74929df5da9fa5a14f35ac830 Mon Sep 17 00:00:00 2001 From: xiaohongbo Date: Sat, 28 Feb 2026 23:58:30 +0800 Subject: [PATCH 20/20] refactor to merge read_fields and read_schema_fields --- .../read/reader/format_pyarrow_reader.py | 34 +++++++------------ paimon-python/pypaimon/read/split_read.py | 5 +-- 2 files changed, 16 insertions(+), 23 deletions(-) diff --git a/paimon-python/pypaimon/read/reader/format_pyarrow_reader.py b/paimon-python/pypaimon/read/reader/format_pyarrow_reader.py index 046b7d5ffa69..e9c9efd917c1 100644 --- a/paimon-python/pypaimon/read/reader/format_pyarrow_reader.py +++ b/paimon-python/pypaimon/read/reader/format_pyarrow_reader.py @@ -34,18 +34,18 @@ class FormatPyArrowReader(RecordBatchReader): and filters it based on the provided predicate and projection. """ - def __init__(self, file_io: FileIO, file_format: str, file_path: str, read_fields: List[str], - push_down_predicate: Any, batch_size: int = 1024, - read_schema_fields: Optional[List[DataField]] = None): + def __init__(self, file_io: FileIO, file_format: str, file_path: str, + read_fields: List[DataField], + push_down_predicate: Any, batch_size: int = 1024): file_path_for_pyarrow = file_io.to_filesystem_path(file_path) self.dataset = ds.dataset(file_path_for_pyarrow, format=file_format, filesystem=file_io.filesystem) self.read_fields = read_fields - self.read_schema_fields = read_schema_fields + self._read_field_names = [f.name for f in read_fields] # Identify which fields exist in the file and which are missing file_schema_names = set(self.dataset.schema.names) - self.existing_fields = [f for f in read_fields if f in file_schema_names] - self.missing_fields = [f for f in read_fields if f not in file_schema_names] + self.existing_fields = [f.name for f in read_fields if f.name in file_schema_names] + self.missing_fields = [f.name for f in read_fields if f.name not in file_schema_names] # Only pass existing fields to PyArrow scanner to avoid errors self.reader = self.dataset.scanner( @@ -54,15 +54,9 @@ def __init__(self, file_io: FileIO, file_format: str, file_path: str, read_field batch_size=batch_size ).to_reader() - def _build_output_schema(self) -> Optional[pa.Schema]: - """Build PyArrow schema for type lookup when filling missing columns.""" - if self.read_schema_fields is None: - return None - name_to_field = {f.name: f for f in self.read_schema_fields} - ordered_fields = [name_to_field[n] for n in self.read_fields if n in name_to_field] - if not ordered_fields: - return None - return PyarrowFieldParser.from_paimon_schema(ordered_fields) + self._output_schema = ( + PyarrowFieldParser.from_paimon_schema(read_fields) if read_fields else None + ) def read_arrow_batch(self) -> Optional[RecordBatch]: try: @@ -71,13 +65,11 @@ def read_arrow_batch(self) -> Optional[RecordBatch]: if not self.missing_fields: return batch - output_schema = self._build_output_schema() - def _type_for_missing(name: str) -> pa.DataType: - if output_schema is not None: - idx = output_schema.get_field_index(name) + if self._output_schema is not None: + idx = self._output_schema.get_field_index(name) if idx >= 0: - return output_schema.field(idx).type + return self._output_schema.field(idx).type return pa.null() missing_columns = [ @@ -88,7 +80,7 @@ def _type_for_missing(name: str) -> pa.DataType: # Reconstruct the batch with all fields in the correct order all_columns = [] out_fields = [] - for field_name in self.read_fields: + for field_name in self._read_field_names: if field_name in self.existing_fields: # Get the column from the existing batch column_idx = self.existing_fields.index(field_name) diff --git a/paimon-python/pypaimon/read/split_read.py b/paimon-python/pypaimon/read/split_read.py index 13214d433a43..60347236478b 100644 --- a/paimon-python/pypaimon/read/split_read.py +++ b/paimon-python/pypaimon/read/split_read.py @@ -137,10 +137,11 @@ def file_reader_supplier(self, file: DataFileMeta, for_merge_read: bool, format_reader = FormatLanceReader(self.table.file_io, file_path, read_file_fields, read_arrow_predicate, batch_size=batch_size) elif file_format == CoreOptions.FILE_FORMAT_PARQUET or file_format == CoreOptions.FILE_FORMAT_ORC: + name_to_field = {f.name: f for f in self.read_fields} + ordered_read_fields = [name_to_field[n] for n in read_file_fields if n in name_to_field] format_reader = FormatPyArrowReader( self.table.file_io, file_format, file_path, - read_file_fields, read_arrow_predicate, batch_size=batch_size, - read_schema_fields=self.read_fields) + ordered_read_fields, read_arrow_predicate, batch_size=batch_size) else: raise ValueError(f"Unexpected file format: {file_format}")