apache
diff --git a/‎docs/content/pypaimon/data-evolution.md‎
Lines changed: 5 additions & 0 deletions b/‎docs/content/pypaimon/data-evolution.md‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎paimon-python/pypaimon/read/read_builder.py‎
Lines changed: 4 additions & 1 deletion b/‎paimon-python/pypaimon/read/read_builder.py‎
Lines changed: 4 additions & 1 deletion
diff --git a/‎paimon-python/pypaimon/read/reader/concat_batch_reader.py‎
Lines changed: 73 additions & 3 deletions b/‎paimon-python/pypaimon/read/reader/concat_batch_reader.py‎
Lines changed: 73 additions & 3 deletions
diff --git a/‎paimon-python/pypaimon/read/reader/data_file_batch_reader.py‎
Lines changed: 114 additions & 18 deletions b/‎paimon-python/pypaimon/read/reader/data_file_batch_reader.py‎
Lines changed: 114 additions & 18 deletions
@@ -196,3 +196,8 @@ commit.close()
 - **Row order matters**: the batches you write must have the **same number of rows** as the batches you read, in the
   same order for that shard.
 - **Parallelism**: run multiple shards by calling `new_shard_updator(shard_idx, num_shards)` for each shard.
+
+## Read After Partial Shard Update
+
+- **Full table read**: rows from updated shards have the new column; rows from other shards have null for that column.
+- **Per-shard read** (`with_shard(shard_idx, num_shards)`): read only the shard(s) you need. (new column where written, null elsewhere).
@@ -67,7 +67,8 @@ def new_read(self) -> TableRead:
         return TableRead(
             table=self.table,
             predicate=self._predicate,
-            read_type=self.read_type()
+            read_type=self.read_type(),
+            projection=self._projection,
         )
 
     def new_predicate_builder(self) -> PredicateBuilder:
@@ -77,6 +78,8 @@ def read_type(self) -> List[DataField]:
         table_fields = self.table.fields
 
         if not self._projection:
+            if self.table.options.row_tracking_enabled():
+                table_fields = SpecialFields.row_type_with_row_tracking(table_fields)
             return table_fields
         else:
             if self.table.options.row_tracking_enabled():
 
@@ -139,14 +139,20 @@ class DataEvolutionMergeReader(RecordBatchReader):
      - The fourth field comes from batch1, and it is at offset 1 in batch1.
      - The fifth field comes from batch2, and it is at offset 1 in batch2.
      - The sixth field comes from batch1, and it is at offset 0 in batch1.
+
+    When row_offsets[i] == -1 (no file provides that field), output a column of nulls using schema.
     """
 
     def __init__(
         self,
         row_offsets: List[int],
         field_offsets: List[int],
         readers: List[Optional[RecordBatchReader]],
+<<<<<<< HEAD
         schema: pa.Schema,
+=======
+        schema: Optional[pa.Schema] = None,
+>>>>>>> 72ffd9919 ([python] Fix data-evolution read after partial shard update)
     ):
         if row_offsets is None:
             raise ValueError("Row offsets must not be null")
@@ -172,16 +178,80 @@ def read_arrow_batch(self) -> Optional[RecordBatch]:
                     # all readers are aligned, as long as one returns null, the others will also have no data
                     return None
                 batches[i] = batch
-        # Assemble record batches from batches based on row_offsets and field_offsets
+        # All readers may be None (e.g. all bunches had empty read_fields_per_bunch)
+        if not any(b is not None for b in batches):
+            return None
+        num_rows = next(b.num_rows for b in batches if b is not None)
         columns = []
         for i in range(len(self.row_offsets)):
             batch_index = self.row_offsets[i]
             field_index = self.field_offsets[i]
-            if batches[batch_index] is not None:
-                column = batches[batch_index].column(field_index)
+            field_name = self.schema.field(i).name if self.schema else None
+            column = None
+            out_name = None
+
+            if batch_index >= 0 and batches[batch_index] is not None:
+                src_batch = batches[batch_index]
+                if field_name is not None and field_name in src_batch.schema.names:
+                    column = src_batch.column(src_batch.schema.get_field_index(field_name))
+                    out_name = (
+                        self.schema.field(i).name
+                        if self.schema is not None and i < len(self.schema)
+                        else field_name
+                    )
+                elif field_index < src_batch.num_columns:
+                    column = src_batch.column(field_index)
+                    out_name = (
+                        self.schema.field(i).name
+                        if self.schema is not None and i < len(self.schema)
+                        else src_batch.schema.names[field_index]
+                    )
+
+            if column is None and field_name is not None:
+                for b in batches:
+                    if b is not None and field_name in b.schema.names:
+                        column = b.column(b.schema.get_field_index(field_name))
+                        out_name = (
+                            self.schema.field(i).name
+                            if self.schema is not None and i < len(self.schema)
+                            else field_name
+                        )
+                        break
+
+            if column is not None and out_name is not None:
                 columns.append(column)
+<<<<<<< HEAD
         if columns:
             return pa.RecordBatch.from_arrays(columns, schema=self.schema)
+=======
+                names.append(out_name)
+            elif self.schema is not None and i < len(self.schema):
+                field = self.schema.field(i)
+                columns.append(pa.nulls(num_rows, type=field.type))
+                names.append(field.name)
+            else:
+                if batch_index >= 0 and batches[batch_index] is not None:
+                    src_batch = batches[batch_index]
+                    raise ValueError(
+                        f"Field index {field_index} out of bounds for batch with "
+                        f"{src_batch.num_columns} columns and no schema for null column"
+                    )
+                raise ValueError(
+                    f"Row offset {batch_index} for field index {i} is invalid and no schema provided for null column"
+                )
+        if columns:
+            if self.schema is not None:
+                schema_fields = []
+                for i, name in enumerate(names):
+                    if name in self.schema.names:
+                        field_idx = self.schema.get_field_index(name)
+                        schema_fields.append(self.schema.field(field_idx))
+                    else:
+                        schema_fields.append(pa.field(name, columns[i].type))
+                return pa.RecordBatch.from_arrays(columns, schema=pa.schema(schema_fields))
+            else:
+                return pa.RecordBatch.from_arrays(columns, names)
+>>>>>>> 72ffd9919 ([python] Fix data-evolution read after partial shard update)
         return None
 
     def close(self) -> None:
 
@@ -48,6 +48,8 @@ def __init__(self, format_reader: RecordBatchReader, index_mapping: List[int], p
         self.first_row_id = first_row_id
         self.max_sequence_number = max_sequence_number
         self.system_fields = system_fields
+        self.requested_field_names = [field.name for field in fields] if fields else None
+        self.fields = fields
 
     def read_arrow_batch(self, start_idx=None, end_idx=None) -> Optional[RecordBatch]:
         if isinstance(self.format_reader, FormatBlobReader):
@@ -57,11 +59,20 @@ def read_arrow_batch(self, start_idx=None, end_idx=None) -> Optional[RecordBatch
         if record_batch is None:
             return None
 
+        num_rows = record_batch.num_rows
         if self.partition_info is None and self.index_mapping is None:
             if self.row_tracking_enabled and self.system_fields:
                 record_batch = self._assign_row_tracking(record_batch)
             return record_batch
 
+        if (self.partition_info is None and self.index_mapping is not None
+                and not self.requested_field_names):
+            ncol = record_batch.num_columns
+            if len(self.index_mapping) == ncol and self.index_mapping == list(range(ncol)):
+                if self.row_tracking_enabled and self.system_fields:
+                    record_batch = self._assign_row_tracking(record_batch)
+                return record_batch
+
         inter_arrays = []
         inter_names = []
         num_rows = record_batch.num_rows
@@ -79,28 +90,101 @@ def read_arrow_batch(self, start_idx=None, end_idx=None) -> Optional[RecordBatch
                         inter_arrays.append(record_batch.column(real_index))
                         inter_names.append(record_batch.schema.field(real_index).name)
         else:
-            inter_arrays = record_batch.columns
-            inter_names = record_batch.schema.names
-
-        if self.index_mapping is not None:
+            inter_arrays = list(record_batch.columns)
+            inter_names = list(record_batch.schema.names)
+
+        if self.requested_field_names is not None:
+            if (len(inter_names) <= len(self.requested_field_names)
+                    and inter_names == self.requested_field_names[:len(inter_names)]):
+                ordered_arrays = list(inter_arrays)
+                ordered_names = list(inter_names)
+                for name in self.requested_field_names[len(inter_names):]:
+                    field = self.schema_map.get(name)
+                    ordered_arrays.append(
+                        pa.nulls(num_rows, type=field.type) if field is not None else pa.nulls(num_rows)
+                    )
+                    ordered_names.append(name)
+                inter_arrays = ordered_arrays
+                inter_names = ordered_names
+            else:
+                ordered_arrays = []
+                ordered_names = []
+                for name in self.requested_field_names:
+                    if name in inter_names:
+                        ordered_arrays.append(inter_arrays[inter_names.index(name)])
+                        ordered_names.append(name)
+                    else:
+                        field = self.schema_map.get(name)
+                        ordered_arrays.append(
+                            pa.nulls(num_rows, type=field.type) if field is not None else pa.nulls(num_rows)
+                        )
+                        ordered_names.append(name)
+                inter_arrays = ordered_arrays
+                inter_names = ordered_names
+
+        if self.index_mapping is not None and not (
+                self.requested_field_names is not None and inter_names == self.requested_field_names):
             mapped_arrays = []
             mapped_names = []
+            partition_names = set()
+            if self.partition_info:
+                for i in range(len(self.partition_info.partition_fields)):
+                    partition_names.add(self.partition_info.partition_fields[i].name)
+            
+            non_partition_indices = [idx for idx, name in enumerate(inter_names) if name not in partition_names]
             for i, real_index in enumerate(self.index_mapping):
-                if 0 <= real_index < len(inter_arrays):
-                    mapped_arrays.append(inter_arrays[real_index])
-                    mapped_names.append(inter_names[real_index])
+                if 0 <= real_index < len(non_partition_indices):
+                    actual_index = non_partition_indices[real_index]
+                    mapped_arrays.append(inter_arrays[actual_index])
+                    mapped_names.append(inter_names[actual_index])
                 else:
                     null_array = pa.nulls(num_rows)
                     mapped_arrays.append(null_array)
                     mapped_names.append(f"null_col_{i}")
 
+            if self.partition_info:
+                partition_names = set()
+                partition_arrays_map = {}
+                for i in range(len(inter_names)):
+                    field_name = inter_names[i]
+                    if field_name in partition_names or (self.partition_info and any(
+                        self.partition_info.partition_fields[j].name == field_name 
+                        for j in range(len(self.partition_info.partition_fields))
+                    )):
+                        partition_names.add(field_name)
+                        partition_arrays_map[field_name] = inter_arrays[i]
+                
+                if self.requested_field_names:
+                    final_arrays = []
+                    final_names = []
+                    mapped_name_to_array = {name: arr for name, arr in zip(mapped_names, mapped_arrays)}
+                    
+                    for name in self.requested_field_names:
+                        if name in mapped_name_to_array:
+                            final_arrays.append(mapped_name_to_array[name])
+                            final_names.append(name)
+                        elif name in partition_arrays_map:
+                            final_arrays.append(partition_arrays_map[name])
+                            final_names.append(name)
+
+                    inter_arrays = final_arrays
+                    inter_names = final_names
+                else:
+                    mapped_name_set = set(mapped_names)
+                    for name, arr in partition_arrays_map.items():
+                        if name not in mapped_name_set:
+                            mapped_arrays.append(arr)
+                            mapped_names.append(name)
+                    inter_arrays = mapped_arrays
+                    inter_names = mapped_names
+            else:
+                inter_arrays = mapped_arrays
+                inter_names = mapped_names
+            
             if self.system_primary_key:
                 for i in range(len(self.system_primary_key)):
-                    if not mapped_names[i].startswith("_KEY_"):
-                        mapped_names[i] = f"_KEY_{mapped_names[i]}"
-
-            inter_arrays = mapped_arrays
-            inter_names = mapped_names
+                    if i < len(inter_names) and not inter_names[i].startswith("_KEY_"):
+                        inter_names[i] = f"_KEY_{inter_names[i]}"
 
         # to contains 'not null' property
         final_fields = []
@@ -109,6 +193,9 @@ def read_arrow_batch(self, start_idx=None, end_idx=None) -> Optional[RecordBatch
             target_field = self.schema_map.get(name)
             if not target_field:
                 target_field = pa.field(name, array.type)
+            else:
+                if name in (SpecialFields.ROW_ID.name, SpecialFields.SEQUENCE_NUMBER.name):
+                    target_field = pa.field(name, target_field.type, nullable=False)
             final_fields.append(target_field)
         final_schema = pa.schema(final_fields)
         record_batch = pa.RecordBatch.from_arrays(inter_arrays, schema=final_schema)
@@ -122,20 +209,20 @@ def read_arrow_batch(self, start_idx=None, end_idx=None) -> Optional[RecordBatch
     def _assign_row_tracking(self, record_batch: RecordBatch) -> RecordBatch:
         """Assign row tracking meta fields (_ROW_ID and _SEQUENCE_NUMBER)."""
         arrays = list(record_batch.columns)
+        num_cols = len(arrays)
 
-        # Handle _ROW_ID field
         if SpecialFields.ROW_ID.name in self.system_fields.keys():
             idx = self.system_fields[SpecialFields.ROW_ID.name]
-            # Create a new array that fills with computed row IDs
-            arrays[idx] = pa.array(range(self.first_row_id, self.first_row_id + record_batch.num_rows), type=pa.int64())
+            if idx < num_cols:
+                arrays[idx] = pa.array(range(self.first_row_id, self.first_row_id + record_batch.num_rows), type=pa.int64())
 
-        # Handle _SEQUENCE_NUMBER field
         if SpecialFields.SEQUENCE_NUMBER.name in self.system_fields.keys():
             idx = self.system_fields[SpecialFields.SEQUENCE_NUMBER.name]
-            # Create a new array that fills with max_sequence_number
-            arrays[idx] = pa.repeat(self.max_sequence_number, record_batch.num_rows)
+            if idx < num_cols:
+                arrays[idx] = pa.repeat(self.max_sequence_number, record_batch.num_rows)
 
         names = record_batch.schema.names
+<<<<<<< HEAD
         table = None
         for i, name in enumerate(names):
             field = pa.field(
@@ -147,6 +234,15 @@ def _assign_row_tracking(self, record_batch: RecordBatch) -> RecordBatch:
             else:
                 table = table.append_column(field, arrays[i])
         return table.to_batches()[0]
+=======
+        fields = []
+        for i, name in enumerate(names):
+            input_field = record_batch.schema.field(name)
+            fields.append(pa.field(name, arrays[i].type, nullable=input_field.nullable))
+        if fields:
+            return pa.RecordBatch.from_arrays(arrays, schema=pa.schema(fields))
+        return pa.RecordBatch.from_arrays(arrays, names=names)
+>>>>>>> 72ffd9919 ([python] Fix data-evolution read after partial shard update)
 
     def close(self) -> None:
         self.format_reader.close()