From c794e99ad42689bce887a861eed449ab0e460215 Mon Sep 17 00:00:00 2001 From: Vinay Mehta <14790730+vimeh@users.noreply.github.com> Date: Wed, 25 Feb 2026 13:22:35 -0800 Subject: [PATCH] fix(arrow-pg): emit NULL field for null struct values instead of skipping encoding `encode_struct` returned `Ok(())` for null struct values without calling `encoder.encode_field()`. This skipped writing a NULL indicator to the pgwire DataRow, corrupting the column count and breaking client connections. The fix calls `encoder.encode_field(&None::<&[i8]>, ...)` for null structs, consistent with how List and Dictionary types already handle nulls in `encode_value`. Adds a regression test that verifies `encode_field` is called exactly once when encoding a null struct row. --- arrow-pg/src/encoder.rs | 53 ++++++++++++++++++++++++++++++++++ arrow-pg/src/struct_encoder.rs | 2 +- 2 files changed, 54 insertions(+), 1 deletion(-) diff --git a/arrow-pg/src/encoder.rs b/arrow-pg/src/encoder.rs index 971834f..536c16c 100644 --- a/arrow-pg/src/encoder.rs +++ b/arrow-pg/src/encoder.rs @@ -535,6 +535,7 @@ pub fn encode_value( #[cfg(test)] mod tests { + use arrow::buffer::NullBuffer; use bytes::BytesMut; use pgwire::{api::results::FieldFormat, types::format::FormatOptions}; use postgres_types::Type; @@ -589,6 +590,58 @@ mod tests { assert!(encoder.encoded_value == val); } + #[test] + fn encode_struct_null_emits_field() { + // Regression test: encode_struct must call encoder.encode_field for + // NULL struct values so a NULL indicator is written to the DataRow. + // Previously it returned Ok(()) without encoding, corrupting the + // column count. + + #[derive(Default)] + struct CountingEncoder { + call_count: usize, + } + + impl Encoder for CountingEncoder { + type Item = (); + + fn encode_field(&mut self, _value: &T, _pg_field: &FieldInfo) -> PgWireResult<()> + where + T: ToSql + ToSqlText + Sized, + { + self.call_count += 1; + Ok(()) + } + + fn take_row(&mut self) -> Self::Item {} + } + + let fields = vec![ + Arc::new(Field::new("a", DataType::Utf8, true)), + Arc::new(Field::new("b", DataType::Utf8, true)), + ]; + let a = Arc::new(StringArray::from(vec![Some("hello"), Some("x")])) as Arc; + let b = Arc::new(StringArray::from(vec![Some("world"), Some("y")])) as Arc; + + // Row 0: non-null struct, Row 1: null struct + let null_buffer = NullBuffer::from(vec![true, false]); + let struct_arr: Arc = Arc::new( + StructArray::try_new(fields.clone().into(), vec![a, b], Some(null_buffer)).unwrap(), + ); + + let arrow_field = Field::new("s", DataType::Struct(fields.into()), true); + let pg_field = FieldInfo::new("s".to_string(), None, None, Type::TEXT, FieldFormat::Text); + + // Encode the NULL row (index 1). + let mut encoder = CountingEncoder::default(); + let result = encode_value(&mut encoder, &struct_arr, 1, &arrow_field, &pg_field); + assert!(result.is_ok()); + assert_eq!( + encoder.call_count, 1, + "encode_field must be called exactly once for a NULL struct to emit a NULL indicator" + ); + } + #[test] fn test_get_time32_second_value() { let array = Time32SecondArray::from_iter_values([3723_i32]); diff --git a/arrow-pg/src/struct_encoder.rs b/arrow-pg/src/struct_encoder.rs index 6af1f4e..8f8f956 100644 --- a/arrow-pg/src/struct_encoder.rs +++ b/arrow-pg/src/struct_encoder.rs @@ -122,7 +122,7 @@ pub(crate) fn encode_struct( ) -> PgWireResult<()> { let arr = arr.as_any().downcast_ref::().unwrap(); if arr.is_null(idx) { - return Ok(()); + return encoder.encode_field(&None::<&[i8]>, parent_pg_field_info); } let fields = arrow_fields