diff --git a/cpp/src/parquet/column_writer.cc b/cpp/src/parquet/column_writer.cc index 653f28f64bd..f4a4e2408d5 100644 --- a/cpp/src/parquet/column_writer.cc +++ b/cpp/src/parquet/column_writer.cc @@ -1809,7 +1809,8 @@ class TypedColumnWriterImpl : public ColumnWriterImpl, auto add_levels = [](std::vector& level_histogram, const int16_t* levels, int64_t num_levels, int16_t max_level) { - if (max_level == 0) { + ARROW_DCHECK(levels != nullptr || num_levels == 0 || max_level == 0); + if (max_level == 0 || levels == nullptr) { return; } ARROW_DCHECK_EQ(static_cast(max_level) + 1, level_histogram.size()); diff --git a/cpp/src/parquet/size_statistics_test.cc b/cpp/src/parquet/size_statistics_test.cc index 8c36d6b680d..2c07b08b559 100644 --- a/cpp/src/parquet/size_statistics_test.cc +++ b/cpp/src/parquet/size_statistics_test.cc @@ -60,6 +60,43 @@ TEST(SizeStatistics, UpdateLevelHistogram) { UpdateLevelHistogram(std::vector{}, histogram); EXPECT_THAT(histogram, ::testing::ElementsAre(3, 3, 2)); } + { + // Empty span should be a no-op. + std::vector histogram(2, 0); + UpdateLevelHistogram(std::span{}, histogram); + EXPECT_THAT(histogram, ::testing::ElementsAre(0, 0)); + } +} + +// Regression test for GH-49928: WriteBatch(0, nullptr, ...) on a nullable column +// must not crash or DCHECK-fail, even though max_definition_level > 0. +TEST(SizeStatistics, NullLevelsInColumnWriter) { + auto node = schema::Int32("a", Repetition::OPTIONAL); + auto schema_node = schema::GroupNode::Make("schema", Repetition::REQUIRED, {node}); + + auto props = WriterProperties::Builder() + .enable_write_page_index() + ->enable_statistics() + ->set_size_statistics_level(SizeStatisticsLevel::PageAndColumnChunk) + ->build(); + + auto sink = CreateOutputStream(); + auto writer = ParquetFileWriter::Open(sink, std::dynamic_pointer_cast(schema_node), props); + auto rg = writer->AppendRowGroup(); + auto col = static_cast(rg->NextColumn()); + + // Empty write: num_values=0 with nullptr levels — must not crash. + col->WriteBatch(/*num_values=*/0, /*def_levels=*/nullptr, + /*rep_levels=*/nullptr, /*values=*/nullptr); + + // Follow up with a real write so the file is valid. + std::vector values = {42}; + std::vector def_levels = {1}; + col->WriteBatch(1, def_levels.data(), nullptr, values.data()); + + col->Close(); + rg->Close(); + writer->Close(); } TEST(SizeStatistics, ThriftSerDe) {