Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion cpp/src/parquet/column_writer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1809,7 +1809,8 @@ class TypedColumnWriterImpl : public ColumnWriterImpl,

auto add_levels = [](std::vector<int64_t>& level_histogram, const int16_t* levels,
int64_t num_levels, int16_t max_level) {
if (max_level == 0) {
ARROW_DCHECK(levels != nullptr || num_levels == 0 || max_level == 0);
if (max_level == 0 || levels == nullptr) {
return;
}
ARROW_DCHECK_EQ(static_cast<size_t>(max_level) + 1, level_histogram.size());
Expand Down
37 changes: 37 additions & 0 deletions cpp/src/parquet/size_statistics_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,43 @@ TEST(SizeStatistics, UpdateLevelHistogram) {
UpdateLevelHistogram(std::vector<int16_t>{}, histogram);
EXPECT_THAT(histogram, ::testing::ElementsAre(3, 3, 2));
}
{
// Empty span should be a no-op.
std::vector<int64_t> histogram(2, 0);
UpdateLevelHistogram(std::span<const int16_t>{}, histogram);
EXPECT_THAT(histogram, ::testing::ElementsAre(0, 0));
}
Comment thread
Diveyam-Mishra marked this conversation as resolved.
}

// Regression test for GH-49928: WriteBatch(0, nullptr, ...) on a nullable column
// must not crash or DCHECK-fail, even though max_definition_level > 0.
TEST(SizeStatistics, NullLevelsInColumnWriter) {
auto node = schema::Int32("a", Repetition::OPTIONAL);
auto schema_node = schema::GroupNode::Make("schema", Repetition::REQUIRED, {node});

auto props = WriterProperties::Builder()
.enable_write_page_index()
->enable_statistics()
->set_size_statistics_level(SizeStatisticsLevel::PageAndColumnChunk)
->build();

auto sink = CreateOutputStream();
auto writer = ParquetFileWriter::Open(sink, std::dynamic_pointer_cast<schema::GroupNode>(schema_node), props);
auto rg = writer->AppendRowGroup();
auto col = static_cast<Int32Writer*>(rg->NextColumn());

// Empty write: num_values=0 with nullptr levels — must not crash.
col->WriteBatch(/*num_values=*/0, /*def_levels=*/nullptr,
/*rep_levels=*/nullptr, /*values=*/nullptr);

// Follow up with a real write so the file is valid.
std::vector<int32_t> values = {42};
std::vector<int16_t> def_levels = {1};
col->WriteBatch(1, def_levels.data(), nullptr, values.data());

col->Close();
rg->Close();
writer->Close();
}

TEST(SizeStatistics, ThriftSerDe) {
Expand Down