Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
35 commits
Select commit Hold shift + click to select a range
5ec03a9
Fix Iceberg read optimization returning NULLs for stats-less manifests
May 20, 2026
d3c0eb5
Merge branch 'antalya-26.3' into fix/iceberg-empty-stats-26.3
il9ue Jun 22, 2026
056eb1a
Rework Iceberg empty-stats tests per review feedback
Jun 22, 2026
17ef6e7
cast like insert select
arthurpassos May 11, 2026
c88f69e
tmp
arthurpassos May 22, 2026
67489f2
fix build and tests
arthurpassos Jun 8, 2026
245c0bb
yet another impl
arthurpassos Jun 15, 2026
8d2b086
fix tests and background setting
arthurpassos Jun 15, 2026
4100ecd
fix tests
arthurpassos Jun 15, 2026
914de72
some more test fixes
arthurpassos Jun 15, 2026
6f822c8
add docs
arthurpassos Jun 15, 2026
f521a95
improve docs with a warning
arthurpassos Jun 16, 2026
41c64ed
hmm... maybe incomplete, gotta continue it tomorrow
arthurpassos Jun 17, 2026
3b177ed
add tests covering corner tricky case
arthurpassos Jun 17, 2026
4a75d52
rmv task ttl
arthurpassos Jun 16, 2026
a676814
cleanup
arthurpassos Jun 16, 2026
16dac3c
missing piece
arthurpassos Jun 16, 2026
978f9e1
Skip store_data.py pre-hook in MasterCI to avoid Finish Workflow temp…
CarlosFelipeOR Jun 18, 2026
0322774
Add 'PRs in Release' table to report
strtgbb Mar 27, 2026
c175440
highlight labels for unverified PRs
strtgbb Mar 27, 2026
a7113d0
report: fix _find_rebase_baseline
strtgbb Mar 31, 2026
458420e
ensure extra git history is fetched in get_prs_in_release_dataframe
strtgbb Apr 1, 2026
27215e6
no longer consider labels no-verification-needed and cicd
strtgbb Apr 1, 2026
7618b0b
simplify verified check
strtgbb Apr 1, 2026
8d7c262
swap columns
strtgbb May 18, 2026
6e748a7
add tabbed navigation to report
strtgbb Jun 11, 2026
a4f0276
support # in url with tabs, improve tab button visibility
strtgbb Jun 15, 2026
46c7956
switch styling from pill to tab
strtgbb Jun 16, 2026
83f75c4
Escape PR title and labels in PRs in Release table to prevent HTML in…
CarlosFelipeOR Jun 23, 2026
3e92ac5
Fix Grype Scan: pin boto3==1.43.33 to avoid botocore runtime ImportEr…
CarlosFelipeOR Jun 23, 2026
9b79223
fix regression not respecting arm skip flag
strtgbb Jun 26, 2026
46c8d32
cache vended catalog credentials
zvonand Jun 18, 2026
c10599b
add test
zvonand Jun 18, 2026
6b15b03
Address review (#1895)
il9ue Jun 30, 2026
2bc7944
Merge branch 'antalya-26.3' into fix/iceberg-empty-stats-26.3
il9ue Jun 30, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 31 additions & 28 deletions src/Storages/ObjectStorage/StorageObjectStorageSource.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -905,40 +905,43 @@ StorageObjectStorageSource::ReaderHolder StorageObjectStorageSource::createReade
}
}
}
for (const auto & column : requested_columns_list)
if (file_meta_data.has_value() && !file_meta_data.value()->columns_info.empty())
{
const auto & column_name = column.first;
for (const auto & column : requested_columns_list)
{
const auto & column_name = column.first;

if (file_meta_data.value()->columns_info.contains(column_name))
continue;
if (file_meta_data.value()->columns_info.contains(column_name))
continue;

if (!column.second.second.type->isNullable())
continue;
if (!column.second.second.type->isNullable())
continue;

/// With View over Iceberg table we have someting like 'materialize(time)' as column_name
/// Simple cheap check
if (column_name.starts_with("materialize(") && column_name.ends_with(")"))
continue;
/// With View over Iceberg table we have someting like 'materialize(time)' as column_name
/// Simple cheap check
if (column_name.starts_with("materialize(") && column_name.ends_with(")"))
continue;

/// Skip columns produced by prewhere or row-level filter expressions —
/// they are computed at read time, not stored in the file.
if (format_filter_info
&& ((format_filter_info->prewhere_info && column_name == format_filter_info->prewhere_info->prewhere_column_name)
|| (format_filter_info->row_level_filter && column_name == format_filter_info->row_level_filter->column_name)))
continue;
/// Skip columns produced by prewhere or row-level filter expressions —
/// they are computed at read time, not stored in the file.
if (format_filter_info
&& ((format_filter_info->prewhere_info && column_name == format_filter_info->prewhere_info->prewhere_column_name)
|| (format_filter_info->row_level_filter && column_name == format_filter_info->row_level_filter->column_name)))
continue;

/// Column is nullable and absent in file
constant_columns_with_values[column.second.first] =
ConstColumnWithValue{
column.second.second,
Field()
};
constant_columns.insert(column_name);

LOG_DEBUG(log, "In file {} constant column '{}' type '{}' with value 'NULL'",
object_info->getPath(),
column_name,
column.second.second.type);
/// Column is nullable and absent in file
constant_columns_with_values[column.second.first] =
ConstColumnWithValue{
column.second.second,
Field()
};
constant_columns.insert(column_name);

LOG_DEBUG(log, "In file {} constant column '{}' type '{}' with value 'NULL'",
object_info->getPath(),
column_name,
column.second.second.type);
}
}
}

Expand Down
49 changes: 49 additions & 0 deletions tests/integration/helpers/iceberg_avro_schemas.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
import json

# Iceberg v2 manifest list schema (only the fields ClickHouse reads).
MANIFEST_LIST_SCHEMA_STR = json.dumps({
"type": "record",
"name": "manifest_file",
"fields": [
{"name": "manifest_path", "type": "string"},
{"name": "manifest_length", "type": "long"},
{"name": "partition_spec_id", "type": "int"},
{"name": "content", "type": "int"},
{"name": "sequence_number", "type": "long"},
{"name": "min_sequence_number", "type": "long"},
{"name": "added_snapshot_id", "type": "long"},
{"name": "added_files_count", "type": "int"},
{"name": "existing_files_count", "type": "int"},
{"name": "deleted_files_count", "type": "int"},
{"name": "added_rows_count", "type": "long"},
{"name": "existing_rows_count", "type": "long"},
{"name": "deleted_rows_count", "type": "long"},
],
})

# Stats-less manifest entry: no optional stats fields -> empty columns_info.
MANIFEST_ENTRY_NO_STATS_SCHEMA_STR = json.dumps({
"type": "record",
"name": "manifest_entry",
"fields": [
{"name": "status", "type": "int"},
{"name": "snapshot_id", "type": ["null", "long"]},
{"name": "sequence_number", "type": ["null", "long"]},
{"name": "file_sequence_number", "type": ["null", "long"]},
{
"name": "data_file",
"type": {
"type": "record",
"name": "r2",
"fields": [
{"name": "content", "type": "int"},
{"name": "file_path", "type": "string"},
{"name": "file_format", "type": "string"},
{"name": "partition", "type": {"type": "record", "name": "r102", "fields": []}},
{"name": "record_count", "type": "long"},
{"name": "file_size_in_bytes", "type": "long"},
],
},
},
],
})
Loading
Loading