Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
62 commits
Select commit Hold shift + click to select a range
665b970
feat!: Support compression codecs for JSON metadata and Avro
emkornfield Nov 12, 2025
091d3bc
fmt
emkornfield Nov 13, 2025
41a8c1c
fix clippy
emkornfield Nov 13, 2025
51e781e
clippy again
emkornfield Nov 13, 2025
253bf59
wip
emkornfield Nov 16, 2025
8bdb52d
address comments
emkornfield Nov 16, 2025
9d27116
no clone needed
emkornfield Nov 16, 2025
d1ee0b2
test compression works
emkornfield Nov 16, 2025
393622f
comments
emkornfield Nov 17, 2025
46fdb8e
update tests
emkornfield Nov 17, 2025
ec96917
address comments
emkornfield Nov 19, 2025
46efa9e
Remove Avro compression changes, keep only JSON metadata compression
emkornfield Nov 19, 2025
e96fe1a
address some comments
emkornfield Nov 20, 2025
53d0997
don't reference deprecated method
emkornfield Nov 20, 2025
4f033e8
don't call deprecated method
emkornfield Nov 20, 2025
2db8b89
with_next_version
emkornfield Nov 20, 2025
8b63161
refactor
emkornfield Nov 20, 2025
9308050
fix imports
emkornfield Nov 20, 2025
9bd46d6
fix compile
emkornfield Nov 20, 2025
f3276ce
fix clipp
emkornfield Nov 21, 2025
27cbc0b
remove ;
emkornfield Nov 21, 2025
bdc6bb5
Update crates/iceberg/src/spec/table_properties.rs
emkornfield Nov 21, 2025
05fdd81
address comment on .gz
emkornfield Nov 21, 2025
5d6404c
Merge branch 'feat/metadata-compression' of github.com-personal:emkor…
emkornfield Nov 21, 2025
207c0bf
add tests
emkornfield Nov 21, 2025
e83c449
fixes
emkornfield Nov 21, 2025
d07e628
Merge branch 'main' into feat/metadata-compression
Xuanwo Dec 5, 2025
eda6e6d
address some comments
emkornfield Dec 11, 2025
95be088
imports
emkornfield Dec 11, 2025
54e59b6
fmt
emkornfield Dec 11, 2025
7446167
Merge branch 'feat/metadata-compression' of github.com-personal:emkor…
emkornfield Dec 11, 2025
208ef58
fmt
emkornfield Dec 16, 2025
422bb87
don't hard-code in tests
emkornfield Dec 16, 2025
025f669
fmt
emkornfield Dec 16, 2025
d7f22f4
fix clippy
emkornfield Dec 16, 2025
34e9a34
clippy
emkornfield Dec 16, 2025
dfac0a4
more clippy
emkornfield Dec 16, 2025
1dd132a
Merge remote-tracking branch 'databricks/main' into feat/metadata-com…
emkornfield Jan 9, 2026
3b4b2ee
make constant
emkornfield Jan 20, 2026
eaf1570
wip
emkornfield Jan 20, 2026
889cd03
one more comment
emkornfield Jan 20, 2026
268d830
fmt
emkornfield Jan 20, 2026
2fd720f
Merge remote-tracking branch 'databricks/main' into feat/metadata-com…
emkornfield Jan 20, 2026
664e541
replace more magic constants
emkornfield Jan 21, 2026
1bfa1b8
Merge remote-tracking branch 'databricks/main' into feat/metadata-com…
emkornfield Jan 29, 2026
86854a8
wip
emkornfield Jan 29, 2026
4e0caed
fix
emkornfield Jan 29, 2026
0b936e9
cleanup
emkornfield Jan 29, 2026
e17b626
wip
emkornfield Jan 29, 2026
f321a03
fmt
emkornfield Jan 30, 2026
4b102e1
Clippy
emkornfield Jan 30, 2026
fa7f1a9
wip
emkornfield Feb 5, 2026
f2595df
use CompressionCodec on metadata location
emkornfield Feb 5, 2026
24ccd8f
change write_to
emkornfield Feb 5, 2026
0bf562b
Merge remote-tracking branch 'databricks/main' into feat/metadata-com…
emkornfield Feb 5, 2026
ab8a4b5
fix nits
emkornfield Feb 5, 2026
91df534
fix s3tables compilation
emkornfield Feb 5, 2026
19209ce
address comments
emkornfield Feb 10, 2026
3588283
fix compilation
emkornfield Feb 10, 2026
bd54626
fmt
emkornfield Feb 10, 2026
e1d8d0b
fix
emkornfield Feb 10, 2026
8aa2df6
Merge branch 'main' into feat/metadata-compression
emkornfield Feb 12, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 8 additions & 6 deletions crates/catalog/glue/src/catalog.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

use std::collections::HashMap;
use std::fmt::Debug;
use std::str::FromStr;

use anyhow::anyhow;
use async_trait::async_trait;
Expand Down Expand Up @@ -526,14 +527,14 @@ impl Catalog for GlueCatalog {
let metadata = TableMetadataBuilder::from_table_creation(creation)?
.build()?
.metadata;
let metadata_location =
MetadataLocation::new_with_table_location(location.clone()).to_string();
let metadata_location = MetadataLocation::new_with_metadata(location.clone(), &metadata);

metadata.write_to(&self.file_io, &metadata_location).await?;

let metadata_location_str = metadata_location.to_string();
let glue_table = convert_to_glue_table(
&table_name,
metadata_location.clone(),
metadata_location_str.clone(),
&metadata,
metadata.properties(),
None,
Expand All @@ -551,7 +552,7 @@ impl Catalog for GlueCatalog {

Table::builder()
.file_io(self.file_io())
.metadata_location(metadata_location)
.metadata_location(metadata_location_str)
.metadata(metadata)
.identifier(TableIdent::new(NamespaceIdent::new(db_name), table_name))
.build()
Expand Down Expand Up @@ -789,12 +790,13 @@ impl Catalog for GlueCatalog {
let current_metadata_location = current_table.metadata_location_result()?.to_string();

let staged_table = commit.apply(current_table)?;
let staged_metadata_location = staged_table.metadata_location_result()?;
let staged_metadata_location_str = staged_table.metadata_location_result()?;
let staged_metadata_location = MetadataLocation::from_str(staged_metadata_location_str)?;

// Write new metadata
staged_table
.metadata()
.write_to(staged_table.file_io(), staged_metadata_location)
.write_to(staged_table.file_io(), &staged_metadata_location)
.await?;

// Persist staged table to Glue with optimistic locking
Expand Down
13 changes: 9 additions & 4 deletions crates/catalog/glue/src/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -306,8 +306,6 @@ mod tests {
fn test_convert_to_glue_table() -> Result<()> {
let table_name = "my_table".to_string();
let location = "s3a://warehouse/hive".to_string();
let metadata_location = MetadataLocation::new_with_table_location(location).to_string();
let properties = HashMap::new();
let schema = Schema::builder()
.with_schema_id(1)
.with_fields(vec![
Expand All @@ -316,6 +314,8 @@ mod tests {
.build()?;

let metadata = create_metadata(schema)?;
let metadata_location =
MetadataLocation::new_with_metadata(location, &metadata).to_string();

let parameters = HashMap::from([
(ICEBERG_FIELD_ID.to_string(), "1".to_string()),
Expand All @@ -336,8 +336,13 @@ mod tests {
.location(metadata.location())
.build();

let result =
convert_to_glue_table(&table_name, metadata_location, &metadata, &properties, None)?;
let result = convert_to_glue_table(
&table_name,
metadata_location,
&metadata,
metadata.properties(),
None,
)?;

assert_eq!(result.name(), &table_name);
assert_eq!(result.description(), None);
Expand Down
8 changes: 4 additions & 4 deletions crates/catalog/hms/src/catalog.rs
Original file line number Diff line number Diff line change
Expand Up @@ -442,17 +442,17 @@ impl Catalog for HmsCatalog {
.build()?
.metadata;

let metadata_location =
MetadataLocation::new_with_table_location(location.clone()).to_string();
let metadata_location = MetadataLocation::new_with_metadata(location.clone(), &metadata);

metadata.write_to(&self.file_io, &metadata_location).await?;

let metadata_location_str = metadata_location.to_string();
let hive_table = convert_to_hive_table(
db_name.clone(),
metadata.current_schema(),
table_name.clone(),
location,
metadata_location.clone(),
metadata_location_str.clone(),
metadata.properties(),
)?;

Expand All @@ -464,7 +464,7 @@ impl Catalog for HmsCatalog {

Table::builder()
.file_io(self.file_io())
.metadata_location(metadata_location)
.metadata_location(metadata_location_str)
.metadata(metadata)
.identifier(TableIdent::new(NamespaceIdent::new(db_name), table_name))
.build()
Expand Down
18 changes: 14 additions & 4 deletions crates/catalog/hms/src/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -311,8 +311,8 @@ fn get_current_time() -> Result<i32> {

#[cfg(test)]
mod tests {
use iceberg::spec::{NestedField, PrimitiveType, Type};
use iceberg::{MetadataLocation, Namespace, NamespaceIdent};
use iceberg::spec::{NestedField, PrimitiveType, TableMetadataBuilder, Type};
use iceberg::{MetadataLocation, Namespace, NamespaceIdent, TableCreation};

use super::*;

Expand Down Expand Up @@ -343,8 +343,6 @@ mod tests {
let db_name = "my_db".to_string();
let table_name = "my_table".to_string();
let location = "s3a://warehouse/hms".to_string();
let metadata_location =
MetadataLocation::new_with_table_location(location.clone()).to_string();
let properties = HashMap::new();
let schema = Schema::builder()
.with_schema_id(1)
Expand All @@ -354,6 +352,18 @@ mod tests {
])
.build()?;

let table_creation = TableCreation::builder()
.name(table_name.clone())
.location(location.clone())
.schema(schema.clone())
.properties(properties.clone())
.build();
let metadata = TableMetadataBuilder::from_table_creation(table_creation)?
.build()?
.metadata;
let metadata_location =
MetadataLocation::new_with_metadata(location.clone(), &metadata).to_string();

let result = convert_to_hive_table(
db_name.clone(),
&schema,
Expand Down
17 changes: 10 additions & 7 deletions crates/catalog/s3tables/src/catalog.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

use std::collections::HashMap;
use std::future::Future;
use std::str::FromStr;

use async_trait::async_trait;
use aws_sdk_s3tables::operation::create_table::CreateTableOutput;
Expand Down Expand Up @@ -476,25 +477,26 @@ impl Catalog for S3TablesCatalog {
let metadata = TableMetadataBuilder::from_table_creation(creation)?
.build()?
.metadata;
let metadata_location =
MetadataLocation::new_with_table_location(table_location).to_string();

let metadata_location = MetadataLocation::new_with_metadata(table_location, &metadata);
metadata.write_to(&self.file_io, &metadata_location).await?;

// update metadata location
let metadata_location_str = metadata_location.to_string();
self.s3tables_client
.update_table_metadata_location()
.table_bucket_arn(self.config.table_bucket_arn.clone())
.namespace(namespace.to_url_string())
.name(table_ident.name())
.metadata_location(metadata_location.clone())
.metadata_location(metadata_location_str.clone())
.version_token(create_resp.version_token())
.send()
.await
.map_err(from_aws_sdk_error)?;

let table = Table::builder()
.identifier(table_ident)
.metadata_location(metadata_location)
.metadata_location(metadata_location_str)
.metadata(metadata)
.file_io(self.file_io.clone())
.build()?;
Expand Down Expand Up @@ -605,11 +607,12 @@ impl Catalog for S3TablesCatalog {
self.load_table_with_version_token(&table_ident).await?;

let staged_table = commit.apply(current_table)?;
let staged_metadata_location = staged_table.metadata_location_result()?;
let staged_metadata_location_str = staged_table.metadata_location_result()?;
let staged_metadata_location = MetadataLocation::from_str(staged_metadata_location_str)?;

staged_table
.metadata()
.write_to(staged_table.file_io(), staged_metadata_location)
.write_to(staged_table.file_io(), &staged_metadata_location)
.await?;

let builder = self
Expand All @@ -619,7 +622,7 @@ impl Catalog for S3TablesCatalog {
.namespace(table_namespace.to_url_string())
.name(table_ident.name())
.version_token(version_token)
.metadata_location(staged_metadata_location);
.metadata_location(staged_metadata_location_str);

let _ = builder.send().await.map_err(|e| {
let error = e.into_service_error();
Expand Down
13 changes: 8 additions & 5 deletions crates/catalog/sql/src/catalog.rs
Original file line number Diff line number Diff line change
Expand Up @@ -829,21 +829,22 @@ impl Catalog for SqlCatalog {
.build()?
.metadata;
let tbl_metadata_location =
MetadataLocation::new_with_table_location(location.clone()).to_string();
MetadataLocation::new_with_metadata(location.clone(), &tbl_metadata);

tbl_metadata
.write_to(&self.fileio, &tbl_metadata_location)
.await?;

let tbl_metadata_location_str = tbl_metadata_location.to_string();
self.execute(&format!(
"INSERT INTO {CATALOG_TABLE_NAME}
({CATALOG_FIELD_CATALOG_NAME}, {CATALOG_FIELD_TABLE_NAMESPACE}, {CATALOG_FIELD_TABLE_NAME}, {CATALOG_FIELD_METADATA_LOCATION_PROP}, {CATALOG_FIELD_RECORD_TYPE})
VALUES (?, ?, ?, ?, ?)
"), vec![Some(&self.name), Some(&namespace.join(".")), Some(&tbl_name.clone()), Some(&tbl_metadata_location), Some(CATALOG_FIELD_TABLE_RECORD_TYPE)], None).await?;
"), vec![Some(&self.name), Some(&namespace.join(".")), Some(&tbl_name.clone()), Some(&tbl_metadata_location_str), Some(CATALOG_FIELD_TABLE_RECORD_TYPE)], None).await?;

Ok(Table::builder()
.file_io(self.fileio.clone())
.metadata_location(tbl_metadata_location)
.metadata_location(tbl_metadata_location_str)
.identifier(tbl_ident)
.metadata(tbl_metadata)
.build()?)
Expand Down Expand Up @@ -927,13 +928,15 @@ impl Catalog for SqlCatalog {
let current_metadata_location = current_table.metadata_location_result()?.to_string();

let staged_table = commit.apply(current_table)?;
let staged_metadata_location = staged_table.metadata_location_result()?;
let staged_metadata_location_str = staged_table.metadata_location_result()?;
let staged_metadata_location = MetadataLocation::from_str(staged_metadata_location_str)?;

staged_table
.metadata()
.write_to(staged_table.file_io(), &staged_metadata_location)
.await?;

let staged_metadata_location_str = staged_metadata_location.to_string();
let update_result = self
.execute(
&format!(
Expand All @@ -949,7 +952,7 @@ impl Catalog for SqlCatalog {
AND {CATALOG_FIELD_METADATA_LOCATION_PROP} = ?"
),
vec![
Some(staged_metadata_location),
Some(&staged_metadata_location_str),
Some(current_metadata_location.as_str()),
Some(&self.name),
Some(table_ident.name()),
Expand Down
14 changes: 7 additions & 7 deletions crates/iceberg/src/catalog/memory/catalog.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
//! This module contains memory catalog implementation.

use std::collections::HashMap;
use std::str::FromStr;

use async_trait::async_trait;
use futures::lock::{Mutex, MutexGuard};
Expand Down Expand Up @@ -279,15 +280,15 @@ impl Catalog for MemoryCatalog {
let metadata = TableMetadataBuilder::from_table_creation(table_creation)?
.build()?
.metadata;
let metadata_location = MetadataLocation::new_with_table_location(location).to_string();
let metadata_location = MetadataLocation::new_with_metadata(location, &metadata);

metadata.write_to(&self.file_io, &metadata_location).await?;

root_namespace_state.insert_new_table(&table_ident, metadata_location.clone())?;
root_namespace_state.insert_new_table(&table_ident, metadata_location.to_string())?;

Table::builder()
.file_io(self.file_io.clone())
.metadata_location(metadata_location)
.metadata_location(metadata_location.to_string())
.metadata(metadata)
.identifier(table_ident)
.build()
Expand Down Expand Up @@ -365,12 +366,11 @@ impl Catalog for MemoryCatalog {
let staged_table = commit.apply(current_table)?;

// Write table metadata to the new location
let metadata_location =
MetadataLocation::from_str(staged_table.metadata_location_result()?)?;
staged_table
.metadata()
.write_to(
staged_table.file_io(),
staged_table.metadata_location_result()?,
)
.write_to(staged_table.file_io(), &metadata_location)
.await?;

// Flip the pointer to reference the new metadata file.
Expand Down
Loading
Loading