From 7a29b96921a8eebee2a68fd3cc17ff306d7dc08d Mon Sep 17 00:00:00 2001 From: Adam Gutglick Date: Mon, 2 Mar 2026 12:44:03 +0000 Subject: [PATCH 01/11] Update to DataFusion 53 Signed-off-by: Adam Gutglick --- Cargo.lock | 948 ++++++++++++++------- Cargo.toml | 42 +- vortex-bench/src/random_access/take.rs | 3 +- vortex-datafusion/src/persistent/format.rs | 54 +- vortex-datafusion/src/persistent/opener.rs | 8 +- vortex-io/src/object_store/read_at.rs | 1 + vortex-io/src/object_store/write.rs | 1 + 7 files changed, 720 insertions(+), 337 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 127d058d719..d9f6d459e66 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -234,19 +234,40 @@ version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e4754a624e5ae42081f464514be454b39711daae0458906dacde5f4c632f33a8" dependencies = [ - "arrow-arith", - "arrow-array", - "arrow-buffer", - "arrow-cast", - "arrow-csv", - "arrow-data", - "arrow-ipc", - "arrow-json", - "arrow-ord", - "arrow-row", - "arrow-schema", - "arrow-select", - "arrow-string", + "arrow-arith 57.3.0", + "arrow-array 57.3.0", + "arrow-buffer 57.3.0", + "arrow-cast 57.3.0", + "arrow-csv 57.3.0", + "arrow-data 57.3.0", + "arrow-ipc 57.3.0", + "arrow-json 57.3.0", + "arrow-ord 57.3.0", + "arrow-row 57.3.0", + "arrow-schema 57.3.0", + "arrow-select 57.3.0", + "arrow-string 57.3.0", +] + +[[package]] +name = "arrow" +version = "58.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "602268ce9f569f282cedb9a9f6bac569b680af47b9b077d515900c03c5d190da" +dependencies = [ + "arrow-arith 58.0.0", + "arrow-array 58.0.0", + "arrow-buffer 58.0.0", + "arrow-cast 58.0.0", + "arrow-csv 58.0.0", + "arrow-data 58.0.0", + "arrow-ipc 58.0.0", + "arrow-json 58.0.0", + "arrow-ord 58.0.0", + "arrow-row 58.0.0", + "arrow-schema 58.0.0", + "arrow-select 58.0.0", + "arrow-string 58.0.0", ] [[package]] @@ -255,10 +276,24 @@ version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f7b3141e0ec5145a22d8694ea8b6d6f69305971c4fa1c1a13ef0195aef2d678b" dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", + "arrow-array 57.3.0", + "arrow-buffer 57.3.0", + "arrow-data 57.3.0", + "arrow-schema 57.3.0", + "chrono", + "num-traits", +] + +[[package]] +name = "arrow-arith" +version = "58.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cd53c6bf277dea91f136ae8e3a5d7041b44b5e489e244e637d00ae302051f56f" +dependencies = [ + "arrow-array 58.0.0", + "arrow-buffer 58.0.0", + "arrow-data 58.0.0", + "arrow-schema 58.0.0", "chrono", "num-traits", ] @@ -270,9 +305,28 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4c8955af33b25f3b175ee10af580577280b4bd01f7e823d94c7cdef7cf8c9aef" dependencies = [ "ahash 0.8.12", - "arrow-buffer", - "arrow-data", - "arrow-schema", + "arrow-buffer 57.3.0", + "arrow-data 57.3.0", + "arrow-schema 57.3.0", + "chrono", + "chrono-tz", + "half", + "hashbrown 0.16.1", + "num-complex", + "num-integer", + "num-traits", +] + +[[package]] +name = "arrow-array" +version = "58.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e53796e07a6525edaf7dc28b540d477a934aff14af97967ad1d5550878969b9e" +dependencies = [ + "ahash 0.8.12", + "arrow-buffer 58.0.0", + "arrow-data 58.0.0", + "arrow-schema 58.0.0", "chrono", "chrono-tz", "half", @@ -294,18 +348,52 @@ dependencies = [ "num-traits", ] +[[package]] +name = "arrow-buffer" +version = "58.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2c1a85bb2e94ee10b76531d8bc3ce9b7b4c0d508cabfb17d477f63f2617bd20" +dependencies = [ + "bytes", + "half", + "num-bigint", + "num-traits", +] + [[package]] name = "arrow-cast" version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "646bbb821e86fd57189c10b4fcdaa941deaf4181924917b0daa92735baa6ada5" dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-ord", - "arrow-schema", - "arrow-select", + "arrow-array 57.3.0", + "arrow-buffer 57.3.0", + "arrow-data 57.3.0", + "arrow-ord 57.3.0", + "arrow-schema 57.3.0", + "arrow-select 57.3.0", + "atoi", + "base64", + "chrono", + "comfy-table", + "half", + "lexical-core", + "num-traits", + "ryu", +] + +[[package]] +name = "arrow-cast" +version = "58.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "89fb245db6b0e234ed8e15b644edb8664673fefe630575e94e62cd9d489a8a26" +dependencies = [ + "arrow-array 58.0.0", + "arrow-buffer 58.0.0", + "arrow-data 58.0.0", + "arrow-ord 58.0.0", + "arrow-schema 58.0.0", + "arrow-select 58.0.0", "atoi", "base64", "chrono", @@ -322,9 +410,24 @@ version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8da746f4180004e3ce7b83c977daf6394d768332349d3d913998b10a120b790a" dependencies = [ - "arrow-array", - "arrow-cast", - "arrow-schema", + "arrow-array 57.3.0", + "arrow-cast 57.3.0", + "arrow-schema 57.3.0", + "chrono", + "csv", + "csv-core", + "regex", +] + +[[package]] +name = "arrow-csv" +version = "58.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d374882fb465a194462527c0c15a93aa19a554cf690a6b77a26b2a02539937a7" +dependencies = [ + "arrow-array 58.0.0", + "arrow-cast 58.0.0", + "arrow-schema 58.0.0", "chrono", "csv", "csv-core", @@ -337,8 +440,21 @@ version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1fdd994a9d28e6365aa78e15da3f3950c0fdcea6b963a12fa1c391afb637b304" dependencies = [ - "arrow-buffer", - "arrow-schema", + "arrow-buffer 57.3.0", + "arrow-schema 57.3.0", + "half", + "num-integer", + "num-traits", +] + +[[package]] +name = "arrow-data" +version = "58.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "189d210bc4244c715fa3ed9e6e22864673cccb73d5da28c2723fb2e527329b33" +dependencies = [ + "arrow-buffer 58.0.0", + "arrow-schema 58.0.0", "half", "num-integer", "num-traits", @@ -350,11 +466,27 @@ version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "abf7df950701ab528bf7c0cf7eeadc0445d03ef5d6ffc151eaae6b38a58feff1" dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", - "arrow-select", + "arrow-array 57.3.0", + "arrow-buffer 57.3.0", + "arrow-data 57.3.0", + "arrow-schema 57.3.0", + "arrow-select 57.3.0", + "flatbuffers", + "lz4_flex 0.12.0", + "zstd", +] + +[[package]] +name = "arrow-ipc" +version = "58.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7968c2e5210c41f4909b2ef76f6e05e172b99021c2def5edf3cc48fdd39d1d6c" +dependencies = [ + "arrow-array 58.0.0", + "arrow-buffer 58.0.0", + "arrow-data 58.0.0", + "arrow-schema 58.0.0", + "arrow-select 58.0.0", "flatbuffers", "lz4_flex 0.12.0", "zstd", @@ -366,11 +498,35 @@ version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0ff8357658bedc49792b13e2e862b80df908171275f8e6e075c460da5ee4bf86" dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-cast", - "arrow-data", - "arrow-schema", + "arrow-array 57.3.0", + "arrow-buffer 57.3.0", + "arrow-cast 57.3.0", + "arrow-data 57.3.0", + "arrow-schema 57.3.0", + "chrono", + "half", + "indexmap", + "itoa", + "lexical-core", + "memchr", + "num-traits", + "ryu", + "serde_core", + "serde_json", + "simdutf8", +] + +[[package]] +name = "arrow-json" +version = "58.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "92111dba5bf900f443488e01f00d8c4ddc2f47f5c50039d18120287b580baa22" +dependencies = [ + "arrow-array 58.0.0", + "arrow-buffer 58.0.0", + "arrow-cast 58.0.0", + "arrow-data 58.0.0", + "arrow-schema 58.0.0", "chrono", "half", "indexmap", @@ -390,11 +546,24 @@ version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f7d8f1870e03d4cbed632959498bcc84083b5a24bded52905ae1695bd29da45b" dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", - "arrow-select", + "arrow-array 57.3.0", + "arrow-buffer 57.3.0", + "arrow-data 57.3.0", + "arrow-schema 57.3.0", + "arrow-select 57.3.0", +] + +[[package]] +name = "arrow-ord" +version = "58.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "211136cb253577ee1a6665f741a13136d4e563f64f5093ffd6fb837af90b9495" +dependencies = [ + "arrow-array 58.0.0", + "arrow-buffer 58.0.0", + "arrow-data 58.0.0", + "arrow-schema 58.0.0", + "arrow-select 58.0.0", ] [[package]] @@ -403,10 +572,23 @@ version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "18228633bad92bff92a95746bbeb16e5fc318e8382b75619dec26db79e4de4c0" dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", + "arrow-array 57.3.0", + "arrow-buffer 57.3.0", + "arrow-data 57.3.0", + "arrow-schema 57.3.0", + "half", +] + +[[package]] +name = "arrow-row" +version = "58.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e0f20145f9f5ea3fe383e2ba7a7487bf19be36aa9dbf5dd6a1f92f657179663" +dependencies = [ + "arrow-array 58.0.0", + "arrow-buffer 58.0.0", + "arrow-data 58.0.0", + "arrow-schema 58.0.0", "half", ] @@ -421,6 +603,17 @@ dependencies = [ "serde_json", ] +[[package]] +name = "arrow-schema" +version = "58.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b47e0ca91cc438d2c7879fe95e0bca5329fff28649e30a88c6f760b1faeddcb" +dependencies = [ + "bitflags", + "serde_core", + "serde_json", +] + [[package]] name = "arrow-select" version = "57.3.0" @@ -428,10 +621,24 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "68bf3e3efbd1278f770d67e5dc410257300b161b93baedb3aae836144edcaf4b" dependencies = [ "ahash 0.8.12", - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", + "arrow-array 57.3.0", + "arrow-buffer 57.3.0", + "arrow-data 57.3.0", + "arrow-schema 57.3.0", + "num-traits", +] + +[[package]] +name = "arrow-select" +version = "58.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "750a7d1dda177735f5e82a314485b6915c7cccdbb278262ac44090f4aba4a325" +dependencies = [ + "ahash 0.8.12", + "arrow-array 58.0.0", + "arrow-buffer 58.0.0", + "arrow-data 58.0.0", + "arrow-schema 58.0.0", "num-traits", ] @@ -441,11 +648,28 @@ version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "85e968097061b3c0e9fe3079cf2e703e487890700546b5b0647f60fca1b5a8d8" dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", - "arrow-select", + "arrow-array 57.3.0", + "arrow-buffer 57.3.0", + "arrow-data 57.3.0", + "arrow-schema 57.3.0", + "arrow-select 57.3.0", + "memchr", + "num-traits", + "regex", + "regex-syntax", +] + +[[package]] +name = "arrow-string" +version = "58.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e1eab1208bc4fe55d768cdc9b9f3d9df5a794cdb3ee2586bf89f9b30dc31ad8c" +dependencies = [ + "arrow-array 58.0.0", + "arrow-buffer 58.0.0", + "arrow-data 58.0.0", + "arrow-schema 58.0.0", + "arrow-select 58.0.0", "memchr", "num-traits", "regex", @@ -1344,8 +1568,8 @@ name = "compress-bench" version = "0.1.0" dependencies = [ "anyhow", - "arrow-array", - "arrow-schema", + "arrow-array 58.0.0", + "arrow-schema 58.0.0", "async-trait", "bytes", "clap", @@ -1353,7 +1577,7 @@ dependencies = [ "indicatif", "itertools 0.14.0", "lance-bench", - "parquet", + "parquet 58.0.0", "regex", "tokio", "tracing", @@ -1814,8 +2038,8 @@ version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8ba7cb113e9c0bedf9e9765926031e132fa05a1b09ba6e93a6d1a4d7044457b8" dependencies = [ - "arrow", - "arrow-schema", + "arrow 57.3.0", + "arrow-schema 57.3.0", "async-trait", "bytes", "chrono", @@ -1846,12 +2070,12 @@ dependencies = [ "futures", "itertools 0.14.0", "log", - "object_store", + "object_store 0.12.5", "parking_lot", "rand 0.9.2", "regex", "rstest", - "sqlparser", + "sqlparser 0.59.0", "tempfile", "tokio", "url", @@ -1864,8 +2088,8 @@ version = "52.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "503f1f4a9060ae6e650d3dff5dc7a21266fea1302d890768d45b4b28586e830f" dependencies = [ - "arrow", - "arrow-schema", + "arrow 58.0.0", + "arrow-schema 58.0.0", "async-trait", "bytes", "bzip2", @@ -1901,12 +2125,12 @@ dependencies = [ "itertools 0.14.0", "liblzma", "log", - "object_store", + "object_store 0.13.1", "parking_lot", - "parquet", + "parquet 58.0.0", "rand 0.9.2", "regex", - "sqlparser", + "sqlparser 0.61.0", "tempfile", "tokio", "url", @@ -1926,7 +2150,7 @@ dependencies = [ "datafusion-physical-plan 52.2.0", "futures", "itertools 0.14.0", - "object_store", + "object_store 0.13.1", "opentelemetry", "opentelemetry-otlp", "opentelemetry_sdk", @@ -1946,7 +2170,7 @@ version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "66a3a799f914a59b1ea343906a0486f17061f39509af74e874a866428951130d" dependencies = [ - "arrow", + "arrow 57.3.0", "async-trait", "dashmap", "datafusion-common 51.0.0", @@ -1960,7 +2184,7 @@ dependencies = [ "futures", "itertools 0.14.0", "log", - "object_store", + "object_store 0.12.5", "parking_lot", "tokio", ] @@ -1971,7 +2195,7 @@ version = "52.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "14417a3ee4ae3d092b56cd6c1d32e8ff3e2c9ec130ecb2276ec91c89fd599399" dependencies = [ - "arrow", + "arrow 58.0.0", "async-trait", "dashmap", "datafusion-common 52.2.0", @@ -1985,7 +2209,7 @@ dependencies = [ "futures", "itertools 0.14.0", "log", - "object_store", + "object_store 0.13.1", "parking_lot", "tokio", ] @@ -1996,7 +2220,7 @@ version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6db1b113c80d7a0febcd901476a57aef378e717c54517a163ed51417d87621b0" dependencies = [ - "arrow", + "arrow 57.3.0", "async-trait", "datafusion-catalog 51.0.0", "datafusion-common 51.0.0", @@ -2010,7 +2234,7 @@ dependencies = [ "futures", "itertools 0.14.0", "log", - "object_store", + "object_store 0.12.5", "tokio", ] @@ -2020,7 +2244,7 @@ version = "52.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9d0eba824adb45a4b3ac6f0251d40df3f6a9382371cad136f4f14ac9ebc6bc10" dependencies = [ - "arrow", + "arrow 58.0.0", "async-trait", "datafusion-catalog 52.2.0", "datafusion-common 52.2.0", @@ -2034,7 +2258,7 @@ dependencies = [ "futures", "itertools 0.14.0", "log", - "object_store", + "object_store 0.13.1", ] [[package]] @@ -2044,17 +2268,17 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7c10f7659e96127d25e8366be7c8be4109595d6a2c3eac70421f380a7006a1b0" dependencies = [ "ahash 0.8.12", - "arrow", - "arrow-ipc", + "arrow 57.3.0", + "arrow-ipc 57.3.0", "chrono", "half", "hashbrown 0.14.5", "indexmap", "libc", "log", - "object_store", + "object_store 0.12.5", "paste", - "sqlparser", + "sqlparser 0.59.0", "tokio", "web-time", ] @@ -2067,19 +2291,20 @@ checksum = "0039deefbd00c56adf5168b7ca58568fb058e4ba4c5a03b09f8be371b4e434b6" dependencies = [ "ahash 0.8.12", "apache-avro", - "arrow", - "arrow-ipc", + "arrow 58.0.0", + "arrow-ipc 58.0.0", "chrono", "half", "hashbrown 0.16.1", "indexmap", + "itertools 0.14.0", "libc", "log", - "object_store", - "parquet", + "object_store 0.13.1", + "parquet 58.0.0", "paste", "recursive", - "sqlparser", + "sqlparser 0.61.0", "tokio", "web-time", ] @@ -2112,7 +2337,7 @@ version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fde13794244bc7581cd82f6fff217068ed79cdc344cafe4ab2c3a1c3510b38d6" dependencies = [ - "arrow", + "arrow 57.3.0", "async-trait", "bytes", "chrono", @@ -2129,7 +2354,7 @@ dependencies = [ "glob", "itertools 0.14.0", "log", - "object_store", + "object_store 0.12.5", "rand 0.9.2", "tokio", "url", @@ -2141,7 +2366,7 @@ version = "52.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "802068957f620302ecf05f84ff4019601aeafd36f5f3f1334984af2e34265129" dependencies = [ - "arrow", + "arrow 58.0.0", "async-compression", "async-trait", "bytes", @@ -2162,7 +2387,7 @@ dependencies = [ "itertools 0.14.0", "liblzma", "log", - "object_store", + "object_store 0.13.1", "rand 0.9.2", "tokio", "tokio-util", @@ -2176,8 +2401,8 @@ version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "804fa9b4ecf3157982021770617200ef7c1b2979d57bec9044748314775a9aea" dependencies = [ - "arrow", - "arrow-ipc", + "arrow 57.3.0", + "arrow-ipc 57.3.0", "async-trait", "bytes", "datafusion-common 51.0.0", @@ -2190,7 +2415,7 @@ dependencies = [ "datafusion-session 51.0.0", "futures", "itertools 0.14.0", - "object_store", + "object_store 0.12.5", "tokio", ] @@ -2200,8 +2425,8 @@ version = "52.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "90fc387d5067c62d494a6647d29c5ad4fcdd5a6e50ab4ea1d2568caa2d66f2cc" dependencies = [ - "arrow", - "arrow-ipc", + "arrow 58.0.0", + "arrow-ipc 58.0.0", "async-trait", "bytes", "datafusion-common 52.2.0", @@ -2214,7 +2439,7 @@ dependencies = [ "datafusion-session 52.2.0", "futures", "itertools 0.14.0", - "object_store", + "object_store 0.13.1", "tokio", ] @@ -2225,7 +2450,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "69ce35d9df5c672747f79df4b8f4967b39a3514c3af30b9a7b5426f83d4be814" dependencies = [ "apache-avro", - "arrow", + "arrow 58.0.0", "async-trait", "bytes", "datafusion-common 52.2.0", @@ -2235,7 +2460,7 @@ dependencies = [ "datafusion-session 52.2.0", "futures", "num-traits", - "object_store", + "object_store 0.13.1", ] [[package]] @@ -2244,7 +2469,7 @@ version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "61a1641a40b259bab38131c5e6f48fac0717bedb7dc93690e604142a849e0568" dependencies = [ - "arrow", + "arrow 57.3.0", "async-trait", "bytes", "datafusion-common 51.0.0", @@ -2256,7 +2481,7 @@ dependencies = [ "datafusion-physical-plan 51.0.0", "datafusion-session 51.0.0", "futures", - "object_store", + "object_store 0.12.5", "regex", "tokio", ] @@ -2267,7 +2492,7 @@ version = "52.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "efd5e20579bb6c8bd4e6c620253972fb723822030c280dd6aa047f660d09eeba" dependencies = [ - "arrow", + "arrow 58.0.0", "async-trait", "bytes", "datafusion-common 52.2.0", @@ -2279,7 +2504,7 @@ dependencies = [ "datafusion-physical-plan 52.2.0", "datafusion-session 52.2.0", "futures", - "object_store", + "object_store 0.13.1", "regex", "tokio", ] @@ -2290,7 +2515,7 @@ version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "adeacdb00c1d37271176f8fb6a1d8ce096baba16ea7a4b2671840c5c9c64fe85" dependencies = [ - "arrow", + "arrow 57.3.0", "async-trait", "bytes", "datafusion-common 51.0.0", @@ -2302,7 +2527,7 @@ dependencies = [ "datafusion-physical-plan 51.0.0", "datafusion-session 51.0.0", "futures", - "object_store", + "object_store 0.12.5", "tokio", ] @@ -2312,7 +2537,7 @@ version = "52.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c0788b0d48fcef31880a02013ea3cc18e5a4e0eacc3b0abdd2cd0597b99dc96e" dependencies = [ - "arrow", + "arrow 58.0.0", "async-trait", "bytes", "datafusion-common 52.2.0", @@ -2324,8 +2549,10 @@ dependencies = [ "datafusion-physical-plan 52.2.0", "datafusion-session 52.2.0", "futures", - "object_store", + "object_store 0.13.1", + "serde_json", "tokio", + "tokio-stream", ] [[package]] @@ -2334,7 +2561,7 @@ version = "52.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "66639b70f1f363f5f0950733170100e588f1acfacac90c1894e231194aa35957" dependencies = [ - "arrow", + "arrow 58.0.0", "async-trait", "bytes", "datafusion-common 52.2.0", @@ -2352,9 +2579,9 @@ dependencies = [ "futures", "itertools 0.14.0", "log", - "object_store", + "object_store 0.13.1", "parking_lot", - "parquet", + "parquet 58.0.0", "tokio", ] @@ -2376,14 +2603,14 @@ version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "63695643190679037bc946ad46a263b62016931547bf119859c511f7ff2f5178" dependencies = [ - "arrow", + "arrow 57.3.0", "async-trait", "dashmap", "datafusion-common 51.0.0", "datafusion-expr 51.0.0", "futures", "log", - "object_store", + "object_store 0.12.5", "parking_lot", "rand 0.9.2", "tempfile", @@ -2396,7 +2623,8 @@ version = "52.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9e456f60e5d38db45335e84617006d90af14a8c8c5b8e959add708b2daaa0e2c" dependencies = [ - "arrow", + "arrow 58.0.0", + "arrow-buffer 58.0.0", "async-trait", "chrono", "dashmap", @@ -2404,7 +2632,7 @@ dependencies = [ "datafusion-expr 52.2.0", "futures", "log", - "object_store", + "object_store 0.13.1", "parking_lot", "rand 0.9.2", "tempfile", @@ -2417,7 +2645,7 @@ version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f9a4787cbf5feb1ab351f789063398f67654a6df75c4d37d7f637dc96f951a91" dependencies = [ - "arrow", + "arrow 57.3.0", "async-trait", "chrono", "datafusion-common 51.0.0", @@ -2430,7 +2658,7 @@ dependencies = [ "itertools 0.14.0", "paste", "serde_json", - "sqlparser", + "sqlparser 0.59.0", ] [[package]] @@ -2439,7 +2667,7 @@ version = "52.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6507c719804265a58043134580c1c20767e7c23ba450724393f03ec982769ad9" dependencies = [ - "arrow", + "arrow 58.0.0", "async-trait", "chrono", "datafusion-common 52.2.0", @@ -2453,7 +2681,7 @@ dependencies = [ "paste", "recursive", "serde_json", - "sqlparser", + "sqlparser 0.61.0", ] [[package]] @@ -2462,7 +2690,7 @@ version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5ce2fb1b8c15c9ac45b0863c30b268c69dc9ee7a1ee13ecf5d067738338173dc" dependencies = [ - "arrow", + "arrow 57.3.0", "datafusion-common 51.0.0", "indexmap", "itertools 0.14.0", @@ -2488,8 +2716,8 @@ version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "794a9db7f7b96b3346fc007ff25e994f09b8f0511b4cf7dff651fadfe3ebb28f" dependencies = [ - "arrow", - "arrow-buffer", + "arrow 57.3.0", + "arrow-buffer 57.3.0", "base64", "blake2", "blake3", @@ -2518,8 +2746,8 @@ version = "52.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "189256495dc9cbbb8e20dbcf161f60422e628d201a78df8207e44bd4baefadb6" dependencies = [ - "arrow", - "arrow-buffer", + "arrow 58.0.0", + "arrow-buffer 58.0.0", "base64", "blake2", "blake3", @@ -2535,6 +2763,7 @@ dependencies = [ "itertools 0.14.0", "log", "md-5", + "memchr", "num-traits", "rand 0.9.2", "regex", @@ -2550,7 +2779,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1c25210520a9dcf9c2b2cbbce31ebd4131ef5af7fc60ee92b266dc7d159cb305" dependencies = [ "ahash 0.8.12", - "arrow", + "arrow 57.3.0", "datafusion-common 51.0.0", "datafusion-doc 51.0.0", "datafusion-execution 51.0.0", @@ -2582,6 +2811,7 @@ dependencies = [ "datafusion-physical-expr-common 52.2.0", "half", "log", + "num-traits", "paste", ] @@ -2592,7 +2822,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "62f4a66f3b87300bb70f4124b55434d2ae3fe80455f3574701d0348da040b55d" dependencies = [ "ahash 0.8.12", - "arrow", + "arrow 57.3.0", "datafusion-common 51.0.0", "datafusion-expr-common 51.0.0", "datafusion-physical-expr-common 51.0.0", @@ -2617,8 +2847,8 @@ version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ae5c06eed03918dc7fe7a9f082a284050f0e9ecf95d72f57712d1496da03b8c4" dependencies = [ - "arrow", - "arrow-ord", + "arrow 57.3.0", + "arrow-ord 57.3.0", "datafusion-common 51.0.0", "datafusion-doc 51.0.0", "datafusion-execution 51.0.0", @@ -2653,6 +2883,7 @@ dependencies = [ "datafusion-macros 52.2.0", "datafusion-physical-expr-common 52.2.0", "itertools 0.14.0", + "itoa", "log", "paste", ] @@ -2663,7 +2894,7 @@ version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "db4fed1d71738fbe22e2712d71396db04c25de4111f1ec252b8f4c6d3b25d7f5" dependencies = [ - "arrow", + "arrow 57.3.0", "async-trait", "datafusion-catalog 51.0.0", "datafusion-common 51.0.0", @@ -2679,7 +2910,7 @@ version = "52.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3b17dac25dfda2d2a90ff0ad1c054a11fb1523766226bec6e9bd8c410daee2ae" dependencies = [ - "arrow", + "arrow 58.0.0", "async-trait", "datafusion-catalog 52.2.0", "datafusion-common 52.2.0", @@ -2695,7 +2926,7 @@ version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1d92206aa5ae21892f1552b4d61758a862a70956e6fd7a95cb85db1de74bc6d1" dependencies = [ - "arrow", + "arrow 57.3.0", "datafusion-common 51.0.0", "datafusion-doc 51.0.0", "datafusion-expr 51.0.0", @@ -2773,7 +3004,7 @@ version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9f35f9ec5d08b87fd1893a30c2929f2559c2f9806ca072d8fefca5009dc0f06a" dependencies = [ - "arrow", + "arrow 57.3.0", "chrono", "datafusion-common 51.0.0", "datafusion-expr 51.0.0", @@ -2792,7 +3023,7 @@ version = "52.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "80ccf60767c09302b2e0fc3afebb3761a6d508d07316fab8c5e93312728a21bb" dependencies = [ - "arrow", + "arrow 58.0.0", "chrono", "datafusion-common 52.2.0", "datafusion-expr 52.2.0", @@ -2813,7 +3044,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c30cc8012e9eedcb48bbe112c6eff4ae5ed19cf3003cb0f505662e88b7014c5d" dependencies = [ "ahash 0.8.12", - "arrow", + "arrow 57.3.0", "datafusion-common 51.0.0", "datafusion-expr 51.0.0", "datafusion-expr-common 51.0.0", @@ -2858,7 +3089,7 @@ version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7f9ff2dbd476221b1f67337699eff432781c4e6e1713d2aefdaa517dfbf79768" dependencies = [ - "arrow", + "arrow 57.3.0", "datafusion-common 51.0.0", "datafusion-expr 51.0.0", "datafusion-functions 51.0.0", @@ -2889,7 +3120,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "90da43e1ec550b172f34c87ec68161986ced70fd05c8d2a2add66eef9c276f03" dependencies = [ "ahash 0.8.12", - "arrow", + "arrow 57.3.0", "datafusion-common 51.0.0", "datafusion-expr-common 51.0.0", "hashbrown 0.14.5", @@ -2903,7 +3134,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "42237efe621f92adc22d111b531fdbc2cc38ca9b5e02327535628fb103ae2157" dependencies = [ "ahash 0.8.12", - "arrow", + "arrow 58.0.0", "chrono", "datafusion-common 52.2.0", "datafusion-expr-common 52.2.0", @@ -2919,7 +3150,7 @@ version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ce9804f799acd7daef3be7aaffe77c0033768ed8fdbf5fb82fc4c5f2e6bc14e6" dependencies = [ - "arrow", + "arrow 57.3.0", "datafusion-common 51.0.0", "datafusion-execution 51.0.0", "datafusion-expr 51.0.0", @@ -2957,9 +3188,9 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0acf0ad6b6924c6b1aa7d213b181e012e2d3ec0a64ff5b10ee6282ab0f8532ac" dependencies = [ "ahash 0.8.12", - "arrow", - "arrow-ord", - "arrow-schema", + "arrow 57.3.0", + "arrow-ord 57.3.0", + "arrow-schema 57.3.0", "async-trait", "chrono", "datafusion-common 51.0.0", @@ -2988,9 +3219,9 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7cbe61b12daf81a9f20ba03bd3541165d51f86e004ef37426b11881330eed261" dependencies = [ "ahash 0.8.12", - "arrow", - "arrow-ord", - "arrow-schema", + "arrow 58.0.0", + "arrow-ord 58.0.0", + "arrow-schema 58.0.0", "async-trait", "datafusion-common 52.2.0", "datafusion-common-runtime 52.2.0", @@ -3007,6 +3238,7 @@ dependencies = [ "indexmap", "itertools 0.14.0", "log", + "num-traits", "parking_lot", "pin-project-lite", "tokio", @@ -3018,7 +3250,7 @@ version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ac2c2498a1f134a9e11a9f5ed202a2a7d7e9774bd9249295593053ea3be999db" dependencies = [ - "arrow", + "arrow 57.3.0", "datafusion-common 51.0.0", "datafusion-datasource 51.0.0", "datafusion-expr-common 51.0.0", @@ -3080,7 +3312,7 @@ version = "52.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "15d28510abfc85709578fcf9065325d43ee3303012c0ccec2dce351bdc577d00" dependencies = [ - "arrow", + "arrow 58.0.0", "bigdecimal", "chrono", "crc32fast", @@ -3093,7 +3325,9 @@ dependencies = [ "log", "percent-encoding", "rand 0.9.2", + "serde_json", "sha1", + "sha2", "url", ] @@ -3103,7 +3337,7 @@ version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3fc195fe60634b2c6ccfd131b487de46dc30eccae8a3c35a13f136e7f440414f" dependencies = [ - "arrow", + "arrow 57.3.0", "bigdecimal", "chrono", "datafusion-common 51.0.0", @@ -3111,7 +3345,7 @@ dependencies = [ "indexmap", "log", "regex", - "sqlparser", + "sqlparser 0.59.0", ] [[package]] @@ -3120,7 +3354,7 @@ version = "52.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5272d256dab5347bb39d2040589f45d8c6b715b27edcb5fffe88cc8b9c3909cb" dependencies = [ - "arrow", + "arrow 58.0.0", "bigdecimal", "chrono", "datafusion-common 52.2.0", @@ -3129,7 +3363,7 @@ dependencies = [ "log", "recursive", "regex", - "sqlparser", + "sqlparser 0.61.0", ] [[package]] @@ -3138,7 +3372,7 @@ version = "52.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ccb859e97759dcbff66b484bdf4f251f9a76784d3dd7883c124de57510b1e1c2" dependencies = [ - "arrow", + "arrow 58.0.0", "async-trait", "bigdecimal", "clap", @@ -3150,9 +3384,9 @@ dependencies = [ "indicatif", "itertools 0.14.0", "log", - "object_store", - "sqllogictest", - "sqlparser", + "object_store 0.13.1", + "sqllogictest 0.29.1", + "sqlparser 0.61.0", "tempfile", "thiserror 2.0.18", "tokio", @@ -3170,7 +3404,7 @@ dependencies = [ "datafusion 52.2.0", "half", "itertools 0.14.0", - "object_store", + "object_store 0.13.1", "pbjson-types", "prost 0.14.3", "substrait", @@ -3694,7 +3928,7 @@ version = "2.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5f9e5c0b1c67a38cb92b41535d44623483beb9511592ae23a3bf42ddec758690" dependencies = [ - "arrow-array", + "arrow-array 57.3.0", "rand 0.9.2", ] @@ -3906,9 +4140,9 @@ version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dc1cc4106ac0a0a512c398961ce95d8150475c84a84e17c4511c3643fa120a17" dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-schema", + "arrow-array 57.3.0", + "arrow-buffer 57.3.0", + "arrow-schema 57.3.0", "geo-traits", "geoarrow-schema", "num-traits", @@ -3922,8 +4156,8 @@ version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fa84300361ce57fb875bcaa6e32b95b0aff5c6b1af692b936bdd58ff343f4394" dependencies = [ - "arrow-array", - "arrow-buffer", + "arrow-array 57.3.0", + "arrow-buffer 57.3.0", "geo", "geo-traits", "geoarrow-array", @@ -3936,7 +4170,7 @@ version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e97be4e9f523f92bd6a0e0458323f4b783d073d011664decd8dbf05651704f34" dependencies = [ - "arrow-schema", + "arrow-schema 57.3.0", "geo-traits", "serde", "serde_json", @@ -3949,9 +4183,9 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "773cfa1fb0d7f7661b76b3fde00f3ffd8e0ff7b3635096f0ff6294fe5ca62a2b" dependencies = [ - "arrow-arith", - "arrow-array", - "arrow-schema", + "arrow-arith 57.3.0", + "arrow-array 57.3.0", + "arrow-schema 57.3.0", "datafusion 51.0.0", "geo", "geo-traits", @@ -4860,15 +5094,15 @@ version = "2.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2b7f07b905df393a5554eba19055c620f9ea25a3e40a013bda4bd8dc4ca66f01" dependencies = [ - "arrow", - "arrow-arith", - "arrow-array", - "arrow-buffer", - "arrow-ipc", - "arrow-ord", - "arrow-row", - "arrow-schema", - "arrow-select", + "arrow 57.3.0", + "arrow-arith 57.3.0", + "arrow-array 57.3.0", + "arrow-buffer 57.3.0", + "arrow-ipc 57.3.0", + "arrow-ord 57.3.0", + "arrow-row 57.3.0", + "arrow-schema 57.3.0", + "arrow-select 57.3.0", "async-recursion", "async-trait", "async_cell", @@ -4900,7 +5134,7 @@ dependencies = [ "lance-table", "log", "moka", - "object_store", + "object_store 0.12.5", "permutation", "pin-project", "prost 0.14.3", @@ -4925,13 +5159,13 @@ version = "2.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "100e076cb81c8f0c24cd2881c706fc53e037c7d6e81eb320e929e265d157effb" dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-cast", - "arrow-data", - "arrow-ord", - "arrow-schema", - "arrow-select", + "arrow-array 57.3.0", + "arrow-buffer 57.3.0", + "arrow-cast 57.3.0", + "arrow-data 57.3.0", + "arrow-ord 57.3.0", + "arrow-schema 57.3.0", + "arrow-select 57.3.0", "bytes", "getrandom 0.2.17", "half", @@ -4945,13 +5179,13 @@ name = "lance-bench" version = "0.1.0" dependencies = [ "anyhow", - "arrow-cast", + "arrow-cast 57.3.0", "async-trait", "clap", "futures", "lance", "lance-encoding", - "parquet", + "parquet 57.3.0", "tempfile", "tokio", "tracing", @@ -4975,9 +5209,9 @@ version = "2.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6fa01d1cf490ccfd3b8eaeee2781415d0419e6be8366040e57e43677abf2644e" dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-schema", + "arrow-array 57.3.0", + "arrow-buffer 57.3.0", + "arrow-schema 57.3.0", "async-trait", "byteorder", "bytes", @@ -4993,7 +5227,7 @@ dependencies = [ "mock_instant", "moka", "num_cpus", - "object_store", + "object_store 0.12.5", "pin-project", "prost 0.14.3", "rand 0.9.2", @@ -5014,12 +5248,12 @@ version = "2.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ef89a39e3284eef76f79e63f23de8881a0583ad6feb20ed39f47eadd847a2b88" dependencies = [ - "arrow", - "arrow-array", - "arrow-buffer", - "arrow-ord", - "arrow-schema", - "arrow-select", + "arrow 57.3.0", + "arrow-array 57.3.0", + "arrow-buffer 57.3.0", + "arrow-ord 57.3.0", + "arrow-schema 57.3.0", + "arrow-select 57.3.0", "async-trait", "chrono", "datafusion 51.0.0", @@ -5046,10 +5280,10 @@ version = "2.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fc2a60eef5c47e65d91e2ffa8e7e1629c52e7190c8b88a371a1a60601dc49371" dependencies = [ - "arrow", - "arrow-array", - "arrow-cast", - "arrow-schema", + "arrow 57.3.0", + "arrow-array 57.3.0", + "arrow-cast 57.3.0", + "arrow-schema 57.3.0", "chrono", "futures", "half", @@ -5066,13 +5300,13 @@ version = "2.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "95ce4a6631308aa681b2671af8f2a845ff781f8d4e755a2a7ccd012379467094" dependencies = [ - "arrow-arith", - "arrow-array", - "arrow-buffer", - "arrow-cast", - "arrow-data", - "arrow-schema", - "arrow-select", + "arrow-arith 57.3.0", + "arrow-array 57.3.0", + "arrow-buffer 57.3.0", + "arrow-cast 57.3.0", + "arrow-data 57.3.0", + "arrow-schema 57.3.0", + "arrow-select 57.3.0", "bytemuck", "byteorder", "bytes", @@ -5105,12 +5339,12 @@ version = "2.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e2d4d82357cbfaa1a18494226c15b1cb3c8ed0b6c84b91146323c82047ede419" dependencies = [ - "arrow-arith", - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", - "arrow-select", + "arrow-arith 57.3.0", + "arrow-array 57.3.0", + "arrow-buffer 57.3.0", + "arrow-data 57.3.0", + "arrow-schema 57.3.0", + "arrow-select 57.3.0", "async-recursion", "async-trait", "byteorder", @@ -5124,7 +5358,7 @@ dependencies = [ "lance-io", "log", "num-traits", - "object_store", + "object_store 0.12.5", "prost 0.14.3", "prost-build", "prost-types", @@ -5155,12 +5389,12 @@ version = "2.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "20e9c5aa7024a63af9ae89ee8c0f23c8421b7896742e5cd4a271a60f9956cb80" dependencies = [ - "arrow", - "arrow-arith", - "arrow-array", - "arrow-ord", - "arrow-schema", - "arrow-select", + "arrow 57.3.0", + "arrow-arith 57.3.0", + "arrow-array 57.3.0", + "arrow-ord 57.3.0", + "arrow-schema 57.3.0", + "arrow-select 57.3.0", "async-channel", "async-recursion", "async-trait", @@ -5197,7 +5431,7 @@ dependencies = [ "log", "ndarray", "num-traits", - "object_store", + "object_store 0.12.5", "prost 0.14.3", "prost-build", "prost-types", @@ -5224,14 +5458,14 @@ version = "2.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c7d2af0b17fb374a8181bcf1a10bce5703ae3ee4373c1587ce4bba23e15e45c8" dependencies = [ - "arrow", - "arrow-arith", - "arrow-array", - "arrow-buffer", - "arrow-cast", - "arrow-data", - "arrow-schema", - "arrow-select", + "arrow 57.3.0", + "arrow-arith 57.3.0", + "arrow-array 57.3.0", + "arrow-buffer 57.3.0", + "arrow-cast 57.3.0", + "arrow-data 57.3.0", + "arrow-schema 57.3.0", + "arrow-select 57.3.0", "async-recursion", "async-trait", "byteorder", @@ -5243,7 +5477,7 @@ dependencies = [ "lance-core", "lance-namespace", "log", - "object_store", + "object_store 0.12.5", "path_abs", "pin-project", "prost 0.14.3", @@ -5262,9 +5496,9 @@ version = "2.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5125aa62696e75a7475807564b4921f252d8815be606b84bc00e6def0f5c24bb" dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-schema", + "arrow-array 57.3.0", + "arrow-buffer 57.3.0", + "arrow-schema 57.3.0", "cc", "deepsize", "half", @@ -5280,7 +5514,7 @@ version = "2.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "70545c2676ce954dfd801da5c6a631a70bba967826cd3a8f31b47d1f04bbfed3" dependencies = [ - "arrow", + "arrow 57.3.0", "async-trait", "bytes", "lance-core", @@ -5307,11 +5541,11 @@ version = "2.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b06ad37bd90045de8ef533df170c6098e6ff6ecb427aade47d7db8e2c86f2678" dependencies = [ - "arrow", - "arrow-array", - "arrow-buffer", - "arrow-ipc", - "arrow-schema", + "arrow 57.3.0", + "arrow-array 57.3.0", + "arrow-buffer 57.3.0", + "arrow-ipc 57.3.0", + "arrow-schema 57.3.0", "async-trait", "byteorder", "bytes", @@ -5323,7 +5557,7 @@ dependencies = [ "lance-file", "lance-io", "log", - "object_store", + "object_store 0.12.5", "prost 0.14.3", "prost-build", "prost-types", @@ -6202,6 +6436,44 @@ dependencies = [ "web-time", ] +[[package]] +name = "object_store" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c2858065e55c148d294a9f3aae3b0fa9458edadb41a108397094566f4e3c0dfb" +dependencies = [ + "async-trait", + "base64", + "bytes", + "chrono", + "form_urlencoded", + "futures", + "http", + "http-body-util", + "httparse", + "humantime", + "hyper", + "itertools 0.14.0", + "md-5", + "parking_lot", + "percent-encoding", + "quick-xml", + "rand 0.9.2", + "reqwest", + "ring", + "rustls-pki-types", + "serde", + "serde_json", + "serde_urlencoded", + "thiserror 2.0.18", + "tokio", + "tracing", + "url", + "walkdir", + "wasm-bindgen-futures", + "web-time", +] + [[package]] name = "once_cell" version = "1.21.3" @@ -6459,13 +6731,46 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6ee96b29972a257b855ff2341b37e61af5f12d6af1158b6dcdb5b31ea07bb3cb" dependencies = [ "ahash 0.8.12", - "arrow-array", - "arrow-buffer", - "arrow-cast", - "arrow-data", - "arrow-ipc", - "arrow-schema", - "arrow-select", + "arrow-array 57.3.0", + "arrow-buffer 57.3.0", + "arrow-cast 57.3.0", + "arrow-data 57.3.0", + "arrow-ipc 57.3.0", + "arrow-schema 57.3.0", + "arrow-select 57.3.0", + "base64", + "brotli", + "bytes", + "chrono", + "flate2", + "half", + "hashbrown 0.16.1", + "lz4_flex 0.12.0", + "num-bigint", + "num-integer", + "num-traits", + "paste", + "seq-macro", + "simdutf8", + "snap", + "thrift", + "twox-hash", + "zstd", +] + +[[package]] +name = "parquet" +version = "58.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f491d0ef1b510194426ee67ddc18a9b747ef3c42050c19322a2cd2e1666c29b" +dependencies = [ + "ahash 0.8.12", + "arrow-array 58.0.0", + "arrow-buffer 58.0.0", + "arrow-data 58.0.0", + "arrow-ipc 58.0.0", + "arrow-schema 58.0.0", + "arrow-select 58.0.0", "base64", "brotli", "bytes", @@ -6478,7 +6783,7 @@ dependencies = [ "num-bigint", "num-integer", "num-traits", - "object_store", + "object_store 0.13.1", "paste", "seq-macro", "simdutf8", @@ -7050,7 +7355,7 @@ dependencies = [ "http", "humantime", "itertools 0.14.0", - "object_store", + "object_store 0.12.5", "percent-encoding", "pyo3", "pyo3-async-runtimes", @@ -8381,15 +8686,50 @@ dependencies = [ "tracing", ] +[[package]] +name = "sqllogictest" +version = "0.29.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d03b2262a244037b0b510edbd25a8e6c9fb8d73ee0237fc6cc95a54c16f94a82" +dependencies = [ + "async-trait", + "educe", + "fs-err", + "futures", + "glob", + "humantime", + "itertools 0.13.0", + "libtest-mimic", + "md-5", + "owo-colors", + "rand 0.8.5", + "regex", + "similar", + "subst", + "tempfile", + "thiserror 2.0.18", + "tracing", +] + [[package]] name = "sqlparser" version = "0.59.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4591acadbcf52f0af60eafbb2c003232b2b4cd8de5f0e9437cb8b1b59046cc0f" +dependencies = [ + "log", + "sqlparser_derive 0.3.0", +] + +[[package]] +name = "sqlparser" +version = "0.61.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dbf5ea8d4d7c808e1af1cbabebca9a2abe603bcefc22294c5b95018d53200cb7" dependencies = [ "log", "recursive", - "sqlparser_derive", + "sqlparser_derive 0.5.0", ] [[package]] @@ -8403,6 +8743,17 @@ dependencies = [ "syn 2.0.117", ] +[[package]] +name = "sqlparser_derive" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a6dd45d8fc1c79299bfbb7190e42ccbbdf6a5f52e4a6ad98d92357ea965bd289" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + [[package]] name = "stable_deref_trait" version = "1.2.1" @@ -9130,6 +9481,7 @@ dependencies = [ "futures-core", "pin-project-lite", "tokio", + "tokio-util", ] [[package]] @@ -9281,16 +9633,14 @@ checksum = "8df9b6e13f2d32c91b9bd719c00d1958837bc7dec474d94952798cc8e69eeec3" [[package]] name = "tpchgen" version = "2.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2d651db770ccf53b89dd769ed47899c0c089452e3b725c3c48fbc6a2be579638" +source = "git+https://github.com/AdamGS/tpchgen-rs.git?branch=adamg%2Fbump-arrow-match-df#803355855df0ed62fc30ee2c125fe46fe1dbab47" [[package]] name = "tpchgen-arrow" version = "2.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "180f3759dffbf26d47021d2a84245a00f20945384bcf22e63c32652b04916e5a" +source = "git+https://github.com/AdamGS/tpchgen-rs.git?branch=adamg%2Fbump-arrow-match-df#803355855df0ed62fc30ee2c125fe46fe1dbab47" dependencies = [ - "arrow", + "arrow 58.0.0", "tpchgen", ] @@ -9618,11 +9968,11 @@ name = "vortex" version = "0.1.0" dependencies = [ "anyhow", - "arrow-array", + "arrow-array 58.0.0", "codspeed-divan-compat", "fastlanes", "mimalloc", - "parquet", + "parquet 58.0.0", "rand 0.9.2", "serde_json", "tokio", @@ -9684,15 +10034,15 @@ version = "0.1.0" dependencies = [ "arbitrary", "arcref", - "arrow-arith", - "arrow-array", - "arrow-buffer", - "arrow-cast", - "arrow-data", - "arrow-ord", - "arrow-schema", - "arrow-select", - "arrow-string", + "arrow-arith 58.0.0", + "arrow-array 58.0.0", + "arrow-buffer 58.0.0", + "arrow-cast 58.0.0", + "arrow-data 58.0.0", + "arrow-ord 58.0.0", + "arrow-schema 58.0.0", + "arrow-select 58.0.0", + "arrow-string 58.0.0", "async-lock", "bytes", "cfg-if", @@ -9745,9 +10095,9 @@ name = "vortex-bench" version = "0.1.0" dependencies = [ "anyhow", - "arrow-array", - "arrow-schema", - "arrow-select", + "arrow-array 58.0.0", + "arrow-schema 58.0.0", + "arrow-select 58.0.0", "async-trait", "bytes", "bzip2", @@ -9762,7 +10112,7 @@ dependencies = [ "noodles-bgzf", "noodles-vcf", "parking_lot", - "parquet", + "parquet 58.0.0", "rand 0.9.2", "regex", "reqwest", @@ -9821,7 +10171,7 @@ dependencies = [ name = "vortex-buffer" version = "0.1.0" dependencies = [ - "arrow-buffer", + "arrow-buffer 58.0.0", "bitvec", "bytes", "codspeed-divan-compat", @@ -9871,7 +10221,7 @@ dependencies = [ "fastlanes", "futures", "kanal", - "object_store", + "object_store 0.13.1", "parking_lot", "prost 0.14.3", "rstest", @@ -9899,8 +10249,8 @@ name = "vortex-cxx" version = "0.1.0" dependencies = [ "anyhow", - "arrow-array", - "arrow-schema", + "arrow-array 58.0.0", + "arrow-schema 58.0.0", "async-fs", "cxx", "futures", @@ -9914,7 +10264,7 @@ name = "vortex-datafusion" version = "0.1.0" dependencies = [ "anyhow", - "arrow-schema", + "arrow-schema 58.0.0", "async-trait", "datafusion 52.2.0", "datafusion-catalog 52.2.0", @@ -9932,7 +10282,7 @@ dependencies = [ "futures", "insta", "itertools 0.14.0", - "object_store", + "object_store 0.13.1", "rstest", "tempfile", "tokio", @@ -9988,7 +10338,7 @@ dependencies = [ "jiff", "kanal", "num-traits", - "object_store", + "object_store 0.13.1", "once_cell", "parking_lot", "paste", @@ -10010,10 +10360,10 @@ dependencies = [ name = "vortex-error" version = "0.1.0" dependencies = [ - "arrow-schema", + "arrow-schema 58.0.0", "flatbuffers", "jiff", - "object_store", + "object_store 0.13.1", "prost 0.14.3", "serial_test", "temp-env", @@ -10051,7 +10401,7 @@ dependencies = [ "futures", "itertools 0.14.0", "mimalloc", - "object_store", + "object_store 0.13.1", "paste", "prost 0.14.3", "tempfile", @@ -10073,7 +10423,7 @@ dependencies = [ "itertools 0.14.0", "kanal", "moka", - "object_store", + "object_store 0.13.1", "oneshot", "parking_lot", "pin-project-lite", @@ -10170,7 +10520,7 @@ dependencies = [ "handle", "itertools 0.14.0", "kanal", - "object_store", + "object_store 0.13.1", "oneshot", "parking_lot", "pin-project-lite", @@ -10208,12 +10558,12 @@ dependencies = [ name = "vortex-jni" version = "0.1.0" dependencies = [ - "arrow-array", - "arrow-ipc", - "arrow-schema", + "arrow-array 58.0.0", + "arrow-ipc 58.0.0", + "arrow-schema 58.0.0", "futures", "jni", - "object_store", + "object_store 0.13.1", "parking_lot", "prost 0.14.3", "thiserror 2.0.18", @@ -10265,7 +10615,7 @@ dependencies = [ name = "vortex-mask" version = "0.1.0" dependencies = [ - "arrow-buffer", + "arrow-buffer 58.0.0", "itertools 0.14.0", "rstest", "serde", @@ -10320,13 +10670,13 @@ dependencies = [ name = "vortex-python" version = "0.1.0" dependencies = [ - "arrow-array", - "arrow-data", - "arrow-schema", + "arrow-array 58.0.0", + "arrow-data 58.0.0", + "arrow-schema 58.0.0", "bytes", "itertools 0.14.0", "log", - "object_store", + "object_store 0.13.1", "parking_lot", "pyo3", "pyo3-bytes", @@ -10343,8 +10693,8 @@ name = "vortex-runend" version = "0.1.0" dependencies = [ "arbitrary", - "arrow-array", - "arrow-schema", + "arrow-array 58.0.0", + "arrow-schema 58.0.0", "codspeed-divan-compat", "itertools 0.14.0", "num-traits", @@ -10362,8 +10712,8 @@ dependencies = [ name = "vortex-scan" version = "0.1.0" dependencies = [ - "arrow-array", - "arrow-schema", + "arrow-array 58.0.0", + "arrow-schema 58.0.0", "async-trait", "bit-vec", "futures", @@ -10469,8 +10819,8 @@ dependencies = [ name = "vortex-test-e2e-cuda" version = "0.1.0" dependencies = [ - "arrow-array", - "arrow-schema", + "arrow-array 58.0.0", + "arrow-schema 58.0.0", "futures", "vortex", "vortex-cuda", @@ -10481,8 +10831,8 @@ name = "vortex-tui" version = "0.1.0" dependencies = [ "anyhow", - "arrow-array", - "arrow-schema", + "arrow-array 58.0.0", + "arrow-schema 58.0.0", "clap", "console_error_panic_hook", "crossterm", @@ -10495,7 +10845,7 @@ dependencies = [ "indicatif", "itertools 0.14.0", "js-sys", - "parquet", + "parquet 58.0.0", "ratatui", "ratzilla", "serde", diff --git a/Cargo.toml b/Cargo.toml index 0da5ee805ba..204b4fdaff3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -86,16 +86,16 @@ arbitrary = "1.3.2" arc-swap = "1.8" arcref = "0.2.0" arrayref = "0.3.7" -arrow-arith = "57.1" -arrow-array = "57.1" -arrow-buffer = "57.1" -arrow-cast = "57.1" -arrow-data = "57.1" -arrow-ipc = "57.1" -arrow-ord = "57.1" -arrow-schema = "57.1" -arrow-select = "57.1" -arrow-string = "57.1" +arrow-arith = "58" +arrow-array = "58" +arrow-buffer = "58" +arrow-cast = "58" +arrow-data = "58" +arrow-ipc = "58" +arrow-ord = "58" +arrow-schema = "58" +arrow-select = "58" +arrow-string = "58" async-fs = "2.2.0" async-lock = "3.4" async-stream = "0.3.6" @@ -171,14 +171,14 @@ noodles-bgzf = "0.44.0" noodles-vcf = "0.82.0" num-traits = "0.2.19" num_enum = { version = "0.7.3", default-features = false } -object_store = { version = "0.12.4", default-features = false } +object_store = { version = "0.13.1", default-features = false } once_cell = "1.21" oneshot = "0.1.13" opentelemetry = "0.31.0" opentelemetry-otlp = "0.31.0" opentelemetry_sdk = "0.31.0" parking_lot = { version = "0.12.3", features = ["nightly"] } -parquet = "57.1" +parquet = "58" paste = "1.0.15" pco = "1.0.1" pin-project-lite = "0.2.15" @@ -368,3 +368,21 @@ lto = false [profile.bench_assert] debug-assertions = true inherits = "bench" + +[patch.crates-io] +datafusion = { git = "https://github.com/apache/datafusion", branch = "branch-53" } +datafusion-catalog = { git = "https://github.com/apache/datafusion", branch = "branch-53" } +datafusion-common = { git = "https://github.com/apache/datafusion", branch = "branch-53" } +datafusion-common-runtime = { git = "https://github.com/apache/datafusion", branch = "branch-53" } +datafusion-datasource = { git = "https://github.com/apache/datafusion", branch = "branch-53" } +datafusion-execution = { git = "https://github.com/apache/datafusion", branch = "branch-53" } +datafusion-expr = { git = "https://github.com/apache/datafusion", branch = "branch-53" } +datafusion-functions = { git = "https://github.com/apache/datafusion", branch = "branch-53" } +datafusion-physical-expr = { git = "https://github.com/apache/datafusion", branch = "branch-53" } +datafusion-physical-expr-adapter = { git = "https://github.com/apache/datafusion", branch = "branch-53" } +datafusion-physical-expr-common = { git = "https://github.com/apache/datafusion", branch = "branch-53" } +datafusion-physical-plan = { git = "https://github.com/apache/datafusion", branch = "branch-53" } +datafusion-pruning = { git = "https://github.com/apache/datafusion", branch = "branch-53" } +datafusion-sqllogictest = { git = "https://github.com/apache/datafusion", branch = "branch-53" } +tpchgen = { git = "https://github.com/AdamGS/tpchgen-rs.git", branch = "adamg/bump-arrow-match-df" } +tpchgen-arrow = { git = "https://github.com/AdamGS/tpchgen-rs.git", branch = "adamg/bump-arrow-match-df" } diff --git a/vortex-bench/src/random_access/take.rs b/vortex-bench/src/random_access/take.rs index 674aa6349aa..b23d6622876 100644 --- a/vortex-bench/src/random_access/take.rs +++ b/vortex-bench/src/random_access/take.rs @@ -14,6 +14,7 @@ use itertools::Itertools; use parquet::arrow::ParquetRecordBatchStreamBuilder; use parquet::arrow::arrow_reader::ArrowReaderMetadata; use parquet::arrow::arrow_reader::ArrowReaderOptions; +use parquet::file::metadata::PageIndexPolicy; use stream::StreamExt; use tokio::fs::File; use vortex::array::Canonical; @@ -100,7 +101,7 @@ impl ParquetRandomAccessor { /// Open a Parquet file, parse the footer, and return a ready-to-use accessor. pub async fn open(path: PathBuf, name: impl Into) -> anyhow::Result { let mut file = File::open(&path).await?; - let options = ArrowReaderOptions::new().with_page_index(true); + let options = ArrowReaderOptions::new().with_page_index_policy(PageIndexPolicy::Required); let arrow_metadata = ArrowReaderMetadata::load_async(&mut file, options).await?; let row_group_offsets = once(0) diff --git a/vortex-datafusion/src/persistent/format.rs b/vortex-datafusion/src/persistent/format.rs index fb3d5f9db11..0be2086c885 100644 --- a/vortex-datafusion/src/persistent/format.rs +++ b/vortex-datafusion/src/persistent/format.rs @@ -32,6 +32,7 @@ use datafusion_datasource::file_scan_config::FileScanConfigBuilder; use datafusion_datasource::file_sink_config::FileSinkConfig; use datafusion_datasource::sink::DataSinkExec; use datafusion_datasource::source::DataSourceExec; +use datafusion_execution::cache::cache_manager::CachedFileMetadataEntry; use datafusion_expr::dml::InsertOp; use datafusion_physical_expr::LexRequirement; use datafusion_physical_plan::ExecutionPlan; @@ -251,16 +252,19 @@ impl FileFormat for VortexFormat { let cache = file_metadata_cache.clone(); SpawnedTask::spawn(async move { - // Check if we have cached metadata for this file - if let Some(cached) = cache.get(&object) - && let Some(cached_vortex) = - cached.as_any().downcast_ref::() + // Check if we have entry metadata for this file + if let Some(entry) = cache.get(&object.location) + && entry.is_valid_for(&object) + && let Some(cached_vortex) = entry + .file_metadata + .as_any() + .downcast_ref::() { let inferred_schema = cached_vortex.footer().dtype().to_arrow_schema()?; return VortexResult::Ok((object.location, inferred_schema)); } - // Not cached or invalid - open the file + // Not entry or invalid - open the file let reader = Arc::new(ObjectStoreReadAt::new( store, object.location.clone(), @@ -276,7 +280,8 @@ impl FileFormat for VortexFormat { // Cache the metadata let cached_metadata = Arc::new(CachedVortexMetadata::new(&vxf)); - cache.put(&object, cached_metadata); + let entry = CachedFileMetadataEntry::new(object.clone(), cached_metadata); + cache.put(&object.location, entry); let inferred_schema = vxf.dtype().to_arrow_schema()?; VortexResult::Ok((object.location, inferred_schema)) @@ -310,24 +315,28 @@ impl FileFormat for VortexFormat { let file_metadata_cache = state.runtime_env().cache_manager.get_file_metadata_cache(); SpawnedTask::spawn(async move { - // Try to get cached metadata first - let cached_metadata = file_metadata_cache.get(&object).and_then(|cached| { - cached - .as_any() - .downcast_ref::() - .map(|m| { - ( - m.footer().dtype().clone(), - m.footer().statistics().cloned(), - m.footer().row_count(), - ) - }) - }); + // Try to get entry metadata first + let cached_metadata = file_metadata_cache + .get(&object.location) + .filter(|entry| entry.is_valid_for(&object)) + .and_then(|entry| { + entry + .file_metadata + .as_any() + .downcast_ref::() + .map(|m| { + ( + m.footer().dtype().clone(), + m.footer().statistics().cloned(), + m.footer().row_count(), + ) + }) + }); let (dtype, file_stats, row_count) = match cached_metadata { Some(metadata) => metadata, None => { - // Not cached - open the file + // Not entry - open the file let reader = Arc::new(ObjectStoreReadAt::new( store, object.location.clone(), @@ -348,8 +357,9 @@ impl FileFormat for VortexFormat { })?; // Cache the metadata - let cached = Arc::new(CachedVortexMetadata::new(&vxf)); - file_metadata_cache.put(&object, cached); + let file_metadata = Arc::new(CachedVortexMetadata::new(&vxf)); + let entry = CachedFileMetadataEntry::new(object.clone(), file_metadata); + file_metadata_cache.put(&object.location, entry); ( vxf.dtype().clone(), diff --git a/vortex-datafusion/src/persistent/opener.rs b/vortex-datafusion/src/persistent/opener.rs index 5986a06da49..3dee97e9d59 100644 --- a/vortex-datafusion/src/persistent/opener.rs +++ b/vortex-datafusion/src/persistent/opener.rs @@ -187,8 +187,10 @@ impl FileOpener for VortexOpener { .with_labels(labels); if let Some(file_metadata_cache) = file_metadata_cache - && let Some(file_metadata) = file_metadata_cache.get(&file.object_meta) - && let Some(vortex_metadata) = file_metadata + && let Some(entry) = file_metadata_cache.get(&file.path()) + && entry.is_valid_for(&file.object_meta) + && let Some(vortex_metadata) = entry + .file_metadata .as_any() .downcast_ref::() { @@ -212,7 +214,7 @@ impl FileOpener for VortexOpener { let expr_adapter = expr_adapter_factory.create( Arc::clone(&unified_file_schema), Arc::clone(&this_file_schema), - ); + )?; let simplifier = PhysicalExprSimplifier::new(&this_file_schema); diff --git a/vortex-io/src/object_store/read_at.rs b/vortex-io/src/object_store/read_at.rs index 82ef57ca6f6..a4957199565 100644 --- a/vortex-io/src/object_store/read_at.rs +++ b/vortex-io/src/object_store/read_at.rs @@ -11,6 +11,7 @@ use object_store::GetOptions; use object_store::GetRange; use object_store::GetResultPayload; use object_store::ObjectStore; +use object_store::ObjectStoreExt; use object_store::path::Path as ObjectPath; use vortex_array::buffer::BufferHandle; use vortex_buffer::Alignment; diff --git a/vortex-io/src/object_store/write.rs b/vortex-io/src/object_store/write.rs index 20bfb7815c4..988fd1473ac 100644 --- a/vortex-io/src/object_store/write.rs +++ b/vortex-io/src/object_store/write.rs @@ -9,6 +9,7 @@ use futures::TryStreamExt; use futures::stream::FuturesUnordered; use object_store::MultipartUpload; use object_store::ObjectStore; +use object_store::ObjectStoreExt; use object_store::PutPayload; use object_store::PutResult; use object_store::path::Path; From 9dde362dea7a66e1a5b399d0137da747ae2d7217 Mon Sep 17 00:00:00 2001 From: Adam Gutglick Date: Mon, 2 Mar 2026 13:49:39 +0000 Subject: [PATCH 02/11] some fixes Signed-off-by: Adam Gutglick --- Cargo.lock | 35 ++++++----------------------------- vortex-jni/src/file.rs | 1 + vortex-python/Cargo.toml | 2 +- 3 files changed, 8 insertions(+), 30 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index d9f6d459e66..aae58b0d4a3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -942,7 +942,7 @@ dependencies = [ "bitflags", "cexpr", "clang-sys", - "itertools 0.11.0", + "itertools 0.13.0", "log", "prettyplease", "proc-macro2", @@ -6405,28 +6405,14 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fbfbfff40aeccab00ec8a910b57ca8ecf4319b335c542f2edcd19dd25a1e2a00" dependencies = [ "async-trait", - "base64", "bytes", "chrono", - "form_urlencoded", "futures", "http", - "http-body-util", - "httparse", "humantime", - "hyper", "itertools 0.14.0", - "md-5", "parking_lot", "percent-encoding", - "quick-xml", - "rand 0.9.2", - "reqwest", - "ring", - "rustls-pemfile", - "serde", - "serde_json", - "serde_urlencoded", "thiserror 2.0.18", "tokio", "tracing", @@ -7154,7 +7140,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "343d3bd7056eda839b03204e68deff7d1b13aba7af2b2fd16890697274262ee7" dependencies = [ "heck", - "itertools 0.11.0", + "itertools 0.14.0", "log", "multimap", "petgraph", @@ -7186,7 +7172,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "27c6023962132f4b30eb4c172c91ce92d933da334c59c23cddee82358ddafb0b" dependencies = [ "anyhow", - "itertools 0.11.0", + "itertools 0.14.0", "proc-macro2", "quote", "syn 2.0.117", @@ -7344,9 +7330,9 @@ dependencies = [ [[package]] name = "pyo3-object_store" -version = "0.7.0" +version = "0.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5ef5552f108a4d65b78c924b27513471a9ba425341ada4be5ea0ca53806ae316" +checksum = "a8b80a3a9af26abe307d2c01c13da487166c5c8ac5ac301a4d8e3c270e58ab50" dependencies = [ "async-trait", "bytes", @@ -7355,7 +7341,7 @@ dependencies = [ "http", "humantime", "itertools 0.14.0", - "object_store 0.12.5", + "object_store 0.13.1", "percent-encoding", "pyo3", "pyo3-async-runtimes", @@ -8100,15 +8086,6 @@ dependencies = [ "security-framework", ] -[[package]] -name = "rustls-pemfile" -version = "2.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dce314e5fee3f39953d46bb63bb8a46d40c2f8fb7cc5a3b6cab2bde9721d6e50" -dependencies = [ - "rustls-pki-types", -] - [[package]] name = "rustls-pki-types" version = "1.14.0" diff --git a/vortex-jni/src/file.rs b/vortex-jni/src/file.rs index 2ab70aa5e85..a492a25a6a0 100644 --- a/vortex-jni/src/file.rs +++ b/vortex-jni/src/file.rs @@ -15,6 +15,7 @@ use jni::objects::ReleaseMode; use jni::sys::jlong; use jni::sys::jobject; use object_store::ObjectStore; +use object_store::ObjectStoreExt; use object_store::path::Path; use prost::Message; use url::Url; diff --git a/vortex-python/Cargo.toml b/vortex-python/Cargo.toml index 391dc9b4b47..0dd36e2703c 100644 --- a/vortex-python/Cargo.toml +++ b/vortex-python/Cargo.toml @@ -47,7 +47,7 @@ parking_lot = { workspace = true } pyo3 = { workspace = true, features = ["abi3", "abi3-py311"] } pyo3-bytes = { workspace = true } pyo3-log = { workspace = true } -pyo3-object_store = { version = "0.7" } +pyo3-object_store = { version = "0.8" } tokio = { workspace = true, features = ["fs", "rt-multi-thread"] } url = { workspace = true } vortex = { workspace = true, features = ["object_store", "tokio"] } From ea00a07052cdec33f9f2972e1cb462037f98e70b Mon Sep 17 00:00:00 2001 From: Adam Gutglick Date: Mon, 2 Mar 2026 14:15:03 +0000 Subject: [PATCH 03/11] more fix Signed-off-by: Adam Gutglick --- Cargo.lock | 27 +-------------------------- vortex-cuda/src/pooled_read_at.rs | 1 + vortex-sqllogictest/Cargo.toml | 2 +- 3 files changed, 3 insertions(+), 27 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index aae58b0d4a3..3041e10a050 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3385,7 +3385,7 @@ dependencies = [ "itertools 0.14.0", "log", "object_store 0.13.1", - "sqllogictest 0.29.1", + "sqllogictest", "sqlparser 0.61.0", "tempfile", "thiserror 2.0.18", @@ -8638,31 +8638,6 @@ dependencies = [ "smallvec", ] -[[package]] -name = "sqllogictest" -version = "0.28.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3566426f72a13e393aa34ca3d542c5b0eb86da4c0db137ee9b5cfccc6179e52d" -dependencies = [ - "async-trait", - "educe", - "fs-err", - "futures", - "glob", - "humantime", - "itertools 0.13.0", - "libtest-mimic", - "md-5", - "owo-colors", - "rand 0.8.5", - "regex", - "similar", - "subst", - "tempfile", - "thiserror 2.0.18", - "tracing", -] - [[package]] name = "sqllogictest" version = "0.29.1" diff --git a/vortex-cuda/src/pooled_read_at.rs b/vortex-cuda/src/pooled_read_at.rs index cb051728cca..1b5ec4fb31a 100644 --- a/vortex-cuda/src/pooled_read_at.rs +++ b/vortex-cuda/src/pooled_read_at.rs @@ -13,6 +13,7 @@ use object_store::GetOptions; use object_store::GetRange; use object_store::GetResultPayload; use object_store::ObjectStore; +use object_store::ObjectStoreExt; use object_store::path::Path as ObjectPath; use vortex::array::buffer::BufferHandle; use vortex::buffer::Alignment; diff --git a/vortex-sqllogictest/Cargo.toml b/vortex-sqllogictest/Cargo.toml index f5f4dd30b0a..757abb49689 100644 --- a/vortex-sqllogictest/Cargo.toml +++ b/vortex-sqllogictest/Cargo.toml @@ -23,7 +23,7 @@ datafusion-sqllogictest = { workspace = true } futures.workspace = true indicatif.workspace = true rstest = { workspace = true } -sqllogictest = "0.28" +sqllogictest = "0.29.1" thiserror = { workspace = true } tokio = { workspace = true, features = ["full"] } vortex = { workspace = true, features = ["tokio"] } From 8a32bd98291b3bf99648b7b19722a9d9f4e166e9 Mon Sep 17 00:00:00 2001 From: Adam Gutglick Date: Fri, 6 Mar 2026 19:51:40 +0000 Subject: [PATCH 04/11] fix clippy --- vortex-datafusion/src/persistent/opener.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vortex-datafusion/src/persistent/opener.rs b/vortex-datafusion/src/persistent/opener.rs index 3dee97e9d59..1643bf28a43 100644 --- a/vortex-datafusion/src/persistent/opener.rs +++ b/vortex-datafusion/src/persistent/opener.rs @@ -187,7 +187,7 @@ impl FileOpener for VortexOpener { .with_labels(labels); if let Some(file_metadata_cache) = file_metadata_cache - && let Some(entry) = file_metadata_cache.get(&file.path()) + && let Some(entry) = file_metadata_cache.get(file.path()) && entry.is_valid_for(&file.object_meta) && let Some(vortex_metadata) = entry .file_metadata From 63826bf7078995e5668193d88246c951d09f664a Mon Sep 17 00:00:00 2001 From: Adam Gutglick Date: Mon, 9 Mar 2026 10:59:43 +0000 Subject: [PATCH 05/11] morselize Signed-off-by: Adam Gutglick --- Cargo.lock | 175 +++++++++------------ Cargo.toml | 28 ++-- vortex-datafusion/src/persistent/opener.rs | 43 +++++ vortex-datafusion/src/persistent/source.rs | 20 +++ vortex-datafusion/src/v2/source.rs | 8 + 5 files changed, 155 insertions(+), 119 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 3041e10a050..d0bb6195ff9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2084,9 +2084,8 @@ dependencies = [ [[package]] name = "datafusion" -version = "52.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "503f1f4a9060ae6e650d3dff5dc7a21266fea1302d890768d45b4b28586e830f" +version = "52.1.0" +source = "git+https://github.com/Dandandan/arrow-datafusion.git?rev=ed061e794b95dbf932bcc06f93ba9bc1252e1db5#ed061e794b95dbf932bcc06f93ba9bc1252e1db5" dependencies = [ "arrow 58.0.0", "arrow-schema 58.0.0", @@ -2191,9 +2190,8 @@ dependencies = [ [[package]] name = "datafusion-catalog" -version = "52.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "14417a3ee4ae3d092b56cd6c1d32e8ff3e2c9ec130ecb2276ec91c89fd599399" +version = "52.1.0" +source = "git+https://github.com/Dandandan/arrow-datafusion.git?rev=ed061e794b95dbf932bcc06f93ba9bc1252e1db5#ed061e794b95dbf932bcc06f93ba9bc1252e1db5" dependencies = [ "arrow 58.0.0", "async-trait", @@ -2240,9 +2238,8 @@ dependencies = [ [[package]] name = "datafusion-catalog-listing" -version = "52.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9d0eba824adb45a4b3ac6f0251d40df3f6a9382371cad136f4f14ac9ebc6bc10" +version = "52.1.0" +source = "git+https://github.com/Dandandan/arrow-datafusion.git?rev=ed061e794b95dbf932bcc06f93ba9bc1252e1db5#ed061e794b95dbf932bcc06f93ba9bc1252e1db5" dependencies = [ "arrow 58.0.0", "async-trait", @@ -2285,9 +2282,8 @@ dependencies = [ [[package]] name = "datafusion-common" -version = "52.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0039deefbd00c56adf5168b7ca58568fb058e4ba4c5a03b09f8be371b4e434b6" +version = "52.1.0" +source = "git+https://github.com/Dandandan/arrow-datafusion.git?rev=ed061e794b95dbf932bcc06f93ba9bc1252e1db5#ed061e794b95dbf932bcc06f93ba9bc1252e1db5" dependencies = [ "ahash 0.8.12", "apache-avro", @@ -2322,9 +2318,8 @@ dependencies = [ [[package]] name = "datafusion-common-runtime" -version = "52.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1ec7e3e60b813048331f8fb9673583173e5d2dd8fef862834ee871fc98b57ca7" +version = "52.1.0" +source = "git+https://github.com/Dandandan/arrow-datafusion.git?rev=ed061e794b95dbf932bcc06f93ba9bc1252e1db5#ed061e794b95dbf932bcc06f93ba9bc1252e1db5" dependencies = [ "futures", "log", @@ -2362,9 +2357,8 @@ dependencies = [ [[package]] name = "datafusion-datasource" -version = "52.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "802068957f620302ecf05f84ff4019601aeafd36f5f3f1334984af2e34265129" +version = "52.1.0" +source = "git+https://github.com/Dandandan/arrow-datafusion.git?rev=ed061e794b95dbf932bcc06f93ba9bc1252e1db5#ed061e794b95dbf932bcc06f93ba9bc1252e1db5" dependencies = [ "arrow 58.0.0", "async-compression", @@ -2421,9 +2415,8 @@ dependencies = [ [[package]] name = "datafusion-datasource-arrow" -version = "52.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "90fc387d5067c62d494a6647d29c5ad4fcdd5a6e50ab4ea1d2568caa2d66f2cc" +version = "52.1.0" +source = "git+https://github.com/Dandandan/arrow-datafusion.git?rev=ed061e794b95dbf932bcc06f93ba9bc1252e1db5#ed061e794b95dbf932bcc06f93ba9bc1252e1db5" dependencies = [ "arrow 58.0.0", "arrow-ipc 58.0.0", @@ -2445,9 +2438,8 @@ dependencies = [ [[package]] name = "datafusion-datasource-avro" -version = "52.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "69ce35d9df5c672747f79df4b8f4967b39a3514c3af30b9a7b5426f83d4be814" +version = "52.1.0" +source = "git+https://github.com/Dandandan/arrow-datafusion.git?rev=ed061e794b95dbf932bcc06f93ba9bc1252e1db5#ed061e794b95dbf932bcc06f93ba9bc1252e1db5" dependencies = [ "apache-avro", "arrow 58.0.0", @@ -2488,9 +2480,8 @@ dependencies = [ [[package]] name = "datafusion-datasource-csv" -version = "52.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "efd5e20579bb6c8bd4e6c620253972fb723822030c280dd6aa047f660d09eeba" +version = "52.1.0" +source = "git+https://github.com/Dandandan/arrow-datafusion.git?rev=ed061e794b95dbf932bcc06f93ba9bc1252e1db5#ed061e794b95dbf932bcc06f93ba9bc1252e1db5" dependencies = [ "arrow 58.0.0", "async-trait", @@ -2533,9 +2524,8 @@ dependencies = [ [[package]] name = "datafusion-datasource-json" -version = "52.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c0788b0d48fcef31880a02013ea3cc18e5a4e0eacc3b0abdd2cd0597b99dc96e" +version = "52.1.0" +source = "git+https://github.com/Dandandan/arrow-datafusion.git?rev=ed061e794b95dbf932bcc06f93ba9bc1252e1db5#ed061e794b95dbf932bcc06f93ba9bc1252e1db5" dependencies = [ "arrow 58.0.0", "async-trait", @@ -2557,9 +2547,8 @@ dependencies = [ [[package]] name = "datafusion-datasource-parquet" -version = "52.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "66639b70f1f363f5f0950733170100e588f1acfacac90c1894e231194aa35957" +version = "52.1.0" +source = "git+https://github.com/Dandandan/arrow-datafusion.git?rev=ed061e794b95dbf932bcc06f93ba9bc1252e1db5#ed061e794b95dbf932bcc06f93ba9bc1252e1db5" dependencies = [ "arrow 58.0.0", "async-trait", @@ -2593,9 +2582,8 @@ checksum = "2b99e13947667b36ad713549237362afb054b2d8f8cc447751e23ec61202db07" [[package]] name = "datafusion-doc" -version = "52.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e44b41f3e8267c6cf3eec982d63f34db9f1dd5f30abfd2e1f124f0871708952e" +version = "52.1.0" +source = "git+https://github.com/Dandandan/arrow-datafusion.git?rev=ed061e794b95dbf932bcc06f93ba9bc1252e1db5#ed061e794b95dbf932bcc06f93ba9bc1252e1db5" [[package]] name = "datafusion-execution" @@ -2619,9 +2607,8 @@ dependencies = [ [[package]] name = "datafusion-execution" -version = "52.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9e456f60e5d38db45335e84617006d90af14a8c8c5b8e959add708b2daaa0e2c" +version = "52.1.0" +source = "git+https://github.com/Dandandan/arrow-datafusion.git?rev=ed061e794b95dbf932bcc06f93ba9bc1252e1db5#ed061e794b95dbf932bcc06f93ba9bc1252e1db5" dependencies = [ "arrow 58.0.0", "arrow-buffer 58.0.0", @@ -2663,9 +2650,8 @@ dependencies = [ [[package]] name = "datafusion-expr" -version = "52.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6507c719804265a58043134580c1c20767e7c23ba450724393f03ec982769ad9" +version = "52.1.0" +source = "git+https://github.com/Dandandan/arrow-datafusion.git?rev=ed061e794b95dbf932bcc06f93ba9bc1252e1db5#ed061e794b95dbf932bcc06f93ba9bc1252e1db5" dependencies = [ "arrow 58.0.0", "async-trait", @@ -2699,9 +2685,8 @@ dependencies = [ [[package]] name = "datafusion-expr-common" -version = "52.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a413caa9c5885072b539337aed68488f0291653e8edd7d676c92df2480f6cab0" +version = "52.1.0" +source = "git+https://github.com/Dandandan/arrow-datafusion.git?rev=ed061e794b95dbf932bcc06f93ba9bc1252e1db5#ed061e794b95dbf932bcc06f93ba9bc1252e1db5" dependencies = [ "arrow", "datafusion-common 52.2.0", @@ -2742,9 +2727,8 @@ dependencies = [ [[package]] name = "datafusion-functions" -version = "52.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "189256495dc9cbbb8e20dbcf161f60422e628d201a78df8207e44bd4baefadb6" +version = "52.1.0" +source = "git+https://github.com/Dandandan/arrow-datafusion.git?rev=ed061e794b95dbf932bcc06f93ba9bc1252e1db5#ed061e794b95dbf932bcc06f93ba9bc1252e1db5" dependencies = [ "arrow 58.0.0", "arrow-buffer 58.0.0", @@ -2795,9 +2779,8 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate" -version = "52.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "12e73dfee4cd67c4a507ffff4c5a711d39983adf544adbc09c09bf06f789f413" +version = "52.1.0" +source = "git+https://github.com/Dandandan/arrow-datafusion.git?rev=ed061e794b95dbf932bcc06f93ba9bc1252e1db5#ed061e794b95dbf932bcc06f93ba9bc1252e1db5" dependencies = [ "ahash 0.8.12", "arrow", @@ -2830,9 +2813,8 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate-common" -version = "52.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "87727bd9e65f4f9ac6d608c9810b7da9eaa3b18b26a4a4b76520592d49020acf" +version = "52.1.0" +source = "git+https://github.com/Dandandan/arrow-datafusion.git?rev=ed061e794b95dbf932bcc06f93ba9bc1252e1db5#ed061e794b95dbf932bcc06f93ba9bc1252e1db5" dependencies = [ "ahash 0.8.12", "arrow", @@ -2866,9 +2848,8 @@ dependencies = [ [[package]] name = "datafusion-functions-nested" -version = "52.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2e5ef761359224b7c2b5a1bfad6296ac63225f8583d08ad18af9ba1a89ac3887" +version = "52.1.0" +source = "git+https://github.com/Dandandan/arrow-datafusion.git?rev=ed061e794b95dbf932bcc06f93ba9bc1252e1db5#ed061e794b95dbf932bcc06f93ba9bc1252e1db5" dependencies = [ "arrow", "arrow-ord", @@ -2906,9 +2887,8 @@ dependencies = [ [[package]] name = "datafusion-functions-table" -version = "52.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3b17dac25dfda2d2a90ff0ad1c054a11fb1523766226bec6e9bd8c410daee2ae" +version = "52.1.0" +source = "git+https://github.com/Dandandan/arrow-datafusion.git?rev=ed061e794b95dbf932bcc06f93ba9bc1252e1db5#ed061e794b95dbf932bcc06f93ba9bc1252e1db5" dependencies = [ "arrow 58.0.0", "async-trait", @@ -2940,9 +2920,8 @@ dependencies = [ [[package]] name = "datafusion-functions-window" -version = "52.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c594a29ddb22cbdbce500e4d99b5b2392c5cecb4c1086298b41d1ffec14dbb77" +version = "52.1.0" +source = "git+https://github.com/Dandandan/arrow-datafusion.git?rev=ed061e794b95dbf932bcc06f93ba9bc1252e1db5#ed061e794b95dbf932bcc06f93ba9bc1252e1db5" dependencies = [ "arrow", "datafusion-common 52.2.0", @@ -2968,9 +2947,8 @@ dependencies = [ [[package]] name = "datafusion-functions-window-common" -version = "52.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9aa1b15ed81c7543f62264a30dd49dec4b1b0b698053b968f53be32dfba4f729" +version = "52.1.0" +source = "git+https://github.com/Dandandan/arrow-datafusion.git?rev=ed061e794b95dbf932bcc06f93ba9bc1252e1db5#ed061e794b95dbf932bcc06f93ba9bc1252e1db5" dependencies = [ "datafusion-common 52.2.0", "datafusion-physical-expr-common 52.2.0", @@ -2989,9 +2967,8 @@ dependencies = [ [[package]] name = "datafusion-macros" -version = "52.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c00c31c4795597aa25b74cab5174ac07a53051f27ce1e011ecaffa9eaeecef81" +version = "52.1.0" +source = "git+https://github.com/Dandandan/arrow-datafusion.git?rev=ed061e794b95dbf932bcc06f93ba9bc1252e1db5#ed061e794b95dbf932bcc06f93ba9bc1252e1db5" dependencies = [ "datafusion-doc 52.2.0", "quote", @@ -3019,9 +2996,8 @@ dependencies = [ [[package]] name = "datafusion-optimizer" -version = "52.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "80ccf60767c09302b2e0fc3afebb3761a6d508d07316fab8c5e93312728a21bb" +version = "52.1.0" +source = "git+https://github.com/Dandandan/arrow-datafusion.git?rev=ed061e794b95dbf932bcc06f93ba9bc1252e1db5#ed061e794b95dbf932bcc06f93ba9bc1252e1db5" dependencies = [ "arrow 58.0.0", "chrono", @@ -3061,9 +3037,8 @@ dependencies = [ [[package]] name = "datafusion-physical-expr" -version = "52.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c64b7f277556944e4edd3558da01d9e9ff9f5416f1c0aa7fee088e57bd141a7e" +version = "52.1.0" +source = "git+https://github.com/Dandandan/arrow-datafusion.git?rev=ed061e794b95dbf932bcc06f93ba9bc1252e1db5#ed061e794b95dbf932bcc06f93ba9bc1252e1db5" dependencies = [ "ahash 0.8.12", "arrow", @@ -3100,9 +3075,8 @@ dependencies = [ [[package]] name = "datafusion-physical-expr-adapter" -version = "52.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b7abaee372ea2d19c016ee9ef8629c4415257d291cdd152bc7f0b75f28af1b63" +version = "52.1.0" +source = "git+https://github.com/Dandandan/arrow-datafusion.git?rev=ed061e794b95dbf932bcc06f93ba9bc1252e1db5#ed061e794b95dbf932bcc06f93ba9bc1252e1db5" dependencies = [ "arrow", "datafusion-common 52.2.0", @@ -3129,9 +3103,8 @@ dependencies = [ [[package]] name = "datafusion-physical-expr-common" -version = "52.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42237efe621f92adc22d111b531fdbc2cc38ca9b5e02327535628fb103ae2157" +version = "52.1.0" +source = "git+https://github.com/Dandandan/arrow-datafusion.git?rev=ed061e794b95dbf932bcc06f93ba9bc1252e1db5#ed061e794b95dbf932bcc06f93ba9bc1252e1db5" dependencies = [ "ahash 0.8.12", "arrow 58.0.0", @@ -3164,9 +3137,8 @@ dependencies = [ [[package]] name = "datafusion-physical-optimizer" -version = "52.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fd093498bd1319c6e5c76e9dfa905e78486f01b34579ce97f2e3a49f84c37fac" +version = "52.1.0" +source = "git+https://github.com/Dandandan/arrow-datafusion.git?rev=ed061e794b95dbf932bcc06f93ba9bc1252e1db5#ed061e794b95dbf932bcc06f93ba9bc1252e1db5" dependencies = [ "arrow", "datafusion-common 52.2.0", @@ -3214,9 +3186,8 @@ dependencies = [ [[package]] name = "datafusion-physical-plan" -version = "52.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7cbe61b12daf81a9f20ba03bd3541165d51f86e004ef37426b11881330eed261" +version = "52.1.0" +source = "git+https://github.com/Dandandan/arrow-datafusion.git?rev=ed061e794b95dbf932bcc06f93ba9bc1252e1db5#ed061e794b95dbf932bcc06f93ba9bc1252e1db5" dependencies = [ "ahash 0.8.12", "arrow 58.0.0", @@ -3263,9 +3234,8 @@ dependencies = [ [[package]] name = "datafusion-pruning" -version = "52.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0124331116db7f79df92ebfd2c3b11a8f90240f253555c9bb084f10b6fecf1dd" +version = "52.1.0" +source = "git+https://github.com/Dandandan/arrow-datafusion.git?rev=ed061e794b95dbf932bcc06f93ba9bc1252e1db5#ed061e794b95dbf932bcc06f93ba9bc1252e1db5" dependencies = [ "arrow", "datafusion-common 52.2.0", @@ -3294,9 +3264,8 @@ dependencies = [ [[package]] name = "datafusion-session" -version = "52.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1673e3c58ba618a6ea0568672f00664087b8982c581e9afd5aa6c3c79c9b431f" +version = "52.1.0" +source = "git+https://github.com/Dandandan/arrow-datafusion.git?rev=ed061e794b95dbf932bcc06f93ba9bc1252e1db5#ed061e794b95dbf932bcc06f93ba9bc1252e1db5" dependencies = [ "async-trait", "datafusion-common 52.2.0", @@ -3308,9 +3277,8 @@ dependencies = [ [[package]] name = "datafusion-spark" -version = "52.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "15d28510abfc85709578fcf9065325d43ee3303012c0ccec2dce351bdc577d00" +version = "52.1.0" +source = "git+https://github.com/Dandandan/arrow-datafusion.git?rev=ed061e794b95dbf932bcc06f93ba9bc1252e1db5#ed061e794b95dbf932bcc06f93ba9bc1252e1db5" dependencies = [ "arrow 58.0.0", "bigdecimal", @@ -3350,9 +3318,8 @@ dependencies = [ [[package]] name = "datafusion-sql" -version = "52.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5272d256dab5347bb39d2040589f45d8c6b715b27edcb5fffe88cc8b9c3909cb" +version = "52.1.0" +source = "git+https://github.com/Dandandan/arrow-datafusion.git?rev=ed061e794b95dbf932bcc06f93ba9bc1252e1db5#ed061e794b95dbf932bcc06f93ba9bc1252e1db5" dependencies = [ "arrow 58.0.0", "bigdecimal", @@ -3368,9 +3335,8 @@ dependencies = [ [[package]] name = "datafusion-sqllogictest" -version = "52.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ccb859e97759dcbff66b484bdf4f251f9a76784d3dd7883c124de57510b1e1c2" +version = "52.1.0" +source = "git+https://github.com/Dandandan/arrow-datafusion.git?rev=ed061e794b95dbf932bcc06f93ba9bc1252e1db5#ed061e794b95dbf932bcc06f93ba9bc1252e1db5" dependencies = [ "arrow 58.0.0", "async-trait", @@ -3394,9 +3360,8 @@ dependencies = [ [[package]] name = "datafusion-substrait" -version = "52.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "199790fd96e852997b30da4ff11109378c944841757d93875ea85fc69587ec91" +version = "52.1.0" +source = "git+https://github.com/Dandandan/arrow-datafusion.git?rev=ed061e794b95dbf932bcc06f93ba9bc1252e1db5#ed061e794b95dbf932bcc06f93ba9bc1252e1db5" dependencies = [ "async-recursion", "async-trait", diff --git a/Cargo.toml b/Cargo.toml index 204b4fdaff3..280e88c849e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -370,19 +370,19 @@ debug-assertions = true inherits = "bench" [patch.crates-io] -datafusion = { git = "https://github.com/apache/datafusion", branch = "branch-53" } -datafusion-catalog = { git = "https://github.com/apache/datafusion", branch = "branch-53" } -datafusion-common = { git = "https://github.com/apache/datafusion", branch = "branch-53" } -datafusion-common-runtime = { git = "https://github.com/apache/datafusion", branch = "branch-53" } -datafusion-datasource = { git = "https://github.com/apache/datafusion", branch = "branch-53" } -datafusion-execution = { git = "https://github.com/apache/datafusion", branch = "branch-53" } -datafusion-expr = { git = "https://github.com/apache/datafusion", branch = "branch-53" } -datafusion-functions = { git = "https://github.com/apache/datafusion", branch = "branch-53" } -datafusion-physical-expr = { git = "https://github.com/apache/datafusion", branch = "branch-53" } -datafusion-physical-expr-adapter = { git = "https://github.com/apache/datafusion", branch = "branch-53" } -datafusion-physical-expr-common = { git = "https://github.com/apache/datafusion", branch = "branch-53" } -datafusion-physical-plan = { git = "https://github.com/apache/datafusion", branch = "branch-53" } -datafusion-pruning = { git = "https://github.com/apache/datafusion", branch = "branch-53" } -datafusion-sqllogictest = { git = "https://github.com/apache/datafusion", branch = "branch-53" } +datafusion = { git = "https://github.com/Dandandan/arrow-datafusion.git", rev = "ed061e794b95dbf932bcc06f93ba9bc1252e1db5" } +datafusion-catalog = { git = "https://github.com/Dandandan/arrow-datafusion.git", rev = "ed061e794b95dbf932bcc06f93ba9bc1252e1db5" } +datafusion-common = { git = "https://github.com/Dandandan/arrow-datafusion.git", rev = "ed061e794b95dbf932bcc06f93ba9bc1252e1db5" } +datafusion-common-runtime = { git = "https://github.com/Dandandan/arrow-datafusion.git", rev = "ed061e794b95dbf932bcc06f93ba9bc1252e1db5" } +datafusion-datasource = { git = "https://github.com/Dandandan/arrow-datafusion.git", rev = "ed061e794b95dbf932bcc06f93ba9bc1252e1db5" } +datafusion-execution = { git = "https://github.com/Dandandan/arrow-datafusion.git", rev = "ed061e794b95dbf932bcc06f93ba9bc1252e1db5" } +datafusion-expr = { git = "https://github.com/Dandandan/arrow-datafusion.git", rev = "ed061e794b95dbf932bcc06f93ba9bc1252e1db5" } +datafusion-functions = { git = "https://github.com/Dandandan/arrow-datafusion.git", rev = "ed061e794b95dbf932bcc06f93ba9bc1252e1db5" } +datafusion-physical-expr = { git = "https://github.com/Dandandan/arrow-datafusion.git", rev = "ed061e794b95dbf932bcc06f93ba9bc1252e1db5" } +datafusion-physical-expr-adapter = { git = "https://github.com/Dandandan/arrow-datafusion.git", rev = "ed061e794b95dbf932bcc06f93ba9bc1252e1db5" } +datafusion-physical-expr-common = { git = "https://github.com/Dandandan/arrow-datafusion.git", rev = "ed061e794b95dbf932bcc06f93ba9bc1252e1db5" } +datafusion-physical-plan = { git = "https://github.com/Dandandan/arrow-datafusion.git", rev = "ed061e794b95dbf932bcc06f93ba9bc1252e1db5" } +datafusion-pruning = { git = "https://github.com/Dandandan/arrow-datafusion.git", rev = "ed061e794b95dbf932bcc06f93ba9bc1252e1db5" } +datafusion-sqllogictest = { git = "https://github.com/Dandandan/arrow-datafusion.git", rev = "ed061e794b95dbf932bcc06f93ba9bc1252e1db5" } tpchgen = { git = "https://github.com/AdamGS/tpchgen-rs.git", branch = "adamg/bump-arrow-match-df" } tpchgen-arrow = { git = "https://github.com/AdamGS/tpchgen-rs.git", branch = "adamg/bump-arrow-match-df" } diff --git a/vortex-datafusion/src/persistent/opener.rs b/vortex-datafusion/src/persistent/opener.rs index 1643bf28a43..093093b416a 100644 --- a/vortex-datafusion/src/persistent/opener.rs +++ b/vortex-datafusion/src/persistent/opener.rs @@ -1,6 +1,7 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors +use std::future::ready; use std::ops::Range; use std::sync::Arc; use std::sync::Weak; @@ -29,6 +30,7 @@ use datafusion_pruning::FilePruner; use futures::FutureExt; use futures::StreamExt; use futures::TryStreamExt; +use futures::future::BoxFuture; use futures::stream; use object_store::path::Path; use tracing::Instrument; @@ -98,7 +100,43 @@ pub(crate) struct VortexOpener { pub scan_concurrency: Option, } +struct VortexMorsel {} + impl FileOpener for VortexOpener { + fn is_leaf_morsel(&self, file: &PartitionedFile) -> bool { + file.extensions + .as_ref() + .is_some_and(|e| e.is::()) + } + + fn morselize( + &self, + partitioned_file: PartitionedFile, + ) -> BoxFuture<'static, DFResult>> { + if partitioned_file + .extensions + .as_ref() + .map(|e| e.is::()) + .unwrap_or(false) + { + return Box::pin(ready(Ok(vec![partitioned_file]))); + } + + if let Some(pred) = predicate.as_ref() { + let logical_file_schema = Arc::clone(table_schema.file_schema()); + if let Some(mut file_pruner) = FilePruner::try_new( + Arc::clone(pred), + &logical_file_schema, + &partitioned_file, + predicate_creation_errors.clone(), + ) && file_pruner.should_prune()? + { + // file_metrics.files_ranges_pruned_statistics.add_pruned(1); + return Ok(vec![]); + } + } + todo!() + } fn open(&self, file: PartitionedFile) -> DFResult { let session = self.session.clone(); let metrics_registry = self.metrics_registry.clone(); @@ -106,6 +144,11 @@ impl FileOpener for VortexOpener { Label::new(PATH_LABEL, file.path().to_string()), Label::new(PARTITION_LABEL, self.partition.to_string()), ]; + let is_morsel = file + .extensions + .as_ref() + .map(|e| e.is::()) + .unwrap_or(false); let mut projection = self.projection.clone(); let mut filter = self.filter.clone(); diff --git a/vortex-datafusion/src/persistent/source.rs b/vortex-datafusion/src/persistent/source.rs index bac7fe2b39a..61a2746e260 100644 --- a/vortex-datafusion/src/persistent/source.rs +++ b/vortex-datafusion/src/persistent/source.rs @@ -8,6 +8,8 @@ use std::sync::Weak; use datafusion_common::Result as DFResult; use datafusion_common::config::ConfigOptions; +use datafusion_common::tree_node::TreeNodeRecursion; +use datafusion_common::tree_node::TreeNodeRecursion; use datafusion_datasource::TableSchema; use datafusion_datasource::file::FileSource; use datafusion_datasource::file_scan_config::FileScanConfig; @@ -326,4 +328,22 @@ impl FileSource for VortexSource { fn table_schema(&self) -> &TableSchema { &self.table_schema } + + fn apply_expressions( + &self, + f: &mut dyn FnMut(&dyn PhysicalExpr) -> DFResult, + ) -> DFResult { + // Visit predicate (filter) expression if present + let mut tnr = TreeNodeRecursion::Continue; + if let Some(predicate) = &self.full_predicate { + tnr = tnr.visit_sibling(|| f(predicate.as_ref()))?; + } + + // Visit projection expressions + for proj_expr in &self.projection { + tnr = tnr.visit_sibling(|| f(proj_expr.expr.as_ref()))?; + } + + Ok(tnr) + } } diff --git a/vortex-datafusion/src/v2/source.rs b/vortex-datafusion/src/v2/source.rs index 53dd9939a16..135a4182c00 100644 --- a/vortex-datafusion/src/v2/source.rs +++ b/vortex-datafusion/src/v2/source.rs @@ -21,6 +21,7 @@ use datafusion_common::DataFusionError; use datafusion_common::Result as DFResult; use datafusion_common::Statistics; use datafusion_common::stats::Precision as DFPrecision; +use datafusion_common::tree_node::TreeNodeRecursion; use datafusion_datasource::source::DataSource; use datafusion_execution::SendableRecordBatchStream; use datafusion_execution::TaskContext; @@ -540,6 +541,13 @@ impl DataSource for VortexDataSource { .with_updated_node(Arc::new(this) as _), ) } + + fn apply_expressions( + &self, + _f: &mut dyn FnMut(&dyn PhysicalExpr) -> DFResult, + ) -> DFResult { + return Ok(TreeNodeRecursion::Continue); + } } /// Convert a Vortex [`Option`] to a DataFusion [`Precision`](DFPrecision). From 8276c6060693b9584c4d0c9c70b984cc56ac428d Mon Sep 17 00:00:00 2001 From: Adam Gutglick Date: Mon, 9 Mar 2026 12:11:03 +0000 Subject: [PATCH 06/11] Try use DF file-based morsels Signed-off-by: Adam Gutglick --- vortex-datafusion/src/persistent/format.rs | 1 + vortex-datafusion/src/persistent/opener.rs | 81 +++++++++++++++++----- vortex-datafusion/src/persistent/source.rs | 1 - 3 files changed, 64 insertions(+), 19 deletions(-) diff --git a/vortex-datafusion/src/persistent/format.rs b/vortex-datafusion/src/persistent/format.rs index 0be2086c885..6cb1d58ec11 100644 --- a/vortex-datafusion/src/persistent/format.rs +++ b/vortex-datafusion/src/persistent/format.rs @@ -500,6 +500,7 @@ impl FileFormat for VortexFormat { let conf = FileScanConfigBuilder::from(file_scan_config) .with_source(Arc::new(source)) + .with_morsel_driven(true) .build(); Ok(DataSourceExec::from_data_source(conf)) diff --git a/vortex-datafusion/src/persistent/opener.rs b/vortex-datafusion/src/persistent/opener.rs index 093093b416a..ad6d921b7a0 100644 --- a/vortex-datafusion/src/persistent/opener.rs +++ b/vortex-datafusion/src/persistent/opener.rs @@ -100,7 +100,12 @@ pub(crate) struct VortexOpener { pub scan_concurrency: Option, } -struct VortexMorsel {} +/// The number of rows per morsel when splitting files for morsel-driven execution. +const MORSEL_ROW_COUNT: u64 = 2048; + +struct VortexMorsel { + row_range: Range, +} impl FileOpener for VortexOpener { fn is_leaf_morsel(&self, file: &PartitionedFile) -> bool { @@ -122,20 +127,57 @@ impl FileOpener for VortexOpener { return Box::pin(ready(Ok(vec![partitioned_file]))); } - if let Some(pred) = predicate.as_ref() { - let logical_file_schema = Arc::clone(table_schema.file_schema()); - if let Some(mut file_pruner) = FilePruner::try_new( - Arc::clone(pred), - &logical_file_schema, - &partitioned_file, - predicate_creation_errors.clone(), - ) && file_pruner.should_prune()? - { - // file_metrics.files_ranges_pruned_statistics.add_pruned(1); - return Ok(vec![]); + let file_pruning_predicate = self.file_pruning_predicate.clone(); + let table_schema = self.table_schema.clone(); + let session = self.session.clone(); + let vortex_reader_factory = self.vortex_reader_factory.clone(); + let metrics_registry = self.metrics_registry.clone(); + + Box::pin(async move { + // File-level pruning + if let Some(pred) = file_pruning_predicate.as_ref() { + let logical_file_schema = Arc::clone(table_schema.file_schema()); + if let Some(mut file_pruner) = FilePruner::try_new( + Arc::clone(pred), + &logical_file_schema, + &partitioned_file, + Count::default(), + ) && file_pruner.should_prune()? + { + return Ok(vec![]); + } } - } - todo!() + + // Open the file to get the row count + let reader = vortex_reader_factory + .create_reader(partitioned_file.path().as_ref(), &session) + .map_err(|e| DataFusionError::External(Box::new(e)))?; + + let vxf = session + .open_options() + .with_file_size(partitioned_file.object_meta.size) + .with_metrics_registry(metrics_registry) + .open_read(reader) + .await + .map_err(|e| DataFusionError::External(Box::new(e)))?; + + let row_count = vxf.row_count(); + + // Split into fixed-size morsels + let mut morsels = Vec::new(); + let mut start = 0u64; + while start < row_count { + let end = (start + MORSEL_ROW_COUNT).min(row_count); + let mut f = partitioned_file.clone(); + f.extensions = Some(Arc::new(VortexMorsel { + row_range: start..end, + })); + morsels.push(f); + start = end; + } + + Ok(morsels) + }) } fn open(&self, file: PartitionedFile) -> DFResult { let session = self.session.clone(); @@ -144,11 +186,11 @@ impl FileOpener for VortexOpener { Label::new(PATH_LABEL, file.path().to_string()), Label::new(PARTITION_LABEL, self.partition.to_string()), ]; - let is_morsel = file + let morsel_row_range = file .extensions .as_ref() - .map(|e| e.is::()) - .unwrap_or(false); + .and_then(|e| e.downcast_ref::()) + .map(|m| m.row_range.clone()); let mut projection = self.projection.clone(); let mut filter = self.filter.clone(); @@ -343,7 +385,10 @@ impl FileOpener for VortexOpener { let mut scan_builder = ScanBuilder::new(session.clone(), layout_reader); - if let Some(extensions) = file.extensions + if let Some(row_range) = morsel_row_range { + // Morsel: restrict the scan to the morsel's row range. + scan_builder = scan_builder.with_row_range(row_range); + } else if let Some(extensions) = file.extensions && let Some(vortex_plan) = extensions.downcast_ref::() { scan_builder = vortex_plan.apply_to_builder(scan_builder); diff --git a/vortex-datafusion/src/persistent/source.rs b/vortex-datafusion/src/persistent/source.rs index 61a2746e260..e6abf23b574 100644 --- a/vortex-datafusion/src/persistent/source.rs +++ b/vortex-datafusion/src/persistent/source.rs @@ -9,7 +9,6 @@ use std::sync::Weak; use datafusion_common::Result as DFResult; use datafusion_common::config::ConfigOptions; use datafusion_common::tree_node::TreeNodeRecursion; -use datafusion_common::tree_node::TreeNodeRecursion; use datafusion_datasource::TableSchema; use datafusion_datasource::file::FileSource; use datafusion_datasource::file_scan_config::FileScanConfig; From 9c523604283e75698f65d089a661ff62f82703ca Mon Sep 17 00:00:00 2001 From: Adam Gutglick Date: Mon, 9 Mar 2026 12:33:23 +0000 Subject: [PATCH 07/11] Try this thing out Signed-off-by: Adam Gutglick --- Cargo.lock | 171 +++++++++++++------------ Cargo.toml | 28 ++-- vortex-datafusion/src/convert/exprs.rs | 3 +- 3 files changed, 104 insertions(+), 98 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index d0bb6195ff9..4496923ad8f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -942,7 +942,7 @@ dependencies = [ "bitflags", "cexpr", "clang-sys", - "itertools 0.13.0", + "itertools 0.10.5", "log", "prettyplease", "proc-macro2", @@ -2084,8 +2084,8 @@ dependencies = [ [[package]] name = "datafusion" -version = "52.1.0" -source = "git+https://github.com/Dandandan/arrow-datafusion.git?rev=ed061e794b95dbf932bcc06f93ba9bc1252e1db5#ed061e794b95dbf932bcc06f93ba9bc1252e1db5" +version = "52.2.0" +source = "git+https://github.com/Dandandan/arrow-datafusion.git?rev=df6c035e68e9508029c9ba5b0979dad428573e63#df6c035e68e9508029c9ba5b0979dad428573e63" dependencies = [ "arrow 58.0.0", "arrow-schema 58.0.0", @@ -2190,8 +2190,8 @@ dependencies = [ [[package]] name = "datafusion-catalog" -version = "52.1.0" -source = "git+https://github.com/Dandandan/arrow-datafusion.git?rev=ed061e794b95dbf932bcc06f93ba9bc1252e1db5#ed061e794b95dbf932bcc06f93ba9bc1252e1db5" +version = "52.2.0" +source = "git+https://github.com/Dandandan/arrow-datafusion.git?rev=df6c035e68e9508029c9ba5b0979dad428573e63#df6c035e68e9508029c9ba5b0979dad428573e63" dependencies = [ "arrow 58.0.0", "async-trait", @@ -2238,8 +2238,8 @@ dependencies = [ [[package]] name = "datafusion-catalog-listing" -version = "52.1.0" -source = "git+https://github.com/Dandandan/arrow-datafusion.git?rev=ed061e794b95dbf932bcc06f93ba9bc1252e1db5#ed061e794b95dbf932bcc06f93ba9bc1252e1db5" +version = "52.2.0" +source = "git+https://github.com/Dandandan/arrow-datafusion.git?rev=df6c035e68e9508029c9ba5b0979dad428573e63#df6c035e68e9508029c9ba5b0979dad428573e63" dependencies = [ "arrow 58.0.0", "async-trait", @@ -2282,8 +2282,8 @@ dependencies = [ [[package]] name = "datafusion-common" -version = "52.1.0" -source = "git+https://github.com/Dandandan/arrow-datafusion.git?rev=ed061e794b95dbf932bcc06f93ba9bc1252e1db5#ed061e794b95dbf932bcc06f93ba9bc1252e1db5" +version = "52.2.0" +source = "git+https://github.com/Dandandan/arrow-datafusion.git?rev=df6c035e68e9508029c9ba5b0979dad428573e63#df6c035e68e9508029c9ba5b0979dad428573e63" dependencies = [ "ahash 0.8.12", "apache-avro", @@ -2318,8 +2318,8 @@ dependencies = [ [[package]] name = "datafusion-common-runtime" -version = "52.1.0" -source = "git+https://github.com/Dandandan/arrow-datafusion.git?rev=ed061e794b95dbf932bcc06f93ba9bc1252e1db5#ed061e794b95dbf932bcc06f93ba9bc1252e1db5" +version = "52.2.0" +source = "git+https://github.com/Dandandan/arrow-datafusion.git?rev=df6c035e68e9508029c9ba5b0979dad428573e63#df6c035e68e9508029c9ba5b0979dad428573e63" dependencies = [ "futures", "log", @@ -2357,8 +2357,8 @@ dependencies = [ [[package]] name = "datafusion-datasource" -version = "52.1.0" -source = "git+https://github.com/Dandandan/arrow-datafusion.git?rev=ed061e794b95dbf932bcc06f93ba9bc1252e1db5#ed061e794b95dbf932bcc06f93ba9bc1252e1db5" +version = "52.2.0" +source = "git+https://github.com/Dandandan/arrow-datafusion.git?rev=df6c035e68e9508029c9ba5b0979dad428573e63#df6c035e68e9508029c9ba5b0979dad428573e63" dependencies = [ "arrow 58.0.0", "async-compression", @@ -2415,8 +2415,8 @@ dependencies = [ [[package]] name = "datafusion-datasource-arrow" -version = "52.1.0" -source = "git+https://github.com/Dandandan/arrow-datafusion.git?rev=ed061e794b95dbf932bcc06f93ba9bc1252e1db5#ed061e794b95dbf932bcc06f93ba9bc1252e1db5" +version = "52.2.0" +source = "git+https://github.com/Dandandan/arrow-datafusion.git?rev=df6c035e68e9508029c9ba5b0979dad428573e63#df6c035e68e9508029c9ba5b0979dad428573e63" dependencies = [ "arrow 58.0.0", "arrow-ipc 58.0.0", @@ -2438,8 +2438,8 @@ dependencies = [ [[package]] name = "datafusion-datasource-avro" -version = "52.1.0" -source = "git+https://github.com/Dandandan/arrow-datafusion.git?rev=ed061e794b95dbf932bcc06f93ba9bc1252e1db5#ed061e794b95dbf932bcc06f93ba9bc1252e1db5" +version = "52.2.0" +source = "git+https://github.com/Dandandan/arrow-datafusion.git?rev=df6c035e68e9508029c9ba5b0979dad428573e63#df6c035e68e9508029c9ba5b0979dad428573e63" dependencies = [ "apache-avro", "arrow 58.0.0", @@ -2480,8 +2480,8 @@ dependencies = [ [[package]] name = "datafusion-datasource-csv" -version = "52.1.0" -source = "git+https://github.com/Dandandan/arrow-datafusion.git?rev=ed061e794b95dbf932bcc06f93ba9bc1252e1db5#ed061e794b95dbf932bcc06f93ba9bc1252e1db5" +version = "52.2.0" +source = "git+https://github.com/Dandandan/arrow-datafusion.git?rev=df6c035e68e9508029c9ba5b0979dad428573e63#df6c035e68e9508029c9ba5b0979dad428573e63" dependencies = [ "arrow 58.0.0", "async-trait", @@ -2524,8 +2524,8 @@ dependencies = [ [[package]] name = "datafusion-datasource-json" -version = "52.1.0" -source = "git+https://github.com/Dandandan/arrow-datafusion.git?rev=ed061e794b95dbf932bcc06f93ba9bc1252e1db5#ed061e794b95dbf932bcc06f93ba9bc1252e1db5" +version = "52.2.0" +source = "git+https://github.com/Dandandan/arrow-datafusion.git?rev=df6c035e68e9508029c9ba5b0979dad428573e63#df6c035e68e9508029c9ba5b0979dad428573e63" dependencies = [ "arrow 58.0.0", "async-trait", @@ -2547,8 +2547,8 @@ dependencies = [ [[package]] name = "datafusion-datasource-parquet" -version = "52.1.0" -source = "git+https://github.com/Dandandan/arrow-datafusion.git?rev=ed061e794b95dbf932bcc06f93ba9bc1252e1db5#ed061e794b95dbf932bcc06f93ba9bc1252e1db5" +version = "52.2.0" +source = "git+https://github.com/Dandandan/arrow-datafusion.git?rev=df6c035e68e9508029c9ba5b0979dad428573e63#df6c035e68e9508029c9ba5b0979dad428573e63" dependencies = [ "arrow 58.0.0", "async-trait", @@ -2582,8 +2582,8 @@ checksum = "2b99e13947667b36ad713549237362afb054b2d8f8cc447751e23ec61202db07" [[package]] name = "datafusion-doc" -version = "52.1.0" -source = "git+https://github.com/Dandandan/arrow-datafusion.git?rev=ed061e794b95dbf932bcc06f93ba9bc1252e1db5#ed061e794b95dbf932bcc06f93ba9bc1252e1db5" +version = "52.2.0" +source = "git+https://github.com/Dandandan/arrow-datafusion.git?rev=df6c035e68e9508029c9ba5b0979dad428573e63#df6c035e68e9508029c9ba5b0979dad428573e63" [[package]] name = "datafusion-execution" @@ -2607,8 +2607,8 @@ dependencies = [ [[package]] name = "datafusion-execution" -version = "52.1.0" -source = "git+https://github.com/Dandandan/arrow-datafusion.git?rev=ed061e794b95dbf932bcc06f93ba9bc1252e1db5#ed061e794b95dbf932bcc06f93ba9bc1252e1db5" +version = "52.2.0" +source = "git+https://github.com/Dandandan/arrow-datafusion.git?rev=df6c035e68e9508029c9ba5b0979dad428573e63#df6c035e68e9508029c9ba5b0979dad428573e63" dependencies = [ "arrow 58.0.0", "arrow-buffer 58.0.0", @@ -2617,6 +2617,7 @@ dependencies = [ "dashmap", "datafusion-common 52.2.0", "datafusion-expr 52.2.0", + "datafusion-physical-expr-common 52.2.0", "futures", "log", "object_store 0.13.1", @@ -2650,8 +2651,8 @@ dependencies = [ [[package]] name = "datafusion-expr" -version = "52.1.0" -source = "git+https://github.com/Dandandan/arrow-datafusion.git?rev=ed061e794b95dbf932bcc06f93ba9bc1252e1db5#ed061e794b95dbf932bcc06f93ba9bc1252e1db5" +version = "52.2.0" +source = "git+https://github.com/Dandandan/arrow-datafusion.git?rev=df6c035e68e9508029c9ba5b0979dad428573e63#df6c035e68e9508029c9ba5b0979dad428573e63" dependencies = [ "arrow 58.0.0", "async-trait", @@ -2685,10 +2686,10 @@ dependencies = [ [[package]] name = "datafusion-expr-common" -version = "52.1.0" -source = "git+https://github.com/Dandandan/arrow-datafusion.git?rev=ed061e794b95dbf932bcc06f93ba9bc1252e1db5#ed061e794b95dbf932bcc06f93ba9bc1252e1db5" +version = "52.2.0" +source = "git+https://github.com/Dandandan/arrow-datafusion.git?rev=df6c035e68e9508029c9ba5b0979dad428573e63#df6c035e68e9508029c9ba5b0979dad428573e63" dependencies = [ - "arrow", + "arrow 58.0.0", "datafusion-common 52.2.0", "indexmap", "itertools 0.14.0", @@ -2727,8 +2728,8 @@ dependencies = [ [[package]] name = "datafusion-functions" -version = "52.1.0" -source = "git+https://github.com/Dandandan/arrow-datafusion.git?rev=ed061e794b95dbf932bcc06f93ba9bc1252e1db5#ed061e794b95dbf932bcc06f93ba9bc1252e1db5" +version = "52.2.0" +source = "git+https://github.com/Dandandan/arrow-datafusion.git?rev=df6c035e68e9508029c9ba5b0979dad428573e63#df6c035e68e9508029c9ba5b0979dad428573e63" dependencies = [ "arrow 58.0.0", "arrow-buffer 58.0.0", @@ -2779,11 +2780,11 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate" -version = "52.1.0" -source = "git+https://github.com/Dandandan/arrow-datafusion.git?rev=ed061e794b95dbf932bcc06f93ba9bc1252e1db5#ed061e794b95dbf932bcc06f93ba9bc1252e1db5" +version = "52.2.0" +source = "git+https://github.com/Dandandan/arrow-datafusion.git?rev=df6c035e68e9508029c9ba5b0979dad428573e63#df6c035e68e9508029c9ba5b0979dad428573e63" dependencies = [ "ahash 0.8.12", - "arrow", + "arrow 58.0.0", "datafusion-common 52.2.0", "datafusion-doc 52.2.0", "datafusion-execution 52.2.0", @@ -2813,11 +2814,11 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate-common" -version = "52.1.0" -source = "git+https://github.com/Dandandan/arrow-datafusion.git?rev=ed061e794b95dbf932bcc06f93ba9bc1252e1db5#ed061e794b95dbf932bcc06f93ba9bc1252e1db5" +version = "52.2.0" +source = "git+https://github.com/Dandandan/arrow-datafusion.git?rev=df6c035e68e9508029c9ba5b0979dad428573e63#df6c035e68e9508029c9ba5b0979dad428573e63" dependencies = [ "ahash 0.8.12", - "arrow", + "arrow 58.0.0", "datafusion-common 52.2.0", "datafusion-expr-common 52.2.0", "datafusion-physical-expr-common 52.2.0", @@ -2848,11 +2849,11 @@ dependencies = [ [[package]] name = "datafusion-functions-nested" -version = "52.1.0" -source = "git+https://github.com/Dandandan/arrow-datafusion.git?rev=ed061e794b95dbf932bcc06f93ba9bc1252e1db5#ed061e794b95dbf932bcc06f93ba9bc1252e1db5" +version = "52.2.0" +source = "git+https://github.com/Dandandan/arrow-datafusion.git?rev=df6c035e68e9508029c9ba5b0979dad428573e63#df6c035e68e9508029c9ba5b0979dad428573e63" dependencies = [ - "arrow", - "arrow-ord", + "arrow 58.0.0", + "arrow-ord 58.0.0", "datafusion-common 52.2.0", "datafusion-doc 52.2.0", "datafusion-execution 52.2.0", @@ -2863,6 +2864,7 @@ dependencies = [ "datafusion-functions-aggregate-common 52.2.0", "datafusion-macros 52.2.0", "datafusion-physical-expr-common 52.2.0", + "hashbrown 0.16.1", "itertools 0.14.0", "itoa", "log", @@ -2887,8 +2889,8 @@ dependencies = [ [[package]] name = "datafusion-functions-table" -version = "52.1.0" -source = "git+https://github.com/Dandandan/arrow-datafusion.git?rev=ed061e794b95dbf932bcc06f93ba9bc1252e1db5#ed061e794b95dbf932bcc06f93ba9bc1252e1db5" +version = "52.2.0" +source = "git+https://github.com/Dandandan/arrow-datafusion.git?rev=df6c035e68e9508029c9ba5b0979dad428573e63#df6c035e68e9508029c9ba5b0979dad428573e63" dependencies = [ "arrow 58.0.0", "async-trait", @@ -2920,10 +2922,10 @@ dependencies = [ [[package]] name = "datafusion-functions-window" -version = "52.1.0" -source = "git+https://github.com/Dandandan/arrow-datafusion.git?rev=ed061e794b95dbf932bcc06f93ba9bc1252e1db5#ed061e794b95dbf932bcc06f93ba9bc1252e1db5" +version = "52.2.0" +source = "git+https://github.com/Dandandan/arrow-datafusion.git?rev=df6c035e68e9508029c9ba5b0979dad428573e63#df6c035e68e9508029c9ba5b0979dad428573e63" dependencies = [ - "arrow", + "arrow 58.0.0", "datafusion-common 52.2.0", "datafusion-doc 52.2.0", "datafusion-expr 52.2.0", @@ -2947,8 +2949,8 @@ dependencies = [ [[package]] name = "datafusion-functions-window-common" -version = "52.1.0" -source = "git+https://github.com/Dandandan/arrow-datafusion.git?rev=ed061e794b95dbf932bcc06f93ba9bc1252e1db5#ed061e794b95dbf932bcc06f93ba9bc1252e1db5" +version = "52.2.0" +source = "git+https://github.com/Dandandan/arrow-datafusion.git?rev=df6c035e68e9508029c9ba5b0979dad428573e63#df6c035e68e9508029c9ba5b0979dad428573e63" dependencies = [ "datafusion-common 52.2.0", "datafusion-physical-expr-common 52.2.0", @@ -2967,8 +2969,8 @@ dependencies = [ [[package]] name = "datafusion-macros" -version = "52.1.0" -source = "git+https://github.com/Dandandan/arrow-datafusion.git?rev=ed061e794b95dbf932bcc06f93ba9bc1252e1db5#ed061e794b95dbf932bcc06f93ba9bc1252e1db5" +version = "52.2.0" +source = "git+https://github.com/Dandandan/arrow-datafusion.git?rev=df6c035e68e9508029c9ba5b0979dad428573e63#df6c035e68e9508029c9ba5b0979dad428573e63" dependencies = [ "datafusion-doc 52.2.0", "quote", @@ -2996,8 +2998,8 @@ dependencies = [ [[package]] name = "datafusion-optimizer" -version = "52.1.0" -source = "git+https://github.com/Dandandan/arrow-datafusion.git?rev=ed061e794b95dbf932bcc06f93ba9bc1252e1db5#ed061e794b95dbf932bcc06f93ba9bc1252e1db5" +version = "52.2.0" +source = "git+https://github.com/Dandandan/arrow-datafusion.git?rev=df6c035e68e9508029c9ba5b0979dad428573e63#df6c035e68e9508029c9ba5b0979dad428573e63" dependencies = [ "arrow 58.0.0", "chrono", @@ -3037,11 +3039,11 @@ dependencies = [ [[package]] name = "datafusion-physical-expr" -version = "52.1.0" -source = "git+https://github.com/Dandandan/arrow-datafusion.git?rev=ed061e794b95dbf932bcc06f93ba9bc1252e1db5#ed061e794b95dbf932bcc06f93ba9bc1252e1db5" +version = "52.2.0" +source = "git+https://github.com/Dandandan/arrow-datafusion.git?rev=df6c035e68e9508029c9ba5b0979dad428573e63#df6c035e68e9508029c9ba5b0979dad428573e63" dependencies = [ "ahash 0.8.12", - "arrow", + "arrow 58.0.0", "datafusion-common 52.2.0", "datafusion-expr 52.2.0", "datafusion-expr-common 52.2.0", @@ -3075,10 +3077,10 @@ dependencies = [ [[package]] name = "datafusion-physical-expr-adapter" -version = "52.1.0" -source = "git+https://github.com/Dandandan/arrow-datafusion.git?rev=ed061e794b95dbf932bcc06f93ba9bc1252e1db5#ed061e794b95dbf932bcc06f93ba9bc1252e1db5" +version = "52.2.0" +source = "git+https://github.com/Dandandan/arrow-datafusion.git?rev=df6c035e68e9508029c9ba5b0979dad428573e63#df6c035e68e9508029c9ba5b0979dad428573e63" dependencies = [ - "arrow", + "arrow 58.0.0", "datafusion-common 52.2.0", "datafusion-expr 52.2.0", "datafusion-functions 52.2.0", @@ -3103,8 +3105,8 @@ dependencies = [ [[package]] name = "datafusion-physical-expr-common" -version = "52.1.0" -source = "git+https://github.com/Dandandan/arrow-datafusion.git?rev=ed061e794b95dbf932bcc06f93ba9bc1252e1db5#ed061e794b95dbf932bcc06f93ba9bc1252e1db5" +version = "52.2.0" +source = "git+https://github.com/Dandandan/arrow-datafusion.git?rev=df6c035e68e9508029c9ba5b0979dad428573e63#df6c035e68e9508029c9ba5b0979dad428573e63" dependencies = [ "ahash 0.8.12", "arrow 58.0.0", @@ -3137,10 +3139,10 @@ dependencies = [ [[package]] name = "datafusion-physical-optimizer" -version = "52.1.0" -source = "git+https://github.com/Dandandan/arrow-datafusion.git?rev=ed061e794b95dbf932bcc06f93ba9bc1252e1db5#ed061e794b95dbf932bcc06f93ba9bc1252e1db5" +version = "52.2.0" +source = "git+https://github.com/Dandandan/arrow-datafusion.git?rev=df6c035e68e9508029c9ba5b0979dad428573e63#df6c035e68e9508029c9ba5b0979dad428573e63" dependencies = [ - "arrow", + "arrow 58.0.0", "datafusion-common 52.2.0", "datafusion-execution 52.2.0", "datafusion-expr 52.2.0", @@ -3186,8 +3188,8 @@ dependencies = [ [[package]] name = "datafusion-physical-plan" -version = "52.1.0" -source = "git+https://github.com/Dandandan/arrow-datafusion.git?rev=ed061e794b95dbf932bcc06f93ba9bc1252e1db5#ed061e794b95dbf932bcc06f93ba9bc1252e1db5" +version = "52.2.0" +source = "git+https://github.com/Dandandan/arrow-datafusion.git?rev=df6c035e68e9508029c9ba5b0979dad428573e63#df6c035e68e9508029c9ba5b0979dad428573e63" dependencies = [ "ahash 0.8.12", "arrow 58.0.0", @@ -3234,10 +3236,10 @@ dependencies = [ [[package]] name = "datafusion-pruning" -version = "52.1.0" -source = "git+https://github.com/Dandandan/arrow-datafusion.git?rev=ed061e794b95dbf932bcc06f93ba9bc1252e1db5#ed061e794b95dbf932bcc06f93ba9bc1252e1db5" +version = "52.2.0" +source = "git+https://github.com/Dandandan/arrow-datafusion.git?rev=df6c035e68e9508029c9ba5b0979dad428573e63#df6c035e68e9508029c9ba5b0979dad428573e63" dependencies = [ - "arrow", + "arrow 58.0.0", "datafusion-common 52.2.0", "datafusion-datasource 52.2.0", "datafusion-expr-common 52.2.0", @@ -3264,8 +3266,8 @@ dependencies = [ [[package]] name = "datafusion-session" -version = "52.1.0" -source = "git+https://github.com/Dandandan/arrow-datafusion.git?rev=ed061e794b95dbf932bcc06f93ba9bc1252e1db5#ed061e794b95dbf932bcc06f93ba9bc1252e1db5" +version = "52.2.0" +source = "git+https://github.com/Dandandan/arrow-datafusion.git?rev=df6c035e68e9508029c9ba5b0979dad428573e63#df6c035e68e9508029c9ba5b0979dad428573e63" dependencies = [ "async-trait", "datafusion-common 52.2.0", @@ -3277,18 +3279,20 @@ dependencies = [ [[package]] name = "datafusion-spark" -version = "52.1.0" -source = "git+https://github.com/Dandandan/arrow-datafusion.git?rev=ed061e794b95dbf932bcc06f93ba9bc1252e1db5#ed061e794b95dbf932bcc06f93ba9bc1252e1db5" +version = "52.2.0" +source = "git+https://github.com/Dandandan/arrow-datafusion.git?rev=df6c035e68e9508029c9ba5b0979dad428573e63#df6c035e68e9508029c9ba5b0979dad428573e63" dependencies = [ "arrow 58.0.0", "bigdecimal", "chrono", "crc32fast", + "datafusion 52.2.0", "datafusion-catalog 52.2.0", "datafusion-common 52.2.0", "datafusion-execution 52.2.0", "datafusion-expr 52.2.0", "datafusion-functions 52.2.0", + "datafusion-functions-aggregate 52.2.0", "datafusion-functions-nested 52.2.0", "log", "percent-encoding", @@ -3318,14 +3322,15 @@ dependencies = [ [[package]] name = "datafusion-sql" -version = "52.1.0" -source = "git+https://github.com/Dandandan/arrow-datafusion.git?rev=ed061e794b95dbf932bcc06f93ba9bc1252e1db5#ed061e794b95dbf932bcc06f93ba9bc1252e1db5" +version = "52.2.0" +source = "git+https://github.com/Dandandan/arrow-datafusion.git?rev=df6c035e68e9508029c9ba5b0979dad428573e63#df6c035e68e9508029c9ba5b0979dad428573e63" dependencies = [ "arrow 58.0.0", "bigdecimal", "chrono", "datafusion-common 52.2.0", "datafusion-expr 52.2.0", + "datafusion-functions-nested 52.2.0", "indexmap", "log", "recursive", @@ -3335,8 +3340,8 @@ dependencies = [ [[package]] name = "datafusion-sqllogictest" -version = "52.1.0" -source = "git+https://github.com/Dandandan/arrow-datafusion.git?rev=ed061e794b95dbf932bcc06f93ba9bc1252e1db5#ed061e794b95dbf932bcc06f93ba9bc1252e1db5" +version = "52.2.0" +source = "git+https://github.com/Dandandan/arrow-datafusion.git?rev=df6c035e68e9508029c9ba5b0979dad428573e63#df6c035e68e9508029c9ba5b0979dad428573e63" dependencies = [ "arrow 58.0.0", "async-trait", @@ -3360,8 +3365,8 @@ dependencies = [ [[package]] name = "datafusion-substrait" -version = "52.1.0" -source = "git+https://github.com/Dandandan/arrow-datafusion.git?rev=ed061e794b95dbf932bcc06f93ba9bc1252e1db5#ed061e794b95dbf932bcc06f93ba9bc1252e1db5" +version = "52.2.0" +source = "git+https://github.com/Dandandan/arrow-datafusion.git?rev=df6c035e68e9508029c9ba5b0979dad428573e63#df6c035e68e9508029c9ba5b0979dad428573e63" dependencies = [ "async-recursion", "async-trait", @@ -7105,7 +7110,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "343d3bd7056eda839b03204e68deff7d1b13aba7af2b2fd16890697274262ee7" dependencies = [ "heck", - "itertools 0.14.0", + "itertools 0.10.5", "log", "multimap", "petgraph", @@ -7137,7 +7142,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "27c6023962132f4b30eb4c172c91ce92d933da334c59c23cddee82358ddafb0b" dependencies = [ "anyhow", - "itertools 0.14.0", + "itertools 0.10.5", "proc-macro2", "quote", "syn 2.0.117", diff --git a/Cargo.toml b/Cargo.toml index 280e88c849e..e8c8af538ff 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -370,19 +370,19 @@ debug-assertions = true inherits = "bench" [patch.crates-io] -datafusion = { git = "https://github.com/Dandandan/arrow-datafusion.git", rev = "ed061e794b95dbf932bcc06f93ba9bc1252e1db5" } -datafusion-catalog = { git = "https://github.com/Dandandan/arrow-datafusion.git", rev = "ed061e794b95dbf932bcc06f93ba9bc1252e1db5" } -datafusion-common = { git = "https://github.com/Dandandan/arrow-datafusion.git", rev = "ed061e794b95dbf932bcc06f93ba9bc1252e1db5" } -datafusion-common-runtime = { git = "https://github.com/Dandandan/arrow-datafusion.git", rev = "ed061e794b95dbf932bcc06f93ba9bc1252e1db5" } -datafusion-datasource = { git = "https://github.com/Dandandan/arrow-datafusion.git", rev = "ed061e794b95dbf932bcc06f93ba9bc1252e1db5" } -datafusion-execution = { git = "https://github.com/Dandandan/arrow-datafusion.git", rev = "ed061e794b95dbf932bcc06f93ba9bc1252e1db5" } -datafusion-expr = { git = "https://github.com/Dandandan/arrow-datafusion.git", rev = "ed061e794b95dbf932bcc06f93ba9bc1252e1db5" } -datafusion-functions = { git = "https://github.com/Dandandan/arrow-datafusion.git", rev = "ed061e794b95dbf932bcc06f93ba9bc1252e1db5" } -datafusion-physical-expr = { git = "https://github.com/Dandandan/arrow-datafusion.git", rev = "ed061e794b95dbf932bcc06f93ba9bc1252e1db5" } -datafusion-physical-expr-adapter = { git = "https://github.com/Dandandan/arrow-datafusion.git", rev = "ed061e794b95dbf932bcc06f93ba9bc1252e1db5" } -datafusion-physical-expr-common = { git = "https://github.com/Dandandan/arrow-datafusion.git", rev = "ed061e794b95dbf932bcc06f93ba9bc1252e1db5" } -datafusion-physical-plan = { git = "https://github.com/Dandandan/arrow-datafusion.git", rev = "ed061e794b95dbf932bcc06f93ba9bc1252e1db5" } -datafusion-pruning = { git = "https://github.com/Dandandan/arrow-datafusion.git", rev = "ed061e794b95dbf932bcc06f93ba9bc1252e1db5" } -datafusion-sqllogictest = { git = "https://github.com/Dandandan/arrow-datafusion.git", rev = "ed061e794b95dbf932bcc06f93ba9bc1252e1db5" } +datafusion = { git = "https://github.com/Dandandan/arrow-datafusion.git", rev = "df6c035e68e9508029c9ba5b0979dad428573e63" } +datafusion-catalog = { git = "https://github.com/Dandandan/arrow-datafusion.git", rev = "df6c035e68e9508029c9ba5b0979dad428573e63" } +datafusion-common = { git = "https://github.com/Dandandan/arrow-datafusion.git", rev = "df6c035e68e9508029c9ba5b0979dad428573e63" } +datafusion-common-runtime = { git = "https://github.com/Dandandan/arrow-datafusion.git", rev = "df6c035e68e9508029c9ba5b0979dad428573e63" } +datafusion-datasource = { git = "https://github.com/Dandandan/arrow-datafusion.git", rev = "df6c035e68e9508029c9ba5b0979dad428573e63" } +datafusion-execution = { git = "https://github.com/Dandandan/arrow-datafusion.git", rev = "df6c035e68e9508029c9ba5b0979dad428573e63" } +datafusion-expr = { git = "https://github.com/Dandandan/arrow-datafusion.git", rev = "df6c035e68e9508029c9ba5b0979dad428573e63" } +datafusion-functions = { git = "https://github.com/Dandandan/arrow-datafusion.git", rev = "df6c035e68e9508029c9ba5b0979dad428573e63" } +datafusion-physical-expr = { git = "https://github.com/Dandandan/arrow-datafusion.git", rev = "df6c035e68e9508029c9ba5b0979dad428573e63" } +datafusion-physical-expr-adapter = { git = "https://github.com/Dandandan/arrow-datafusion.git", rev = "df6c035e68e9508029c9ba5b0979dad428573e63" } +datafusion-physical-expr-common = { git = "https://github.com/Dandandan/arrow-datafusion.git", rev = "df6c035e68e9508029c9ba5b0979dad428573e63" } +datafusion-physical-plan = { git = "https://github.com/Dandandan/arrow-datafusion.git", rev = "df6c035e68e9508029c9ba5b0979dad428573e63" } +datafusion-pruning = { git = "https://github.com/Dandandan/arrow-datafusion.git", rev = "df6c035e68e9508029c9ba5b0979dad428573e63" } +datafusion-sqllogictest = { git = "https://github.com/Dandandan/arrow-datafusion.git", rev = "df6c035e68e9508029c9ba5b0979dad428573e63" } tpchgen = { git = "https://github.com/AdamGS/tpchgen-rs.git", branch = "adamg/bump-arrow-match-df" } tpchgen-arrow = { git = "https://github.com/AdamGS/tpchgen-rs.git", branch = "adamg/bump-arrow-match-df" } diff --git a/vortex-datafusion/src/convert/exprs.rs b/vortex-datafusion/src/convert/exprs.rs index 455a5c65a71..79fec6e1e3e 100644 --- a/vortex-datafusion/src/convert/exprs.rs +++ b/vortex-datafusion/src/convert/exprs.rs @@ -395,7 +395,8 @@ fn try_operator_from_df(value: &DFOperator) -> DFResult { | DFOperator::AtQuestion | DFOperator::Question | DFOperator::QuestionAnd - | DFOperator::QuestionPipe => { + | DFOperator::QuestionPipe + | DFOperator::Colon => { tracing::debug!(operator = %value, "Can't pushdown binary_operator operator"); Err(exec_datafusion_err!( "Unsupported datafusion operator {value}" From bbe50c2664cf591fc34f885fdc959d2ec27ab22c Mon Sep 17 00:00:00 2001 From: Adam Gutglick Date: Mon, 9 Mar 2026 12:56:32 +0000 Subject: [PATCH 08/11] Better morsel size Signed-off-by: Adam Gutglick --- vortex-datafusion/src/persistent/opener.rs | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/vortex-datafusion/src/persistent/opener.rs b/vortex-datafusion/src/persistent/opener.rs index ad6d921b7a0..6cbdc682541 100644 --- a/vortex-datafusion/src/persistent/opener.rs +++ b/vortex-datafusion/src/persistent/opener.rs @@ -38,6 +38,7 @@ use vortex::array::ArrayRef; use vortex::array::VortexSessionExecute; use vortex::array::arrow::ArrowArrayExecutor; use vortex::error::VortexError; +use vortex::file::Footer; use vortex::file::OpenOptionsSessionExt; use vortex::io::InstrumentedReadAt; use vortex::layout::LayoutReader; @@ -101,10 +102,12 @@ pub(crate) struct VortexOpener { } /// The number of rows per morsel when splitting files for morsel-driven execution. -const MORSEL_ROW_COUNT: u64 = 2048; +const MORSEL_ROW_COUNT: u64 = 32 * 2048; struct VortexMorsel { row_range: Range, + /// Cached footer from morselize() so open() doesn't re-read it. + footer: Footer, } impl FileOpener for VortexOpener { @@ -162,6 +165,7 @@ impl FileOpener for VortexOpener { .map_err(|e| DataFusionError::External(Box::new(e)))?; let row_count = vxf.row_count(); + let footer = vxf.footer().clone(); // Split into fixed-size morsels let mut morsels = Vec::new(); @@ -171,6 +175,7 @@ impl FileOpener for VortexOpener { let mut f = partitioned_file.clone(); f.extensions = Some(Arc::new(VortexMorsel { row_range: start..end, + footer: footer.clone(), })); morsels.push(f); start = end; @@ -186,11 +191,11 @@ impl FileOpener for VortexOpener { Label::new(PATH_LABEL, file.path().to_string()), Label::new(PARTITION_LABEL, self.partition.to_string()), ]; - let morsel_row_range = file + let morsel = file .extensions .as_ref() .and_then(|e| e.downcast_ref::()) - .map(|m| m.row_range.clone()); + .map(|m| (m.row_range.clone(), m.footer.clone())); let mut projection = self.projection.clone(); let mut filter = self.filter.clone(); @@ -271,7 +276,11 @@ impl FileOpener for VortexOpener { .with_metrics_registry(metrics_registry.clone()) .with_labels(labels); - if let Some(file_metadata_cache) = file_metadata_cache + // Use cached footer: prefer the morsel's footer (from morselize()), + // fall back to the file metadata cache. + if let Some((_, ref footer)) = morsel { + open_opts = open_opts.with_footer(footer.clone()); + } else if let Some(file_metadata_cache) = file_metadata_cache && let Some(entry) = file_metadata_cache.get(file.path()) && entry.is_valid_for(&file.object_meta) && let Some(vortex_metadata) = entry @@ -385,7 +394,7 @@ impl FileOpener for VortexOpener { let mut scan_builder = ScanBuilder::new(session.clone(), layout_reader); - if let Some(row_range) = morsel_row_range { + if let Some((row_range, _)) = morsel { // Morsel: restrict the scan to the morsel's row range. scan_builder = scan_builder.with_row_range(row_range); } else if let Some(extensions) = file.extensions From 0b1ced13ef4eb2a1d0ec838ee28e7e821561b77c Mon Sep 17 00:00:00 2001 From: Adam Gutglick Date: Mon, 9 Mar 2026 13:33:39 +0000 Subject: [PATCH 09/11] fix thing Signed-off-by: Adam Gutglick --- vortex-datafusion/src/persistent/opener.rs | 155 ++++++++++++--------- 1 file changed, 88 insertions(+), 67 deletions(-) diff --git a/vortex-datafusion/src/persistent/opener.rs b/vortex-datafusion/src/persistent/opener.rs index 6cbdc682541..946be7119b1 100644 --- a/vortex-datafusion/src/persistent/opener.rs +++ b/vortex-datafusion/src/persistent/opener.rs @@ -38,7 +38,6 @@ use vortex::array::ArrayRef; use vortex::array::VortexSessionExecute; use vortex::array::arrow::ArrowArrayExecutor; use vortex::error::VortexError; -use vortex::file::Footer; use vortex::file::OpenOptionsSessionExt; use vortex::io::InstrumentedReadAt; use vortex::layout::LayoutReader; @@ -106,8 +105,8 @@ const MORSEL_ROW_COUNT: u64 = 32 * 2048; struct VortexMorsel { row_range: Range, - /// Cached footer from morselize() so open() doesn't re-read it. - footer: Footer, + /// Cached layout reader, shared across all morsels from the same file. + layout_reader: Arc, } impl FileOpener for VortexOpener { @@ -165,7 +164,9 @@ impl FileOpener for VortexOpener { .map_err(|e| DataFusionError::External(Box::new(e)))?; let row_count = vxf.row_count(); - let footer = vxf.footer().clone(); + let layout_reader = vxf + .layout_reader() + .map_err(|e| DataFusionError::External(Box::new(e)))?; // Split into fixed-size morsels let mut morsels = Vec::new(); @@ -175,7 +176,7 @@ impl FileOpener for VortexOpener { let mut f = partitioned_file.clone(); f.extensions = Some(Arc::new(VortexMorsel { row_range: start..end, - footer: footer.clone(), + layout_reader: layout_reader.clone(), })); morsels.push(f); start = end; @@ -195,17 +196,24 @@ impl FileOpener for VortexOpener { .extensions .as_ref() .and_then(|e| e.downcast_ref::()) - .map(|m| (m.row_range.clone(), m.footer.clone())); + .map(|m| (m.row_range.clone(), m.layout_reader.clone())); let mut projection = self.projection.clone(); let mut filter = self.filter.clone(); - let reader = self - .vortex_reader_factory - .create_reader(file.path().as_ref(), &session)?; - - let reader = - InstrumentedReadAt::new_with_labels(reader, metrics_registry.as_ref(), labels.clone()); + // Only create I/O reader if not a morsel (morsels have a cached layout reader). + let reader = if morsel.is_none() { + let r = self + .vortex_reader_factory + .create_reader(file.path().as_ref(), &session)?; + Some(InstrumentedReadAt::new_with_labels( + r, + metrics_registry.as_ref(), + labels.clone(), + )) + } else { + None + }; let file_pruning_predicate = self.file_pruning_predicate.clone(); let expr_adapter_factory = self.expr_adapter_factory.clone(); @@ -270,36 +278,77 @@ impl FileOpener for VortexOpener { return Ok(stream::empty().boxed()); } - let mut open_opts = session - .open_options() - .with_file_size(file.object_meta.size) - .with_metrics_registry(metrics_registry.clone()) - .with_labels(labels); - - // Use cached footer: prefer the morsel's footer (from morselize()), - // fall back to the file metadata cache. - if let Some((_, ref footer)) = morsel { - open_opts = open_opts.with_footer(footer.clone()); - } else if let Some(file_metadata_cache) = file_metadata_cache - && let Some(entry) = file_metadata_cache.get(file.path()) - && entry.is_valid_for(&file.object_meta) - && let Some(vortex_metadata) = entry - .file_metadata - .as_any() - .downcast_ref::() + // Get layout reader and file dtype - either from morsel cache or by opening the file. + let (scan_layout_reader, file_dtype, file_row_count) = if let Some((_, ref morsel_lr)) = + morsel { - open_opts = open_opts.with_footer(vortex_metadata.footer().clone()); - } + ( + morsel_lr.clone(), + morsel_lr.dtype().clone(), + morsel_lr.row_count(), + ) + } else { + let mut open_opts = session + .open_options() + .with_file_size(file.object_meta.size) + .with_metrics_registry(metrics_registry.clone()) + .with_labels(labels); + + if let Some(file_metadata_cache) = file_metadata_cache + && let Some(entry) = file_metadata_cache.get(file.path()) + && entry.is_valid_for(&file.object_meta) + && let Some(vortex_metadata) = entry + .file_metadata + .as_any() + .downcast_ref::() + { + open_opts = open_opts.with_footer(vortex_metadata.footer().clone()); + } - let vxf = open_opts - .open_read(reader) - .await - .map_err(|e| exec_datafusion_err!("Failed to open Vortex file {e}"))?; + let vxf = open_opts + .open_read(reader.expect("reader must exist for non-morsel path")) + .await + .map_err(|e| exec_datafusion_err!("Failed to open Vortex file {e}"))?; + + let dtype = vxf.dtype().clone(); + let rc = vxf.row_count(); + + // We share layout readers across partitions so we only read each layout once. + let lr = match layout_reader.entry(file.object_meta.location.clone()) { + Entry::Occupied(mut occupied_entry) => { + if let Some(reader) = occupied_entry.get().upgrade() { + tracing::trace!("reusing layout reader for {}", occupied_entry.key()); + reader + } else { + tracing::trace!("creating layout reader for {}", occupied_entry.key()); + let reader = vxf.layout_reader().map_err(|e| { + DataFusionError::Execution(format!( + "Failed to create layout reader: {e}" + )) + })?; + occupied_entry.insert(Arc::downgrade(&reader)); + reader + } + } + Entry::Vacant(vacant_entry) => { + tracing::trace!("creating layout reader for {}", vacant_entry.key()); + let reader = vxf.layout_reader().map_err(|e| { + DataFusionError::Execution(format!( + "Failed to create layout reader: {e}" + )) + })?; + vacant_entry.insert(Arc::downgrade(&reader)); + reader + } + }; + + (lr, dtype, rc) + }; // This is the expected arrow types of the actual columns in the file, which might have different types // from the unified logical schema or miss let this_file_schema = Arc::new(calculate_physical_schema( - vxf.dtype(), + &file_dtype, &unified_file_schema, )?); @@ -341,7 +390,7 @@ impl FileOpener for VortexOpener { // The schema of the stream returned from the vortex scan. // We use a reference schema for types that don't roundtrip (Dictionary, Utf8, etc.). - let scan_dtype = scan_projection.return_dtype(vxf.dtype()).map_err(|_e| { + let scan_dtype = scan_projection.return_dtype(&file_dtype).map_err(|_e| { exec_datafusion_err!("Couldn't get the dtype for the underlying Vortex scan") })?; @@ -364,35 +413,7 @@ impl FileOpener for VortexOpener { .try_map_exprs(|expr| reassign_expr_columns(expr, &stream_schema))?; let projector = leftover_projection.make_projector(&stream_schema)?; - // We share our layout readers with others partitions in the scan, so we can only need to read each layout in each file once. - let layout_reader = match layout_reader.entry(file.object_meta.location.clone()) { - Entry::Occupied(mut occupied_entry) => { - if let Some(reader) = occupied_entry.get().upgrade() { - tracing::trace!("reusing layout reader for {}", occupied_entry.key()); - reader - } else { - tracing::trace!("creating layout reader for {}", occupied_entry.key()); - let reader = vxf.layout_reader().map_err(|e| { - DataFusionError::Execution(format!( - "Failed to create layout reader: {e}" - )) - })?; - occupied_entry.insert(Arc::downgrade(&reader)); - reader - } - } - Entry::Vacant(vacant_entry) => { - tracing::trace!("creating layout reader for {}", vacant_entry.key()); - let reader = vxf.layout_reader().map_err(|e| { - DataFusionError::Execution(format!("Failed to create layout reader: {e}")) - })?; - vacant_entry.insert(Arc::downgrade(&reader)); - - reader - } - }; - - let mut scan_builder = ScanBuilder::new(session.clone(), layout_reader); + let mut scan_builder = ScanBuilder::new(session.clone(), scan_layout_reader); if let Some((row_range, _)) = morsel { // Morsel: restrict the scan to the morsel's row range. @@ -407,7 +428,7 @@ impl FileOpener for VortexOpener { scan_builder = apply_byte_range( file_range, file.object_meta.size, - vxf.row_count(), + file_row_count, scan_builder, ); } From 7d0504e818e060ed2d7071d8c217c6084bc18e9d Mon Sep 17 00:00:00 2001 From: Adam Gutglick Date: Mon, 9 Mar 2026 17:20:52 +0000 Subject: [PATCH 10/11] trying something out Signed-off-by: Adam Gutglick --- vortex-datafusion/src/persistent/opener.rs | 108 ++++++++++++++++++++- 1 file changed, 104 insertions(+), 4 deletions(-) diff --git a/vortex-datafusion/src/persistent/opener.rs b/vortex-datafusion/src/persistent/opener.rs index 946be7119b1..b06ef54be80 100644 --- a/vortex-datafusion/src/persistent/opener.rs +++ b/vortex-datafusion/src/persistent/opener.rs @@ -10,6 +10,7 @@ use arrow_schema::Schema; use datafusion_common::DataFusionError; use datafusion_common::Result as DFResult; use datafusion_common::ScalarValue; +use datafusion_common::Statistics; use datafusion_common::exec_datafusion_err; use datafusion_datasource::FileRange; use datafusion_datasource::PartitionedFile; @@ -21,6 +22,7 @@ use datafusion_physical_expr::PhysicalExprRef; use datafusion_physical_expr::projection::ProjectionExprs; use datafusion_physical_expr::simplifier::PhysicalExprSimplifier; use datafusion_physical_expr::split_conjunction; +use datafusion_physical_expr::utils::collect_columns; use datafusion_physical_expr::utils::reassign_expr_columns; use datafusion_physical_expr_adapter::PhysicalExprAdapterFactory; use datafusion_physical_expr_adapter::replace_columns_with_literals; @@ -37,6 +39,8 @@ use tracing::Instrument; use vortex::array::ArrayRef; use vortex::array::VortexSessionExecute; use vortex::array::arrow::ArrowArrayExecutor; +use vortex::dtype::DType; +use vortex::dtype::DecimalType; use vortex::error::VortexError; use vortex::file::OpenOptionsSessionExt; use vortex::io::InstrumentedReadAt; @@ -47,6 +51,7 @@ use vortex::scan::ScanBuilder; use vortex::session::VortexSession; use vortex_utils::aliases::dash_map::DashMap; use vortex_utils::aliases::dash_map::Entry; +use vortex_utils::aliases::hash_set::HashSet; use crate::VortexAccessPlan; use crate::convert::exprs::ExpressionConvertor; @@ -100,8 +105,85 @@ pub(crate) struct VortexOpener { pub scan_concurrency: Option, } -/// The number of rows per morsel when splitting files for morsel-driven execution. -const MORSEL_ROW_COUNT: u64 = 32 * 2048; +/// Target byte budget per morsel (16 MB). +const TARGET_MORSEL_BYTES: u64 = 16 * 1024 * 1024; +/// Minimum rows per morsel to avoid excessive overhead. +const MIN_MORSEL_ROWS: u64 = 2048; +/// Maximum rows per morsel to bound memory usage. +const MAX_MORSEL_ROWS: u64 = 1_000_000; + +/// Estimate the average byte width of a DType for morsel sizing fallback. +fn estimate_dtype_byte_width(dtype: &DType) -> u64 { + match dtype { + DType::Null => 0, + DType::Bool(_) => 1, + DType::Primitive(ptype, _) => ptype.byte_width() as u64, + DType::Decimal(dec, _) => DecimalType::smallest_decimal_value_type(dec).byte_width() as u64, + DType::Utf8(_) | DType::Binary(_) => 64, + DType::List(inner, _) => 64 + estimate_dtype_byte_width(inner), + DType::FixedSizeList(inner, size, _) => estimate_dtype_byte_width(inner) * (*size as u64), + DType::Struct(fields, _) => fields.fields().map(|f| estimate_dtype_byte_width(&f)).sum(), + DType::Extension(ext) => estimate_dtype_byte_width(ext.storage_dtype()), + } +} + +/// Compute the morsel row count using file statistics if available, falling back to DType estimation. +fn compute_morsel_row_count( + statistics: Option<&Statistics>, + file_dtype: &DType, + row_count: u64, + touched_col_indices: &HashSet, +) -> u64 { + // Try to use actual per-column byte_size statistics + if let Some(stats) = statistics { + let touched_bytes: usize = touched_col_indices + .iter() + .filter_map(|&i| stats.column_statistics.get(i)) + .filter_map(|cs| cs.byte_size.get_value().copied()) + .sum(); + + if touched_bytes > 0 { + let num_rows = stats + .num_rows + .get_value() + .map(|&n| n as u64) + .unwrap_or(row_count); + if num_rows > 0 { + let bytes_per_row = touched_bytes as u64 / num_rows; + if bytes_per_row > 0 { + return (TARGET_MORSEL_BYTES / bytes_per_row) + .clamp(MIN_MORSEL_ROWS, MAX_MORSEL_ROWS); + } + } + } + } + + // Fallback: estimate from DType + let struct_fields = match file_dtype.as_struct_fields_opt() { + Some(fields) => fields, + None => return MIN_MORSEL_ROWS, + }; + + let touched_bytes: u64 = if touched_col_indices.is_empty() { + struct_fields + .fields() + .map(|f| estimate_dtype_byte_width(&f)) + .sum() + } else { + struct_fields + .fields() + .enumerate() + .filter(|(i, _)| touched_col_indices.contains(i)) + .map(|(_, f)| estimate_dtype_byte_width(&f)) + .sum() + }; + + if touched_bytes == 0 { + return MAX_MORSEL_ROWS; + } + + (TARGET_MORSEL_BYTES / touched_bytes).clamp(MIN_MORSEL_ROWS, MAX_MORSEL_ROWS) +} struct VortexMorsel { row_range: Range, @@ -135,6 +217,17 @@ impl FileOpener for VortexOpener { let vortex_reader_factory = self.vortex_reader_factory.clone(); let metrics_registry = self.metrics_registry.clone(); + // Gather the set of column indices touched by projection and filter + let touched_col_indices: HashSet = { + let mut cols: HashSet = self.projection.column_indices().into_iter().collect(); + if let Some(filter) = &self.filter { + for col in collect_columns(filter) { + cols.insert(col.index()); + } + } + cols + }; + Box::pin(async move { // File-level pruning if let Some(pred) = file_pruning_predicate.as_ref() { @@ -168,11 +261,18 @@ impl FileOpener for VortexOpener { .layout_reader() .map_err(|e| DataFusionError::External(Box::new(e)))?; - // Split into fixed-size morsels + let morsel_row_count = compute_morsel_row_count( + partitioned_file.statistics.as_deref(), + layout_reader.dtype(), + row_count, + &touched_col_indices, + ); + + // Split into adaptive-size morsels let mut morsels = Vec::new(); let mut start = 0u64; while start < row_count { - let end = (start + MORSEL_ROW_COUNT).min(row_count); + let end = (start + morsel_row_count).min(row_count); let mut f = partitioned_file.clone(); f.extensions = Some(Arc::new(VortexMorsel { row_range: start..end, From 2e1f73c4695734704b56401cb0b4fb1cac3338bf Mon Sep 17 00:00:00 2001 From: Adam Gutglick Date: Mon, 9 Mar 2026 17:30:26 +0000 Subject: [PATCH 11/11] some potential sizing Signed-off-by: Adam Gutglick --- vortex-datafusion/src/persistent/opener.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vortex-datafusion/src/persistent/opener.rs b/vortex-datafusion/src/persistent/opener.rs index b06ef54be80..ec182dca05b 100644 --- a/vortex-datafusion/src/persistent/opener.rs +++ b/vortex-datafusion/src/persistent/opener.rs @@ -106,11 +106,11 @@ pub(crate) struct VortexOpener { } /// Target byte budget per morsel (16 MB). -const TARGET_MORSEL_BYTES: u64 = 16 * 1024 * 1024; +const TARGET_MORSEL_BYTES: u64 = 8 * 1024 * 1024; /// Minimum rows per morsel to avoid excessive overhead. const MIN_MORSEL_ROWS: u64 = 2048; /// Maximum rows per morsel to bound memory usage. -const MAX_MORSEL_ROWS: u64 = 1_000_000; +const MAX_MORSEL_ROWS: u64 = 128 * 1024; /// Estimate the average byte width of a DType for morsel sizing fallback. fn estimate_dtype_byte_width(dtype: &DType) -> u64 {