diff --git a/Cargo.lock b/Cargo.lock index 733d82da3c2..98adca42483 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -234,19 +234,40 @@ version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e4754a624e5ae42081f464514be454b39711daae0458906dacde5f4c632f33a8" dependencies = [ - "arrow-arith", - "arrow-array", - "arrow-buffer", - "arrow-cast", - "arrow-csv", - "arrow-data", - "arrow-ipc", - "arrow-json", - "arrow-ord", - "arrow-row", - "arrow-schema", - "arrow-select", - "arrow-string", + "arrow-arith 57.3.0", + "arrow-array 57.3.0", + "arrow-buffer 57.3.0", + "arrow-cast 57.3.0", + "arrow-csv 57.3.0", + "arrow-data 57.3.0", + "arrow-ipc 57.3.0", + "arrow-json 57.3.0", + "arrow-ord 57.3.0", + "arrow-row 57.3.0", + "arrow-schema 57.3.0", + "arrow-select 57.3.0", + "arrow-string 57.3.0", +] + +[[package]] +name = "arrow" +version = "58.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "602268ce9f569f282cedb9a9f6bac569b680af47b9b077d515900c03c5d190da" +dependencies = [ + "arrow-arith 58.0.0", + "arrow-array 58.0.0", + "arrow-buffer 58.0.0", + "arrow-cast 58.0.0", + "arrow-csv 58.0.0", + "arrow-data 58.0.0", + "arrow-ipc 58.0.0", + "arrow-json 58.0.0", + "arrow-ord 58.0.0", + "arrow-row 58.0.0", + "arrow-schema 58.0.0", + "arrow-select 58.0.0", + "arrow-string 58.0.0", ] [[package]] @@ -255,10 +276,24 @@ version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f7b3141e0ec5145a22d8694ea8b6d6f69305971c4fa1c1a13ef0195aef2d678b" dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", + "arrow-array 57.3.0", + "arrow-buffer 57.3.0", + "arrow-data 57.3.0", + "arrow-schema 57.3.0", + "chrono", + "num-traits", +] + +[[package]] +name = "arrow-arith" +version = "58.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cd53c6bf277dea91f136ae8e3a5d7041b44b5e489e244e637d00ae302051f56f" +dependencies = [ + "arrow-array 58.0.0", + "arrow-buffer 58.0.0", + "arrow-data 58.0.0", + "arrow-schema 58.0.0", "chrono", "num-traits", ] @@ -270,9 +305,28 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4c8955af33b25f3b175ee10af580577280b4bd01f7e823d94c7cdef7cf8c9aef" dependencies = [ "ahash 0.8.12", - "arrow-buffer", - "arrow-data", - "arrow-schema", + "arrow-buffer 57.3.0", + "arrow-data 57.3.0", + "arrow-schema 57.3.0", + "chrono", + "chrono-tz", + "half", + "hashbrown 0.16.1", + "num-complex", + "num-integer", + "num-traits", +] + +[[package]] +name = "arrow-array" +version = "58.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e53796e07a6525edaf7dc28b540d477a934aff14af97967ad1d5550878969b9e" +dependencies = [ + "ahash 0.8.12", + "arrow-buffer 58.0.0", + "arrow-data 58.0.0", + "arrow-schema 58.0.0", "chrono", "chrono-tz", "half", @@ -294,18 +348,52 @@ dependencies = [ "num-traits", ] +[[package]] +name = "arrow-buffer" +version = "58.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2c1a85bb2e94ee10b76531d8bc3ce9b7b4c0d508cabfb17d477f63f2617bd20" +dependencies = [ + "bytes", + "half", + "num-bigint", + "num-traits", +] + [[package]] name = "arrow-cast" version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "646bbb821e86fd57189c10b4fcdaa941deaf4181924917b0daa92735baa6ada5" dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-ord", - "arrow-schema", - "arrow-select", + "arrow-array 57.3.0", + "arrow-buffer 57.3.0", + "arrow-data 57.3.0", + "arrow-ord 57.3.0", + "arrow-schema 57.3.0", + "arrow-select 57.3.0", + "atoi", + "base64", + "chrono", + "comfy-table", + "half", + "lexical-core", + "num-traits", + "ryu", +] + +[[package]] +name = "arrow-cast" +version = "58.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "89fb245db6b0e234ed8e15b644edb8664673fefe630575e94e62cd9d489a8a26" +dependencies = [ + "arrow-array 58.0.0", + "arrow-buffer 58.0.0", + "arrow-data 58.0.0", + "arrow-ord 58.0.0", + "arrow-schema 58.0.0", + "arrow-select 58.0.0", "atoi", "base64", "chrono", @@ -322,9 +410,24 @@ version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8da746f4180004e3ce7b83c977daf6394d768332349d3d913998b10a120b790a" dependencies = [ - "arrow-array", - "arrow-cast", - "arrow-schema", + "arrow-array 57.3.0", + "arrow-cast 57.3.0", + "arrow-schema 57.3.0", + "chrono", + "csv", + "csv-core", + "regex", +] + +[[package]] +name = "arrow-csv" +version = "58.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d374882fb465a194462527c0c15a93aa19a554cf690a6b77a26b2a02539937a7" +dependencies = [ + "arrow-array 58.0.0", + "arrow-cast 58.0.0", + "arrow-schema 58.0.0", "chrono", "csv", "csv-core", @@ -337,8 +440,21 @@ version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1fdd994a9d28e6365aa78e15da3f3950c0fdcea6b963a12fa1c391afb637b304" dependencies = [ - "arrow-buffer", - "arrow-schema", + "arrow-buffer 57.3.0", + "arrow-schema 57.3.0", + "half", + "num-integer", + "num-traits", +] + +[[package]] +name = "arrow-data" +version = "58.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "189d210bc4244c715fa3ed9e6e22864673cccb73d5da28c2723fb2e527329b33" +dependencies = [ + "arrow-buffer 58.0.0", + "arrow-schema 58.0.0", "half", "num-integer", "num-traits", @@ -350,11 +466,27 @@ version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "abf7df950701ab528bf7c0cf7eeadc0445d03ef5d6ffc151eaae6b38a58feff1" dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", - "arrow-select", + "arrow-array 57.3.0", + "arrow-buffer 57.3.0", + "arrow-data 57.3.0", + "arrow-schema 57.3.0", + "arrow-select 57.3.0", + "flatbuffers", + "lz4_flex 0.12.0", + "zstd", +] + +[[package]] +name = "arrow-ipc" +version = "58.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7968c2e5210c41f4909b2ef76f6e05e172b99021c2def5edf3cc48fdd39d1d6c" +dependencies = [ + "arrow-array 58.0.0", + "arrow-buffer 58.0.0", + "arrow-data 58.0.0", + "arrow-schema 58.0.0", + "arrow-select 58.0.0", "flatbuffers", "lz4_flex 0.12.0", "zstd", @@ -366,11 +498,35 @@ version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0ff8357658bedc49792b13e2e862b80df908171275f8e6e075c460da5ee4bf86" dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-cast", - "arrow-data", - "arrow-schema", + "arrow-array 57.3.0", + "arrow-buffer 57.3.0", + "arrow-cast 57.3.0", + "arrow-data 57.3.0", + "arrow-schema 57.3.0", + "chrono", + "half", + "indexmap", + "itoa", + "lexical-core", + "memchr", + "num-traits", + "ryu", + "serde_core", + "serde_json", + "simdutf8", +] + +[[package]] +name = "arrow-json" +version = "58.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "92111dba5bf900f443488e01f00d8c4ddc2f47f5c50039d18120287b580baa22" +dependencies = [ + "arrow-array 58.0.0", + "arrow-buffer 58.0.0", + "arrow-cast 58.0.0", + "arrow-data 58.0.0", + "arrow-schema 58.0.0", "chrono", "half", "indexmap", @@ -390,11 +546,24 @@ version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f7d8f1870e03d4cbed632959498bcc84083b5a24bded52905ae1695bd29da45b" dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", - "arrow-select", + "arrow-array 57.3.0", + "arrow-buffer 57.3.0", + "arrow-data 57.3.0", + "arrow-schema 57.3.0", + "arrow-select 57.3.0", +] + +[[package]] +name = "arrow-ord" +version = "58.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "211136cb253577ee1a6665f741a13136d4e563f64f5093ffd6fb837af90b9495" +dependencies = [ + "arrow-array 58.0.0", + "arrow-buffer 58.0.0", + "arrow-data 58.0.0", + "arrow-schema 58.0.0", + "arrow-select 58.0.0", ] [[package]] @@ -403,10 +572,23 @@ version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "18228633bad92bff92a95746bbeb16e5fc318e8382b75619dec26db79e4de4c0" dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", + "arrow-array 57.3.0", + "arrow-buffer 57.3.0", + "arrow-data 57.3.0", + "arrow-schema 57.3.0", + "half", +] + +[[package]] +name = "arrow-row" +version = "58.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e0f20145f9f5ea3fe383e2ba7a7487bf19be36aa9dbf5dd6a1f92f657179663" +dependencies = [ + "arrow-array 58.0.0", + "arrow-buffer 58.0.0", + "arrow-data 58.0.0", + "arrow-schema 58.0.0", "half", ] @@ -421,6 +603,17 @@ dependencies = [ "serde_json", ] +[[package]] +name = "arrow-schema" +version = "58.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b47e0ca91cc438d2c7879fe95e0bca5329fff28649e30a88c6f760b1faeddcb" +dependencies = [ + "bitflags", + "serde_core", + "serde_json", +] + [[package]] name = "arrow-select" version = "57.3.0" @@ -428,10 +621,24 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "68bf3e3efbd1278f770d67e5dc410257300b161b93baedb3aae836144edcaf4b" dependencies = [ "ahash 0.8.12", - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", + "arrow-array 57.3.0", + "arrow-buffer 57.3.0", + "arrow-data 57.3.0", + "arrow-schema 57.3.0", + "num-traits", +] + +[[package]] +name = "arrow-select" +version = "58.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "750a7d1dda177735f5e82a314485b6915c7cccdbb278262ac44090f4aba4a325" +dependencies = [ + "ahash 0.8.12", + "arrow-array 58.0.0", + "arrow-buffer 58.0.0", + "arrow-data 58.0.0", + "arrow-schema 58.0.0", "num-traits", ] @@ -441,11 +648,28 @@ version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "85e968097061b3c0e9fe3079cf2e703e487890700546b5b0647f60fca1b5a8d8" dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", - "arrow-select", + "arrow-array 57.3.0", + "arrow-buffer 57.3.0", + "arrow-data 57.3.0", + "arrow-schema 57.3.0", + "arrow-select 57.3.0", + "memchr", + "num-traits", + "regex", + "regex-syntax", +] + +[[package]] +name = "arrow-string" +version = "58.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e1eab1208bc4fe55d768cdc9b9f3d9df5a794cdb3ee2586bf89f9b30dc31ad8c" +dependencies = [ + "arrow-array 58.0.0", + "arrow-buffer 58.0.0", + "arrow-data 58.0.0", + "arrow-schema 58.0.0", + "arrow-select 58.0.0", "memchr", "num-traits", "regex", @@ -1344,8 +1568,8 @@ name = "compress-bench" version = "0.1.0" dependencies = [ "anyhow", - "arrow-array", - "arrow-schema", + "arrow-array 58.0.0", + "arrow-schema 58.0.0", "async-trait", "bytes", "clap", @@ -1353,7 +1577,7 @@ dependencies = [ "indicatif", "itertools 0.14.0", "lance-bench", - "parquet", + "parquet 58.0.0", "regex", "tokio", "tracing", @@ -1814,8 +2038,8 @@ version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8ba7cb113e9c0bedf9e9765926031e132fa05a1b09ba6e93a6d1a4d7044457b8" dependencies = [ - "arrow", - "arrow-schema", + "arrow 57.3.0", + "arrow-schema 57.3.0", "async-trait", "bytes", "chrono", @@ -1846,12 +2070,12 @@ dependencies = [ "futures", "itertools 0.14.0", "log", - "object_store", + "object_store 0.12.5", "parking_lot", "rand 0.9.2", "regex", "rstest", - "sqlparser", + "sqlparser 0.59.0", "tempfile", "tokio", "url", @@ -1861,11 +2085,10 @@ dependencies = [ [[package]] name = "datafusion" version = "52.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "503f1f4a9060ae6e650d3dff5dc7a21266fea1302d890768d45b4b28586e830f" +source = "git+https://github.com/Dandandan/arrow-datafusion.git?rev=c629ef9eeaad70b6836657163e6aa1f072090b58#c629ef9eeaad70b6836657163e6aa1f072090b58" dependencies = [ - "arrow", - "arrow-schema", + "arrow 58.0.0", + "arrow-schema 58.0.0", "async-trait", "bytes", "bzip2", @@ -1901,12 +2124,12 @@ dependencies = [ "itertools 0.14.0", "liblzma", "log", - "object_store", + "object_store 0.13.1", "parking_lot", - "parquet", + "parquet 58.0.0", "rand 0.9.2", "regex", - "sqlparser", + "sqlparser 0.61.0", "tempfile", "tokio", "url", @@ -1926,7 +2149,7 @@ dependencies = [ "datafusion-physical-plan 52.2.0", "futures", "itertools 0.14.0", - "object_store", + "object_store 0.13.1", "opentelemetry", "opentelemetry-otlp", "opentelemetry_sdk", @@ -1946,7 +2169,7 @@ version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "66a3a799f914a59b1ea343906a0486f17061f39509af74e874a866428951130d" dependencies = [ - "arrow", + "arrow 57.3.0", "async-trait", "dashmap", "datafusion-common 51.0.0", @@ -1960,7 +2183,7 @@ dependencies = [ "futures", "itertools 0.14.0", "log", - "object_store", + "object_store 0.12.5", "parking_lot", "tokio", ] @@ -1968,10 +2191,9 @@ dependencies = [ [[package]] name = "datafusion-catalog" version = "52.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "14417a3ee4ae3d092b56cd6c1d32e8ff3e2c9ec130ecb2276ec91c89fd599399" +source = "git+https://github.com/Dandandan/arrow-datafusion.git?rev=c629ef9eeaad70b6836657163e6aa1f072090b58#c629ef9eeaad70b6836657163e6aa1f072090b58" dependencies = [ - "arrow", + "arrow 58.0.0", "async-trait", "dashmap", "datafusion-common 52.2.0", @@ -1985,7 +2207,7 @@ dependencies = [ "futures", "itertools 0.14.0", "log", - "object_store", + "object_store 0.13.1", "parking_lot", "tokio", ] @@ -1996,7 +2218,7 @@ version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6db1b113c80d7a0febcd901476a57aef378e717c54517a163ed51417d87621b0" dependencies = [ - "arrow", + "arrow 57.3.0", "async-trait", "datafusion-catalog 51.0.0", "datafusion-common 51.0.0", @@ -2010,17 +2232,16 @@ dependencies = [ "futures", "itertools 0.14.0", "log", - "object_store", + "object_store 0.12.5", "tokio", ] [[package]] name = "datafusion-catalog-listing" version = "52.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9d0eba824adb45a4b3ac6f0251d40df3f6a9382371cad136f4f14ac9ebc6bc10" +source = "git+https://github.com/Dandandan/arrow-datafusion.git?rev=c629ef9eeaad70b6836657163e6aa1f072090b58#c629ef9eeaad70b6836657163e6aa1f072090b58" dependencies = [ - "arrow", + "arrow 58.0.0", "async-trait", "datafusion-catalog 52.2.0", "datafusion-common 52.2.0", @@ -2034,7 +2255,7 @@ dependencies = [ "futures", "itertools 0.14.0", "log", - "object_store", + "object_store 0.13.1", ] [[package]] @@ -2044,17 +2265,17 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7c10f7659e96127d25e8366be7c8be4109595d6a2c3eac70421f380a7006a1b0" dependencies = [ "ahash 0.8.12", - "arrow", - "arrow-ipc", + "arrow 57.3.0", + "arrow-ipc 57.3.0", "chrono", "half", "hashbrown 0.14.5", "indexmap", "libc", "log", - "object_store", + "object_store 0.12.5", "paste", - "sqlparser", + "sqlparser 0.59.0", "tokio", "web-time", ] @@ -2062,24 +2283,24 @@ dependencies = [ [[package]] name = "datafusion-common" version = "52.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0039deefbd00c56adf5168b7ca58568fb058e4ba4c5a03b09f8be371b4e434b6" +source = "git+https://github.com/Dandandan/arrow-datafusion.git?rev=c629ef9eeaad70b6836657163e6aa1f072090b58#c629ef9eeaad70b6836657163e6aa1f072090b58" dependencies = [ "ahash 0.8.12", "apache-avro", - "arrow", - "arrow-ipc", + "arrow 58.0.0", + "arrow-ipc 58.0.0", "chrono", "half", "hashbrown 0.16.1", "indexmap", + "itertools 0.14.0", "libc", "log", - "object_store", - "parquet", + "object_store 0.13.1", + "parquet 58.0.0", "paste", "recursive", - "sqlparser", + "sqlparser 0.61.0", "tokio", "web-time", ] @@ -2098,8 +2319,7 @@ dependencies = [ [[package]] name = "datafusion-common-runtime" version = "52.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1ec7e3e60b813048331f8fb9673583173e5d2dd8fef862834ee871fc98b57ca7" +source = "git+https://github.com/Dandandan/arrow-datafusion.git?rev=c629ef9eeaad70b6836657163e6aa1f072090b58#c629ef9eeaad70b6836657163e6aa1f072090b58" dependencies = [ "futures", "log", @@ -2112,7 +2332,7 @@ version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fde13794244bc7581cd82f6fff217068ed79cdc344cafe4ab2c3a1c3510b38d6" dependencies = [ - "arrow", + "arrow 57.3.0", "async-trait", "bytes", "chrono", @@ -2129,7 +2349,7 @@ dependencies = [ "glob", "itertools 0.14.0", "log", - "object_store", + "object_store 0.12.5", "rand 0.9.2", "tokio", "url", @@ -2138,10 +2358,9 @@ dependencies = [ [[package]] name = "datafusion-datasource" version = "52.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "802068957f620302ecf05f84ff4019601aeafd36f5f3f1334984af2e34265129" +source = "git+https://github.com/Dandandan/arrow-datafusion.git?rev=c629ef9eeaad70b6836657163e6aa1f072090b58#c629ef9eeaad70b6836657163e6aa1f072090b58" dependencies = [ - "arrow", + "arrow 58.0.0", "async-compression", "async-trait", "bytes", @@ -2162,7 +2381,7 @@ dependencies = [ "itertools 0.14.0", "liblzma", "log", - "object_store", + "object_store 0.13.1", "rand 0.9.2", "tokio", "tokio-util", @@ -2176,8 +2395,8 @@ version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "804fa9b4ecf3157982021770617200ef7c1b2979d57bec9044748314775a9aea" dependencies = [ - "arrow", - "arrow-ipc", + "arrow 57.3.0", + "arrow-ipc 57.3.0", "async-trait", "bytes", "datafusion-common 51.0.0", @@ -2190,18 +2409,17 @@ dependencies = [ "datafusion-session 51.0.0", "futures", "itertools 0.14.0", - "object_store", + "object_store 0.12.5", "tokio", ] [[package]] name = "datafusion-datasource-arrow" version = "52.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "90fc387d5067c62d494a6647d29c5ad4fcdd5a6e50ab4ea1d2568caa2d66f2cc" +source = "git+https://github.com/Dandandan/arrow-datafusion.git?rev=c629ef9eeaad70b6836657163e6aa1f072090b58#c629ef9eeaad70b6836657163e6aa1f072090b58" dependencies = [ - "arrow", - "arrow-ipc", + "arrow 58.0.0", + "arrow-ipc 58.0.0", "async-trait", "bytes", "datafusion-common 52.2.0", @@ -2214,18 +2432,17 @@ dependencies = [ "datafusion-session 52.2.0", "futures", "itertools 0.14.0", - "object_store", + "object_store 0.13.1", "tokio", ] [[package]] name = "datafusion-datasource-avro" version = "52.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "69ce35d9df5c672747f79df4b8f4967b39a3514c3af30b9a7b5426f83d4be814" +source = "git+https://github.com/Dandandan/arrow-datafusion.git?rev=c629ef9eeaad70b6836657163e6aa1f072090b58#c629ef9eeaad70b6836657163e6aa1f072090b58" dependencies = [ "apache-avro", - "arrow", + "arrow 58.0.0", "async-trait", "bytes", "datafusion-common 52.2.0", @@ -2235,7 +2452,7 @@ dependencies = [ "datafusion-session 52.2.0", "futures", "num-traits", - "object_store", + "object_store 0.13.1", ] [[package]] @@ -2244,7 +2461,7 @@ version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "61a1641a40b259bab38131c5e6f48fac0717bedb7dc93690e604142a849e0568" dependencies = [ - "arrow", + "arrow 57.3.0", "async-trait", "bytes", "datafusion-common 51.0.0", @@ -2256,7 +2473,7 @@ dependencies = [ "datafusion-physical-plan 51.0.0", "datafusion-session 51.0.0", "futures", - "object_store", + "object_store 0.12.5", "regex", "tokio", ] @@ -2264,10 +2481,9 @@ dependencies = [ [[package]] name = "datafusion-datasource-csv" version = "52.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "efd5e20579bb6c8bd4e6c620253972fb723822030c280dd6aa047f660d09eeba" +source = "git+https://github.com/Dandandan/arrow-datafusion.git?rev=c629ef9eeaad70b6836657163e6aa1f072090b58#c629ef9eeaad70b6836657163e6aa1f072090b58" dependencies = [ - "arrow", + "arrow 58.0.0", "async-trait", "bytes", "datafusion-common 52.2.0", @@ -2279,7 +2495,7 @@ dependencies = [ "datafusion-physical-plan 52.2.0", "datafusion-session 52.2.0", "futures", - "object_store", + "object_store 0.13.1", "regex", "tokio", ] @@ -2290,7 +2506,7 @@ version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "adeacdb00c1d37271176f8fb6a1d8ce096baba16ea7a4b2671840c5c9c64fe85" dependencies = [ - "arrow", + "arrow 57.3.0", "async-trait", "bytes", "datafusion-common 51.0.0", @@ -2302,17 +2518,16 @@ dependencies = [ "datafusion-physical-plan 51.0.0", "datafusion-session 51.0.0", "futures", - "object_store", + "object_store 0.12.5", "tokio", ] [[package]] name = "datafusion-datasource-json" version = "52.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c0788b0d48fcef31880a02013ea3cc18e5a4e0eacc3b0abdd2cd0597b99dc96e" +source = "git+https://github.com/Dandandan/arrow-datafusion.git?rev=c629ef9eeaad70b6836657163e6aa1f072090b58#c629ef9eeaad70b6836657163e6aa1f072090b58" dependencies = [ - "arrow", + "arrow 58.0.0", "async-trait", "bytes", "datafusion-common 52.2.0", @@ -2324,17 +2539,18 @@ dependencies = [ "datafusion-physical-plan 52.2.0", "datafusion-session 52.2.0", "futures", - "object_store", + "object_store 0.13.1", + "serde_json", "tokio", + "tokio-stream", ] [[package]] name = "datafusion-datasource-parquet" version = "52.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "66639b70f1f363f5f0950733170100e588f1acfacac90c1894e231194aa35957" +source = "git+https://github.com/Dandandan/arrow-datafusion.git?rev=c629ef9eeaad70b6836657163e6aa1f072090b58#c629ef9eeaad70b6836657163e6aa1f072090b58" dependencies = [ - "arrow", + "arrow 58.0.0", "async-trait", "bytes", "datafusion-common 52.2.0", @@ -2352,9 +2568,9 @@ dependencies = [ "futures", "itertools 0.14.0", "log", - "object_store", + "object_store 0.13.1", "parking_lot", - "parquet", + "parquet 58.0.0", "tokio", ] @@ -2367,8 +2583,7 @@ checksum = "2b99e13947667b36ad713549237362afb054b2d8f8cc447751e23ec61202db07" [[package]] name = "datafusion-doc" version = "52.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e44b41f3e8267c6cf3eec982d63f34db9f1dd5f30abfd2e1f124f0871708952e" +source = "git+https://github.com/Dandandan/arrow-datafusion.git?rev=c629ef9eeaad70b6836657163e6aa1f072090b58#c629ef9eeaad70b6836657163e6aa1f072090b58" [[package]] name = "datafusion-execution" @@ -2376,14 +2591,14 @@ version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "63695643190679037bc946ad46a263b62016931547bf119859c511f7ff2f5178" dependencies = [ - "arrow", + "arrow 57.3.0", "async-trait", "dashmap", "datafusion-common 51.0.0", "datafusion-expr 51.0.0", "futures", "log", - "object_store", + "object_store 0.12.5", "parking_lot", "rand 0.9.2", "tempfile", @@ -2393,18 +2608,19 @@ dependencies = [ [[package]] name = "datafusion-execution" version = "52.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9e456f60e5d38db45335e84617006d90af14a8c8c5b8e959add708b2daaa0e2c" +source = "git+https://github.com/Dandandan/arrow-datafusion.git?rev=c629ef9eeaad70b6836657163e6aa1f072090b58#c629ef9eeaad70b6836657163e6aa1f072090b58" dependencies = [ - "arrow", + "arrow 58.0.0", + "arrow-buffer 58.0.0", "async-trait", "chrono", "dashmap", "datafusion-common 52.2.0", "datafusion-expr 52.2.0", + "datafusion-physical-expr-common 52.2.0", "futures", "log", - "object_store", + "object_store 0.13.1", "parking_lot", "rand 0.9.2", "tempfile", @@ -2417,7 +2633,7 @@ version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f9a4787cbf5feb1ab351f789063398f67654a6df75c4d37d7f637dc96f951a91" dependencies = [ - "arrow", + "arrow 57.3.0", "async-trait", "chrono", "datafusion-common 51.0.0", @@ -2430,16 +2646,15 @@ dependencies = [ "itertools 0.14.0", "paste", "serde_json", - "sqlparser", + "sqlparser 0.59.0", ] [[package]] name = "datafusion-expr" version = "52.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6507c719804265a58043134580c1c20767e7c23ba450724393f03ec982769ad9" +source = "git+https://github.com/Dandandan/arrow-datafusion.git?rev=c629ef9eeaad70b6836657163e6aa1f072090b58#c629ef9eeaad70b6836657163e6aa1f072090b58" dependencies = [ - "arrow", + "arrow 58.0.0", "async-trait", "chrono", "datafusion-common 52.2.0", @@ -2453,7 +2668,7 @@ dependencies = [ "paste", "recursive", "serde_json", - "sqlparser", + "sqlparser 0.61.0", ] [[package]] @@ -2462,7 +2677,7 @@ version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5ce2fb1b8c15c9ac45b0863c30b268c69dc9ee7a1ee13ecf5d067738338173dc" dependencies = [ - "arrow", + "arrow 57.3.0", "datafusion-common 51.0.0", "indexmap", "itertools 0.14.0", @@ -2472,10 +2687,9 @@ dependencies = [ [[package]] name = "datafusion-expr-common" version = "52.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a413caa9c5885072b539337aed68488f0291653e8edd7d676c92df2480f6cab0" +source = "git+https://github.com/Dandandan/arrow-datafusion.git?rev=c629ef9eeaad70b6836657163e6aa1f072090b58#c629ef9eeaad70b6836657163e6aa1f072090b58" dependencies = [ - "arrow", + "arrow 58.0.0", "datafusion-common 52.2.0", "indexmap", "itertools 0.14.0", @@ -2488,8 +2702,8 @@ version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "794a9db7f7b96b3346fc007ff25e994f09b8f0511b4cf7dff651fadfe3ebb28f" dependencies = [ - "arrow", - "arrow-buffer", + "arrow 57.3.0", + "arrow-buffer 57.3.0", "base64", "blake2", "blake3", @@ -2515,11 +2729,10 @@ dependencies = [ [[package]] name = "datafusion-functions" version = "52.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "189256495dc9cbbb8e20dbcf161f60422e628d201a78df8207e44bd4baefadb6" +source = "git+https://github.com/Dandandan/arrow-datafusion.git?rev=c629ef9eeaad70b6836657163e6aa1f072090b58#c629ef9eeaad70b6836657163e6aa1f072090b58" dependencies = [ - "arrow", - "arrow-buffer", + "arrow 58.0.0", + "arrow-buffer 58.0.0", "base64", "blake2", "blake3", @@ -2535,6 +2748,7 @@ dependencies = [ "itertools 0.14.0", "log", "md-5", + "memchr", "num-traits", "rand 0.9.2", "regex", @@ -2550,7 +2764,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1c25210520a9dcf9c2b2cbbce31ebd4131ef5af7fc60ee92b266dc7d159cb305" dependencies = [ "ahash 0.8.12", - "arrow", + "arrow 57.3.0", "datafusion-common 51.0.0", "datafusion-doc 51.0.0", "datafusion-execution 51.0.0", @@ -2567,11 +2781,10 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate" version = "52.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "12e73dfee4cd67c4a507ffff4c5a711d39983adf544adbc09c09bf06f789f413" +source = "git+https://github.com/Dandandan/arrow-datafusion.git?rev=c629ef9eeaad70b6836657163e6aa1f072090b58#c629ef9eeaad70b6836657163e6aa1f072090b58" dependencies = [ "ahash 0.8.12", - "arrow", + "arrow 58.0.0", "datafusion-common 52.2.0", "datafusion-doc 52.2.0", "datafusion-execution 52.2.0", @@ -2582,6 +2795,7 @@ dependencies = [ "datafusion-physical-expr-common 52.2.0", "half", "log", + "num-traits", "paste", ] @@ -2592,7 +2806,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "62f4a66f3b87300bb70f4124b55434d2ae3fe80455f3574701d0348da040b55d" dependencies = [ "ahash 0.8.12", - "arrow", + "arrow 57.3.0", "datafusion-common 51.0.0", "datafusion-expr-common 51.0.0", "datafusion-physical-expr-common 51.0.0", @@ -2601,11 +2815,10 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate-common" version = "52.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "87727bd9e65f4f9ac6d608c9810b7da9eaa3b18b26a4a4b76520592d49020acf" +source = "git+https://github.com/Dandandan/arrow-datafusion.git?rev=c629ef9eeaad70b6836657163e6aa1f072090b58#c629ef9eeaad70b6836657163e6aa1f072090b58" dependencies = [ "ahash 0.8.12", - "arrow", + "arrow 58.0.0", "datafusion-common 52.2.0", "datafusion-expr-common 52.2.0", "datafusion-physical-expr-common 52.2.0", @@ -2617,8 +2830,8 @@ version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ae5c06eed03918dc7fe7a9f082a284050f0e9ecf95d72f57712d1496da03b8c4" dependencies = [ - "arrow", - "arrow-ord", + "arrow 57.3.0", + "arrow-ord 57.3.0", "datafusion-common 51.0.0", "datafusion-doc 51.0.0", "datafusion-execution 51.0.0", @@ -2637,11 +2850,10 @@ dependencies = [ [[package]] name = "datafusion-functions-nested" version = "52.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2e5ef761359224b7c2b5a1bfad6296ac63225f8583d08ad18af9ba1a89ac3887" +source = "git+https://github.com/Dandandan/arrow-datafusion.git?rev=c629ef9eeaad70b6836657163e6aa1f072090b58#c629ef9eeaad70b6836657163e6aa1f072090b58" dependencies = [ - "arrow", - "arrow-ord", + "arrow 58.0.0", + "arrow-ord 58.0.0", "datafusion-common 52.2.0", "datafusion-doc 52.2.0", "datafusion-execution 52.2.0", @@ -2652,7 +2864,9 @@ dependencies = [ "datafusion-functions-aggregate-common 52.2.0", "datafusion-macros 52.2.0", "datafusion-physical-expr-common 52.2.0", + "hashbrown 0.16.1", "itertools 0.14.0", + "itoa", "log", "paste", ] @@ -2663,7 +2877,7 @@ version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "db4fed1d71738fbe22e2712d71396db04c25de4111f1ec252b8f4c6d3b25d7f5" dependencies = [ - "arrow", + "arrow 57.3.0", "async-trait", "datafusion-catalog 51.0.0", "datafusion-common 51.0.0", @@ -2676,10 +2890,9 @@ dependencies = [ [[package]] name = "datafusion-functions-table" version = "52.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3b17dac25dfda2d2a90ff0ad1c054a11fb1523766226bec6e9bd8c410daee2ae" +source = "git+https://github.com/Dandandan/arrow-datafusion.git?rev=c629ef9eeaad70b6836657163e6aa1f072090b58#c629ef9eeaad70b6836657163e6aa1f072090b58" dependencies = [ - "arrow", + "arrow 58.0.0", "async-trait", "datafusion-catalog 52.2.0", "datafusion-common 52.2.0", @@ -2695,7 +2908,7 @@ version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1d92206aa5ae21892f1552b4d61758a862a70956e6fd7a95cb85db1de74bc6d1" dependencies = [ - "arrow", + "arrow 57.3.0", "datafusion-common 51.0.0", "datafusion-doc 51.0.0", "datafusion-expr 51.0.0", @@ -2710,10 +2923,9 @@ dependencies = [ [[package]] name = "datafusion-functions-window" version = "52.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c594a29ddb22cbdbce500e4d99b5b2392c5cecb4c1086298b41d1ffec14dbb77" +source = "git+https://github.com/Dandandan/arrow-datafusion.git?rev=c629ef9eeaad70b6836657163e6aa1f072090b58#c629ef9eeaad70b6836657163e6aa1f072090b58" dependencies = [ - "arrow", + "arrow 58.0.0", "datafusion-common 52.2.0", "datafusion-doc 52.2.0", "datafusion-expr 52.2.0", @@ -2738,8 +2950,7 @@ dependencies = [ [[package]] name = "datafusion-functions-window-common" version = "52.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9aa1b15ed81c7543f62264a30dd49dec4b1b0b698053b968f53be32dfba4f729" +source = "git+https://github.com/Dandandan/arrow-datafusion.git?rev=c629ef9eeaad70b6836657163e6aa1f072090b58#c629ef9eeaad70b6836657163e6aa1f072090b58" dependencies = [ "datafusion-common 52.2.0", "datafusion-physical-expr-common 52.2.0", @@ -2759,8 +2970,7 @@ dependencies = [ [[package]] name = "datafusion-macros" version = "52.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c00c31c4795597aa25b74cab5174ac07a53051f27ce1e011ecaffa9eaeecef81" +source = "git+https://github.com/Dandandan/arrow-datafusion.git?rev=c629ef9eeaad70b6836657163e6aa1f072090b58#c629ef9eeaad70b6836657163e6aa1f072090b58" dependencies = [ "datafusion-doc 52.2.0", "quote", @@ -2773,7 +2983,7 @@ version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9f35f9ec5d08b87fd1893a30c2929f2559c2f9806ca072d8fefca5009dc0f06a" dependencies = [ - "arrow", + "arrow 57.3.0", "chrono", "datafusion-common 51.0.0", "datafusion-expr 51.0.0", @@ -2789,10 +2999,9 @@ dependencies = [ [[package]] name = "datafusion-optimizer" version = "52.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "80ccf60767c09302b2e0fc3afebb3761a6d508d07316fab8c5e93312728a21bb" +source = "git+https://github.com/Dandandan/arrow-datafusion.git?rev=c629ef9eeaad70b6836657163e6aa1f072090b58#c629ef9eeaad70b6836657163e6aa1f072090b58" dependencies = [ - "arrow", + "arrow 58.0.0", "chrono", "datafusion-common 52.2.0", "datafusion-expr 52.2.0", @@ -2813,7 +3022,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c30cc8012e9eedcb48bbe112c6eff4ae5ed19cf3003cb0f505662e88b7014c5d" dependencies = [ "ahash 0.8.12", - "arrow", + "arrow 57.3.0", "datafusion-common 51.0.0", "datafusion-expr 51.0.0", "datafusion-expr-common 51.0.0", @@ -2831,11 +3040,10 @@ dependencies = [ [[package]] name = "datafusion-physical-expr" version = "52.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c64b7f277556944e4edd3558da01d9e9ff9f5416f1c0aa7fee088e57bd141a7e" +source = "git+https://github.com/Dandandan/arrow-datafusion.git?rev=c629ef9eeaad70b6836657163e6aa1f072090b58#c629ef9eeaad70b6836657163e6aa1f072090b58" dependencies = [ "ahash 0.8.12", - "arrow", + "arrow 58.0.0", "datafusion-common 52.2.0", "datafusion-expr 52.2.0", "datafusion-expr-common 52.2.0", @@ -2858,7 +3066,7 @@ version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7f9ff2dbd476221b1f67337699eff432781c4e6e1713d2aefdaa517dfbf79768" dependencies = [ - "arrow", + "arrow 57.3.0", "datafusion-common 51.0.0", "datafusion-expr 51.0.0", "datafusion-functions 51.0.0", @@ -2870,10 +3078,9 @@ dependencies = [ [[package]] name = "datafusion-physical-expr-adapter" version = "52.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b7abaee372ea2d19c016ee9ef8629c4415257d291cdd152bc7f0b75f28af1b63" +source = "git+https://github.com/Dandandan/arrow-datafusion.git?rev=c629ef9eeaad70b6836657163e6aa1f072090b58#c629ef9eeaad70b6836657163e6aa1f072090b58" dependencies = [ - "arrow", + "arrow 58.0.0", "datafusion-common 52.2.0", "datafusion-expr 52.2.0", "datafusion-functions 52.2.0", @@ -2889,7 +3096,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "90da43e1ec550b172f34c87ec68161986ced70fd05c8d2a2add66eef9c276f03" dependencies = [ "ahash 0.8.12", - "arrow", + "arrow 57.3.0", "datafusion-common 51.0.0", "datafusion-expr-common 51.0.0", "hashbrown 0.14.5", @@ -2899,11 +3106,10 @@ dependencies = [ [[package]] name = "datafusion-physical-expr-common" version = "52.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42237efe621f92adc22d111b531fdbc2cc38ca9b5e02327535628fb103ae2157" +source = "git+https://github.com/Dandandan/arrow-datafusion.git?rev=c629ef9eeaad70b6836657163e6aa1f072090b58#c629ef9eeaad70b6836657163e6aa1f072090b58" dependencies = [ "ahash 0.8.12", - "arrow", + "arrow 58.0.0", "chrono", "datafusion-common 52.2.0", "datafusion-expr-common 52.2.0", @@ -2919,7 +3125,7 @@ version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ce9804f799acd7daef3be7aaffe77c0033768ed8fdbf5fb82fc4c5f2e6bc14e6" dependencies = [ - "arrow", + "arrow 57.3.0", "datafusion-common 51.0.0", "datafusion-execution 51.0.0", "datafusion-expr 51.0.0", @@ -2934,10 +3140,9 @@ dependencies = [ [[package]] name = "datafusion-physical-optimizer" version = "52.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fd093498bd1319c6e5c76e9dfa905e78486f01b34579ce97f2e3a49f84c37fac" +source = "git+https://github.com/Dandandan/arrow-datafusion.git?rev=c629ef9eeaad70b6836657163e6aa1f072090b58#c629ef9eeaad70b6836657163e6aa1f072090b58" dependencies = [ - "arrow", + "arrow 58.0.0", "datafusion-common 52.2.0", "datafusion-execution 52.2.0", "datafusion-expr 52.2.0", @@ -2957,9 +3162,9 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0acf0ad6b6924c6b1aa7d213b181e012e2d3ec0a64ff5b10ee6282ab0f8532ac" dependencies = [ "ahash 0.8.12", - "arrow", - "arrow-ord", - "arrow-schema", + "arrow 57.3.0", + "arrow-ord 57.3.0", + "arrow-schema 57.3.0", "async-trait", "chrono", "datafusion-common 51.0.0", @@ -2984,13 +3189,12 @@ dependencies = [ [[package]] name = "datafusion-physical-plan" version = "52.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7cbe61b12daf81a9f20ba03bd3541165d51f86e004ef37426b11881330eed261" +source = "git+https://github.com/Dandandan/arrow-datafusion.git?rev=c629ef9eeaad70b6836657163e6aa1f072090b58#c629ef9eeaad70b6836657163e6aa1f072090b58" dependencies = [ "ahash 0.8.12", - "arrow", - "arrow-ord", - "arrow-schema", + "arrow 58.0.0", + "arrow-ord 58.0.0", + "arrow-schema 58.0.0", "async-trait", "datafusion-common 52.2.0", "datafusion-common-runtime 52.2.0", @@ -3007,6 +3211,7 @@ dependencies = [ "indexmap", "itertools 0.14.0", "log", + "num-traits", "parking_lot", "pin-project-lite", "tokio", @@ -3018,7 +3223,7 @@ version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ac2c2498a1f134a9e11a9f5ed202a2a7d7e9774bd9249295593053ea3be999db" dependencies = [ - "arrow", + "arrow 57.3.0", "datafusion-common 51.0.0", "datafusion-datasource 51.0.0", "datafusion-expr-common 51.0.0", @@ -3032,10 +3237,9 @@ dependencies = [ [[package]] name = "datafusion-pruning" version = "52.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0124331116db7f79df92ebfd2c3b11a8f90240f253555c9bb084f10b6fecf1dd" +source = "git+https://github.com/Dandandan/arrow-datafusion.git?rev=c629ef9eeaad70b6836657163e6aa1f072090b58#c629ef9eeaad70b6836657163e6aa1f072090b58" dependencies = [ - "arrow", + "arrow 58.0.0", "datafusion-common 52.2.0", "datafusion-datasource 52.2.0", "datafusion-expr-common 52.2.0", @@ -3063,8 +3267,7 @@ dependencies = [ [[package]] name = "datafusion-session" version = "52.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1673e3c58ba618a6ea0568672f00664087b8982c581e9afd5aa6c3c79c9b431f" +source = "git+https://github.com/Dandandan/arrow-datafusion.git?rev=c629ef9eeaad70b6836657163e6aa1f072090b58#c629ef9eeaad70b6836657163e6aa1f072090b58" dependencies = [ "async-trait", "datafusion-common 52.2.0", @@ -3077,23 +3280,26 @@ dependencies = [ [[package]] name = "datafusion-spark" version = "52.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "15d28510abfc85709578fcf9065325d43ee3303012c0ccec2dce351bdc577d00" +source = "git+https://github.com/Dandandan/arrow-datafusion.git?rev=c629ef9eeaad70b6836657163e6aa1f072090b58#c629ef9eeaad70b6836657163e6aa1f072090b58" dependencies = [ - "arrow", + "arrow 58.0.0", "bigdecimal", "chrono", "crc32fast", + "datafusion 52.2.0", "datafusion-catalog 52.2.0", "datafusion-common 52.2.0", "datafusion-execution 52.2.0", "datafusion-expr 52.2.0", "datafusion-functions 52.2.0", + "datafusion-functions-aggregate 52.2.0", "datafusion-functions-nested 52.2.0", "log", "percent-encoding", "rand 0.9.2", + "serde_json", "sha1", + "sha2", "url", ] @@ -3103,7 +3309,7 @@ version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3fc195fe60634b2c6ccfd131b487de46dc30eccae8a3c35a13f136e7f440414f" dependencies = [ - "arrow", + "arrow 57.3.0", "bigdecimal", "chrono", "datafusion-common 51.0.0", @@ -3111,34 +3317,33 @@ dependencies = [ "indexmap", "log", "regex", - "sqlparser", + "sqlparser 0.59.0", ] [[package]] name = "datafusion-sql" version = "52.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5272d256dab5347bb39d2040589f45d8c6b715b27edcb5fffe88cc8b9c3909cb" +source = "git+https://github.com/Dandandan/arrow-datafusion.git?rev=c629ef9eeaad70b6836657163e6aa1f072090b58#c629ef9eeaad70b6836657163e6aa1f072090b58" dependencies = [ - "arrow", + "arrow 58.0.0", "bigdecimal", "chrono", "datafusion-common 52.2.0", "datafusion-expr 52.2.0", + "datafusion-functions-nested 52.2.0", "indexmap", "log", "recursive", "regex", - "sqlparser", + "sqlparser 0.61.0", ] [[package]] name = "datafusion-sqllogictest" version = "52.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ccb859e97759dcbff66b484bdf4f251f9a76784d3dd7883c124de57510b1e1c2" +source = "git+https://github.com/Dandandan/arrow-datafusion.git?rev=c629ef9eeaad70b6836657163e6aa1f072090b58#c629ef9eeaad70b6836657163e6aa1f072090b58" dependencies = [ - "arrow", + "arrow 58.0.0", "async-trait", "bigdecimal", "clap", @@ -3150,9 +3355,9 @@ dependencies = [ "indicatif", "itertools 0.14.0", "log", - "object_store", + "object_store 0.13.1", "sqllogictest", - "sqlparser", + "sqlparser 0.61.0", "tempfile", "thiserror 2.0.18", "tokio", @@ -3161,8 +3366,7 @@ dependencies = [ [[package]] name = "datafusion-substrait" version = "52.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "199790fd96e852997b30da4ff11109378c944841757d93875ea85fc69587ec91" +source = "git+https://github.com/Dandandan/arrow-datafusion.git?rev=c629ef9eeaad70b6836657163e6aa1f072090b58#c629ef9eeaad70b6836657163e6aa1f072090b58" dependencies = [ "async-recursion", "async-trait", @@ -3170,7 +3374,7 @@ dependencies = [ "datafusion 52.2.0", "half", "itertools 0.14.0", - "object_store", + "object_store 0.13.1", "pbjson-types", "prost 0.14.3", "substrait", @@ -3694,7 +3898,7 @@ version = "2.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5f9e5c0b1c67a38cb92b41535d44623483beb9511592ae23a3bf42ddec758690" dependencies = [ - "arrow-array", + "arrow-array 57.3.0", "rand 0.9.2", ] @@ -3906,9 +4110,9 @@ version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dc1cc4106ac0a0a512c398961ce95d8150475c84a84e17c4511c3643fa120a17" dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-schema", + "arrow-array 57.3.0", + "arrow-buffer 57.3.0", + "arrow-schema 57.3.0", "geo-traits", "geoarrow-schema", "num-traits", @@ -3922,8 +4126,8 @@ version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fa84300361ce57fb875bcaa6e32b95b0aff5c6b1af692b936bdd58ff343f4394" dependencies = [ - "arrow-array", - "arrow-buffer", + "arrow-array 57.3.0", + "arrow-buffer 57.3.0", "geo", "geo-traits", "geoarrow-array", @@ -3936,7 +4140,7 @@ version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e97be4e9f523f92bd6a0e0458323f4b783d073d011664decd8dbf05651704f34" dependencies = [ - "arrow-schema", + "arrow-schema 57.3.0", "geo-traits", "serde", "serde_json", @@ -3949,9 +4153,9 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "773cfa1fb0d7f7661b76b3fde00f3ffd8e0ff7b3635096f0ff6294fe5ca62a2b" dependencies = [ - "arrow-arith", - "arrow-array", - "arrow-schema", + "arrow-arith 57.3.0", + "arrow-array 57.3.0", + "arrow-schema 57.3.0", "datafusion 51.0.0", "geo", "geo-traits", @@ -4860,15 +5064,15 @@ version = "2.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2b7f07b905df393a5554eba19055c620f9ea25a3e40a013bda4bd8dc4ca66f01" dependencies = [ - "arrow", - "arrow-arith", - "arrow-array", - "arrow-buffer", - "arrow-ipc", - "arrow-ord", - "arrow-row", - "arrow-schema", - "arrow-select", + "arrow 57.3.0", + "arrow-arith 57.3.0", + "arrow-array 57.3.0", + "arrow-buffer 57.3.0", + "arrow-ipc 57.3.0", + "arrow-ord 57.3.0", + "arrow-row 57.3.0", + "arrow-schema 57.3.0", + "arrow-select 57.3.0", "async-recursion", "async-trait", "async_cell", @@ -4900,7 +5104,7 @@ dependencies = [ "lance-table", "log", "moka", - "object_store", + "object_store 0.12.5", "permutation", "pin-project", "prost 0.14.3", @@ -4925,13 +5129,13 @@ version = "2.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "100e076cb81c8f0c24cd2881c706fc53e037c7d6e81eb320e929e265d157effb" dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-cast", - "arrow-data", - "arrow-ord", - "arrow-schema", - "arrow-select", + "arrow-array 57.3.0", + "arrow-buffer 57.3.0", + "arrow-cast 57.3.0", + "arrow-data 57.3.0", + "arrow-ord 57.3.0", + "arrow-schema 57.3.0", + "arrow-select 57.3.0", "bytes", "getrandom 0.2.17", "half", @@ -4945,13 +5149,13 @@ name = "lance-bench" version = "0.1.0" dependencies = [ "anyhow", - "arrow-cast", + "arrow-cast 57.3.0", "async-trait", "clap", "futures", "lance", "lance-encoding", - "parquet", + "parquet 57.3.0", "tempfile", "tokio", "tracing", @@ -4975,9 +5179,9 @@ version = "2.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6fa01d1cf490ccfd3b8eaeee2781415d0419e6be8366040e57e43677abf2644e" dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-schema", + "arrow-array 57.3.0", + "arrow-buffer 57.3.0", + "arrow-schema 57.3.0", "async-trait", "byteorder", "bytes", @@ -4993,7 +5197,7 @@ dependencies = [ "mock_instant", "moka", "num_cpus", - "object_store", + "object_store 0.12.5", "pin-project", "prost 0.14.3", "rand 0.9.2", @@ -5014,12 +5218,12 @@ version = "2.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ef89a39e3284eef76f79e63f23de8881a0583ad6feb20ed39f47eadd847a2b88" dependencies = [ - "arrow", - "arrow-array", - "arrow-buffer", - "arrow-ord", - "arrow-schema", - "arrow-select", + "arrow 57.3.0", + "arrow-array 57.3.0", + "arrow-buffer 57.3.0", + "arrow-ord 57.3.0", + "arrow-schema 57.3.0", + "arrow-select 57.3.0", "async-trait", "chrono", "datafusion 51.0.0", @@ -5046,10 +5250,10 @@ version = "2.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fc2a60eef5c47e65d91e2ffa8e7e1629c52e7190c8b88a371a1a60601dc49371" dependencies = [ - "arrow", - "arrow-array", - "arrow-cast", - "arrow-schema", + "arrow 57.3.0", + "arrow-array 57.3.0", + "arrow-cast 57.3.0", + "arrow-schema 57.3.0", "chrono", "futures", "half", @@ -5066,13 +5270,13 @@ version = "2.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "95ce4a6631308aa681b2671af8f2a845ff781f8d4e755a2a7ccd012379467094" dependencies = [ - "arrow-arith", - "arrow-array", - "arrow-buffer", - "arrow-cast", - "arrow-data", - "arrow-schema", - "arrow-select", + "arrow-arith 57.3.0", + "arrow-array 57.3.0", + "arrow-buffer 57.3.0", + "arrow-cast 57.3.0", + "arrow-data 57.3.0", + "arrow-schema 57.3.0", + "arrow-select 57.3.0", "bytemuck", "byteorder", "bytes", @@ -5105,12 +5309,12 @@ version = "2.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e2d4d82357cbfaa1a18494226c15b1cb3c8ed0b6c84b91146323c82047ede419" dependencies = [ - "arrow-arith", - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", - "arrow-select", + "arrow-arith 57.3.0", + "arrow-array 57.3.0", + "arrow-buffer 57.3.0", + "arrow-data 57.3.0", + "arrow-schema 57.3.0", + "arrow-select 57.3.0", "async-recursion", "async-trait", "byteorder", @@ -5124,7 +5328,7 @@ dependencies = [ "lance-io", "log", "num-traits", - "object_store", + "object_store 0.12.5", "prost 0.14.3", "prost-build", "prost-types", @@ -5155,12 +5359,12 @@ version = "2.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "20e9c5aa7024a63af9ae89ee8c0f23c8421b7896742e5cd4a271a60f9956cb80" dependencies = [ - "arrow", - "arrow-arith", - "arrow-array", - "arrow-ord", - "arrow-schema", - "arrow-select", + "arrow 57.3.0", + "arrow-arith 57.3.0", + "arrow-array 57.3.0", + "arrow-ord 57.3.0", + "arrow-schema 57.3.0", + "arrow-select 57.3.0", "async-channel", "async-recursion", "async-trait", @@ -5197,7 +5401,7 @@ dependencies = [ "log", "ndarray", "num-traits", - "object_store", + "object_store 0.12.5", "prost 0.14.3", "prost-build", "prost-types", @@ -5224,14 +5428,14 @@ version = "2.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c7d2af0b17fb374a8181bcf1a10bce5703ae3ee4373c1587ce4bba23e15e45c8" dependencies = [ - "arrow", - "arrow-arith", - "arrow-array", - "arrow-buffer", - "arrow-cast", - "arrow-data", - "arrow-schema", - "arrow-select", + "arrow 57.3.0", + "arrow-arith 57.3.0", + "arrow-array 57.3.0", + "arrow-buffer 57.3.0", + "arrow-cast 57.3.0", + "arrow-data 57.3.0", + "arrow-schema 57.3.0", + "arrow-select 57.3.0", "async-recursion", "async-trait", "byteorder", @@ -5243,7 +5447,7 @@ dependencies = [ "lance-core", "lance-namespace", "log", - "object_store", + "object_store 0.12.5", "path_abs", "pin-project", "prost 0.14.3", @@ -5262,9 +5466,9 @@ version = "2.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5125aa62696e75a7475807564b4921f252d8815be606b84bc00e6def0f5c24bb" dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-schema", + "arrow-array 57.3.0", + "arrow-buffer 57.3.0", + "arrow-schema 57.3.0", "cc", "deepsize", "half", @@ -5280,7 +5484,7 @@ version = "2.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "70545c2676ce954dfd801da5c6a631a70bba967826cd3a8f31b47d1f04bbfed3" dependencies = [ - "arrow", + "arrow 57.3.0", "async-trait", "bytes", "lance-core", @@ -5307,11 +5511,11 @@ version = "2.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b06ad37bd90045de8ef533df170c6098e6ff6ecb427aade47d7db8e2c86f2678" dependencies = [ - "arrow", - "arrow-array", - "arrow-buffer", - "arrow-ipc", - "arrow-schema", + "arrow 57.3.0", + "arrow-array 57.3.0", + "arrow-buffer 57.3.0", + "arrow-ipc 57.3.0", + "arrow-schema 57.3.0", "async-trait", "byteorder", "bytes", @@ -5323,7 +5527,7 @@ dependencies = [ "lance-file", "lance-io", "log", - "object_store", + "object_store 0.12.5", "prost 0.14.3", "prost-build", "prost-types", @@ -6169,6 +6373,30 @@ name = "object_store" version = "0.12.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fbfbfff40aeccab00ec8a910b57ca8ecf4319b335c542f2edcd19dd25a1e2a00" +dependencies = [ + "async-trait", + "bytes", + "chrono", + "futures", + "http", + "humantime", + "itertools 0.14.0", + "parking_lot", + "percent-encoding", + "thiserror 2.0.18", + "tokio", + "tracing", + "url", + "walkdir", + "wasm-bindgen-futures", + "web-time", +] + +[[package]] +name = "object_store" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c2858065e55c148d294a9f3aae3b0fa9458edadb41a108397094566f4e3c0dfb" dependencies = [ "async-trait", "base64", @@ -6189,7 +6417,7 @@ dependencies = [ "rand 0.9.2", "reqwest", "ring", - "rustls-pemfile", + "rustls-pki-types", "serde", "serde_json", "serde_urlencoded", @@ -6459,13 +6687,46 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6ee96b29972a257b855ff2341b37e61af5f12d6af1158b6dcdb5b31ea07bb3cb" dependencies = [ "ahash 0.8.12", - "arrow-array", - "arrow-buffer", - "arrow-cast", - "arrow-data", - "arrow-ipc", - "arrow-schema", - "arrow-select", + "arrow-array 57.3.0", + "arrow-buffer 57.3.0", + "arrow-cast 57.3.0", + "arrow-data 57.3.0", + "arrow-ipc 57.3.0", + "arrow-schema 57.3.0", + "arrow-select 57.3.0", + "base64", + "brotli", + "bytes", + "chrono", + "flate2", + "half", + "hashbrown 0.16.1", + "lz4_flex 0.12.0", + "num-bigint", + "num-integer", + "num-traits", + "paste", + "seq-macro", + "simdutf8", + "snap", + "thrift", + "twox-hash", + "zstd", +] + +[[package]] +name = "parquet" +version = "58.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f491d0ef1b510194426ee67ddc18a9b747ef3c42050c19322a2cd2e1666c29b" +dependencies = [ + "ahash 0.8.12", + "arrow-array 58.0.0", + "arrow-buffer 58.0.0", + "arrow-data 58.0.0", + "arrow-ipc 58.0.0", + "arrow-schema 58.0.0", + "arrow-select 58.0.0", "base64", "brotli", "bytes", @@ -6478,7 +6739,7 @@ dependencies = [ "num-bigint", "num-integer", "num-traits", - "object_store", + "object_store 0.13.1", "paste", "seq-macro", "simdutf8", @@ -7039,9 +7300,9 @@ dependencies = [ [[package]] name = "pyo3-object_store" -version = "0.7.0" +version = "0.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5ef5552f108a4d65b78c924b27513471a9ba425341ada4be5ea0ca53806ae316" +checksum = "a8b80a3a9af26abe307d2c01c13da487166c5c8ac5ac301a4d8e3c270e58ab50" dependencies = [ "async-trait", "bytes", @@ -7050,7 +7311,7 @@ dependencies = [ "http", "humantime", "itertools 0.14.0", - "object_store", + "object_store 0.13.1", "percent-encoding", "pyo3", "pyo3-async-runtimes", @@ -7795,15 +8056,6 @@ dependencies = [ "security-framework", ] -[[package]] -name = "rustls-pemfile" -version = "2.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dce314e5fee3f39953d46bb63bb8a46d40c2f8fb7cc5a3b6cab2bde9721d6e50" -dependencies = [ - "rustls-pki-types", -] - [[package]] name = "rustls-pki-types" version = "1.14.0" @@ -8358,9 +8610,9 @@ dependencies = [ [[package]] name = "sqllogictest" -version = "0.28.4" +version = "0.29.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3566426f72a13e393aa34ca3d542c5b0eb86da4c0db137ee9b5cfccc6179e52d" +checksum = "d03b2262a244037b0b510edbd25a8e6c9fb8d73ee0237fc6cc95a54c16f94a82" dependencies = [ "async-trait", "educe", @@ -8386,10 +8638,20 @@ name = "sqlparser" version = "0.59.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4591acadbcf52f0af60eafbb2c003232b2b4cd8de5f0e9437cb8b1b59046cc0f" +dependencies = [ + "log", + "sqlparser_derive 0.3.0", +] + +[[package]] +name = "sqlparser" +version = "0.61.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dbf5ea8d4d7c808e1af1cbabebca9a2abe603bcefc22294c5b95018d53200cb7" dependencies = [ "log", "recursive", - "sqlparser_derive", + "sqlparser_derive 0.5.0", ] [[package]] @@ -8403,6 +8665,17 @@ dependencies = [ "syn 2.0.117", ] +[[package]] +name = "sqlparser_derive" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a6dd45d8fc1c79299bfbb7190e42ccbbdf6a5f52e4a6ad98d92357ea965bd289" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + [[package]] name = "stable_deref_trait" version = "1.2.1" @@ -9130,6 +9403,7 @@ dependencies = [ "futures-core", "pin-project-lite", "tokio", + "tokio-util", ] [[package]] @@ -9281,16 +9555,14 @@ checksum = "8df9b6e13f2d32c91b9bd719c00d1958837bc7dec474d94952798cc8e69eeec3" [[package]] name = "tpchgen" version = "2.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2d651db770ccf53b89dd769ed47899c0c089452e3b725c3c48fbc6a2be579638" +source = "git+https://github.com/AdamGS/tpchgen-rs.git?branch=adamg%2Fbump-arrow-match-df#803355855df0ed62fc30ee2c125fe46fe1dbab47" [[package]] name = "tpchgen-arrow" version = "2.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "180f3759dffbf26d47021d2a84245a00f20945384bcf22e63c32652b04916e5a" +source = "git+https://github.com/AdamGS/tpchgen-rs.git?branch=adamg%2Fbump-arrow-match-df#803355855df0ed62fc30ee2c125fe46fe1dbab47" dependencies = [ - "arrow", + "arrow 58.0.0", "tpchgen", ] @@ -9618,11 +9890,11 @@ name = "vortex" version = "0.1.0" dependencies = [ "anyhow", - "arrow-array", + "arrow-array 58.0.0", "codspeed-divan-compat", "fastlanes", "mimalloc", - "parquet", + "parquet 58.0.0", "rand 0.9.2", "serde_json", "tokio", @@ -9684,15 +9956,15 @@ version = "0.1.0" dependencies = [ "arbitrary", "arcref", - "arrow-arith", - "arrow-array", - "arrow-buffer", - "arrow-cast", - "arrow-data", - "arrow-ord", - "arrow-schema", - "arrow-select", - "arrow-string", + "arrow-arith 58.0.0", + "arrow-array 58.0.0", + "arrow-buffer 58.0.0", + "arrow-cast 58.0.0", + "arrow-data 58.0.0", + "arrow-ord 58.0.0", + "arrow-schema 58.0.0", + "arrow-select 58.0.0", + "arrow-string 58.0.0", "async-lock", "bytes", "cfg-if", @@ -9746,9 +10018,9 @@ name = "vortex-bench" version = "0.1.0" dependencies = [ "anyhow", - "arrow-array", - "arrow-schema", - "arrow-select", + "arrow-array 58.0.0", + "arrow-schema 58.0.0", + "arrow-select 58.0.0", "async-trait", "bytes", "bzip2", @@ -9763,7 +10035,7 @@ dependencies = [ "noodles-bgzf", "noodles-vcf", "parking_lot", - "parquet", + "parquet 58.0.0", "rand 0.9.2", "regex", "reqwest", @@ -9822,7 +10094,7 @@ dependencies = [ name = "vortex-buffer" version = "0.1.0" dependencies = [ - "arrow-buffer", + "arrow-buffer 58.0.0", "bitvec", "bytes", "codspeed-divan-compat", @@ -9872,7 +10144,7 @@ dependencies = [ "fastlanes", "futures", "kanal", - "object_store", + "object_store 0.13.1", "parking_lot", "prost 0.14.3", "rstest", @@ -9900,8 +10172,8 @@ name = "vortex-cxx" version = "0.1.0" dependencies = [ "anyhow", - "arrow-array", - "arrow-schema", + "arrow-array 58.0.0", + "arrow-schema 58.0.0", "async-fs", "cxx", "futures", @@ -9915,7 +10187,7 @@ name = "vortex-datafusion" version = "0.1.0" dependencies = [ "anyhow", - "arrow-schema", + "arrow-schema 58.0.0", "async-trait", "datafusion 52.2.0", "datafusion-catalog 52.2.0", @@ -9933,7 +10205,7 @@ dependencies = [ "futures", "insta", "itertools 0.14.0", - "object_store", + "object_store 0.13.1", "rstest", "tempfile", "tokio", @@ -9989,7 +10261,7 @@ dependencies = [ "jiff", "kanal", "num-traits", - "object_store", + "object_store 0.13.1", "once_cell", "parking_lot", "paste", @@ -10011,10 +10283,10 @@ dependencies = [ name = "vortex-error" version = "0.1.0" dependencies = [ - "arrow-schema", + "arrow-schema 58.0.0", "flatbuffers", "jiff", - "object_store", + "object_store 0.13.1", "prost 0.14.3", "serial_test", "temp-env", @@ -10052,7 +10324,7 @@ dependencies = [ "futures", "itertools 0.14.0", "mimalloc", - "object_store", + "object_store 0.13.1", "paste", "prost 0.14.3", "tempfile", @@ -10074,7 +10346,7 @@ dependencies = [ "itertools 0.14.0", "kanal", "moka", - "object_store", + "object_store 0.13.1", "oneshot", "parking_lot", "pin-project-lite", @@ -10171,7 +10443,7 @@ dependencies = [ "handle", "itertools 0.14.0", "kanal", - "object_store", + "object_store 0.13.1", "oneshot", "parking_lot", "pin-project-lite", @@ -10209,12 +10481,12 @@ dependencies = [ name = "vortex-jni" version = "0.1.0" dependencies = [ - "arrow-array", - "arrow-ipc", - "arrow-schema", + "arrow-array 58.0.0", + "arrow-ipc 58.0.0", + "arrow-schema 58.0.0", "futures", "jni", - "object_store", + "object_store 0.13.1", "parking_lot", "prost 0.14.3", "thiserror 2.0.18", @@ -10266,7 +10538,7 @@ dependencies = [ name = "vortex-mask" version = "0.1.0" dependencies = [ - "arrow-buffer", + "arrow-buffer 58.0.0", "itertools 0.14.0", "rstest", "serde", @@ -10321,13 +10593,13 @@ dependencies = [ name = "vortex-python" version = "0.1.0" dependencies = [ - "arrow-array", - "arrow-data", - "arrow-schema", + "arrow-array 58.0.0", + "arrow-data 58.0.0", + "arrow-schema 58.0.0", "bytes", "itertools 0.14.0", "log", - "object_store", + "object_store 0.13.1", "parking_lot", "pyo3", "pyo3-bytes", @@ -10344,8 +10616,8 @@ name = "vortex-runend" version = "0.1.0" dependencies = [ "arbitrary", - "arrow-array", - "arrow-schema", + "arrow-array 58.0.0", + "arrow-schema 58.0.0", "codspeed-divan-compat", "itertools 0.14.0", "num-traits", @@ -10363,8 +10635,8 @@ dependencies = [ name = "vortex-scan" version = "0.1.0" dependencies = [ - "arrow-array", - "arrow-schema", + "arrow-array 58.0.0", + "arrow-schema 58.0.0", "async-trait", "bit-vec", "futures", @@ -10470,8 +10742,8 @@ dependencies = [ name = "vortex-test-e2e-cuda" version = "0.1.0" dependencies = [ - "arrow-array", - "arrow-schema", + "arrow-array 58.0.0", + "arrow-schema 58.0.0", "futures", "vortex", "vortex-cuda", @@ -10482,8 +10754,8 @@ name = "vortex-tui" version = "0.1.0" dependencies = [ "anyhow", - "arrow-array", - "arrow-schema", + "arrow-array 58.0.0", + "arrow-schema 58.0.0", "clap", "console_error_panic_hook", "crossterm", @@ -10496,7 +10768,7 @@ dependencies = [ "indicatif", "itertools 0.14.0", "js-sys", - "parquet", + "parquet 58.0.0", "ratatui", "ratzilla", "serde", diff --git a/Cargo.toml b/Cargo.toml index 0da5ee805ba..7bf314c84eb 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -86,16 +86,16 @@ arbitrary = "1.3.2" arc-swap = "1.8" arcref = "0.2.0" arrayref = "0.3.7" -arrow-arith = "57.1" -arrow-array = "57.1" -arrow-buffer = "57.1" -arrow-cast = "57.1" -arrow-data = "57.1" -arrow-ipc = "57.1" -arrow-ord = "57.1" -arrow-schema = "57.1" -arrow-select = "57.1" -arrow-string = "57.1" +arrow-arith = "58" +arrow-array = "58" +arrow-buffer = "58" +arrow-cast = "58" +arrow-data = "58" +arrow-ipc = "58" +arrow-ord = "58" +arrow-schema = "58" +arrow-select = "58" +arrow-string = "58" async-fs = "2.2.0" async-lock = "3.4" async-stream = "0.3.6" @@ -171,14 +171,14 @@ noodles-bgzf = "0.44.0" noodles-vcf = "0.82.0" num-traits = "0.2.19" num_enum = { version = "0.7.3", default-features = false } -object_store = { version = "0.12.4", default-features = false } +object_store = { version = "0.13.1", default-features = false } once_cell = "1.21" oneshot = "0.1.13" opentelemetry = "0.31.0" opentelemetry-otlp = "0.31.0" opentelemetry_sdk = "0.31.0" parking_lot = { version = "0.12.3", features = ["nightly"] } -parquet = "57.1" +parquet = "58" paste = "1.0.15" pco = "1.0.1" pin-project-lite = "0.2.15" @@ -368,3 +368,21 @@ lto = false [profile.bench_assert] debug-assertions = true inherits = "bench" + +[patch.crates-io] +datafusion = { git = "https://github.com/Dandandan/arrow-datafusion.git", rev = "c629ef9eeaad70b6836657163e6aa1f072090b58" } +datafusion-catalog = { git = "https://github.com/Dandandan/arrow-datafusion.git", rev = "c629ef9eeaad70b6836657163e6aa1f072090b58" } +datafusion-common = { git = "https://github.com/Dandandan/arrow-datafusion.git", rev = "c629ef9eeaad70b6836657163e6aa1f072090b58" } +datafusion-common-runtime = { git = "https://github.com/Dandandan/arrow-datafusion.git", rev = "c629ef9eeaad70b6836657163e6aa1f072090b58" } +datafusion-datasource = { git = "https://github.com/Dandandan/arrow-datafusion.git", rev = "c629ef9eeaad70b6836657163e6aa1f072090b58" } +datafusion-execution = { git = "https://github.com/Dandandan/arrow-datafusion.git", rev = "c629ef9eeaad70b6836657163e6aa1f072090b58" } +datafusion-expr = { git = "https://github.com/Dandandan/arrow-datafusion.git", rev = "c629ef9eeaad70b6836657163e6aa1f072090b58" } +datafusion-functions = { git = "https://github.com/Dandandan/arrow-datafusion.git", rev = "c629ef9eeaad70b6836657163e6aa1f072090b58" } +datafusion-physical-expr = { git = "https://github.com/Dandandan/arrow-datafusion.git", rev = "c629ef9eeaad70b6836657163e6aa1f072090b58" } +datafusion-physical-expr-adapter = { git = "https://github.com/Dandandan/arrow-datafusion.git", rev = "c629ef9eeaad70b6836657163e6aa1f072090b58" } +datafusion-physical-expr-common = { git = "https://github.com/Dandandan/arrow-datafusion.git", rev = "c629ef9eeaad70b6836657163e6aa1f072090b58" } +datafusion-physical-plan = { git = "https://github.com/Dandandan/arrow-datafusion.git", rev = "c629ef9eeaad70b6836657163e6aa1f072090b58" } +datafusion-pruning = { git = "https://github.com/Dandandan/arrow-datafusion.git", rev = "c629ef9eeaad70b6836657163e6aa1f072090b58" } +datafusion-sqllogictest = { git = "https://github.com/Dandandan/arrow-datafusion.git", rev = "c629ef9eeaad70b6836657163e6aa1f072090b58" } +tpchgen = { git = "https://github.com/AdamGS/tpchgen-rs.git", branch = "adamg/bump-arrow-match-df" } +tpchgen-arrow = { git = "https://github.com/AdamGS/tpchgen-rs.git", branch = "adamg/bump-arrow-match-df" } diff --git a/vortex-bench/src/random_access/take.rs b/vortex-bench/src/random_access/take.rs index 674aa6349aa..b23d6622876 100644 --- a/vortex-bench/src/random_access/take.rs +++ b/vortex-bench/src/random_access/take.rs @@ -14,6 +14,7 @@ use itertools::Itertools; use parquet::arrow::ParquetRecordBatchStreamBuilder; use parquet::arrow::arrow_reader::ArrowReaderMetadata; use parquet::arrow::arrow_reader::ArrowReaderOptions; +use parquet::file::metadata::PageIndexPolicy; use stream::StreamExt; use tokio::fs::File; use vortex::array::Canonical; @@ -100,7 +101,7 @@ impl ParquetRandomAccessor { /// Open a Parquet file, parse the footer, and return a ready-to-use accessor. pub async fn open(path: PathBuf, name: impl Into) -> anyhow::Result { let mut file = File::open(&path).await?; - let options = ArrowReaderOptions::new().with_page_index(true); + let options = ArrowReaderOptions::new().with_page_index_policy(PageIndexPolicy::Required); let arrow_metadata = ArrowReaderMetadata::load_async(&mut file, options).await?; let row_group_offsets = once(0) diff --git a/vortex-cuda/src/pooled_read_at.rs b/vortex-cuda/src/pooled_read_at.rs index cb051728cca..1b5ec4fb31a 100644 --- a/vortex-cuda/src/pooled_read_at.rs +++ b/vortex-cuda/src/pooled_read_at.rs @@ -13,6 +13,7 @@ use object_store::GetOptions; use object_store::GetRange; use object_store::GetResultPayload; use object_store::ObjectStore; +use object_store::ObjectStoreExt; use object_store::path::Path as ObjectPath; use vortex::array::buffer::BufferHandle; use vortex::buffer::Alignment; diff --git a/vortex-datafusion/src/convert/exprs.rs b/vortex-datafusion/src/convert/exprs.rs index 455a5c65a71..79fec6e1e3e 100644 --- a/vortex-datafusion/src/convert/exprs.rs +++ b/vortex-datafusion/src/convert/exprs.rs @@ -395,7 +395,8 @@ fn try_operator_from_df(value: &DFOperator) -> DFResult { | DFOperator::AtQuestion | DFOperator::Question | DFOperator::QuestionAnd - | DFOperator::QuestionPipe => { + | DFOperator::QuestionPipe + | DFOperator::Colon => { tracing::debug!(operator = %value, "Can't pushdown binary_operator operator"); Err(exec_datafusion_err!( "Unsupported datafusion operator {value}" diff --git a/vortex-datafusion/src/persistent/format.rs b/vortex-datafusion/src/persistent/format.rs index fb3d5f9db11..6cb1d58ec11 100644 --- a/vortex-datafusion/src/persistent/format.rs +++ b/vortex-datafusion/src/persistent/format.rs @@ -32,6 +32,7 @@ use datafusion_datasource::file_scan_config::FileScanConfigBuilder; use datafusion_datasource::file_sink_config::FileSinkConfig; use datafusion_datasource::sink::DataSinkExec; use datafusion_datasource::source::DataSourceExec; +use datafusion_execution::cache::cache_manager::CachedFileMetadataEntry; use datafusion_expr::dml::InsertOp; use datafusion_physical_expr::LexRequirement; use datafusion_physical_plan::ExecutionPlan; @@ -251,16 +252,19 @@ impl FileFormat for VortexFormat { let cache = file_metadata_cache.clone(); SpawnedTask::spawn(async move { - // Check if we have cached metadata for this file - if let Some(cached) = cache.get(&object) - && let Some(cached_vortex) = - cached.as_any().downcast_ref::() + // Check if we have entry metadata for this file + if let Some(entry) = cache.get(&object.location) + && entry.is_valid_for(&object) + && let Some(cached_vortex) = entry + .file_metadata + .as_any() + .downcast_ref::() { let inferred_schema = cached_vortex.footer().dtype().to_arrow_schema()?; return VortexResult::Ok((object.location, inferred_schema)); } - // Not cached or invalid - open the file + // Not entry or invalid - open the file let reader = Arc::new(ObjectStoreReadAt::new( store, object.location.clone(), @@ -276,7 +280,8 @@ impl FileFormat for VortexFormat { // Cache the metadata let cached_metadata = Arc::new(CachedVortexMetadata::new(&vxf)); - cache.put(&object, cached_metadata); + let entry = CachedFileMetadataEntry::new(object.clone(), cached_metadata); + cache.put(&object.location, entry); let inferred_schema = vxf.dtype().to_arrow_schema()?; VortexResult::Ok((object.location, inferred_schema)) @@ -310,24 +315,28 @@ impl FileFormat for VortexFormat { let file_metadata_cache = state.runtime_env().cache_manager.get_file_metadata_cache(); SpawnedTask::spawn(async move { - // Try to get cached metadata first - let cached_metadata = file_metadata_cache.get(&object).and_then(|cached| { - cached - .as_any() - .downcast_ref::() - .map(|m| { - ( - m.footer().dtype().clone(), - m.footer().statistics().cloned(), - m.footer().row_count(), - ) - }) - }); + // Try to get entry metadata first + let cached_metadata = file_metadata_cache + .get(&object.location) + .filter(|entry| entry.is_valid_for(&object)) + .and_then(|entry| { + entry + .file_metadata + .as_any() + .downcast_ref::() + .map(|m| { + ( + m.footer().dtype().clone(), + m.footer().statistics().cloned(), + m.footer().row_count(), + ) + }) + }); let (dtype, file_stats, row_count) = match cached_metadata { Some(metadata) => metadata, None => { - // Not cached - open the file + // Not entry - open the file let reader = Arc::new(ObjectStoreReadAt::new( store, object.location.clone(), @@ -348,8 +357,9 @@ impl FileFormat for VortexFormat { })?; // Cache the metadata - let cached = Arc::new(CachedVortexMetadata::new(&vxf)); - file_metadata_cache.put(&object, cached); + let file_metadata = Arc::new(CachedVortexMetadata::new(&vxf)); + let entry = CachedFileMetadataEntry::new(object.clone(), file_metadata); + file_metadata_cache.put(&object.location, entry); ( vxf.dtype().clone(), @@ -490,6 +500,7 @@ impl FileFormat for VortexFormat { let conf = FileScanConfigBuilder::from(file_scan_config) .with_source(Arc::new(source)) + .with_morsel_driven(true) .build(); Ok(DataSourceExec::from_data_source(conf)) diff --git a/vortex-datafusion/src/persistent/opener.rs b/vortex-datafusion/src/persistent/opener.rs index 5986a06da49..545d60e1b30 100644 --- a/vortex-datafusion/src/persistent/opener.rs +++ b/vortex-datafusion/src/persistent/opener.rs @@ -1,6 +1,7 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors +use std::future::ready; use std::ops::Range; use std::sync::Arc; use std::sync::Weak; @@ -9,6 +10,7 @@ use arrow_schema::Schema; use datafusion_common::DataFusionError; use datafusion_common::Result as DFResult; use datafusion_common::ScalarValue; +use datafusion_common::Statistics; use datafusion_common::exec_datafusion_err; use datafusion_datasource::FileRange; use datafusion_datasource::PartitionedFile; @@ -18,8 +20,10 @@ use datafusion_datasource::file_stream::FileOpener; use datafusion_execution::cache::cache_manager::FileMetadataCache; use datafusion_physical_expr::PhysicalExprRef; use datafusion_physical_expr::projection::ProjectionExprs; +use datafusion_physical_expr::projection::Projector; use datafusion_physical_expr::simplifier::PhysicalExprSimplifier; use datafusion_physical_expr::split_conjunction; +use datafusion_physical_expr::utils::collect_columns; use datafusion_physical_expr::utils::reassign_expr_columns; use datafusion_physical_expr_adapter::PhysicalExprAdapterFactory; use datafusion_physical_expr_adapter::replace_columns_with_literals; @@ -29,13 +33,17 @@ use datafusion_pruning::FilePruner; use futures::FutureExt; use futures::StreamExt; use futures::TryStreamExt; +use futures::future::BoxFuture; use futures::stream; use object_store::path::Path; use tracing::Instrument; use vortex::array::ArrayRef; use vortex::array::VortexSessionExecute; use vortex::array::arrow::ArrowArrayExecutor; +use vortex::dtype::DType; +use vortex::dtype::DecimalType; use vortex::error::VortexError; +use vortex::error::VortexExpect; use vortex::file::OpenOptionsSessionExt; use vortex::io::InstrumentedReadAt; use vortex::layout::LayoutReader; @@ -45,6 +53,7 @@ use vortex::scan::ScanBuilder; use vortex::session::VortexSession; use vortex_utils::aliases::dash_map::DashMap; use vortex_utils::aliases::dash_map::Entry; +use vortex_utils::aliases::hash_set::HashSet; use crate::VortexAccessPlan; use crate::convert::exprs::ExpressionConvertor; @@ -98,7 +107,424 @@ pub(crate) struct VortexOpener { pub scan_concurrency: Option, } +/// Target byte budget per morsel (16 MB). +const TARGET_MORSEL_BYTES: u64 = 16 * 1024 * 1024; +/// Minimum rows per morsel to avoid excessive overhead. +const MIN_MORSEL_ROWS: u64 = 32 * 1024; +/// Maximum rows per morsel to bound memory usage. +const MAX_MORSEL_ROWS: u64 = 256 * 1024; +/// Minimum projected bytes per morsel. Small row ranges are coalesced until this threshold. +const MIN_BYTES_PER_MORSEL: u64 = 4 * 1024 * 1024; // 4 MB + +/// Estimate the average byte width of a DType for morsel sizing fallback. +fn estimate_dtype_byte_width(dtype: &DType) -> u64 { + match dtype { + DType::Null => 0, + DType::Bool(_) => 1, + DType::Primitive(ptype, _) => ptype.byte_width() as u64, + DType::Decimal(dec, _) => DecimalType::smallest_decimal_value_type(dec).byte_width() as u64, + DType::Utf8(_) | DType::Binary(_) => 64, + DType::List(inner, _) => 64 + estimate_dtype_byte_width(inner), + DType::FixedSizeList(inner, size, _) => estimate_dtype_byte_width(inner) * (*size as u64), + DType::Struct(fields, _) => fields.fields().map(|f| estimate_dtype_byte_width(&f)).sum(), + DType::Extension(ext) => estimate_dtype_byte_width(ext.storage_dtype()), + } +} + +/// Estimate the average bytes per row for the touched columns, using statistics if available, +/// falling back to DType estimation. +fn estimate_bytes_per_row( + statistics: Option<&Statistics>, + file_dtype: &DType, + row_count: u64, + touched_col_indices: &HashSet, +) -> u64 { + // Try to use actual per-column byte_size statistics + if let Some(stats) = statistics { + let touched_bytes: usize = touched_col_indices + .iter() + .filter_map(|&i| stats.column_statistics.get(i)) + .filter_map(|cs| cs.byte_size.get_value().copied()) + .sum(); + + if touched_bytes > 0 { + let num_rows = stats + .num_rows + .get_value() + .map(|&n| n as u64) + .unwrap_or(row_count); + if num_rows > 0 { + let bpr = touched_bytes as u64 / num_rows; + if bpr > 0 { + return bpr; + } + } + } + } + + // Fallback: estimate from DType + let struct_fields = match file_dtype.as_struct_fields_opt() { + Some(fields) => fields, + None => return 1, + }; + + let touched_bytes: u64 = if touched_col_indices.is_empty() { + struct_fields + .fields() + .map(|f| estimate_dtype_byte_width(&f)) + .sum() + } else { + struct_fields + .fields() + .enumerate() + .filter(|(i, _)| touched_col_indices.contains(i)) + .map(|(_, f)| estimate_dtype_byte_width(&f)) + .sum() + }; + + touched_bytes.max(1) +} + +struct VortexMorsel { + row_range: Range, + /// Cached scan preparation shared across all morsels from the same file. + prepared_scan: Arc, + /// Estimated projected bytes per row for the touched columns. + bytes_per_row: u64, + /// Whether this morsel is a sub-morsel produced by split_morsel. + is_sub_morsel: bool, +} + +struct PreparedFileScan { + layout_reader: Arc, + file_row_count: u64, + scan_projection: vortex::expr::Expression, + pushed_filter: Option, + stream_schema: Arc, + projector: Projector, +} + +fn assert_extensions_can_be_replaced_with_morsel(file: &PartitionedFile) { + assert!( + file.extensions + .as_ref() + .is_none_or(|extensions| extensions.is::()), + "expected PartitionedFile.extensions to be empty or VortexMorsel before replacing it" + ); +} + +fn prepare_file_scan( + unified_file_schema: &Schema, + projection: ProjectionExprs, + filter: Option, + expr_adapter_factory: &Arc, + expr_convertor: &Arc, + projection_pushdown: bool, + layout_reader: Arc, +) -> DFResult { + let file_row_count = layout_reader.row_count(); + let file_dtype = layout_reader.dtype().clone(); + let this_file_schema = Arc::new(calculate_physical_schema(&file_dtype, unified_file_schema)?); + let projected_physical_schema = projection.project_schema(unified_file_schema)?; + + let expr_adapter = expr_adapter_factory.create( + Arc::new(unified_file_schema.clone()), + Arc::clone(&this_file_schema), + )?; + + let simplifier = PhysicalExprSimplifier::new(&this_file_schema); + + let filter = filter + .map(|filter| simplifier.simplify(expr_adapter.rewrite(filter)?)) + .transpose()?; + let projection = projection.try_map_exprs(|p| simplifier.simplify(expr_adapter.rewrite(p)?))?; + + let ProcessedProjection { + scan_projection, + leftover_projection, + } = if projection_pushdown { + expr_convertor.split_projection( + projection.clone(), + &this_file_schema, + &projected_physical_schema, + )? + } else { + expr_convertor.no_pushdown_projection(projection.clone(), &this_file_schema)? + }; + + let scan_dtype = scan_projection.return_dtype(&file_dtype).map_err(|_e| { + exec_datafusion_err!("Couldn't get the dtype for the underlying Vortex scan") + })?; + + let scan_reference_schema = if projection_pushdown { + projected_physical_schema + } else { + let column_indices = projection.column_indices(); + let fields: Vec<_> = column_indices + .into_iter() + .map(|idx| this_file_schema.field(idx).clone()) + .collect(); + Schema::new(fields) + }; + let stream_schema = Arc::new(calculate_physical_schema( + &scan_dtype, + &scan_reference_schema, + )?); + + let leftover_projection = + leftover_projection.try_map_exprs(|expr| reassign_expr_columns(expr, &stream_schema))?; + let projector = leftover_projection.make_projector(&stream_schema)?; + + let pushed_filter = filter + .and_then(|f| { + let (pushed, unpushed): (Vec, Vec) = + split_conjunction(&f) + .into_iter() + .cloned() + .partition(|expr| expr_convertor.can_be_pushed_down(expr, &this_file_schema)); + + if !unpushed.is_empty() { + return Some(Err(exec_datafusion_err!( + r#"VortexSource accepted but failed to push {} filters. + This should never happen if you have a properly configured + PhysicalExprAdapterFactory configured on the source. + + Failed filters: + + {unpushed:#?} + "#, + unpushed.len() + ))); + } + + make_vortex_predicate(expr_convertor.as_ref(), &pushed).transpose() + }) + .transpose()?; + + Ok(PreparedFileScan { + layout_reader, + file_row_count, + scan_projection, + pushed_filter, + stream_schema, + projector, + }) +} + +fn replace_partition_column_literals( + table_schema: &TableSchema, + file: &PartitionedFile, + projection: ProjectionExprs, + filter: Option, +) -> DFResult<(ProjectionExprs, Option)> { + #[allow(clippy::disallowed_types)] + let literal_value_cols = table_schema + .table_partition_cols() + .iter() + .map(|f| f.name()) + .cloned() + .zip(file.partition_values.clone()) + .collect::>(); + + if literal_value_cols.is_empty() { + return Ok((projection, filter)); + } + + let projection = projection.try_map_exprs(|expr| { + replace_columns_with_literals(Arc::clone(&expr), &literal_value_cols) + })?; + let filter = filter + .map(|p| replace_columns_with_literals(p, &literal_value_cols)) + .transpose()?; + + Ok((projection, filter)) +} + impl FileOpener for VortexOpener { + fn is_leaf_morsel(&self, file: &PartitionedFile) -> bool { + file.extensions + .as_ref() + .is_some_and(|e| e.is::()) + } + + fn morselize( + &self, + partitioned_file: PartitionedFile, + ) -> BoxFuture<'static, DFResult>> { + if partitioned_file + .extensions + .as_ref() + .map(|e| e.is::()) + .unwrap_or(false) + { + return Box::pin(ready(Ok(vec![partitioned_file]))); + } + + let file_pruning_predicate = self.file_pruning_predicate.clone(); + let table_schema = self.table_schema.clone(); + let session = self.session.clone(); + let vortex_reader_factory = self.vortex_reader_factory.clone(); + let metrics_registry = self.metrics_registry.clone(); + let expr_adapter_factory = self.expr_adapter_factory.clone(); + let expr_convertor = self.expression_convertor.clone(); + let projection_pushdown = self.projection_pushdown; + let (projection, filter) = match replace_partition_column_literals( + &table_schema, + &partitioned_file, + self.projection.clone(), + self.filter.clone(), + ) { + Ok(values) => values, + Err(err) => return Box::pin(ready(Err(err))), + }; + + // Gather the set of column indices touched by projection and filter + let touched_col_indices: HashSet = { + let mut cols: HashSet = self.projection.column_indices().into_iter().collect(); + if let Some(filter) = &self.filter { + for col in collect_columns(filter) { + cols.insert(col.index()); + } + } + cols + }; + + Box::pin(async move { + // File-level pruning + if let Some(pred) = file_pruning_predicate.as_ref() { + let logical_file_schema = Arc::clone(table_schema.file_schema()); + if let Some(mut file_pruner) = FilePruner::try_new( + Arc::clone(pred), + &logical_file_schema, + &partitioned_file, + Count::default(), + ) && file_pruner.should_prune()? + { + return Ok(vec![]); + } + } + + // Open the file to get the row count + let reader = vortex_reader_factory + .create_reader(partitioned_file.path().as_ref(), &session) + .map_err(|e| DataFusionError::External(Box::new(e)))?; + + let vxf = session + .open_options() + .with_file_size(partitioned_file.object_meta.size) + .with_metrics_registry(metrics_registry) + .open_read(reader) + .await + .map_err(|e| DataFusionError::External(Box::new(e)))?; + + let row_count = vxf.row_count(); + let layout_reader = vxf + .layout_reader() + .map_err(|e| DataFusionError::External(Box::new(e)))?; + let prepared_scan = Arc::new(prepare_file_scan( + table_schema.file_schema(), + projection.clone(), + filter.clone(), + &expr_adapter_factory, + &expr_convertor, + projection_pushdown, + layout_reader.clone(), + )?); + + let bytes_per_row = estimate_bytes_per_row( + partitioned_file.statistics.as_deref(), + layout_reader.dtype(), + row_count, + &touched_col_indices, + ); + let morsel_row_count = + (TARGET_MORSEL_BYTES / bytes_per_row).clamp(MIN_MORSEL_ROWS, MAX_MORSEL_ROWS); + + // Split into chunks, then coalesce small consecutive chunks so each + // morsel meets MIN_BYTES_PER_MORSEL projected bytes. + let mut morsels = Vec::new(); + let mut coalesce_start = 0u64; + let mut start = 0u64; + while start < row_count { + let end = (start + morsel_row_count).min(row_count); + let coalesced_rows = end - coalesce_start; + let projected_bytes = coalesced_rows * bytes_per_row; + + if projected_bytes >= MIN_BYTES_PER_MORSEL || end == row_count { + let mut f = partitioned_file.clone(); + assert_extensions_can_be_replaced_with_morsel(&f); + f.extensions = Some(Arc::new(VortexMorsel { + row_range: coalesce_start..end, + prepared_scan: prepared_scan.clone(), + bytes_per_row, + is_sub_morsel: false, + })); + morsels.push(f); + coalesce_start = end; + } + start = end; + } + + Ok(morsels) + }) + } + + fn split_morsel( + &self, + file: PartitionedFile, + n: usize, + _min_rows_per_split: usize, + ) -> Vec { + let morsel = match file + .extensions + .as_ref() + .and_then(|e| e.downcast_ref::()) + { + Some(m) => m, + None => return vec![file], + }; + + // Don't re-split sub-morsels to prevent cascading splits. + if morsel.is_sub_morsel { + return vec![file]; + } + + let row_count = morsel.row_range.end - morsel.row_range.start; + let projected_bytes = row_count * morsel.bytes_per_row; + + // Compute number of splits: each sub-morsel should be at least MIN_BYTES_PER_MORSEL. + let n_splits = usize::try_from((projected_bytes / MIN_BYTES_PER_MORSEL).clamp(1, n as u64)) + .unwrap_or(n); + if n_splits <= 1 { + return vec![file]; + } + + let rows_per_split = row_count / n_splits as u64; + let remainder = row_count % n_splits as u64; + let prepared_scan = morsel.prepared_scan.clone(); + let bytes_per_row = morsel.bytes_per_row; + let base_start = morsel.row_range.start; + + let mut splits = Vec::with_capacity(n_splits); + let mut start = base_start; + for i in 0..n_splits { + let extra = if (i as u64) < remainder { 1 } else { 0 }; + let end = start + rows_per_split + extra; + let mut f = file.clone(); + assert_extensions_can_be_replaced_with_morsel(&f); + f.extensions = Some(Arc::new(VortexMorsel { + row_range: start..end, + prepared_scan: prepared_scan.clone(), + bytes_per_row, + is_sub_morsel: true, + })); + splits.push(f); + start = end; + } + + splits + } + fn open(&self, file: PartitionedFile) -> DFResult { let session = self.session.clone(); let metrics_registry = self.metrics_registry.clone(); @@ -106,16 +532,32 @@ impl FileOpener for VortexOpener { Label::new(PATH_LABEL, file.path().to_string()), Label::new(PARTITION_LABEL, self.partition.to_string()), ]; + let morsel = file + .extensions + .as_ref() + .and_then(|e| e.downcast_ref::()) + .map(|m| (m.row_range.clone(), m.prepared_scan.clone())); + + let (projection, filter) = replace_partition_column_literals( + &self.table_schema, + &file, + self.projection.clone(), + self.filter.clone(), + )?; - let mut projection = self.projection.clone(); - let mut filter = self.filter.clone(); - - let reader = self - .vortex_reader_factory - .create_reader(file.path().as_ref(), &session)?; - - let reader = - InstrumentedReadAt::new_with_labels(reader, metrics_registry.as_ref(), labels.clone()); + // Only create I/O reader if not a morsel (morsels have a cached layout reader). + let reader = if morsel.is_none() { + let r = self + .vortex_reader_factory + .create_reader(file.path().as_ref(), &session)?; + Some(InstrumentedReadAt::new_with_labels( + r, + metrics_registry.as_ref(), + labels.clone(), + )) + } else { + None + }; let file_pruning_predicate = self.file_pruning_predicate.clone(); let expr_adapter_factory = self.expr_adapter_factory.clone(); @@ -131,26 +573,6 @@ impl FileOpener for VortexOpener { let expr_convertor = self.expression_convertor.clone(); let projection_pushdown = self.projection_pushdown; - // Replace column access for partition columns with literals - #[allow(clippy::disallowed_types)] - let literal_value_cols = self - .table_schema - .table_partition_cols() - .iter() - .map(|f| f.name()) - .cloned() - .zip(file.partition_values.clone()) - .collect::>(); - - if !literal_value_cols.is_empty() { - projection = projection.try_map_exprs(|expr| { - replace_columns_with_literals(Arc::clone(&expr), &literal_value_cols) - })?; - filter = filter - .map(|p| replace_columns_with_literals(p, &literal_value_cols)) - .transpose()?; - } - Ok(async move { // Create FilePruner when we have a predicate and either dynamic expressions // or file statistics available. The pruner can eliminate files without @@ -180,125 +602,79 @@ impl FileOpener for VortexOpener { return Ok(stream::empty().boxed()); } - let mut open_opts = session - .open_options() - .with_file_size(file.object_meta.size) - .with_metrics_registry(metrics_registry.clone()) - .with_labels(labels); - - if let Some(file_metadata_cache) = file_metadata_cache - && let Some(file_metadata) = file_metadata_cache.get(&file.object_meta) - && let Some(vortex_metadata) = file_metadata - .as_any() - .downcast_ref::() - { - open_opts = open_opts.with_footer(vortex_metadata.footer().clone()); - } - - let vxf = open_opts - .open_read(reader) - .await - .map_err(|e| exec_datafusion_err!("Failed to open Vortex file {e}"))?; - - // This is the expected arrow types of the actual columns in the file, which might have different types - // from the unified logical schema or miss - let this_file_schema = Arc::new(calculate_physical_schema( - vxf.dtype(), - &unified_file_schema, - )?); - - let projected_physical_schema = projection.project_schema(&unified_file_schema)?; - - let expr_adapter = expr_adapter_factory.create( - Arc::clone(&unified_file_schema), - Arc::clone(&this_file_schema), - ); - - let simplifier = PhysicalExprSimplifier::new(&this_file_schema); - - // The adapter rewrites the expressions to the local file schema, allowing - // for schema evolution and divergence between the table's schema and individual files. - let filter = filter - .map(|filter| { - // Expression might now reference columns that don't exist in the file, so we can give it - // another simplification pass. - simplifier.simplify(expr_adapter.rewrite(filter)?) - }) - .transpose()?; - let projection = - projection.try_map_exprs(|p| simplifier.simplify(expr_adapter.rewrite(p)?))?; - - let ProcessedProjection { - scan_projection, - leftover_projection, - } = if projection_pushdown { - expr_convertor.split_projection( - projection.clone(), - &this_file_schema, - &projected_physical_schema, - )? + // Get the prepared scan state - either from the morsel cache or by opening the file. + let prepared_scan = if let Some((_, ref prepared_scan)) = morsel { + prepared_scan.clone() } else { - // When projection pushdown is disabled, read only the required columns - // and apply the full projection after the scan. - expr_convertor.no_pushdown_projection(projection.clone(), &this_file_schema)? - }; - - // The schema of the stream returned from the vortex scan. - // We use a reference schema for types that don't roundtrip (Dictionary, Utf8, etc.). - let scan_dtype = scan_projection.return_dtype(vxf.dtype()).map_err(|_e| { - exec_datafusion_err!("Couldn't get the dtype for the underlying Vortex scan") - })?; - - // When projection pushdown is enabled, the scan outputs the projected columns. - // When disabled, the scan outputs raw columns and the projection is applied after. - let scan_reference_schema = if projection_pushdown { - projected_physical_schema - } else { - // Build schema from the raw columns being read - let column_indices = projection.column_indices(); - let fields: Vec<_> = column_indices - .into_iter() - .map(|idx| this_file_schema.field(idx).clone()) - .collect(); - Schema::new(fields) - }; - let stream_schema = calculate_physical_schema(&scan_dtype, &scan_reference_schema)?; - - let leftover_projection = leftover_projection - .try_map_exprs(|expr| reassign_expr_columns(expr, &stream_schema))?; - let projector = leftover_projection.make_projector(&stream_schema)?; + let mut open_opts = session + .open_options() + .with_file_size(file.object_meta.size) + .with_metrics_registry(metrics_registry.clone()) + .with_labels(labels); + + if let Some(file_metadata_cache) = file_metadata_cache + && let Some(entry) = file_metadata_cache.get(file.path()) + && entry.is_valid_for(&file.object_meta) + && let Some(vortex_metadata) = entry + .file_metadata + .as_any() + .downcast_ref::() + { + open_opts = open_opts.with_footer(vortex_metadata.footer().clone()); + } - // We share our layout readers with others partitions in the scan, so we can only need to read each layout in each file once. - let layout_reader = match layout_reader.entry(file.object_meta.location.clone()) { - Entry::Occupied(mut occupied_entry) => { - if let Some(reader) = occupied_entry.get().upgrade() { - tracing::trace!("reusing layout reader for {}", occupied_entry.key()); - reader - } else { - tracing::trace!("creating layout reader for {}", occupied_entry.key()); + let vxf = open_opts + .open_read(reader.vortex_expect("reader must exist for non-morsel path")) + .await + .map_err(|e| exec_datafusion_err!("Failed to open Vortex file {e}"))?; + + // We share layout readers across partitions so we only read each layout once. + let lr = match layout_reader.entry(file.object_meta.location.clone()) { + Entry::Occupied(mut occupied_entry) => { + if let Some(reader) = occupied_entry.get().upgrade() { + tracing::trace!("reusing layout reader for {}", occupied_entry.key()); + reader + } else { + tracing::trace!("creating layout reader for {}", occupied_entry.key()); + let reader = vxf.layout_reader().map_err(|e| { + DataFusionError::Execution(format!( + "Failed to create layout reader: {e}" + )) + })?; + occupied_entry.insert(Arc::downgrade(&reader)); + reader + } + } + Entry::Vacant(vacant_entry) => { + tracing::trace!("creating layout reader for {}", vacant_entry.key()); let reader = vxf.layout_reader().map_err(|e| { DataFusionError::Execution(format!( "Failed to create layout reader: {e}" )) })?; - occupied_entry.insert(Arc::downgrade(&reader)); + vacant_entry.insert(Arc::downgrade(&reader)); reader } - } - Entry::Vacant(vacant_entry) => { - tracing::trace!("creating layout reader for {}", vacant_entry.key()); - let reader = vxf.layout_reader().map_err(|e| { - DataFusionError::Execution(format!("Failed to create layout reader: {e}")) - })?; - vacant_entry.insert(Arc::downgrade(&reader)); - - reader - } + }; + + Arc::new(prepare_file_scan( + &unified_file_schema, + projection.clone(), + filter.clone(), + &expr_adapter_factory, + &expr_convertor, + projection_pushdown, + lr, + )?) }; - let mut scan_builder = ScanBuilder::new(session.clone(), layout_reader); + let mut scan_builder = + ScanBuilder::new(session.clone(), prepared_scan.layout_reader.clone()); - if let Some(extensions) = file.extensions + if let Some((row_range, _)) = morsel { + // Morsel: restrict the scan to the morsel's row range. + scan_builder = scan_builder.with_row_range(row_range); + } else if let Some(extensions) = file.extensions && let Some(vortex_plan) = extensions.downcast_ref::() { scan_builder = vortex_plan.apply_to_builder(scan_builder); @@ -308,46 +684,13 @@ impl FileOpener for VortexOpener { scan_builder = apply_byte_range( file_range, file.object_meta.size, - vxf.row_count(), + prepared_scan.file_row_count, scan_builder, ); } - let filter = filter - .and_then(|f| { - // Verify that all filters we've accepted from DataFusion get pushed down. - // This will only fail if the user has not configured a suitable - // PhysicalExprAdapterFactory on the file source to handle rewriting the - // expression to handle missing/reordered columns in the Vortex file. - - let (pushed, unpushed): (Vec, Vec) = - split_conjunction(&f) - .into_iter() - .cloned() - .partition(|expr| { - expr_convertor.can_be_pushed_down(expr, &this_file_schema) - }); - - if !unpushed.is_empty() { - return Some(Err(exec_datafusion_err!( - r#"VortexSource accepted but failed to push {} filters. - This should never happen if you have a properly configured - PhysicalExprAdapterFactory configured on the source. - - Failed filters: - - {unpushed:#?} - "#, - unpushed.len() - ))); - } - - make_vortex_predicate(expr_convertor.as_ref(), &pushed).transpose() - }) - .transpose()?; - if let Some(limit) = limit - && filter.is_none() + && prepared_scan.pushed_filter.is_none() { scan_builder = scan_builder.with_limit(limit); } @@ -356,14 +699,16 @@ impl FileOpener for VortexOpener { scan_builder = scan_builder.with_concurrency(concurrency); } + let prepared_scan_for_execute = prepared_scan.clone(); + let prepared_scan_for_project = prepared_scan.clone(); let stream = scan_builder .with_metrics_registry(metrics_registry) - .with_projection(scan_projection) - .with_some_filter(filter) + .with_projection(prepared_scan.scan_projection.clone()) + .with_some_filter(prepared_scan.pushed_filter.clone()) .with_ordered(has_output_ordering) .map(move |chunk| { let mut ctx = session.create_execution_ctx(); - chunk.execute_record_batch(&stream_schema, &mut ctx) + chunk.execute_record_batch(&prepared_scan_for_execute.stream_schema, &mut ctx) }) .into_stream() .map_err(|e| exec_datafusion_err!("Failed to create Vortex stream: {e}"))? @@ -396,10 +741,15 @@ impl FileOpener for VortexOpener { }) .try_flatten() .map(move |batch| { - if projector.projection().as_ref().is_empty() { + if prepared_scan_for_project + .projector + .projection() + .as_ref() + .is_empty() + { batch } else { - batch.and_then(|b| projector.project_batch(&b)) + batch.and_then(|b| prepared_scan_for_project.projector.project_batch(&b)) } }) .boxed(); diff --git a/vortex-datafusion/src/persistent/source.rs b/vortex-datafusion/src/persistent/source.rs index bac7fe2b39a..e6abf23b574 100644 --- a/vortex-datafusion/src/persistent/source.rs +++ b/vortex-datafusion/src/persistent/source.rs @@ -8,6 +8,7 @@ use std::sync::Weak; use datafusion_common::Result as DFResult; use datafusion_common::config::ConfigOptions; +use datafusion_common::tree_node::TreeNodeRecursion; use datafusion_datasource::TableSchema; use datafusion_datasource::file::FileSource; use datafusion_datasource::file_scan_config::FileScanConfig; @@ -326,4 +327,22 @@ impl FileSource for VortexSource { fn table_schema(&self) -> &TableSchema { &self.table_schema } + + fn apply_expressions( + &self, + f: &mut dyn FnMut(&dyn PhysicalExpr) -> DFResult, + ) -> DFResult { + // Visit predicate (filter) expression if present + let mut tnr = TreeNodeRecursion::Continue; + if let Some(predicate) = &self.full_predicate { + tnr = tnr.visit_sibling(|| f(predicate.as_ref()))?; + } + + // Visit projection expressions + for proj_expr in &self.projection { + tnr = tnr.visit_sibling(|| f(proj_expr.expr.as_ref()))?; + } + + Ok(tnr) + } } diff --git a/vortex-datafusion/src/v2/source.rs b/vortex-datafusion/src/v2/source.rs index 53dd9939a16..7737b54cd26 100644 --- a/vortex-datafusion/src/v2/source.rs +++ b/vortex-datafusion/src/v2/source.rs @@ -21,6 +21,7 @@ use datafusion_common::DataFusionError; use datafusion_common::Result as DFResult; use datafusion_common::Statistics; use datafusion_common::stats::Precision as DFPrecision; +use datafusion_common::tree_node::TreeNodeRecursion; use datafusion_datasource::source::DataSource; use datafusion_execution::SendableRecordBatchStream; use datafusion_execution::TaskContext; @@ -540,6 +541,13 @@ impl DataSource for VortexDataSource { .with_updated_node(Arc::new(this) as _), ) } + + fn apply_expressions( + &self, + _f: &mut dyn FnMut(&dyn PhysicalExpr) -> DFResult, + ) -> DFResult { + Ok(TreeNodeRecursion::Continue) + } } /// Convert a Vortex [`Option`] to a DataFusion [`Precision`](DFPrecision). diff --git a/vortex-io/src/object_store/read_at.rs b/vortex-io/src/object_store/read_at.rs index 82ef57ca6f6..a4957199565 100644 --- a/vortex-io/src/object_store/read_at.rs +++ b/vortex-io/src/object_store/read_at.rs @@ -11,6 +11,7 @@ use object_store::GetOptions; use object_store::GetRange; use object_store::GetResultPayload; use object_store::ObjectStore; +use object_store::ObjectStoreExt; use object_store::path::Path as ObjectPath; use vortex_array::buffer::BufferHandle; use vortex_buffer::Alignment; diff --git a/vortex-io/src/object_store/write.rs b/vortex-io/src/object_store/write.rs index 20bfb7815c4..988fd1473ac 100644 --- a/vortex-io/src/object_store/write.rs +++ b/vortex-io/src/object_store/write.rs @@ -9,6 +9,7 @@ use futures::TryStreamExt; use futures::stream::FuturesUnordered; use object_store::MultipartUpload; use object_store::ObjectStore; +use object_store::ObjectStoreExt; use object_store::PutPayload; use object_store::PutResult; use object_store::path::Path; diff --git a/vortex-jni/src/file.rs b/vortex-jni/src/file.rs index 2ab70aa5e85..a492a25a6a0 100644 --- a/vortex-jni/src/file.rs +++ b/vortex-jni/src/file.rs @@ -15,6 +15,7 @@ use jni::objects::ReleaseMode; use jni::sys::jlong; use jni::sys::jobject; use object_store::ObjectStore; +use object_store::ObjectStoreExt; use object_store::path::Path; use prost::Message; use url::Url; diff --git a/vortex-layout/src/layouts/row_idx/mod.rs b/vortex-layout/src/layouts/row_idx/mod.rs index fd7513aad96..8ce4a2f5f4b 100644 --- a/vortex-layout/src/layouts/row_idx/mod.rs +++ b/vortex-layout/src/layouts/row_idx/mod.rs @@ -9,6 +9,7 @@ use std::fmt::Formatter; use std::ops::BitAnd; use std::ops::Range; use std::sync::Arc; +use std::sync::OnceLock; use Nullability::NonNullable; pub use expr::*; @@ -47,7 +48,7 @@ pub struct RowIdxLayoutReader { name: Arc, row_offset: u64, child: Arc, - partition_cache: DashMap, + partition_cache: DashMap>>, session: VortexSession, } @@ -66,45 +67,52 @@ impl RowIdxLayoutReader { let key = ExactExpr(expr.clone()); // Check cache first with read-only lock. - if let Some(partitioning) = self.partition_cache.get(&key) { + if let Some(entry) = self.partition_cache.get(&key) + && let Some(partitioning) = entry.value().get() + { return partitioning.clone(); } - self.partition_cache + let cell = self + .partition_cache .entry(key) - .or_insert_with(|| { - // Partition the expression into row idx and child expressions. - let mut partitioned = partition(expr.clone(), self.dtype(), |expr| { - if expr.is::() { - vec![Partition::RowIdx] - } else if is_root(expr) { - vec![Partition::Child] - } else { - vec![] - } - }) - .vortex_expect("We should not fail to partition expression over struct fields"); + .or_insert_with(|| Arc::new(OnceLock::new())) + .clone(); - // If there's only a single partition, we can directly return the expression. - if partitioned.partitions.len() == 1 { - return match &partitioned.partition_annotations[0] { - Partition::RowIdx => { - Partitioning::RowIdx(replace(expr.clone(), &row_idx(), root())) - } - Partition::Child => Partitioning::Child(expr.clone()), - }; + cell.get_or_init(|| self.compute_partitioning(expr)).clone() + } + + fn compute_partitioning(&self, expr: &Expression) -> Partitioning { + // Partition the expression into row idx and child expressions. + let mut partitioned = partition(expr.clone(), self.dtype(), |expr| { + if expr.is::() { + vec![Partition::RowIdx] + } else if is_root(expr) { + vec![Partition::Child] + } else { + vec![] + } + }) + .vortex_expect("We should not fail to partition expression over struct fields"); + + // If there's only a single partition, we can directly return the expression. + if partitioned.partitions.len() == 1 { + return match &partitioned.partition_annotations[0] { + Partition::RowIdx => { + Partitioning::RowIdx(replace(expr.clone(), &row_idx(), root())) } + Partition::Child => Partitioning::Child(expr.clone()), + }; + } - // Replace the row_idx expression with the root expression in the row_idx partition. - partitioned.partitions = partitioned - .partitions - .into_iter() - .map(|p| replace(p, &row_idx(), root())) - .collect(); + // Replace the row_idx expression with the root expression in the row_idx partition. + partitioned.partitions = partitioned + .partitions + .into_iter() + .map(|p| replace(p, &row_idx(), root())) + .collect(); - Partitioning::Partitioned(Arc::new(partitioned)) - }) - .clone() + Partitioning::Partitioned(Arc::new(partitioned)) } } diff --git a/vortex-layout/src/layouts/struct_/reader.rs b/vortex-layout/src/layouts/struct_/reader.rs index fc665731bdd..bb069df9723 100644 --- a/vortex-layout/src/layouts/struct_/reader.rs +++ b/vortex-layout/src/layouts/struct_/reader.rs @@ -4,6 +4,7 @@ use std::collections::BTreeSet; use std::ops::Range; use std::sync::Arc; +use std::sync::OnceLock; use futures::try_join; use itertools::Itertools; @@ -57,7 +58,7 @@ pub struct StructReader { expanded_root_expr: Expression, field_lookup: Option>, - partitioned_expr_cache: DashMap, + partitioned_expr_cache: DashMap>>, } impl StructReader { @@ -152,51 +153,65 @@ impl StructReader { /// Utility for partitioning an expression over the fields of a struct. fn partition_expr(&self, expr: Expression) -> Partitioned { - self.partitioned_expr_cache - .entry(ExactExpr(expr.clone())) - .or_insert_with(|| { - // First, we expand the root scope into the fields of the struct to ensure - // that partitioning works correctly. - let expr = replace(expr.clone(), &root(), self.expanded_root_expr.clone()); - let expr = expr - .optimize_recursive(self.dtype()) - .vortex_expect("We should not fail to simplify expression over struct fields"); - - // Partition the expression into expressions that can be evaluated over individual fields - let mut partitioned = partition( - expr.clone(), - self.dtype(), - make_free_field_annotator( - self.dtype() - .as_struct_fields_opt() - .vortex_expect("We know it's a struct DType"), - ), - ) - .vortex_expect("We should not fail to partition expression over struct fields"); - - if partitioned.partitions.len() == 1 { - // If there's only one partition, we step into the field scope of the original - // expression by replacing any `$.a` with `$`. - return Partitioned::Single( - partitioned.partition_names[0].clone(), - replace(expr, &col(partitioned.partition_names[0].clone()), root()), - ); - } + let key = ExactExpr(expr.clone()); + + if let Some(entry) = self.partitioned_expr_cache.get(&key) + && let Some(partitioning) = entry.value().get() + { + return partitioning.clone(); + } - // We now need to process the partitioned expressions to rewrite the root scope - // to be that of the field, rather than the struct. In other words, "stepping in" - // to the field scope. - partitioned.partitions = partitioned - .partitions - .iter() - .zip_eq(partitioned.partition_names.iter()) - .map(|(e, name)| replace(e.clone(), &col(name.clone()), root())) - .collect(); - - Partitioned::Multi(Arc::new(partitioned)) - }) + let cell = self + .partitioned_expr_cache + .entry(key) + .or_insert_with(|| Arc::new(OnceLock::new())) + .clone(); + + cell.get_or_init(|| self.compute_partitioned_expr(expr)) .clone() } + + fn compute_partitioned_expr(&self, expr: Expression) -> Partitioned { + // First, we expand the root scope into the fields of the struct to ensure + // that partitioning works correctly. + let expr = replace(expr, &root(), self.expanded_root_expr.clone()); + let expr = expr + .optimize_recursive(self.dtype()) + .vortex_expect("We should not fail to simplify expression over struct fields"); + + // Partition the expression into expressions that can be evaluated over individual fields + let mut partitioned = partition( + expr.clone(), + self.dtype(), + make_free_field_annotator( + self.dtype() + .as_struct_fields_opt() + .vortex_expect("We know it's a struct DType"), + ), + ) + .vortex_expect("We should not fail to partition expression over struct fields"); + + if partitioned.partitions.len() == 1 { + // If there's only one partition, we step into the field scope of the original + // expression by replacing any `$.a` with `$`. + return Partitioned::Single( + partitioned.partition_names[0].clone(), + replace(expr, &col(partitioned.partition_names[0].clone()), root()), + ); + } + + // We now need to process the partitioned expressions to rewrite the root scope + // to be that of the field, rather than the struct. In other words, "stepping in" + // to the field scope. + partitioned.partitions = partitioned + .partitions + .iter() + .zip_eq(partitioned.partition_names.iter()) + .map(|(e, name)| replace(e.clone(), &col(name.clone()), root())) + .collect(); + + Partitioned::Multi(Arc::new(partitioned)) + } } /// When partitioning an expression, in the case it only has a single partition we can avoid diff --git a/vortex-python/Cargo.toml b/vortex-python/Cargo.toml index 391dc9b4b47..0dd36e2703c 100644 --- a/vortex-python/Cargo.toml +++ b/vortex-python/Cargo.toml @@ -47,7 +47,7 @@ parking_lot = { workspace = true } pyo3 = { workspace = true, features = ["abi3", "abi3-py311"] } pyo3-bytes = { workspace = true } pyo3-log = { workspace = true } -pyo3-object_store = { version = "0.7" } +pyo3-object_store = { version = "0.8" } tokio = { workspace = true, features = ["fs", "rt-multi-thread"] } url = { workspace = true } vortex = { workspace = true, features = ["object_store", "tokio"] } diff --git a/vortex-sqllogictest/Cargo.toml b/vortex-sqllogictest/Cargo.toml index f5f4dd30b0a..757abb49689 100644 --- a/vortex-sqllogictest/Cargo.toml +++ b/vortex-sqllogictest/Cargo.toml @@ -23,7 +23,7 @@ datafusion-sqllogictest = { workspace = true } futures.workspace = true indicatif.workspace = true rstest = { workspace = true } -sqllogictest = "0.28" +sqllogictest = "0.29.1" thiserror = { workspace = true } tokio = { workspace = true, features = ["full"] } vortex = { workspace = true, features = ["tokio"] }