Skip to content

Commit 64ddf16

Browse files
committed
1. use arrow-* instead of arrow
2. refine test
1 parent 5f4c457 commit 64ddf16

5 files changed

Lines changed: 111 additions & 44 deletions

File tree

crates/iceberg/Cargo.toml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,9 @@ keywords = ["iceberg"]
2828

2929
[dependencies]
3030
apache-avro = "0.15.0"
31-
arrow = { version = ">=46" }
31+
arrow-array = { version = ">=46" }
32+
arrow-schema = { version = ">=46" }
33+
arrow-arith = { version = ">=46" }
3234
serde = {version = "^1.0", features = ["rc"]}
3335
serde_bytes = "0.11.8"
3436
serde_json = "^1.0"

crates/iceberg/src/transform/identity.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
// under the License.
1717

1818
use crate::Result;
19-
use arrow::array::ArrayRef;
19+
use arrow_array::ArrayRef;
2020

2121
use super::TransformFunction;
2222

crates/iceberg/src/transform/mod.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717

1818
//! Transform function used to compute partition values.
1919
use crate::{spec::Transform, Result};
20-
use arrow::array::ArrayRef;
20+
use arrow_array::ArrayRef;
2121

2222
mod identity;
2323
mod temporal;

crates/iceberg/src/transform/temporal.rs

Lines changed: 105 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -17,14 +17,14 @@
1717

1818
use super::TransformFunction;
1919
use crate::{Error, ErrorKind, Result};
20-
use arrow::array::{Array, TimestampMicrosecondArray};
21-
use arrow::compute::binary;
22-
use arrow::datatypes;
23-
use arrow::datatypes::DataType;
24-
use arrow::{
25-
array::{ArrayRef, Date32Array, Int32Array},
26-
compute::{month_dyn, year_dyn},
20+
use arrow_arith::{
21+
arity::binary,
22+
temporal::{month_dyn, year_dyn},
2723
};
24+
use arrow_array::{
25+
types::Date32Type, Array, ArrayRef, Date32Array, Int32Array, TimestampMicrosecondArray,
26+
};
27+
use arrow_schema::{DataType, TimeUnit};
2828
use chrono::Datelike;
2929
use std::sync::Arc;
3030

@@ -86,7 +86,7 @@ pub struct Day;
8686
impl TransformFunction for Day {
8787
fn transform(&self, input: ArrayRef) -> Result<ArrayRef> {
8888
let res: Int32Array = match input.data_type() {
89-
DataType::Timestamp(datatypes::TimeUnit::Microsecond, _) => input
89+
DataType::Timestamp(TimeUnit::Microsecond, _) => input
9090
.as_any()
9191
.downcast_ref::<TimestampMicrosecondArray>()
9292
.unwrap()
@@ -97,8 +97,7 @@ impl TransformFunction for Day {
9797
.downcast_ref::<Date32Array>()
9898
.unwrap()
9999
.unary(|v| -> i32 {
100-
datatypes::Date32Type::to_naive_date(v).num_days_from_ce()
101-
- DAY_SINCE_UNIX_EPOCH
100+
Date32Type::to_naive_date(v).num_days_from_ce() - DAY_SINCE_UNIX_EPOCH
102101
})
103102
}
104103
_ => {
@@ -121,7 +120,7 @@ pub struct Hour;
121120
impl TransformFunction for Hour {
122121
fn transform(&self, input: ArrayRef) -> Result<ArrayRef> {
123122
let res: Int32Array = match input.data_type() {
124-
DataType::Timestamp(datatypes::TimeUnit::Microsecond, _) => input
123+
DataType::Timestamp(TimeUnit::Microsecond, _) => input
125124
.as_any()
126125
.downcast_ref::<TimestampMicrosecondArray>()
127126
.unwrap()
@@ -142,26 +141,25 @@ impl TransformFunction for Hour {
142141

143142
#[cfg(test)]
144143
mod test {
145-
use arrow::array::{ArrayRef, Date32Array, Int32Array, TimestampMicrosecondArray};
146-
use chrono::NaiveDate;
144+
use arrow_array::{ArrayRef, Date32Array, Int32Array, TimestampMicrosecondArray};
145+
use chrono::{NaiveDate, NaiveDateTime};
147146
use std::sync::Arc;
148147

149148
use crate::transform::TransformFunction;
150149

151150
#[test]
152151
fn test_transform_years() {
153152
let year = super::Year;
153+
154+
// Test Date32
154155
let ori_date = vec![
155156
NaiveDate::from_ymd_opt(1970, 1, 1).unwrap(),
156157
NaiveDate::from_ymd_opt(2000, 1, 1).unwrap(),
157158
NaiveDate::from_ymd_opt(2030, 1, 1).unwrap(),
158159
NaiveDate::from_ymd_opt(2060, 1, 1).unwrap(),
159160
];
160-
161-
// Test Date32
162161
let date_array: ArrayRef = Arc::new(Date32Array::from(
163162
ori_date
164-
.clone()
165163
.into_iter()
166164
.map(|date| {
167165
date.signed_duration_since(NaiveDate::from_ymd_opt(1970, 1, 1).unwrap())
@@ -178,11 +176,28 @@ mod test {
178176
assert_eq!(res.value(3), 90);
179177

180178
// Test TimestampMicrosecond
179+
let ori_timestamp = vec![
180+
NaiveDateTime::parse_from_str("1970-01-01 12:30:42.123", "%Y-%m-%d %H:%M:%S.%f")
181+
.unwrap(),
182+
NaiveDateTime::parse_from_str("2000-01-01 19:30:42.123", "%Y-%m-%d %H:%M:%S.%f")
183+
.unwrap(),
184+
NaiveDateTime::parse_from_str("2030-01-01 10:30:42.123", "%Y-%m-%d %H:%M:%S.%f")
185+
.unwrap(),
186+
NaiveDateTime::parse_from_str("2060-01-01 11:30:42.123", "%Y-%m-%d %H:%M:%S.%f")
187+
.unwrap(),
188+
];
181189
let date_array: ArrayRef = Arc::new(TimestampMicrosecondArray::from(
182-
ori_date
190+
ori_timestamp
183191
.into_iter()
184-
.map(|date| {
185-
date.signed_duration_since(NaiveDate::from_ymd_opt(1970, 1, 1).unwrap())
192+
.map(|timestamp| {
193+
timestamp
194+
.signed_duration_since(
195+
NaiveDateTime::parse_from_str(
196+
"1970-01-01 00:00:00.0",
197+
"%Y-%m-%d %H:%M:%S.%f",
198+
)
199+
.unwrap(),
200+
)
186201
.num_microseconds()
187202
.unwrap()
188203
})
@@ -200,17 +215,16 @@ mod test {
200215
#[test]
201216
fn test_transform_months() {
202217
let month = super::Month;
218+
219+
// Test Date32
203220
let ori_date = vec![
204221
NaiveDate::from_ymd_opt(1970, 1, 1).unwrap(),
205222
NaiveDate::from_ymd_opt(2000, 4, 1).unwrap(),
206223
NaiveDate::from_ymd_opt(2030, 7, 1).unwrap(),
207224
NaiveDate::from_ymd_opt(2060, 10, 1).unwrap(),
208225
];
209-
210-
// Test Date32
211226
let date_array: ArrayRef = Arc::new(Date32Array::from(
212227
ori_date
213-
.clone()
214228
.into_iter()
215229
.map(|date| {
216230
date.signed_duration_since(NaiveDate::from_ymd_opt(1970, 1, 1).unwrap())
@@ -227,11 +241,28 @@ mod test {
227241
assert_eq!(res.value(3), 90 * 12 + 9);
228242

229243
// Test TimestampMicrosecond
244+
let ori_timestamp = vec![
245+
NaiveDateTime::parse_from_str("1970-01-01 12:30:42.123", "%Y-%m-%d %H:%M:%S.%f")
246+
.unwrap(),
247+
NaiveDateTime::parse_from_str("2000-04-01 19:30:42.123", "%Y-%m-%d %H:%M:%S.%f")
248+
.unwrap(),
249+
NaiveDateTime::parse_from_str("2030-07-01 10:30:42.123", "%Y-%m-%d %H:%M:%S.%f")
250+
.unwrap(),
251+
NaiveDateTime::parse_from_str("2060-10-01 11:30:42.123", "%Y-%m-%d %H:%M:%S.%f")
252+
.unwrap(),
253+
];
230254
let date_array: ArrayRef = Arc::new(TimestampMicrosecondArray::from(
231-
ori_date
255+
ori_timestamp
232256
.into_iter()
233-
.map(|date| {
234-
date.signed_duration_since(NaiveDate::from_ymd_opt(1970, 1, 1).unwrap())
257+
.map(|timestamp| {
258+
timestamp
259+
.signed_duration_since(
260+
NaiveDateTime::parse_from_str(
261+
"1970-01-01 00:00:00.0",
262+
"%Y-%m-%d %H:%M:%S.%f",
263+
)
264+
.unwrap(),
265+
)
235266
.num_microseconds()
236267
.unwrap()
237268
})
@@ -267,7 +298,6 @@ mod test {
267298
// Test Date32
268299
let date_array: ArrayRef = Arc::new(Date32Array::from(
269300
ori_date
270-
.clone()
271301
.into_iter()
272302
.map(|date| {
273303
date.signed_duration_since(NaiveDate::from_ymd_opt(1970, 1, 1).unwrap())
@@ -284,11 +314,28 @@ mod test {
284314
assert_eq!(res.value(3), expect_day[3]);
285315

286316
// Test TimestampMicrosecond
317+
let ori_timestamp = vec![
318+
NaiveDateTime::parse_from_str("1970-01-01 12:30:42.123", "%Y-%m-%d %H:%M:%S.%f")
319+
.unwrap(),
320+
NaiveDateTime::parse_from_str("2000-04-01 19:30:42.123", "%Y-%m-%d %H:%M:%S.%f")
321+
.unwrap(),
322+
NaiveDateTime::parse_from_str("2030-07-01 10:30:42.123", "%Y-%m-%d %H:%M:%S.%f")
323+
.unwrap(),
324+
NaiveDateTime::parse_from_str("2060-10-01 11:30:42.123", "%Y-%m-%d %H:%M:%S.%f")
325+
.unwrap(),
326+
];
287327
let date_array: ArrayRef = Arc::new(TimestampMicrosecondArray::from(
288-
ori_date
328+
ori_timestamp
289329
.into_iter()
290-
.map(|date| {
291-
date.signed_duration_since(NaiveDate::from_ymd_opt(1970, 1, 1).unwrap())
330+
.map(|timestamp| {
331+
timestamp
332+
.signed_duration_since(
333+
NaiveDateTime::parse_from_str(
334+
"1970-01-01 00:00:00.0",
335+
"%Y-%m-%d %H:%M:%S.%f",
336+
)
337+
.unwrap(),
338+
)
292339
.num_microseconds()
293340
.unwrap()
294341
})
@@ -306,27 +353,45 @@ mod test {
306353
#[test]
307354
fn test_transform_hours() {
308355
let hour = super::Hour;
309-
let ori_date = vec![
310-
NaiveDate::from_ymd_opt(1970, 1, 1).unwrap(),
311-
NaiveDate::from_ymd_opt(2000, 4, 1).unwrap(),
312-
NaiveDate::from_ymd_opt(2030, 7, 1).unwrap(),
313-
NaiveDate::from_ymd_opt(2060, 10, 1).unwrap(),
356+
let ori_timestamp = vec![
357+
NaiveDateTime::parse_from_str("1970-01-01 19:01:23.123", "%Y-%m-%d %H:%M:%S.%f")
358+
.unwrap(),
359+
NaiveDateTime::parse_from_str("2000-03-01 12:01:23.123", "%Y-%m-%d %H:%M:%S.%f")
360+
.unwrap(),
361+
NaiveDateTime::parse_from_str("2030-10-02 10:01:23.123", "%Y-%m-%d %H:%M:%S.%f")
362+
.unwrap(),
363+
NaiveDateTime::parse_from_str("2060-09-01 05:03:23.123", "%Y-%m-%d %H:%M:%S.%f")
364+
.unwrap(),
314365
];
315-
let expect_hour = ori_date
366+
let expect_hour = ori_timestamp
316367
.clone()
317368
.into_iter()
318-
.map(|data| {
319-
data.signed_duration_since(NaiveDate::from_ymd_opt(1970, 1, 1).unwrap())
369+
.map(|timestamp| {
370+
timestamp
371+
.signed_duration_since(
372+
NaiveDateTime::parse_from_str(
373+
"1970-01-01 00:00:0.0",
374+
"%Y-%m-%d %H:%M:%S.%f",
375+
)
376+
.unwrap(),
377+
)
320378
.num_hours() as i32
321379
})
322380
.collect::<Vec<i32>>();
323381

324382
// Test TimestampMicrosecond
325383
let date_array: ArrayRef = Arc::new(TimestampMicrosecondArray::from(
326-
ori_date
384+
ori_timestamp
327385
.into_iter()
328-
.map(|date| {
329-
date.signed_duration_since(NaiveDate::from_ymd_opt(1970, 1, 1).unwrap())
386+
.map(|timestamp| {
387+
timestamp
388+
.signed_duration_since(
389+
NaiveDateTime::parse_from_str(
390+
"1970-01-01 00:00:0.0",
391+
"%Y-%m-%d %H:%M:%S.%f",
392+
)
393+
.unwrap(),
394+
)
330395
.num_microseconds()
331396
.unwrap()
332397
})

crates/iceberg/src/transform/void.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
// under the License.
1717

1818
use crate::Result;
19-
use arrow::array::{new_null_array, ArrayRef};
19+
use arrow_array::{new_null_array, ArrayRef};
2020

2121
use super::TransformFunction;
2222

0 commit comments

Comments
 (0)