Skip to content

Commit d187bb9

Browse files
committed
Allow offsets to be run end encoded
1 parent c138eb9 commit d187bb9

1 file changed

Lines changed: 102 additions & 0 deletions

File tree

arrow-schema/src/extension/canonical/timestamp_with_offset.rs

Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,10 @@ impl ExtensionType for TimestampWithOffset {
107107
key_type.is_dictionary_key_type()
108108
&& matches!(value_type.as_ref(), DataType::Int16)
109109
}
110+
DataType::RunEndEncoded(run_ends, values) => {
111+
run_ends.data_type().is_run_ends_type()
112+
&& matches!(values.data_type(), DataType::Int16)
113+
}
110114
_ => false,
111115
};
112116

@@ -137,6 +141,8 @@ impl ExtensionType for TimestampWithOffset {
137141

138142
#[cfg(test)]
139143
mod tests {
144+
use std::sync::Arc;
145+
140146
#[cfg(feature = "canonical_extension_types")]
141147
use crate::extension::CanonicalExtensionType;
142148
use crate::{
@@ -182,6 +188,29 @@ mod tests {
182188
)
183189
}
184190

191+
fn make_valid_field_run_end_encoded(time_unit: TimeUnit, run_ends_type: DataType) -> Field {
192+
assert!(run_ends_type.is_run_ends_type());
193+
Field::new(
194+
"",
195+
DataType::Struct(Fields::from_iter([
196+
Field::new(
197+
TIMESTAMP_FIELD_NAME,
198+
DataType::Timestamp(time_unit, Some("UTC".into())),
199+
false,
200+
),
201+
Field::new(
202+
OFFSET_FIELD_NAME,
203+
DataType::RunEndEncoded(
204+
Arc::new(Field::new("run_ends", run_ends_type, false)),
205+
Arc::new(Field::new("values", DataType::Int16, false)),
206+
),
207+
false,
208+
),
209+
])),
210+
false,
211+
)
212+
}
213+
185214
#[test]
186215
fn valid_primitive_offsets() -> Result<(), ArrowError> {
187216
let time_units = [
@@ -241,6 +270,33 @@ mod tests {
241270
Ok(())
242271
}
243272

273+
#[test]
274+
fn valid_run_end_encoded_offsets() -> Result<(), ArrowError> {
275+
let time_units = [
276+
TimeUnit::Second,
277+
TimeUnit::Millisecond,
278+
TimeUnit::Microsecond,
279+
TimeUnit::Nanosecond,
280+
];
281+
282+
let run_ends_types = [DataType::Int16, DataType::Int32, DataType::Int64];
283+
284+
for time_unit in time_units {
285+
for run_ends_type in &run_ends_types {
286+
let mut field = make_valid_field_run_end_encoded(time_unit, run_ends_type.clone());
287+
field.try_with_extension_type(TimestampWithOffset)?;
288+
field.try_extension_type::<TimestampWithOffset>()?;
289+
#[cfg(feature = "canonical_extension_types")]
290+
assert_eq!(
291+
field.try_canonical_extension_type()?,
292+
CanonicalExtensionType::TimestampWithOffset(TimestampWithOffset)
293+
);
294+
}
295+
}
296+
297+
Ok(())
298+
}
299+
244300
#[test]
245301
#[should_panic(expected = "Field extension type name missing")]
246302
fn missing_name() {
@@ -335,6 +391,52 @@ mod tests {
335391
Field::new("", data_type, false).with_extension_type(TimestampWithOffset);
336392
}
337393

394+
#[test]
395+
#[should_panic(
396+
expected = "expected Struct(\"timestamp\": Timestamp(_, Some(\"UTC\")), \"offset_minutes\": Int16), found Struct"
397+
)]
398+
fn invalid_type_wrong_run_ends_run_end_encoded() {
399+
let data_type = DataType::Struct(Fields::from_iter([
400+
Field::new(
401+
TIMESTAMP_FIELD_NAME,
402+
DataType::Timestamp(TimeUnit::Second, Some("UTC".into())),
403+
false,
404+
),
405+
Field::new(
406+
OFFSET_FIELD_NAME,
407+
DataType::RunEndEncoded(
408+
Arc::new(Field::new("run_ends", DataType::Boolean, false)),
409+
Arc::new(Field::new("values", DataType::Int16, false)),
410+
),
411+
false,
412+
),
413+
]));
414+
Field::new("", data_type, false).with_extension_type(TimestampWithOffset);
415+
}
416+
417+
#[test]
418+
#[should_panic(
419+
expected = "expected Struct(\"timestamp\": Timestamp(_, Some(\"UTC\")), \"offset_minutes\": Int16), found Struct"
420+
)]
421+
fn invalid_type_wrong_values_run_end_encoded() {
422+
let data_type = DataType::Struct(Fields::from_iter([
423+
Field::new(
424+
TIMESTAMP_FIELD_NAME,
425+
DataType::Timestamp(TimeUnit::Second, Some("UTC".into())),
426+
false,
427+
),
428+
Field::new(
429+
OFFSET_FIELD_NAME,
430+
DataType::RunEndEncoded(
431+
Arc::new(Field::new("run_ends", DataType::UInt16, false)),
432+
Arc::new(Field::new("values", DataType::Int32, false)),
433+
),
434+
false,
435+
),
436+
]));
437+
Field::new("", data_type, false).with_extension_type(TimestampWithOffset);
438+
}
439+
338440
#[test]
339441
#[should_panic(
340442
expected = "expected Struct(\"timestamp\": Timestamp(_, Some(\"UTC\")), \"offset_minutes\": Int16), found Struct"

0 commit comments

Comments
 (0)