Skip to content
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 8 additions & 0 deletions datafusion/physical-expr-common/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -50,3 +50,11 @@ hashbrown = { workspace = true }
indexmap = { workspace = true }
itertools = { workspace = true }
parking_lot = { workspace = true }

[dev-dependencies]
criterion = { workspace = true }
rand = { workspace = true }

[[bench]]
harness = false
name = "compare_nested"
74 changes: 74 additions & 0 deletions datafusion/physical-expr-common/benches/compare_nested.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

use arrow::array::{ArrayRef, Int32Array, Scalar, StringArray, StructArray};
use arrow::datatypes::{DataType, Field, Fields};
use criterion::{Criterion, criterion_group, criterion_main};
use datafusion_expr_common::operator::Operator;
use datafusion_physical_expr_common::datum::compare_op_for_nested;
use rand::rngs::StdRng;
use rand::{Rng, SeedableRng};
use std::hint::black_box;
use std::sync::Arc;

/// Build a StructArray with fields {x: Int32, y: Utf8}.
fn make_struct_array(num_rows: usize, rng: &mut StdRng) -> ArrayRef {
let ints: Int32Array = (0..num_rows).map(|_| Some(rng.random::<i32>())).collect();

let strings: StringArray = (0..num_rows)
.map(|_| {
let s: String = (0..12)
.map(|_| rng.random_range(b'a'..=b'z') as char)
.collect();
Some(s)
})
.collect();

let fields = Fields::from(vec![
Field::new("x", DataType::Int32, false),
Field::new("y", DataType::Utf8, false),
]);

Arc::new(
StructArray::try_new(fields, vec![Arc::new(ints), Arc::new(strings)], None)
.unwrap(),
)
}

fn criterion_benchmark(c: &mut Criterion) {
let num_rows = 8192;
let mut rng = StdRng::seed_from_u64(42);

let lhs = make_struct_array(num_rows, &mut rng);
let rhs_array = make_struct_array(num_rows, &mut rng);
let rhs_scalar = Scalar::new(make_struct_array(1, &mut rng));

c.bench_function("compare_nested array_array", |b| {
b.iter(|| {
black_box(compare_op_for_nested(Operator::Eq, &lhs, &rhs_array).unwrap())
})
});

c.bench_function("compare_nested array_scalar", |b| {
b.iter(|| {
black_box(compare_op_for_nested(Operator::Eq, &lhs, &rhs_scalar).unwrap())
})
});
}

criterion_group!(benches, criterion_benchmark);
criterion_main!(benches);
8 changes: 4 additions & 4 deletions datafusion/physical-expr-common/src/datum.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

use arrow::array::BooleanArray;
use arrow::array::{ArrayRef, Datum, make_comparator};
use arrow::buffer::NullBuffer;
use arrow::buffer::{BooleanBuffer, NullBuffer};
use arrow::compute::kernels::cmp::{
distinct, eq, gt, gt_eq, lt, lt_eq, neq, not_distinct,
};
Expand Down Expand Up @@ -171,9 +171,9 @@ pub fn compare_op_for_nested(
};

let values = match (is_l_scalar, is_r_scalar) {
(false, false) => (0..len).map(|i| cmp_with_op(i, i)).collect(),
(true, false) => (0..len).map(|i| cmp_with_op(0, i)).collect(),
(false, true) => (0..len).map(|i| cmp_with_op(i, 0)).collect(),
(false, false) => BooleanBuffer::collect_bool(len, |i| cmp_with_op(i, i)),
(true, false) => BooleanBuffer::collect_bool(len, |i| cmp_with_op(0, i)),
(false, true) => BooleanBuffer::collect_bool(len, |i| cmp_with_op(i, 0)),
(true, true) => std::iter::once(cmp_with_op(0, 0)).collect(),
};

Expand Down
Loading