Skip to content
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 21 additions & 4 deletions native/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 3 additions & 2 deletions native/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@
# under the License.

[workspace]
default-members = ["core", "spark-expr", "proto"]
members = ["core", "spark-expr", "proto", "hdfs", "fs-hdfs"]
default-members = ["core", "spark-expr", "proto", "jvm-bridge"]
members = ["core", "spark-expr", "proto", "jvm-bridge", "hdfs", "fs-hdfs"]
resolver = "2"

[workspace.package]
Expand All @@ -43,6 +43,7 @@ datafusion-datasource = { version = "52.2.0" }
datafusion-physical-expr-adapter = { version = "52.2.0" }
datafusion-spark = { version = "52.2.0" }
datafusion-comet-spark-expr = { path = "spark-expr" }
datafusion-comet-jvm-bridge = { path = "jvm-bridge" }
datafusion-comet-proto = { path = "proto" }
chrono = { version = "0.4", default-features = false, features = ["clock"] }
chrono-tz = { version = "0.10" }
Expand Down
6 changes: 2 additions & 4 deletions native/core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,6 @@ tokio = { version = "1", features = ["rt-multi-thread"] }
async-trait = { workspace = true }
log = "0.4"
log4rs = "1.4.0"
thiserror = { workspace = true }
lazy_static = "1.4.0"
prost = "0.14.3"
jni = "0.21"
snap = "1.1"
Expand All @@ -64,10 +62,10 @@ datafusion-physical-expr-adapter = { workspace = true }
datafusion-datasource = { workspace = true }
datafusion-spark = { workspace = true }
once_cell = "1.18.0"
regex = { workspace = true }
crc32fast = "1.3.2"
simd-adler32 = "0.3.7"
datafusion-comet-spark-expr = { workspace = true }
datafusion-comet-jvm-bridge = { workspace = true }
datafusion-comet-proto = { workspace = true }
object_store = { workspace = true }
url = { workspace = true }
Expand Down Expand Up @@ -108,7 +106,7 @@ jemalloc = ["tikv-jemallocator", "tikv-jemalloc-ctl"]

# exclude optional packages from cargo machete verifications
[package.metadata.cargo-machete]
ignored = ["datafusion-comet-objectstore-hdfs", "hdfs-sys"]
ignored = ["hdfs-sys", "paste"]

[lib]
name = "comet"
Expand Down
2 changes: 1 addition & 1 deletion native/core/src/execution/expressions/subquery.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

use crate::{
execution::utils::bytes_to_i128,
jvm_bridge::{jni_static_call, BinaryWrapper, JVMClasses, StringWrapper},
jvm_bridge::{BinaryWrapper, JVMClasses, StringWrapper},
};
use arrow::array::RecordBatch;
use arrow::datatypes::{DataType, Schema, TimeUnit};
Expand Down
2 changes: 1 addition & 1 deletion native/core/src/execution/jni_api.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ use crate::{
metrics::utils::update_comet_metric, planner::PhysicalPlanner, serde::to_arrow_datatype,
shuffle::spark_unsafe::row::process_sorted_row_partition, sort::RdxSort,
},
jvm_bridge::{jni_new_global_ref, JVMClasses},
jvm_bridge::JVMClasses,
};
use arrow::array::{Array, RecordBatch, UInt32Array};
use arrow::compute::{take, TakeOptions};
Expand Down
5 changes: 1 addition & 4 deletions native/core/src/execution/memory_pools/fair_pool.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,7 @@ use std::{

use jni::objects::GlobalRef;

use crate::{
errors::CometResult,
jvm_bridge::{jni_call, JVMClasses},
};
use crate::{errors::CometResult, jvm_bridge::JVMClasses};
use datafusion::common::resources_err;
use datafusion::execution::memory_pool::MemoryConsumer;
use datafusion::{
Expand Down
5 changes: 1 addition & 4 deletions native/core/src/execution/memory_pools/unified_pool.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,7 @@ use std::{
},
};

use crate::{
errors::CometResult,
jvm_bridge::{jni_call, JVMClasses},
};
use crate::{errors::CometResult, jvm_bridge::JVMClasses};
use datafusion::{
common::{resources_datafusion_err, DataFusionError},
execution::memory_pool::{MemoryPool, MemoryReservation},
Expand Down
2 changes: 1 addition & 1 deletion native/core/src/execution/metrics/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@
// specific language governing permissions and limitations
// under the License.

use crate::errors::CometError;
use crate::execution::spark_plan::SparkPlan;
use crate::{errors::CometError, jvm_bridge::jni_call};
use datafusion::physical_plan::metrics::MetricValue;
use datafusion_comet_proto::spark_metric::NativeMetricNode;
use jni::{objects::JObject, JNIEnv};
Expand Down
32 changes: 1 addition & 31 deletions native/core/src/execution/operators/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,7 @@

//! Operators

use std::fmt::Debug;

use jni::objects::GlobalRef;
pub use crate::errors::ExecutionError;

pub use copy::*;
pub use iceberg_scan::*;
Expand All @@ -35,31 +33,3 @@ mod csv_scan;
pub mod projection;
mod scan;
pub use csv_scan::init_csv_datasource_exec;

/// Error returned during executing operators.
#[derive(thiserror::Error, Debug)]
pub enum ExecutionError {
/// Simple error
#[allow(dead_code)]
#[error("General execution error with reason: {0}.")]
GeneralError(String),

/// Error when deserializing an operator.
#[error("Fail to deserialize to native operator with reason: {0}.")]
DeserializeError(String),

/// Error when processing Arrow array.
#[error("Fail to process Arrow array with reason: {0}.")]
ArrowError(String),

/// DataFusion error
#[error("Error from DataFusion: {0}.")]
DataFusionError(String),

#[error("{class}: {msg}")]
JavaException {
class: String,
msg: String,
throwable: GlobalRef,
},
}
2 changes: 1 addition & 1 deletion native/core/src/execution/operators/scan.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ use crate::{
execution::{
operators::ExecutionError, planner::TEST_EXEC_CONTEXT_ID, utils::SparkArrowConvert,
},
jvm_bridge::{jni_call, JVMClasses},
jvm_bridge::JVMClasses,
};
use arrow::array::{make_array, ArrayData, ArrayRef, RecordBatch, RecordBatchOptions};
use arrow::compute::{cast_with_options, take, CastOptions};
Expand Down
35 changes: 7 additions & 28 deletions native/core/src/execution/planner.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,16 +23,13 @@ pub mod operator_registry;

use crate::execution::operators::init_csv_datasource_exec;
use crate::execution::operators::IcebergScanExec;
use crate::{
errors::ExpressionError,
execution::{
expressions::subquery::Subquery,
operators::{ExecutionError, ExpandExec, ParquetWriterExec, ScanExec},
planner::expression_registry::ExpressionRegistry,
planner::operator_registry::OperatorRegistry,
serde::to_arrow_datatype,
shuffle::ShuffleWriterExec,
},
use crate::execution::{
expressions::subquery::Subquery,
operators::{ExecutionError, ExpandExec, ParquetWriterExec, ScanExec},
planner::expression_registry::ExpressionRegistry,
planner::operator_registry::OperatorRegistry,
serde::to_arrow_datatype,
shuffle::ShuffleWriterExec,
};
use arrow::compute::CastOptions;
use arrow::datatypes::{DataType, Field, FieldRef, Schema, TimeUnit, DECIMAL128_MAX_PRECISION};
Expand Down Expand Up @@ -2626,24 +2623,6 @@ impl PhysicalPlanner {
}
}

impl From<DataFusionError> for ExecutionError {
fn from(value: DataFusionError) -> Self {
ExecutionError::DataFusionError(value.message().to_string())
}
}

impl From<ExecutionError> for DataFusionError {
fn from(value: ExecutionError) -> Self {
DataFusionError::Execution(value.to_string())
}
}

impl From<ExpressionError> for DataFusionError {
fn from(value: ExpressionError) -> Self {
DataFusionError::Execution(value.to_string())
}
}

/// Collects the indices of the columns in the input schema that are used in the expression
/// and returns them as a pair of vectors, one for the left side and one for the right side.
fn expr_to_columns(
Expand Down
24 changes: 0 additions & 24 deletions native/core/src/execution/serde.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,30 +34,6 @@ use datafusion_comet_proto::{
use prost::Message;
use std::{io::Cursor, sync::Arc};

impl From<prost::DecodeError> for ExpressionError {
fn from(error: prost::DecodeError) -> ExpressionError {
ExpressionError::Deserialize(error.to_string())
}
}

impl From<prost::UnknownEnumValue> for ExpressionError {
fn from(error: prost::UnknownEnumValue) -> ExpressionError {
ExpressionError::Deserialize(error.to_string())
}
}

impl From<prost::DecodeError> for ExecutionError {
fn from(error: prost::DecodeError) -> ExecutionError {
ExecutionError::DeserializeError(error.to_string())
}
}

impl From<prost::UnknownEnumValue> for ExecutionError {
fn from(error: prost::UnknownEnumValue) -> ExecutionError {
ExecutionError::DeserializeError(error.to_string())
}
}

/// Deserialize bytes to protobuf type of expression
pub fn deserialize_expr(buf: &[u8]) -> Result<spark_expression::Expr, ExpressionError> {
match spark_expression::Expr::decode(&mut Cursor::new(buf)) {
Expand Down
20 changes: 0 additions & 20 deletions native/core/src/execution/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,32 +16,12 @@
// under the License.

/// Utils for array vector, etc.
use crate::errors::ExpressionError;
use crate::execution::operators::ExecutionError;
use arrow::{
array::ArrayData,
error::ArrowError,
ffi::{from_ffi, FFI_ArrowArray, FFI_ArrowSchema},
};

impl From<ArrowError> for ExecutionError {
fn from(error: ArrowError) -> ExecutionError {
ExecutionError::ArrowError(error.to_string())
}
}

impl From<ArrowError> for ExpressionError {
fn from(error: ArrowError) -> ExpressionError {
ExpressionError::ArrowError(error.to_string())
}
}

impl From<ExpressionError> for ArrowError {
fn from(error: ExpressionError) -> ArrowError {
ArrowError::ComputeError(error.to_string())
}
}

pub trait SparkArrowConvert {
/// Build Arrow Arrays from C data interface passed from Spark.
/// It accepts a tuple (ArrowArray address, ArrowSchema address).
Expand Down
16 changes: 9 additions & 7 deletions native/core/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,12 @@
#![deny(clippy::clone_on_ref_ptr)]
extern crate core;

#[macro_use]
extern crate datafusion_comet_jvm_bridge;

use jni::{
objects::{JClass, JString},
JNIEnv, JavaVM,
JNIEnv,
};
use log::info;
use log4rs::{
Expand All @@ -37,7 +40,6 @@ use log4rs::{
encode::pattern::PatternEncoder,
Config,
};
use once_cell::sync::OnceCell;

#[cfg(all(
not(target_env = "msvc"),
Expand All @@ -52,14 +54,16 @@ use tikv_jemallocator::Jemalloc;
))]
use mimalloc::MiMalloc;

// Re-export from jvm-bridge crate for internal use
pub use datafusion_comet_jvm_bridge::errors;
pub use datafusion_comet_jvm_bridge::jvm_bridge;
pub use datafusion_comet_jvm_bridge::JAVA_VM;

use errors::{try_unwrap_or_throw, CometError, CometResult};

#[macro_use]
mod errors;
#[macro_use]
pub mod common;
pub mod execution;
mod jvm_bridge;
pub mod parquet;

#[cfg(all(
Expand All @@ -77,8 +81,6 @@ static GLOBAL: Jemalloc = Jemalloc;
#[global_allocator]
static GLOBAL: MiMalloc = MiMalloc;

static JAVA_VM: OnceCell<JavaVM> = OnceCell::new();

#[no_mangle]
pub extern "system" fn Java_org_apache_comet_NativeBase_init(
e: JNIEnv,
Expand Down
2 changes: 1 addition & 1 deletion native/core/src/parquet/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ use crate::execution::planner::PhysicalPlanner;
use crate::execution::serde;
use crate::execution::spark_plan::SparkPlan;
use crate::execution::utils::SparkArrowConvert;
use crate::jvm_bridge::{jni_new_global_ref, JVMClasses};
use crate::jvm_bridge::JVMClasses;
use crate::parquet::data_type::AsBytes;
use crate::parquet::encryption_support::{CometEncryptionFactory, ENCRYPTION_FACTORY_ID};
use crate::parquet::parquet_exec::init_datasource_exec;
Expand Down
Loading
Loading