diff --git a/datafusion/common/src/config.rs b/datafusion/common/src/config.rs index 2889259dd4820..7d32f2a88fd9c 100644 --- a/datafusion/common/src/config.rs +++ b/datafusion/common/src/config.rs @@ -2862,10 +2862,15 @@ impl ConfigField for ConfigFileEncryptionProperties { } #[cfg(feature = "parquet_encryption")] -impl From for FileEncryptionProperties { - fn from(val: ConfigFileEncryptionProperties) -> Self { +impl TryFrom for FileEncryptionProperties { + type Error = DataFusionError; + + fn try_from(val: ConfigFileEncryptionProperties) -> Result { let mut fep = FileEncryptionProperties::builder( - hex::decode(val.footer_key_as_hex).unwrap(), + hex::decode(val.footer_key_as_hex) + .map_err(|e| { + DataFusionError::Configuration(format!("Unable to decode hex footer key from ConfigFileEncryptionProperties: {e}")) + })?, ) .with_plaintext_footer(!val.encrypt_footer) .with_aad_prefix_storage(val.store_aad_prefix); @@ -2873,17 +2878,26 @@ impl From for FileEncryptionProperties { if !val.footer_key_metadata_as_hex.is_empty() { fep = fep.with_footer_key_metadata( hex::decode(&val.footer_key_metadata_as_hex) - .expect("Invalid footer key metadata"), + .map_err(|e| { + DataFusionError::Configuration(format!("Unable to decode hex footer key metadata from ConfigFileEncryptionProperties: {e}")) + })?, ); } for (column_name, encryption_props) in val.column_encryption_properties.iter() { let encryption_key = hex::decode(&encryption_props.column_key_as_hex) - .expect("Invalid column encryption key"); + .map_err(|e| { + DataFusionError::Configuration(format!("Unable to decode hex encryption key for column {column_name}: {e}")) + })?; let key_metadata = encryption_props .column_metadata_as_hex .as_ref() - .map(|x| hex::decode(x).expect("Invalid column metadata")); + .map(hex::decode) + .transpose() + .map_err(|e| { + DataFusionError::Configuration(format!("Unable to decode hex column metadata for column {column_name}: {e}")) + })?; + match key_metadata { Some(key_metadata) => { fep = fep.with_column_key_and_metadata( @@ -2899,11 +2913,18 @@ impl From for FileEncryptionProperties { } if !val.aad_prefix_as_hex.is_empty() { - let aad_prefix: Vec = - hex::decode(&val.aad_prefix_as_hex).expect("Invalid AAD prefix"); + let aad_prefix: Vec = hex::decode(&val.aad_prefix_as_hex).map_err(|e| { + DataFusionError::Configuration(format!( + "Unable to decode hex AAD prefix from ConfigFileEncryptionProperties: {e}" + )) + })?; fep = fep.with_aad_prefix(aad_prefix); } - Arc::unwrap_or_clone(fep.build().unwrap()) + Ok(Arc::unwrap_or_clone(fep.build().map_err(|e| { + DataFusionError::Configuration(format!( + "Could not build FileEncryptionProperties: {e}" + )) + })?)) } } @@ -3019,36 +3040,54 @@ impl ConfigField for ConfigFileDecryptionProperties { } #[cfg(feature = "parquet_encryption")] -impl From for FileDecryptionProperties { - fn from(val: ConfigFileDecryptionProperties) -> Self { +impl TryFrom for FileDecryptionProperties { + type Error = DataFusionError; + + fn try_from(val: ConfigFileDecryptionProperties) -> Result { let mut column_names: Vec<&str> = Vec::new(); let mut column_keys: Vec> = Vec::new(); for (col_name, decryption_properties) in val.column_decryption_properties.iter() { + let column_key = hex::decode(&decryption_properties.column_key_as_hex).map_err(|e| { + DataFusionError::Configuration(format! + ("Could not decode hex column key from ConfigFileDecryptionProperties for column name {col_name}: {e}.")) + })?; column_names.push(col_name.as_str()); - column_keys.push( - hex::decode(&decryption_properties.column_key_as_hex) - .expect("Invalid column decryption key"), - ); + column_keys.push(column_key); } - let mut fep = FileDecryptionProperties::builder( - hex::decode(val.footer_key_as_hex).expect("Invalid footer key"), - ) - .with_column_keys(column_names, column_keys) - .unwrap(); + let footer_key = hex::decode(val.footer_key_as_hex).map_err(|e| { + DataFusionError::Configuration(format!( + "Could not decode hex footer key from ConfigFileDecryptionProperties: {e}." + )) + })?; + + let mut fep = FileDecryptionProperties::builder(footer_key) + .with_column_keys(column_names, column_keys) + .map_err(|e| { + DataFusionError::Configuration(format!( + "Could not set column keys on FileDecryptionPropertiesBuilder: {e}." + )) + })?; if !val.footer_signature_verification { fep = fep.disable_footer_signature_verification(); } if !val.aad_prefix_as_hex.is_empty() { - let aad_prefix = - hex::decode(&val.aad_prefix_as_hex).expect("Invalid AAD prefix"); + let aad_prefix = hex::decode(&val.aad_prefix_as_hex).map_err(|e| { + DataFusionError::Configuration(format!( + "Could not decode hex AAD prefix from ConfigFileDecryptionProperties: {e}." + )) + })?; fep = fep.with_aad_prefix(aad_prefix); } - Arc::unwrap_or_clone(fep.build().unwrap()) + Ok(Arc::unwrap_or_clone(fep.build().map_err(|e| { + DataFusionError::Configuration(format!( + "Could not build FileDecryptionProperties: {e}." + )) + })?)) } } @@ -3586,13 +3625,13 @@ mod tests { let config_encrypt = ConfigFileEncryptionProperties::from(&file_encryption_properties); let encryption_properties_built = - Arc::new(FileEncryptionProperties::from(config_encrypt.clone())); + Arc::new(FileEncryptionProperties::try_from(config_encrypt.clone()).unwrap()); assert_eq!(file_encryption_properties, encryption_properties_built); let config_decrypt = ConfigFileDecryptionProperties::try_from(&decryption_properties).unwrap(); let decryption_properties_built = - Arc::new(FileDecryptionProperties::from(config_decrypt.clone())); + Arc::new(FileDecryptionProperties::try_from(config_decrypt.clone()).unwrap()); assert_eq!(decryption_properties, decryption_properties_built); /////////////////////////////////////////////////////////////////////////////////// @@ -3678,6 +3717,124 @@ mod tests { ); } + #[cfg(feature = "parquet_encryption")] + #[test] + fn parquet_encryption_invalid_hex_errors_encryption() { + use crate::config::ColumnEncryptionProperties; + use crate::config::ConfigFileEncryptionProperties; + use parquet::encryption::encrypt::FileEncryptionProperties; + use std::collections::HashMap; + + let valid_footer_key_as_hex = hex::encode(b"0123456789012345"); + + let mut enc = ConfigFileEncryptionProperties { + encrypt_footer: true, + footer_key_as_hex: valid_footer_key_as_hex.clone(), + footer_key_metadata_as_hex: String::new(), + column_encryption_properties: HashMap::new(), + aad_prefix_as_hex: String::new(), + store_aad_prefix: false, + }; + + // Encryption: invalid footer key hex + enc.footer_key_as_hex = "not_hex".to_string(); + let err = FileEncryptionProperties::try_from(enc.clone()) + .unwrap_err() + .to_string(); + assert!(err.contains("Unable to decode hex footer key")); + enc.footer_key_as_hex = valid_footer_key_as_hex.clone(); + + // Encryption: invalid footer key metadata hex + enc.footer_key_metadata_as_hex = "zz".to_string(); + let err = FileEncryptionProperties::try_from(enc.clone()) + .unwrap_err() + .to_string(); + assert!(err.contains("Unable to decode hex footer key metadata")); + enc.footer_key_metadata_as_hex = String::new(); + + // Encryption: invalid column key hex + enc.column_encryption_properties.insert( + "col1".to_string(), + ColumnEncryptionProperties { + column_key_as_hex: "bad".to_string(), + column_metadata_as_hex: None, + }, + ); + let err = FileEncryptionProperties::try_from(enc.clone()) + .unwrap_err() + .to_string(); + assert!(err.contains("Unable to decode hex encryption key for column col1")); + enc.column_encryption_properties.clear(); + + // Encryption: invalid column metadata hex + enc.column_encryption_properties.insert( + "col1".to_string(), + ColumnEncryptionProperties { + column_key_as_hex: hex::encode(b"1234567890123450"), + column_metadata_as_hex: Some("zz".to_string()), + }, + ); + let err = FileEncryptionProperties::try_from(enc.clone()) + .unwrap_err() + .to_string(); + assert!(err.contains("Unable to decode hex column metadata for column col1")); + enc.column_encryption_properties.clear(); + + // Encryption: invalid AAD prefix hex + enc.aad_prefix_as_hex = "zz".to_string(); + let err = FileEncryptionProperties::try_from(enc.clone()) + .unwrap_err() + .to_string(); + assert!(err.contains("Unable to decode hex AAD prefix")); + } + + #[cfg(feature = "parquet_encryption")] + #[test] + fn parquet_encryption_invalid_hex_errors_decryption() { + use crate::config::ColumnDecryptionProperties; + use crate::config::ConfigFileDecryptionProperties; + use parquet::encryption::decrypt::FileDecryptionProperties; + use std::collections::HashMap; + + let valid_footer_key_as_hex = hex::encode(b"0123456789012345"); + + let mut dec = ConfigFileDecryptionProperties { + footer_key_as_hex: valid_footer_key_as_hex.clone(), + column_decryption_properties: HashMap::new(), + aad_prefix_as_hex: String::new(), + footer_signature_verification: true, + }; + + // Decryption: invalid column key hex + dec.column_decryption_properties.insert( + "col1".to_string(), + ColumnDecryptionProperties { + column_key_as_hex: "bad".to_string(), + }, + ); + let err = FileDecryptionProperties::try_from(dec.clone()) + .unwrap_err() + .to_string(); + assert!(err.contains("Could not decode hex column key")); + assert!(err.contains("col1")); + dec.column_decryption_properties.clear(); + + // Decryption: invalid footer key hex + dec.footer_key_as_hex = "bad".to_string(); + let err = FileDecryptionProperties::try_from(dec.clone()) + .unwrap_err() + .to_string(); + assert!(err.contains("Could not decode hex footer key")); + dec.footer_key_as_hex = valid_footer_key_as_hex; + + // Decryption: invalid AAD prefix hex + dec.aad_prefix_as_hex = "zz".to_string(); + let err = FileDecryptionProperties::try_from(dec.clone()) + .unwrap_err() + .to_string(); + assert!(err.contains("Could not decode hex AAD prefix")); + } + #[cfg(feature = "parquet_encryption")] #[test] fn parquet_encryption_factory_config() { diff --git a/datafusion/datasource-parquet/src/file_format.rs b/datafusion/datasource-parquet/src/file_format.rs index d97077d609efd..5676ee940c7b8 100644 --- a/datafusion/datasource-parquet/src/file_format.rs +++ b/datafusion/datasource-parquet/src/file_format.rs @@ -308,7 +308,7 @@ async fn get_file_decryption_properties( file_path: &Path, ) -> Result>> { Ok(match &options.crypto.file_decryption { - Some(cfd) => Some(Arc::new(FileDecryptionProperties::from(cfd.clone()))), + Some(cfd) => Some(Arc::new(FileDecryptionProperties::try_from(cfd.clone())?)), None => match &options.crypto.factory_id { Some(factory_id) => { let factory = @@ -1284,7 +1284,7 @@ async fn set_writer_encryption_properties( if let Some(file_encryption_properties) = parquet_opts.crypto.file_encryption { // Encryption properties have been specified directly return Ok(builder.with_file_encryption_properties(Arc::new( - FileEncryptionProperties::from(file_encryption_properties), + FileEncryptionProperties::try_from(file_encryption_properties)?, ))); } else if let Some(encryption_factory_id) = &parquet_opts.crypto.factory_id.as_ref() { // Encryption properties will be generated by an encryption factory diff --git a/datafusion/datasource-parquet/src/source.rs b/datafusion/datasource-parquet/src/source.rs index a014c8b2726e7..787f882908e9b 100644 --- a/datafusion/datasource-parquet/src/source.rs +++ b/datafusion/datasource-parquet/src/source.rs @@ -543,7 +543,8 @@ impl FileSource for ParquetSource { .crypto .file_decryption .clone() - .map(FileDecryptionProperties::from) + .map(FileDecryptionProperties::try_from) + .transpose()? .map(Arc::new); let coerce_int96 = self diff --git a/docs/source/library-user-guide/upgrading/54.0.0.md b/docs/source/library-user-guide/upgrading/54.0.0.md index 34d1f7c61eaf1..e6885fbdfa947 100644 --- a/docs/source/library-user-guide/upgrading/54.0.0.md +++ b/docs/source/library-user-guide/upgrading/54.0.0.md @@ -447,6 +447,48 @@ let config_decryption_properties = ConfigFileDecryptionProperties::try_from(&dec See [#21602](https://github.com/apache/datafusion/issues/21602) and [PR #21603](https://github.com/apache/datafusion/pull/21603) for details. +### Conversion from `ConfigFileEncryptionProperties` / `ConfigFileDecryptionProperties` is now fallible + +Previously, `datafusion_common::config::ConfigFileEncryptionProperties` and +`datafusion_common::config::ConfigFileDecryptionProperties` implemented infallible +conversions into Parquet's encryption/decryption types (via `From` / `Into`). +These conversions may need to decode hex-encoded keys and other configuration values, which can fail. + +They now use `TryFrom` / `TryInto` and return a `Result`: + +- `impl TryFrom for parquet::encryption::encrypt::FileEncryptionProperties` +- `impl TryFrom for parquet::encryption::decrypt::FileDecryptionProperties` + +**Migration guide:** + +Replace `from()` / `into()` with `try_from()` / `try_into()` and handle the resulting `Result`. + +**Before:** + +```rust,ignore +let file_encryption_properties: FileEncryptionProperties = config_encryption_properties.into(); +// or +let file_decryption_properties = FileDecryptionProperties::from(config_decryption_properties); +``` + +( +where `config_encryption_properties` is a `ConfigFileEncryptionProperties` and +`config_decryption_properties` is a `ConfigFileDecryptionProperties` +) + +**After:** + +```rust,ignore +let file_encryption_properties: FileEncryptionProperties = + config_encryption_properties.try_into()?; +// or +let file_decryption_properties = + FileDecryptionProperties::try_from(config_decryption_properties)?; +``` + +See [#21974](https://github.com/apache/datafusion/issues/21974) and +[PR #21985](https://github.com/apache/datafusion/pull/21985) for details. + ### `approx_percentile_cont`, `approx_percentile_cont_with_weight`, `approx_median` now coerce to floats The type signatures of `approx_percentile_cont`, `approx_percentile_cont_with_weight`, and