diff --git a/bindgen-tests/tests/expectations/tests/clang-macro-fallback-pointer.rs b/bindgen-tests/tests/expectations/tests/clang-macro-fallback-pointer.rs new file mode 100644 index 0000000000..9d0188344f --- /dev/null +++ b/bindgen-tests/tests/expectations/tests/clang-macro-fallback-pointer.rs @@ -0,0 +1,18 @@ +#![allow(dead_code, non_snake_case, non_camel_case_types, non_upper_case_globals)] +pub const BEFORE_DECL: *mut later = 3u64 as usize as *mut later; +pub const CONST_PTR: *const later = 4u64 as usize as *const later; +pub const TYPEDEF_PTR: *mut later = 5u64 as usize as *mut later; +pub const MAP_FAILED: *mut ::std::os::raw::c_void = 18446744073709551615u64 as usize + as *mut ::std::os::raw::c_void; +pub const MAP_FAILED_ALIAS: *mut ::std::os::raw::c_void = 18446744073709551615u64 + as usize as *mut ::std::os::raw::c_void; +pub const MAP_FAILED_EQUALS_ITSELF: u32 = 1; +pub const REDEFINED_FROM_INT: u32 = 1; +pub const REDEFINED_ALIAS: *mut ::std::os::raw::c_void = 2u64 as usize + as *mut ::std::os::raw::c_void; +#[repr(C)] +#[derive(Debug)] +pub struct later { + _unused: [u8; 0], +} +pub type later_ptr = *mut later; diff --git a/bindgen-tests/tests/expectations/tests/libclang-9/clang-macro-fallback-pointer.rs b/bindgen-tests/tests/expectations/tests/libclang-9/clang-macro-fallback-pointer.rs new file mode 100644 index 0000000000..d96effbb22 --- /dev/null +++ b/bindgen-tests/tests/expectations/tests/libclang-9/clang-macro-fallback-pointer.rs @@ -0,0 +1,8 @@ +#![allow(dead_code, non_snake_case, non_camel_case_types, non_upper_case_globals)] +pub const REDEFINED_FROM_INT: u32 = 1; +#[repr(C)] +#[derive(Debug)] +pub struct later { + _unused: [u8; 0], +} +pub type later_ptr = *mut later; diff --git a/bindgen-tests/tests/headers/clang-macro-fallback-pointer.h b/bindgen-tests/tests/headers/clang-macro-fallback-pointer.h new file mode 100644 index 0000000000..6e3a207197 --- /dev/null +++ b/bindgen-tests/tests/headers/clang-macro-fallback-pointer.h @@ -0,0 +1,16 @@ +// bindgen-flags: --clang-macro-fallback + +#define BEFORE_DECL ((struct later *)3UL) +#define CONST_PTR ((const struct later *)4UL) + +struct later; + +typedef struct later *later_ptr; +#define TYPEDEF_PTR ((later_ptr)5UL) +#define MAP_FAILED ((void *)-1) +#define MAP_FAILED_ALIAS MAP_FAILED +#define MAP_FAILED_EQUALS_ITSELF (MAP_FAILED == MAP_FAILED) +#define REDEFINED_FROM_INT 1 +#undef REDEFINED_FROM_INT +#define REDEFINED_FROM_INT ((void *)2) +#define REDEFINED_ALIAS REDEFINED_FROM_INT diff --git a/bindgen/codegen/mod.rs b/bindgen/codegen/mod.rs index 7a998c8fac..235b8d2125 100644 --- a/bindgen/codegen/mod.rs +++ b/bindgen/codegen/mod.rs @@ -807,6 +807,7 @@ impl CodeGenerator for Var { } VarType::Float(f) => helpers::ast_ty::float_expr(f).ok(), VarType::Char(c) => Some(c.to_token_stream()), + VarType::Pointer(val) => Some(quote! { #val as usize as #ty }), }; if let Some(mut val) = const_expr { diff --git a/bindgen/ir/context.rs b/bindgen/ir/context.rs index b5b6b4a000..d478ad24a0 100644 --- a/bindgen/ir/context.rs +++ b/bindgen/ir/context.rs @@ -19,6 +19,7 @@ use super::module::{Module, ModuleKind}; use super::template::{TemplateInstantiation, TemplateParameters}; use super::traversal::{self, Edge, ItemTraversal}; use super::ty::{FloatKind, Type, TypeKind}; +use super::var::PendingPointerMacro; use crate::clang::{self, ABIKind, Cursor}; use crate::codegen::CodegenError; use crate::ir::item::ItemCanonicalName; @@ -350,13 +351,18 @@ pub(crate) struct BindgenContext { /// potentially break that assumption. currently_parsed_types: Vec, - /// A map with all the already parsed macro names. This is done to avoid - /// hard errors while parsing duplicated macros, as well to allow macro - /// expression parsing. + /// Parsed macro names whose current value cannot be provided to `cexpr`. + non_cexpr_macros: HashSet>, + + /// Values from parsed macros that `cexpr` can use while parsing derived + /// macro expressions. /// /// This needs to be an `std::HashMap` because the `cexpr` API requires it. parsed_macros: StdHashMap, cexpr::expr::EvalResult>, + /// Pointer-valued macros whose type is materialized after macro parsing. + pending_pointer_macros: Vec, + /// A map with all include locations. /// /// This is needed so that items are created in the order they are defined in. @@ -587,7 +593,9 @@ If you encounter an error missing from this list, please file an issue or a PR!" current_module: root_module_id, semantic_parents: Default::default(), currently_parsed_types: vec![], + non_cexpr_macros: Default::default(), parsed_macros: Default::default(), + pending_pointer_macros: vec![], replacements: Default::default(), collected_typerefs: false, in_codegen: false, @@ -2130,7 +2138,8 @@ If you encounter an error missing from this list, please file an issue or a PR!" /// Have we parsed the macro named `macro_name` already? pub(crate) fn parsed_macro(&self, macro_name: &[u8]) -> bool { - self.parsed_macros.contains_key(macro_name) + self.parsed_macros.contains_key(macro_name) || + self.non_cexpr_macros.contains(macro_name) } /// Get the currently parsed macros. @@ -2141,13 +2150,34 @@ If you encounter an error missing from this list, please file an issue or a PR!" &self.parsed_macros } - /// Mark the macro named `macro_name` as parsed. + /// Mark a macro as parsed and update any value usable by `cexpr`. pub(crate) fn note_parsed_macro( &mut self, id: Vec, - value: cexpr::expr::EvalResult, + value: Option, ) { - self.parsed_macros.insert(id, value); + if let Some(value) = value { + self.non_cexpr_macros.remove(&id); + self.parsed_macros.insert(id, value); + } else { + self.parsed_macros.remove(&id); + self.non_cexpr_macros.insert(id); + } + } + + /// Defer materialization of a pointer-valued macro until parsing finishes. + pub(crate) fn note_pending_pointer_macro( + &mut self, + pointer_macro: PendingPointerMacro, + ) { + self.pending_pointer_macros.push(pointer_macro); + } + + /// Take all pointer-valued macros waiting for final materialization. + pub(crate) fn take_pending_pointer_macros( + &mut self, + ) -> Vec { + mem::take(&mut self.pending_pointer_macros) } /// Are we in the codegen phase? diff --git a/bindgen/ir/var.rs b/bindgen/ir/var.rs index 9d72dcf06e..b6a09f67af 100644 --- a/bindgen/ir/var.rs +++ b/bindgen/ir/var.rs @@ -1,11 +1,13 @@ //! Intermediate representation of variables. use super::super::codegen::MacroTypeVariation; -use super::context::{BindgenContext, TypeId}; +use super::annotations::Annotations; +use super::context::{BindgenContext, ItemId, TypeId}; use super::dot::DotAttributes; use super::function::cursor_mangling; use super::int::IntKind; use super::item::Item; +use super::item_kind::ItemKind as IrItemKind; use super::ty::{FloatKind, TypeKind}; use crate::callbacks::{ItemInfo, ItemKind, MacroParsingBehavior}; use crate::clang; @@ -28,6 +30,27 @@ pub(crate) enum VarType { Char(u8), /// A string, not necessarily well-formed utf-8. String(Vec), + /// A pointer represented as an integer constant. + Pointer(u64), +} + +/// The value obtained when parsing a macro. +enum MacroValue { + /// A value parsed using `cexpr` or evaluated by Clang as an integer. + Expr(cexpr::expr::EvalResult), + /// A data pointer value recognized by Clang. + Pointer, +} + +/// A pointer-valued macro waiting to have its type materialized. +#[derive(Debug)] +pub(crate) struct PendingPointerMacro { + /// The item ID reserved at the macro's source position. + id: ItemId, + /// The macro name. + name: String, + /// The original macro cursor in the primary translation unit. + cursor: clang::Cursor, } /// A `Var` is our intermediate representation of a variable. @@ -207,10 +230,13 @@ impl ClangSubItemParser for Var { let previously_defined = ctx.parsed_macro(&id); - // NB: It's important to "note" the macro even if the result is - // not an integer, otherwise we might loose other kind of - // derived macros. - ctx.note_parsed_macro(id.clone(), value.clone()); + // Keep pointer macros unknown to `cexpr`: derived expressions + // must still be eligible for Clang fallback evaluation. + let cexpr_value = match &value { + MacroValue::Expr(value) => Some(value.clone()), + MacroValue::Pointer => None, + }; + ctx.note_parsed_macro(id.clone(), cexpr_value); if previously_defined { let name = String::from_utf8(id).unwrap(); @@ -223,6 +249,18 @@ impl ClangSubItemParser for Var { // enforce utf8 there, so we should have already panicked at // this point. let name = String::from_utf8(id).unwrap(); + let value = + match value { + MacroValue::Expr(value) => value, + MacroValue::Pointer => { + let id = ctx.next_item_id(); + ctx.note_pending_pointer_macro( + PendingPointerMacro { id, name, cursor }, + ); + return Err(ParseError::Continue); + } + }; + let (type_kind, val) = match value { EvalResult::Invalid => return Err(ParseError::Continue), EvalResult::Float(f) => { @@ -390,13 +428,18 @@ impl ClangSubItemParser for Var { fn parse_macro_clang_fallback( ctx: &mut BindgenContext, cursor: &clang::Cursor, -) -> Option<(Vec, cexpr::expr::EvalResult)> { +) -> Option<(Vec, MacroValue)> { + use clang_sys::{ + CXType_FunctionNoProto, CXType_FunctionProto, CXType_Pointer, + }; + if !ctx.options().clang_macro_fallback { return None; } let ftu = ctx.try_ensure_fallback_translation_unit()?; - let contents = format!("int main() {{ {}; }}", cursor.spelling()); + let name = cursor.spelling(); + let contents = format!("int main() {{ {name}; }}"); ftu.reparse(&contents).ok()?; // Children of root node of AST let root_children = ftu.translation_unit().cursor().collect_children(); @@ -413,18 +456,33 @@ fn parse_macro_clang_fallback( // First child in all_exprs is the expression utilizing the given macro to be evaluated // Should be ParenExpr let paren = paren_exprs.first()?; + let canonical_ty = paren.cur_type().canonical_type(); + + if canonical_ty.kind() != CXType_Pointer { + return Some(( + name.into_bytes(), + MacroValue::Expr(cexpr::expr::EvalResult::Int(Wrapping( + paren.evaluate()?.as_int()?, + ))), + )); + } + + let pointee = canonical_ty.pointee_type()?; + if matches!( + pointee.kind(), + CXType_FunctionNoProto | CXType_FunctionProto + ) { + return None; + } - Some(( - cursor.spelling().into_bytes(), - cexpr::expr::EvalResult::Int(Wrapping(paren.evaluate()?.as_int()?)), - )) + Some((name.into_bytes(), MacroValue::Pointer)) } /// Try and parse a macro using all the macros parsed until now. fn parse_macro( ctx: &mut BindgenContext, cursor: &clang::Cursor, -) -> Option<(Vec, cexpr::expr::EvalResult)> { +) -> Option<(Vec, MacroValue)> { use cexpr::expr; let mut cexpr_tokens = cursor.cexpr_tokens(); @@ -436,8 +494,102 @@ fn parse_macro( let parser = expr::IdentifierParser::new(ctx.parsed_macros()); match parser.macro_definition(&cexpr_tokens) { - Ok((_, (id, val))) => Some((id.into(), val)), - _ => parse_macro_clang_fallback(ctx, cursor), + Ok((_, (id, value))) => Some((id.into(), MacroValue::Expr(value))), + Err(_) => parse_macro_clang_fallback(ctx, cursor), + } +} + +/// Materialize all pointer macro types from one final fallback translation +/// unit, which remains valid throughout deferred type resolution. +pub(crate) fn finish_pending_pointer_macros(ctx: &mut BindgenContext) { + use clang_sys::{CXChildVisit_Break, CXChildVisit_Recurse, CXType_Pointer}; + + let pending = ctx.take_pending_pointer_macros(); + if pending.is_empty() { + return; + } + + let statements = pending + .iter() + .map(|pointer_macro| { + format!("{{ (unsigned long long)({}); }}", pointer_macro.name) + }) + .collect::>() + .join(" "); + let contents = format!("int main() {{ {statements} }}"); + let expressions = { + let Some(ftu) = ctx.try_ensure_fallback_translation_unit() else { + return; + }; + if ftu.reparse(&contents).is_err() { + return; + } + let root_children = ftu.translation_unit().cursor().collect_children(); + let Some(main_func) = root_children.last() else { + return; + }; + let all_stmts = main_func.collect_children(); + let Some(macro_stmt) = all_stmts.first() else { + return; + }; + macro_stmt + .collect_children() + .into_iter() + .map(|statement| { + let value_expression = *statement.collect_children().first()?; + let value = value_expression.evaluate()?.as_int()? as u64; + let mut pointer_expression = None; + value_expression.visit(|child| { + if child.cur_type().canonical_type().kind() == + CXType_Pointer + { + pointer_expression = Some(child); + CXChildVisit_Break + } else { + CXChildVisit_Recurse + } + }); + Some((pointer_expression?, value)) + }) + .collect::>() + }; + if expressions.len() != pending.len() { + return; + } + + for (pointer_macro, expression) in pending.into_iter().zip(expressions) { + let Some((expression, value)) = expression else { + continue; + }; + let Ok(ty) = Item::from_ty( + &expression.cur_type().canonical_type(), + expression, + None, + ctx, + ) else { + continue; + }; + let cursor = pointer_macro.cursor; + let var = Var::new( + pointer_macro.name, + None, + None, + ty, + Some(VarType::Pointer(value)), + true, + ); + ctx.add_item( + Item::new( + pointer_macro.id, + cursor.raw_comment(), + Annotations::new(&cursor), + ctx.root_module().into(), + IrItemKind::Var(var), + Some(cursor.location()), + ), + Some(cursor), + Some(cursor), + ); } } diff --git a/bindgen/lib.rs b/bindgen/lib.rs index 0305b5cd7b..5d9637cafc 100644 --- a/bindgen/lib.rs +++ b/bindgen/lib.rs @@ -1169,6 +1169,7 @@ fn parse(context: &mut BindgenContext) -> Result<(), BindgenError> { context.root_module(), "How did this happen?" ); + ir::var::finish_pending_pointer_macros(context); Ok(()) }