From 5b545dd327dda3dcdb64ab36ebcbde0bbcd49e24 Mon Sep 17 00:00:00 2001 From: Brandon Miller Date: Thu, 15 Jan 2026 11:00:05 -0500 Subject: [PATCH] Perform function lifting and inlining in arch plugins This change allows architecture plugins to override the LiftFunction callback to iterate a function's basic block list and lift entire functions at once. This is required for architectures such as TMS320 C6x, which have non-traditional "delay slots" in that branches, loads, and other instructions take multiple cycles to complete, and branch instructions can reside within the delay slots of other branches. --- architecture.cpp | 185 ++++++++ basicblock.cpp | 6 + binaryninjaapi.h | 102 ++++- binaryninjacore.h | 41 ++ binaryview.cpp | 10 + defaultabb/plugin.cpp | 12 - defaultabb.cpp => defaultarch.cpp | 471 +++++++++++++++++++++ {defaultabb => defaultarch}/CMakeLists.txt | 22 +- defaultarch/plugin.cpp | 16 + function.cpp | 7 +- lowlevelil.cpp | 12 + python/architecture.py | 139 ++++++ python/log.py | 7 +- rust/src/architecture.rs | 50 +++ 14 files changed, 1050 insertions(+), 30 deletions(-) delete mode 100644 defaultabb/plugin.cpp rename defaultabb.cpp => defaultarch.cpp (57%) rename {defaultabb => defaultarch}/CMakeLists.txt (59%) create mode 100644 defaultarch/plugin.cpp diff --git a/architecture.cpp b/architecture.cpp index 64d3907c70..1901438f0a 100644 --- a/architecture.cpp +++ b/architecture.cpp @@ -466,6 +466,169 @@ void BasicBlockAnalysisContext::Finalize() } +FunctionLifterContext::FunctionLifterContext(LowLevelILFunction* func, BNFunctionLifterContext* context) +{ + m_context = context; + m_view = func->GetFunction()->GetView(); + m_function = new LowLevelILFunction(BNNewLowLevelILFunctionReference(func->GetObject())); + m_platform = new Platform(BNNewPlatformReference(context->platform)); + m_logger = new Logger(BNNewLoggerReference(context->logger)); + m_blocks.reserve(context->basicBlockCount); + for (size_t i = 0; i < context->basicBlockCount; i++) + { + m_blocks.emplace_back(new BasicBlock(context->basicBlocks[i])); + } + + for (size_t i = 0; i < context->noReturnCallsCount; i++) + { + ArchAndAddr addr(new CoreArchitecture(context->noReturnCalls[i].arch), context->noReturnCalls[i].address); + m_noReturnCalls.insert(addr); + } + + for (size_t i = 0; i < context->contextualFunctionReturnCount; i++) + { + ArchAndAddr addr(new CoreArchitecture(context->contextualFunctionReturnLocations[i].arch), + context->contextualFunctionReturnLocations[i].address); + m_contextualReturns[addr] = context->contextualFunctionReturnValues[i]; + } + + for (size_t i = 0; i < context->inlinedRemappingEntryCount; i++) + { + ArchAndAddr key( + new CoreArchitecture(context->inlinedRemappingKeys[i].arch), context->inlinedRemappingKeys[i].address); + ArchAndAddr value( + new CoreArchitecture(context->inlinedRemappingValues[i].arch), context->inlinedRemappingValues[i].address); + m_inlinedRemapping[key] = value; + } + + for (size_t i = 0; i < context->indirectBranchesCount; i++) + { + ArchAndAddr src( + new CoreArchitecture(context->indirectBranches[i].sourceArch), context->indirectBranches[i].sourceAddr); + ArchAndAddr dest( + new CoreArchitecture(context->indirectBranches[i].destArch), context->indirectBranches[i].destAddr); + if (context->indirectBranches[i].autoDefined) + m_autoIndirectBranches[src].insert(dest); + else + m_userIndirectBranches[src].insert(dest); + } + + for (size_t i = 0; i < context->inlinedCallsCount; i++) + { + m_inlinedCalls.insert(context->inlinedCalls[i]); + } + + m_containsInlinedFunctions = context->containsInlinedFunctions; +} + + +Ref& FunctionLifterContext::GetView() +{ + return m_view; +} + + +Ref& FunctionLifterContext::GetPlatform() +{ + return m_platform; +} + + +std::map& FunctionLifterContext::GetInlinedRemapping() +{ + return m_inlinedRemapping; +} + + +std::map>& FunctionLifterContext::GetUserIndirectBranches() +{ + return m_userIndirectBranches; +} + + +std::map>& FunctionLifterContext::GetAutoIndirectBranches() +{ + return m_autoIndirectBranches; +} + + +Ref& FunctionLifterContext::GetLogger() +{ + return m_logger; +} + + +vector>& FunctionLifterContext::GetBasicBlocks() +{ + return m_blocks; +} + + +std::set& FunctionLifterContext::GetNoReturnCalls() +{ + return m_noReturnCalls; +} + + +std::map& FunctionLifterContext::GetContextualReturns() +{ + return m_contextualReturns; +} + + +std::set& FunctionLifterContext::GetInlinedCalls() +{ + return m_inlinedCalls; +} + + +void FunctionLifterContext::SetContainsInlinedFunctions(bool value) +{ + *m_containsInlinedFunctions = value; +} + + +void FunctionLifterContext::PrepareBlockTranslation(LowLevelILFunction* function, Architecture* arch, uint64_t addr) +{ + BNPrepareBlockTranslation(function->GetObject(), arch->GetObject(), addr); +} + + +std::vector> FunctionLifterContext::PrepareToCopyForeignFunction(LowLevelILFunction* function) +{ + size_t blockCount = 0; + BNBasicBlock** bnBlocks = + BNPrepareToCopyForeignFunction(m_function->GetObject(), function->GetObject(), &blockCount); + std::vector> blocks; + blocks.reserve(blockCount); + for (size_t i = 0; i < blockCount; i++) + blocks.emplace_back(new BasicBlock(BNNewBasicBlockReference(bnBlocks[i]))); + + BNFreeBasicBlockList(bnBlocks, blockCount); + return blocks; +} + + +Ref FunctionLifterContext::GetForeignFunctionLiftedIL(Ref func) +{ + size_t inlinedCallsCount = m_inlinedCalls.size(); + uint64_t* inlinedCalls = nullptr; + if (inlinedCallsCount) + inlinedCalls = new uint64_t[inlinedCallsCount]; + + BNLowLevelILFunction* il = + BNGetForeignFunctionLiftedIL(func->GetObject(), m_logger->GetObject(), inlinedCallsCount, inlinedCalls); + + if (inlinedCalls) + delete[] inlinedCalls; + + if (!il) + return nullptr; + + return new LowLevelILFunction(il); +} + + Architecture::Architecture(BNArchitecture* arch) { m_object = arch; @@ -600,6 +763,15 @@ void Architecture::AnalyzeBasicBlocksCallback(void *ctxt, BNFunction* function, } +bool Architecture::LiftFunctionCallback(void* ctxt, BNLowLevelILFunction* function, BNFunctionLifterContext* context) +{ + CallbackRef arch(ctxt); + Ref func(new LowLevelILFunction(BNNewLowLevelILFunctionReference(function))); + FunctionLifterContext flc(func, context); + return arch->LiftFunction(func, flc); +} + + char* Architecture::GetRegisterNameCallback(void* ctxt, uint32_t reg) { CallbackRef arch(ctxt); @@ -1091,6 +1263,7 @@ void Architecture::Register(Architecture* arch) callbacks.freeInstructionText = FreeInstructionTextCallback; callbacks.getInstructionLowLevelIL = GetInstructionLowLevelILCallback; callbacks.analyzeBasicBlocks = AnalyzeBasicBlocksCallback; + callbacks.liftFunction = LiftFunctionCallback; callbacks.getRegisterName = GetRegisterNameCallback; callbacks.getFlagName = GetFlagNameCallback; callbacks.getFlagWriteTypeName = GetFlagWriteTypeNameCallback; @@ -1225,6 +1398,12 @@ void Architecture::AnalyzeBasicBlocks(Function* function, BasicBlockAnalysisCont } +bool Architecture::LiftFunction(LowLevelILFunction* function, FunctionLifterContext& context) +{ + return DefaultLiftFunction(function, context); +} + + string Architecture::GetRegisterName(uint32_t reg) { return fmt::format("r{}", reg); @@ -1792,6 +1971,12 @@ void CoreArchitecture::AnalyzeBasicBlocks(Function* function, BasicBlockAnalysis } +bool CoreArchitecture::LiftFunction(LowLevelILFunction* function, FunctionLifterContext& context) +{ + return BNArchitectureLiftFunction(m_object, function->GetObject(), context.m_context); +} + + string CoreArchitecture::GetRegisterName(uint32_t reg) { char* name = BNGetArchitectureRegisterName(m_object, reg); diff --git a/basicblock.cpp b/basicblock.cpp index a548c70818..8025a9fc20 100644 --- a/basicblock.cpp +++ b/basicblock.cpp @@ -454,6 +454,12 @@ void BasicBlock::SetUndeterminedOutgoingEdges(bool value) } +bool BasicBlock::HasInstructionData() const +{ + return BNBasicBlockHasInstructionData(m_object); +} + + const uint8_t* BasicBlock::GetInstructionData(uint64_t addr, size_t* len) const { return BNBasicBlockGetInstructionData(m_object, addr, len); diff --git a/binaryninjaapi.h b/binaryninjaapi.h index e28ebfb1a2..ed0ae3713f 100644 --- a/binaryninjaapi.h +++ b/binaryninjaapi.h @@ -6164,6 +6164,7 @@ namespace BinaryNinja { std::vector> GetRelocationRangesInRange(uint64_t addr, size_t size) const; bool RangeContainsRelocation(uint64_t addr, size_t size) const; std::vector> GetRelocationsAt(uint64_t addr) const; + Ref GetNextRelocation(uint64_t addr, uint64_t maxAddr = 0); /*! Provides a mechanism for receiving callbacks for various analysis events. @@ -9387,6 +9388,44 @@ namespace BinaryNinja { void Finalize(); }; + class FunctionLifterContext + { + Ref m_function; + Ref m_view; + Ref m_platform; + Ref m_logger; + std::vector> m_blocks; + std::set m_noReturnCalls; + std::map m_contextualReturns; + std::map m_inlinedRemapping; + std::map> m_userIndirectBranches; + std::map> m_autoIndirectBranches; + std::set m_inlinedCalls; + bool* m_containsInlinedFunctions; + + public: + BNFunctionLifterContext* m_context; + FunctionLifterContext(LowLevelILFunction* func, BNFunctionLifterContext* context); + Ref& GetView(); + Ref& GetPlatform(); + Ref& GetLogger(); + std::vector>& GetBasicBlocks(); + std::set& GetNoReturnCalls(); + std::map& GetContextualReturns(); + std::map& GetInlinedRemapping(); + std::map>& GetUserIndirectBranches(); + std::map>& GetAutoIndirectBranches(); + std::set& GetInlinedCalls(); + void SetContainsInlinedFunctions(bool value); + + void CheckForInlinedCall(BasicBlock* block, size_t instrCountBefore, size_t instrCountAfter, uint64_t prevAddr, + uint64_t addr, const uint8_t* opcode, size_t len, + std::optional> indirectSource); + void PrepareBlockTranslation(LowLevelILFunction* function, Architecture* arch, uint64_t addr); + std::vector> PrepareToCopyForeignFunction(LowLevelILFunction* function); + Ref GetForeignFunctionLiftedIL(Ref func); + }; + /*! The Architecture class is the base class for all CPU architectures. This provides disassembly, assembly, patching, and IL translation lifting for a given architecture. @@ -9415,6 +9454,7 @@ namespace BinaryNinja { static bool GetInstructionLowLevelILCallback( void* ctxt, const uint8_t* data, uint64_t addr, size_t* len, BNLowLevelILFunction* il); static void AnalyzeBasicBlocksCallback(void *ctxt, BNFunction* function, BNBasicBlockAnalysisContext* context); + static bool LiftFunctionCallback(void* ctxt, BNLowLevelILFunction* function, BNFunctionLifterContext* context); static char* GetRegisterNameCallback(void* ctxt, uint32_t reg); static char* GetFlagNameCallback(void* ctxt, uint32_t flag); static char* GetFlagWriteTypeNameCallback(void* ctxt, uint32_t flags); @@ -9496,6 +9536,16 @@ namespace BinaryNinja { */ static void DefaultAnalyzeBasicBlocks(Function* function, BasicBlockAnalysisContext& context); + static bool DefaultLiftFunctionCallback(BNLowLevelILFunction* function, BNFunctionLifterContext* context); + + /*! Default implementation of LiftFunction + + \param function Function to analyze + \param context Context for the analysis + \return Whether lifting was successful + */ + static bool DefaultLiftFunction(LowLevelILFunction* function, FunctionLifterContext& context); + /*! Get an Architecture by name \param name Name of the architecture @@ -9600,6 +9650,14 @@ namespace BinaryNinja { */ virtual void AnalyzeBasicBlocks(Function* function, BasicBlockAnalysisContext& context); + /*! Lift function instructions to IL + + \param function Function to analyze + \param context Context for the analysis + \return Whether lifting was successful + */ + virtual bool LiftFunction(LowLevelILFunction* function, FunctionLifterContext& context); + /*! Gets a register name from a register index. \param reg Register index @@ -9994,6 +10052,7 @@ namespace BinaryNinja { virtual bool GetInstructionLowLevelIL( const uint8_t* data, uint64_t addr, size_t& len, LowLevelILFunction& il) override; virtual void AnalyzeBasicBlocks(Function* function, BasicBlockAnalysisContext& context) override; + virtual bool LiftFunction(LowLevelILFunction* function, FunctionLifterContext& context) override; virtual std::string GetRegisterName(uint32_t reg) override; virtual std::string GetFlagName(uint32_t flag) override; virtual std::string GetFlagWriteTypeName(uint32_t flags) override; @@ -12152,6 +12211,12 @@ namespace BinaryNinja { */ void SetCanExit(bool value); + /*! Determine whether this basic block has instruction data + + \return Whether this basic block has instruction data + */ + bool HasInstructionData() const; + /*! List of dominators for this basic block \param post Whether to get post dominators (default: false) @@ -12327,6 +12392,39 @@ namespace BinaryNinja { Ref GetSourceBlock() const; }; + /*! + \ingroup basicblocks + */ + template + class FastBasicBlockMap + { + T* m_storage; + size_t m_blockCount; + + public: + FastBasicBlockMap(const std::vector>& blocks) + { + m_blockCount = blocks.size(); + m_storage = new T[m_blockCount + 1]; + } + + ~FastBasicBlockMap() { delete[] m_storage; } + + T& operator[](BasicBlock* block) + { + if (block) + return m_storage[block->GetIndex()]; + return m_storage[m_blockCount]; + } + + const T& operator[](BasicBlock* block) const + { + if (block) + return m_storage[block->GetIndex()]; + return m_storage[m_blockCount]; + } + }; + /*! \ingroup function */ @@ -12889,7 +12987,7 @@ namespace BinaryNinja { Ref GetCalleeForAnalysis(Ref platform, uint64_t addr, bool exact); - std::vector GetUnresolvedIndirectBranches(); + std::set GetUnresolvedIndirectBranches(); bool HasUnresolvedIndirectBranches(); /*! \brief Apply an automatic type adjustment to the call at `addr` in `arch`. @@ -13698,6 +13796,7 @@ namespace BinaryNinja { */ uint64_t GetCurrentAddress() const; void SetCurrentAddress(Architecture* arch, uint64_t addr); + void SetCurrentSourceBlock(BasicBlock* source); size_t GetInstructionStart(Architecture* arch, uint64_t addr); std::set GetInstructionsAt(Architecture* arch, uint64_t addr); @@ -13705,6 +13804,7 @@ namespace BinaryNinja { void ClearIndirectBranches(); void SetIndirectBranches(const std::vector& branches); + bool HasIndirectBranches() const; /*! Get a list of registers used in the LLIL function diff --git a/binaryninjacore.h b/binaryninjacore.h index 1e346bb19d..662100111b 100644 --- a/binaryninjacore.h +++ b/binaryninjacore.h @@ -2051,6 +2051,34 @@ extern "C" BNArchitectureAndAddress* inlinedUnresolvedIndirectBranches; } BNBasicBlockAnalysisContext; + typedef struct BNFunctionLifterContext { + // IN + BNPlatform* platform; + BNLogger* logger; + size_t basicBlockCount; + BNBasicBlock** basicBlocks; + + size_t inlinedRemappingEntryCount; + BNArchitectureAndAddress* inlinedRemappingKeys; + BNArchitectureAndAddress* inlinedRemappingValues; + + size_t indirectBranchesCount; + BNIndirectBranchInfo* indirectBranches; + + size_t noReturnCallsCount; + BNArchitectureAndAddress* noReturnCalls; + + size_t contextualFunctionReturnCount; + BNArchitectureAndAddress* contextualFunctionReturnLocations; + bool* contextualFunctionReturnValues; + + size_t inlinedCallsCount; + uint64_t* inlinedCalls; + + // OUT + bool* containsInlinedFunctions; + } BNFunctionLifterContext; + typedef struct BNCustomArchitecture { void* context; @@ -2070,6 +2098,7 @@ extern "C" bool (*getInstructionLowLevelIL)( void* ctxt, const uint8_t* data, uint64_t addr, size_t* len, BNLowLevelILFunction* il); void (*analyzeBasicBlocks)(void* ctxt, BNFunction* function, BNBasicBlockAnalysisContext* context); + bool (*liftFunction)(void *ctext, BNLowLevelILFunction* function, BNFunctionLifterContext* context); char* (*getRegisterName)(void* ctxt, uint32_t reg); char* (*getFlagName)(void* ctxt, uint32_t flag); char* (*getFlagWriteTypeName)(void* ctxt, uint32_t flags); @@ -4561,6 +4590,7 @@ extern "C" BINARYNINJACOREAPI BNRelocation** BNGetRelocationsAt(BNBinaryView* view, uint64_t addr, size_t* count); BINARYNINJACOREAPI void BNFreeRelocationList(BNRelocation** relocations, size_t count); BINARYNINJACOREAPI void BNFreeRelocationRanges(BNRange* ranges); + BINARYNINJACOREAPI BNRelocation* BNGetNextRelocation(BNBinaryView* view, uint64_t addr, uint64_t maxAddr); BINARYNINJACOREAPI void BNRegisterDataNotification(BNBinaryView* view, BNBinaryDataNotification* notify); BINARYNINJACOREAPI void BNUnregisterDataNotification(BNBinaryView* view, BNBinaryDataNotification* notify); @@ -4905,6 +4935,10 @@ extern "C" BINARYNINJACOREAPI void BNArchitectureDefaultAnalyzeBasicBlocks(BNFunction* function, BNBasicBlockAnalysisContext* context); BINARYNINJACOREAPI void BNArchitectureAnalyzeBasicBlocks(BNArchitecture* arch, BNFunction* function, BNBasicBlockAnalysisContext* context); + BINARYNINJACOREAPI bool BNArchitectureSetDefaultLiftFunctionCallback(void *callback); + BINARYNINJACOREAPI bool BNArchitectureDefaultLiftFunction(BNLowLevelILFunction* function, BNFunctionLifterContext* context); + BINARYNINJACOREAPI bool BNArchitectureLiftFunction(BNArchitecture* arch, BNLowLevelILFunction* function, + BNFunctionLifterContext* context); BINARYNINJACOREAPI void BNFreeInstructionTextLines(BNInstructionTextLine* lines, size_t count); BINARYNINJACOREAPI char* BNGetArchitectureRegisterName(BNArchitecture* arch, uint32_t reg); BINARYNINJACOREAPI char* BNGetArchitectureFlagName(BNArchitecture* arch, uint32_t flag); @@ -5080,6 +5114,9 @@ extern "C" BINARYNINJACOREAPI void BNRemoveUserTypeFieldReference(BNFunction* func, BNArchitecture* fromArch, uint64_t fromAddr, BNQualifiedName* name, uint64_t offset, size_t size); + BINARYNINJACOREAPI BNLowLevelILFunction* BNGetForeignFunctionLiftedIL( + const BNFunction* func, const BNLogger* logger, const size_t inlinedCallsCount, const uint64_t* inlinedCalls); + BINARYNINJACOREAPI BNBasicBlock* BNNewBasicBlockReference(BNBasicBlock* block); BINARYNINJACOREAPI void BNFreeBasicBlock(BNBasicBlock* block); BINARYNINJACOREAPI BNBasicBlock** BNGetFunctionBasicBlockList(BNFunction* func, size_t* count); @@ -5226,6 +5263,7 @@ extern "C" BINARYNINJACOREAPI void BNFreePendingBasicBlockEdgeList(BNPendingBasicBlockEdge* edges); BINARYNINJACOREAPI void BNClearBasicBlockPendingOutgoingEdges(BNBasicBlock* block); BINARYNINJACOREAPI void BNBasicBlockSetUndeterminedOutgoingEdges(BNBasicBlock* block, bool value); + BINARYNINJACOREAPI const bool BNBasicBlockHasInstructionData(BNBasicBlock* block); BINARYNINJACOREAPI const uint8_t* BNBasicBlockGetInstructionData(BNBasicBlock* block, uint64_t addr, size_t* len); BINARYNINJACOREAPI void BNBasicBlockAddInstructionData(BNBasicBlock* block, const void* data, size_t len); BINARYNINJACOREAPI void BNBasicBlockSetFallThroughToFunction(BNBasicBlock* block, bool value); @@ -6343,11 +6381,13 @@ extern "C" BINARYNINJACOREAPI void BNLowLevelILMarkLabel(BNLowLevelILFunction* func, BNLowLevelILLabel* label); BINARYNINJACOREAPI void BNFinalizeLowLevelILFunction(BNLowLevelILFunction* func); BINARYNINJACOREAPI void BNGenerateLowLevelILSSAForm(BNLowLevelILFunction* func); + BINARYNINJACOREAPI bool BNLowLevelILFunctionHasIndirectBranches(BNLowLevelILFunction* func); BINARYNINJACOREAPI void BNPrepareToCopyLowLevelILFunction(BNLowLevelILFunction* func, BNLowLevelILFunction* src); BINARYNINJACOREAPI void BNPrepareToCopyLowLevelILBasicBlock(BNLowLevelILFunction* func, BNBasicBlock* block); BINARYNINJACOREAPI BNLowLevelILLabel* BNGetLabelForLowLevelILSourceInstruction( BNLowLevelILFunction* func, size_t instr); + BINARYNINJACOREAPI BNBasicBlock** BNPrepareToCopyForeignFunction(BNLowLevelILFunction* dst, BNLowLevelILFunction* src, size_t* count); BINARYNINJACOREAPI size_t BNLowLevelILAddLabelMap( BNLowLevelILFunction* func, uint64_t* values, BNLowLevelILLabel** labels, size_t count); @@ -6374,6 +6414,7 @@ extern "C" BNLowLevelILFunction* func, BNArchitecture* arch, uint64_t addr); BINARYNINJACOREAPI BNLowLevelILLabel* BNGetLowLevelILLabelForAddress( BNLowLevelILFunction* func, BNArchitecture* arch, uint64_t addr); + BINARYNINJACOREAPI void BNPrepareBlockTranslation(BNLowLevelILFunction* func, BNArchitecture* arch, uint64_t addr); BINARYNINJACOREAPI bool BNGetLowLevelILExprText( BNLowLevelILFunction* func, BNArchitecture* arch, size_t i, BNDisassemblySettings* settings, diff --git a/binaryview.cpp b/binaryview.cpp index 282fff7339..85a7cfbe35 100644 --- a/binaryview.cpp +++ b/binaryview.cpp @@ -5829,6 +5829,16 @@ optional> BinaryView::StringifyUnicodeData(Architectu } +Ref BinaryView::GetNextRelocation(uint64_t addr, uint64_t maxAddr) +{ + BNRelocation* reloc = BNGetNextRelocation(m_object, addr, maxAddr); + if (!reloc) + return nullptr; + + return new Relocation(reloc); +} + + Relocation::Relocation(BNRelocation* reloc) { m_object = reloc; diff --git a/defaultabb/plugin.cpp b/defaultabb/plugin.cpp deleted file mode 100644 index 917a03ee33..0000000000 --- a/defaultabb/plugin.cpp +++ /dev/null @@ -1,12 +0,0 @@ -#include "binaryninjaapi.h" -#include "binaryninjacore.h" - -using namespace BinaryNinja; - -extern "C" { - BN_DECLARE_CORE_ABI_VERSION - BINARYNINJAPLUGIN bool CorePluginInit() - { - return BNArchitectureSetDefaultAnalyzeBasicBlocksCallback((void *)Architecture::DefaultAnalyzeBasicBlocksCallback); - } -} diff --git a/defaultabb.cpp b/defaultarch.cpp similarity index 57% rename from defaultabb.cpp rename to defaultarch.cpp index 71823dd1c2..7a24b31ac3 100644 --- a/defaultabb.cpp +++ b/defaultarch.cpp @@ -643,3 +643,474 @@ void Architecture::DefaultAnalyzeBasicBlocksCallback(BNFunction* function, BNBas BasicBlockAnalysisContext abbc(context); Architecture::DefaultAnalyzeBasicBlocks(func, abbc); } + + +static void ApplyExternPointerForRelocation( + int64_t operand, LowLevelILFunction& il, size_t start, size_t end, Ref relocation, Ref logger) +{ + ExprId id = (ExprId)-1; + uint64_t offset = 0; + size_t size = 0; + + uint64_t relocStart = relocation->GetAddress(); + uint64_t relocEnd = relocStart + relocation->GetInfo().size; + + if (operand == BN_AUTOCOERCE_EXTERN_PTR) + { + // Go through all expressions looking for just one LLIL_CONST expression + size_t count = 0; + for (size_t i = start; i < end; i++) + { + auto instr = il.GetInstruction(i); + + // because multiple instructions can be lifted at once, we want to ensure that + // each relocation is only checked against IL instructions that potentially + // overlap. this is hard/impossible to do robustly (reloc will not always be + // at the start of an instruction), but we can at least rule out instructions + // that start after the candidate reloc ends (as in MIPS delay slots, which this + // fixes) + if (instr.address >= relocEnd) + continue; + + instr.VisitExprs([&](const LowLevelILInstruction& expr) { + switch (expr.operation) + { + case LLIL_CONST: + case LLIL_CONST_PTR: + id = expr.exprIndex; + offset = expr.operands[0]; + size = expr.size; + count++; + break; + default: + break; + } + return true; + }); + // If there is more than one LLIL_CONST then we don't know which one to set + // as an external pointer. + if (count > 1) + return; + } + if (count != 1) + return; + } + else + { + for (size_t i = start; i < end; i++) + { + auto instr = il.GetInstruction(i); + instr.VisitExprs([&](const LowLevelILInstruction& expr) { + if (expr.sourceOperand == operand) + { + switch (expr.operation) + { + case LLIL_CONST: + case LLIL_CONST_PTR: + id = expr.exprIndex; + offset = expr.operands[0]; + size = expr.size; + return false; + default: + break; + } + } + return true; // Parse any subexpressions + }); + if (id != (ExprId)-1) + break; + } + } + + if (id == (ExprId)-1) + { + logger->LogWarn("Unable to find const or const_ptr in expresssion @ %08x:%d", il.GetCurrentAddress(), start); + return; + } + offset = offset - relocation->GetTarget(); + il.ReplaceExpr(id, il.ExternPointer(size, relocation->GetTarget(), offset)); +} + + +bool Architecture::DefaultLiftFunction(LowLevelILFunction* function, FunctionLifterContext& context) +{ + std::unique_ptr> instrData; + Ref data = context.GetView(); + Ref logger = context.GetLogger(); + Ref platform = context.GetPlatform(); + std::set noReturnCalls = context.GetNoReturnCalls(); + std::vector> blocks = context.GetBasicBlocks(); + std::map contextualReturns = context.GetContextualReturns(); + std::map inlinedRemapping = context.GetInlinedRemapping(); + std::optional> indirectSource; + std::map> userIndirectBranches = context.GetUserIndirectBranches(); + std::map> autoIndirectBranches = context.GetAutoIndirectBranches(); + for (auto& i: blocks) + { + function->SetCurrentSourceBlock(i); + + auto relocationHandler = i->GetArchitecture()->GetRelocationHandler(data->GetTypeName()); + Ref nextRelocation; + if (relocationHandler) + nextRelocation = data->GetNextRelocation(i->GetStart()); + + context.PrepareBlockTranslation(function, i->GetArchitecture(), i->GetStart()); + BNLowLevelILLabel* label = function->GetLabelForAddress(i->GetArchitecture(), i->GetStart()); + if (label) + function->MarkLabel(*label); + + size_t beginInstrCount = function->GetInstructionCount(); + + // Generate IL for each instruction in the block + for (uint64_t addr = i->GetStart(); addr < i->GetEnd();) { + if (data->AnalysisIsAborted()) + return false; + + ArchAndAddr cur(i->GetArchitecture(), addr); + function->SetCurrentAddress(i->GetArchitecture(), addr); + function->ClearIndirectBranches(); + + if (auto it = inlinedRemapping.find(cur); it != inlinedRemapping.end()) + { + indirectSource = *it; + } + else + { + if (auto brit = userIndirectBranches.find(cur); brit != userIndirectBranches.end()) + { + const auto& s = brit->second; + function->SetIndirectBranches(std::vector(s.begin(), s.end())); + } + else if (auto brit = autoIndirectBranches.find(cur); brit != autoIndirectBranches.end()) + { + const auto& s = brit->second; + function->SetIndirectBranches(std::vector(s.begin(), s.end())); + } + } + + size_t len = 0; + const uint8_t* opcode; + + if (i->HasInstructionData()) + { + opcode = i->GetInstructionData(addr, &len); + + if (len == 0) + { + // Instruction data not found, emit undefined IL instruction + function->AddInstruction(function->AddExpr(LLIL_UNDEF, 0, 0)); + logger->LogDebug("Instruction data not found, inserted LLIL_UNDEF at %#" PRIx64, addr); + break; + } + } + else + { + if (!instrData) + instrData = std::make_unique>(blocks); + + DataBuffer& buffer = (*instrData)[i]; + if (buffer.GetLength() == 0) + buffer = data->ReadBuffer(i->GetStart(), i->GetEnd() - i->GetStart()); + + if (addr < i->GetStart() || addr >= (i->GetStart() + buffer.GetLength())) + { + // Instruction data not found, emit undefined IL instruction + function->AddInstruction(function->AddExpr(LLIL_UNDEF, 0, 0)); + logger->LogDebug("Instruction data not found, inserted LLIL_UNDEF at %#" PRIx64, addr); + break; + } + + len = (i->GetStart() + buffer.GetLength()) - addr; + opcode = (const uint8_t*)buffer.GetDataAt(addr - i->GetStart()); + } + + size_t instrCountBefore = function->GetInstructionCount(); + bool status = i->GetArchitecture()->GetInstructionLowLevelIL(opcode, addr, len, *function); + size_t instrCountAfter = function->GetInstructionCount(); + while (nextRelocation && nextRelocation->GetAddress() >= addr && nextRelocation->GetAddress() < addr + len) + { + if (data->IsOffsetExternSemantics(nextRelocation->GetTarget())) + { + int64_t operand = relocationHandler->GetOperandForExternalRelocation( + opcode, addr, len, function, nextRelocation); + if (operand != BN_NOCOERCE_EXTERN_PTR) + { + ApplyExternPointerForRelocation( + operand, *function, instrCountBefore, instrCountAfter, nextRelocation, logger); + } + } + nextRelocation = data->GetNextRelocation(nextRelocation->GetAddress() + 1, i->GetEnd()); + } + + // Conditional Call Support (Part 2) + // Replace the emitted GOTO with a noreturn expression + if (((instrCountAfter - instrCountBefore) >= 3) + && noReturnCalls.count(ArchAndAddr(i->GetArchitecture(), addr))) + { + for (size_t instrIndex = instrCountBefore; instrIndex < (instrCountAfter - 1); instrIndex++) + { + if (function->GetInstruction(instrIndex).operation != LLIL_CALL) + continue; + LowLevelILInstruction instr = function->GetInstruction(instrIndex + 1); + if (instr.operation == LLIL_GOTO) + function->ReplaceExpr(instr.exprIndex, function->AddExpr(LLIL_NORET, 0, 0)); + } + } + + uint64_t prevAddr = addr; + addr += len; + + context.CheckForInlinedCall(i, instrCountBefore, instrCountAfter, prevAddr, addr, opcode, len, indirectSource); + + // Indirect branch information informs when to translate non-standard returns into jumps + if (auto lastInstr = function->GetInstruction(instrCountAfter - 1); (lastInstr.operation == LLIL_RET) + && (function->HasIndirectBranches() || !function->GetFunction()->CanReturn().GetValue())) + { + auto addressSize = platform->GetAddressSize(); + lastInstr.Replace(function->SetRegister(addressSize, LLIL_TEMP(0), lastInstr.GetDestExpr().exprIndex)); + function->AddInstruction(function->Jump(function->Register(addressSize, LLIL_TEMP(0)), lastInstr)); + //lastInstr.Replace(m_liftedIL->Jump(lastInstr.GetDestExpr().exprIndex, lastInstr)); + } + + if (!status) + { + // Invalid instruction, emit undefined IL instruction + function->AddInstruction(function->AddExpr(LLIL_UNDEF, 0, 0)); + logger->LogDebug("Invalid instruction, inserted LLIL_UNDEF at %#" PRIx64, addr); + break; + } + } + + function->ClearIndirectBranches(); + + // Support for contextual function returns. This is mainly used for ARM/Thumb with 'blx lr'. It's most common for this to be treated + // as a function return, however it can also be a function call. For now this transform is described as follows: + // 1) Architecture lifts a call instruction as LLIL_CALL with a branch type of FunctionReturn + // 2) By default, contextualFunctionReturns is used to translate this to a LLIL_RET (conservative) + // 3) Downstream analysis uses dataflow to validate the return target + // 4) If the target is not the ReturnAddressValue, then we avoid the translation to a return and leave the instruction as a call + if (LowLevelILInstruction prevInstr = function->GetInstruction(function->GetInstructionCount() - 1); prevInstr.operation == LLIL_CALL) + { + if (auto itr = contextualReturns.find(ArchAndAddr(i->GetArchitecture(), prevInstr.address)); itr != contextualReturns.end() && itr->second) + prevInstr.Replace(function->Return(prevInstr.GetDestExpr().exprIndex, prevInstr)); + } + + // If basic block does not end in a jump or undefined instruction, add jump to the next block + size_t endInstrCount = function->GetInstructionCount(); + if (endInstrCount == beginInstrCount) + { + // Basic block must have instructions to be valid + function->AddInstruction(function->AddExpr(LLIL_UNDEF, 0, 0)); + logger->LogDebug( + "Basic block must have instructions to be valid, inserted LLIL_UNDEF at %#" PRIx64, i->GetStart()); + } + else if ((i->GetOutgoingEdges().size() == 0) && !i->CanExit() && !i->IsFallThroughToFunction()) + { + // Basic block does not exit + function->AddInstruction(function->AddExpr(LLIL_NORET, 0, 0)); + } + else + { + BNLowLevelILLabel* exitLabel = function->GetLabelForAddress(i->GetArchitecture(), i->GetEnd()); + if (exitLabel) + function->AddInstruction(function->Goto(*exitLabel)); + else + { + size_t dest = + function->AddExpr(LLIL_CONST_PTR, platform->GetAddressSize(), 0, i->GetEnd()); + function->AddInstruction(function->AddExpr(LLIL_JUMP, 0, 0, dest)); + } + } + } + + if (function->GetInstructionCount() == 0) + { + // If no instructions, make it undefined + function->AddInstruction(function->AddExpr(LLIL_UNDEF, 0, 0)); + logger->LogDebug("No instructions found, inserted LLIL_UNDEF at %#" PRIx64, + function->GetFunction()->GetStart()); + } + + function->Finalize(); + return true; +} + + +void FunctionLifterContext::CheckForInlinedCall(BasicBlock* block, size_t instrCountBefore, size_t instrCountAfter, + uint64_t prevAddr, uint64_t addr, const uint8_t* opcode, size_t len, + std::optional> indirectSource) +{ + // Check for direct inlined calls + // TODO: Handle indirect calls where the address is constant + if (instrCountAfter > instrCountBefore) + { + LowLevelILInstruction lastInstr = m_function->GetInstruction(instrCountAfter - 1); + if ((lastInstr.operation == LLIL_CALL || lastInstr.operation == LLIL_JUMP) + && (lastInstr.GetDestExpr().operation == LLIL_CONST || lastInstr.GetDestExpr().operation == LLIL_CONST_PTR)) + { + InstructionInfo info; + if (!block->GetArchitecture()->GetInstructionInfo(opcode, prevAddr, len, info)) + return; + + uint64_t target = lastInstr.GetDestExpr().GetConstant(); + Ref platform = + info.archTransitionByTargetAddr ? m_platform->GetAssociatedPlatformByAddress(target) : m_platform; + if (!platform) + return; + + // Avoid inline recursion + if (m_inlinedCalls.count(target) != 0) + return; + + Ref targetFunc = m_view->GetAnalysisFunction(platform, target); + if (!targetFunc) + return; + + auto inlineDuringAnalysis = targetFunc->GetInlinedDuringAnalysis().GetValue(); + if (inlineDuringAnalysis == DoNotInlineCall) + return; + + // Must not be a conditional call. + // TODO: Expand support to allow these. + bool hasBranches = false; + for (size_t instrIndex = instrCountBefore; instrIndex < instrCountAfter - 1; instrIndex++) + { + LowLevelILInstruction instr = m_function->GetInstruction(instrIndex); + if (instr.operation == LLIL_IF || instr.operation == LLIL_GOTO) + { + hasBranches = true; + break; + } + } + if (hasBranches) + return; + + // Get lifted IL for the target function + m_inlinedCalls.insert(target); + Ref targetIL = GetForeignFunctionLiftedIL(targetFunc); + m_inlinedCalls.erase(target); + if (!targetIL) + { + // Lifting of inlined function failed, do not inline + return; + } + + // Replace call with a goto to the inlined code + LowLevelILLabel start, end; + m_function->MarkLabel(start); + m_function->ReplaceExpr(lastInstr.exprIndex, m_function->Goto(start, lastInstr)); + + if (lastInstr.operation == LLIL_CALL) + { + // Set up return address according to the architecture + // TODO: Handle architectures that use a nonstandard way of calling functions + uint32_t linkReg = m_platform->GetArchitecture()->GetLinkRegister(); + if (linkReg == BN_INVALID_REGISTER) + { + // No link register, push return address onto stack + // XXX: hey, this is one of the things making bad datavars inside functions, look into this + size_t addrSize = m_platform->GetAddressSize(); + ExprId pushExpr = + m_function->Push(addrSize, m_function->ConstPointer(addrSize, addr, lastInstr), 0, lastInstr); + m_function->SetExprAttributes(pushExpr, ILAllowDeadStoreElimination); + m_function->AddInstruction(pushExpr); + } + else + { + // Set link register to return address + BNRegisterInfo regInfo = m_platform->GetArchitecture()->GetRegisterInfo(linkReg); + + uint64_t addrToSet = addr; + if (block->GetArchitecture()->GetName() == "thumb2") + addrToSet |= 1; // XXX: hack moved here from lowlevelilfunction.cpp + + ExprId linkExpr = m_function->SetRegister( + regInfo.size, linkReg, m_function->ConstPointer(regInfo.size, addrToSet, lastInstr), 0, lastInstr); + m_function->SetExprAttributes(linkExpr, ILAllowDeadStoreElimination); + m_function->AddInstruction(linkExpr); + } + } + + // Copy the inlined code from the target function + auto blocks = PrepareToCopyForeignFunction(targetIL); + auto unresolvedIndirectBranches = targetFunc->GetUnresolvedIndirectBranches(); + auto sourceLocation = inlineDuringAnalysis == InlineUsingCallAddress ? ILSourceLocation(lastInstr) : ILSourceLocation(); + for (auto& block : blocks) + { + m_function->PrepareToCopyBlock(block); + for (size_t instrIndex = block->GetStart(); instrIndex < block->GetEnd(); instrIndex++) + { + LowLevelILInstruction instr = targetIL->GetInstruction(instrIndex); + ArchAndAddr loc(block->GetArchitecture(), instr.address); + + if (lastInstr.operation == LLIL_CALL && instr.operation == LLIL_RET) + { + // If the instruction is a return, emit the computation of the target + // location (it may affect the stack pointer) but go directly to the + // return label instead of emitting a return instruction. + // TODO: Handle architectures that don't use LLIL_RET and functions + // that jump to the return address in nonstandard ways + //m_liftedIL->AddInstruction(m_liftedIL->Jump(instr.GetDestExpr().CopyTo(m_liftedIL), instr)); + m_function->AddInstruction(instr.GetDestExpr().CopyTo(m_function, sourceLocation)); + m_function->AddInstruction(m_function->Goto(end, sourceLocation)); + } + else if (lastInstr.operation == LLIL_CALL && instr.operation == LLIL_JUMP + && block->GetOutgoingEdges().empty() && (unresolvedIndirectBranches.count(loc) == 0)) + { + // Jump without outgoing edges in the graph, and it is not marked as having + // unresolved branches, and this is the end of the function. This implies + // that this is a tail call. Copy tail calls as a call followed by a goto to + // the end of the inlined section. If the architecture places the return + // address on the stack, ensure to pop it off before emitting the call, as + // this implicitly places a return address onto the stack. We do not need + // to worry about nested inlining here because that is already resolved at + // this point. + uint32_t linkReg = m_platform->GetArchitecture()->GetLinkRegister(); + if (linkReg == BN_INVALID_REGISTER) + { + size_t addrSize = m_platform->GetAddressSize(); + m_function->AddInstruction(m_function->Pop(addrSize, 0, sourceLocation)); + } + m_function->AddInstruction( + m_function->Call(instr.GetDestExpr().CopyTo(m_function), sourceLocation)); + m_function->AddInstruction(m_function->Goto(end, sourceLocation)); + } + else + { + if (indirectSource.has_value() && indirectSource->second == loc) + { + ArchAndAddr cur(indirectSource->first); + if (auto brit = m_userIndirectBranches.find(cur); brit != m_userIndirectBranches.end()) + { + const auto& s = brit->second; + m_function->SetIndirectBranches(std::vector(s.begin(), s.end())); + } + else if (auto brit = m_autoIndirectBranches.find(cur); brit != m_autoIndirectBranches.end()) + { + const auto& s = brit->second; + m_function->SetIndirectBranches(std::vector(s.begin(), s.end())); + } + + m_function->SetCurrentAddress(loc.arch, loc.address); + } + + // Other instructions are copied directly + m_function->AddInstruction(instr.CopyTo(m_function, sourceLocation)); + } + } + } + + // Mark end of inlined code, execution will resume at the instruction following the call + m_function->MarkLabel(end); + *m_containsInlinedFunctions = true; + } + } +} + + +bool Architecture::DefaultLiftFunctionCallback(BNLowLevelILFunction* function, BNFunctionLifterContext* context) +{ + Ref func(new LowLevelILFunction(BNNewLowLevelILFunctionReference(function))); + FunctionLifterContext flc(func, context); + return DefaultLiftFunction(func, flc); +} diff --git a/defaultabb/CMakeLists.txt b/defaultarch/CMakeLists.txt similarity index 59% rename from defaultabb/CMakeLists.txt rename to defaultarch/CMakeLists.txt index b81d14bed0..d355f3179b 100644 --- a/defaultabb/CMakeLists.txt +++ b/defaultarch/CMakeLists.txt @@ -1,6 +1,6 @@ cmake_minimum_required(VERSION 3.13 FATAL_ERROR) -project(defaultabb) +project(defaultarch) if(NOT BN_INTERNAL_BUILD) add_subdirectory(${PROJECT_SOURCE_DIR}/../.. ${PROJECT_BINARY_DIR}/api) @@ -11,28 +11,28 @@ file(GLOB SOURCES CONFIGURE_DEPENDS *.h) if(DEMO) - add_library(defaultabb STATIC ${SOURCES}) + add_library(defaultarch STATIC ${SOURCES}) else() - add_library(defaultabb SHARED ${SOURCES}) + add_library(defaultarch SHARED ${SOURCES}) endif() -target_include_directories(defaultabb +target_include_directories(defaultarch PRIVATE ${PROJECT_SOURCE_DIR} ) if(WIN32) - target_link_directories(defaultabb + target_link_directories(defaultarch PRIVATE ${BN_INSTALL_DIR}) - target_link_libraries(defaultabb binaryninjaapi) + target_link_libraries(defaultarch binaryninjaapi) else() - target_link_libraries(defaultabb binaryninjaapi) + target_link_libraries(defaultarch binaryninjaapi) endif() if(UNIX AND NOT APPLE) - target_link_options(defaultabb PRIVATE "LINKER:--exclude-libs,ALL") + target_link_options(defaultarch PRIVATE "LINKER:--exclude-libs,ALL") endif() -set_target_properties(defaultabb PROPERTIES +set_target_properties(defaultarch PROPERTIES CXX_STANDARD 20 CXX_VISIBILITY_PRESET hidden CXX_STANDARD_REQUIRED ON @@ -43,8 +43,8 @@ set_target_properties(defaultabb PROPERTIES POSITION_INDEPENDENT_CODE ON) if(BN_INTERNAL_BUILD) - plugin_rpath(defaultabb) - set_target_properties(defaultabb PROPERTIES + plugin_rpath(defaultarch) + set_target_properties(defaultarch PROPERTIES LIBRARY_OUTPUT_DIRECTORY ${BN_CORE_PLUGIN_DIR} RUNTIME_OUTPUT_DIRECTORY ${BN_CORE_PLUGIN_DIR}) endif() diff --git a/defaultarch/plugin.cpp b/defaultarch/plugin.cpp new file mode 100644 index 0000000000..3e6e66a8c5 --- /dev/null +++ b/defaultarch/plugin.cpp @@ -0,0 +1,16 @@ +#include "binaryninjaapi.h" +#include "binaryninjacore.h" + +using namespace BinaryNinja; + +extern "C" +{ + BN_DECLARE_CORE_ABI_VERSION + BINARYNINJAPLUGIN bool CorePluginInit() + { + if (!BNArchitectureSetDefaultAnalyzeBasicBlocksCallback((void*)Architecture::DefaultAnalyzeBasicBlocksCallback)) + return false; + + return BNArchitectureSetDefaultLiftFunctionCallback((void*)Architecture::DefaultLiftFunctionCallback); + } +} diff --git a/function.cpp b/function.cpp index 3f5f5b2ea3..a8ab3b9985 100644 --- a/function.cpp +++ b/function.cpp @@ -1644,14 +1644,13 @@ Ref Function::GetCalleeForAnalysis(Ref platform, uint64_t ad } -vector Function::GetUnresolvedIndirectBranches() +set Function::GetUnresolvedIndirectBranches() { size_t count; BNArchitectureAndAddress* addresses = BNGetUnresolvedIndirectBranches(m_object, &count); - vector result; - result.reserve(count); + set result; for (size_t i = 0; i < count; i++) - result.push_back({new CoreArchitecture(addresses[i].arch), addresses[i].address}); + result.insert({new CoreArchitecture(addresses[i].arch), addresses[i].address}); BNFreeArchitectureAndAddressList(addresses); return result; } diff --git a/lowlevelil.cpp b/lowlevelil.cpp index 514595bbff..cc1cb6d017 100644 --- a/lowlevelil.cpp +++ b/lowlevelil.cpp @@ -97,6 +97,12 @@ void LowLevelILFunction::SetCurrentAddress(Architecture* arch, uint64_t addr) } +void LowLevelILFunction::SetCurrentSourceBlock(BasicBlock* source) +{ + BNLowLevelILSetCurrentSourceBlock(m_object, source->GetObject()); +} + + size_t LowLevelILFunction::GetInstructionStart(Architecture* arch, uint64_t addr) { return BNLowLevelILGetInstructionStart(m_object, arch ? arch->GetObject() : nullptr, addr); @@ -145,6 +151,12 @@ void LowLevelILFunction::SetIndirectBranches(const vector& branches } +bool LowLevelILFunction::HasIndirectBranches() const +{ + return BNLowLevelILFunctionHasIndirectBranches(m_object); +} + + std::vector LowLevelILFunction::GetRegisters() { std::vector result; diff --git a/python/architecture.py b/python/architecture.py index 57d327c0b1..2d085d9e30 100644 --- a/python/architecture.py +++ b/python/architecture.py @@ -41,6 +41,7 @@ from . import binaryview from . import variable from . import basicblock +from . import log RegisterIndex = NewType('RegisterIndex', int) RegisterStackIndex = NewType('RegisterStackIndex', int) @@ -412,6 +413,117 @@ def finalize(self) -> None: core.BNAnalyzeBasicBlocksContextFinalize(self._handle) +@dataclass +class FunctionLifterContext: + """Used by ``lift_function`` and contains contextual information for function-level lifting + + .. note:: This class is meant to be used by Architecture plugins only + """ + + _handle: core.BNFunctionLifterContext + _function: "function.Function" + _platform: "platform.Platform" + _logger: "log.Logger" + _blocks: List["basicblock.BasicBlock"] + _contextual_returns: Dict["function.ArchAndAddr", bool] + _inline_remapping: Dict["function.ArchAndAddr", "function.ArchAndAddr"] + _user_indirect_branches: Dict["function.ArchAndAddr", Set["function.ArchAndAddr"]] + _auto_indirect_branches: Dict["function.ArchAndAddr", Set["function.ArchAndAddr"]] + _inlined_calls: Set[int] + + @staticmethod + def from_core_struct(func: core.BNLowLevelILFunction, + bn_fl_context: core.BNFunctionLifterContext) -> "FunctionLifterContext": + """Create a FunctionLifterContext from a core.BNFunctionLifterContext structure.""" + + session_id = core.BNLoggerGetSessionId(bn_fl_context.logger) + name = core.BNLoggerGetName(bn_fl_context.logger) + logger = log.Logger(session_id, name, handle=core.BNNewLoggerReference(bn_fl_context.logger)) + + plat = platform.CorePlatform._from_cache(core.BNNewPlatformReference(bn_fl_context.platform)) + blocks = [] + for i in range(0, bn_fl_context.basicBlockCount): + blocks.append( + basicblock.BasicBlock( + core.BNNewBasicBlockReference(bn_fl_context.basicBlocks[i]) + ) + ) + + contextual_returns = {} + for i in range(0, bn_fl_context.contextualFunctionReturnCount): + loc = function.ArchAndAddr( + CoreArchitecture._from_cache(bn_fl_context.contextualFunctionReturnLocations[i].arch), + bn_fl_context.contextualFunctionReturnLocations[i].address, + ) + + contextual_returns[loc] = bn_fl_context._contextualFunctionReturnValues[i] + + inline_remapping = {} + for i in range(0, bn_fl_context.inlinedRemappingEntryCount): + key = function.ArchAndAddr( + CoreArchitecture._from_cache(bn_fl_context.inlinedRemappingKeys[i].arch), + bn_fl_context.inlinedRemappingKeys[i].address, + ) + dest = function.ArchAndAddr( + CoreArchitecture._from_cache(bn_fl_context.inlinedRemappingEntries[i].destination.arch), + bn_fl_context.inlinedRemappingEntries[i].destination.address, + ) + inline_remapping[src] = dest + + user_indirect_branches = {} + auto_indirect_branches = {} + for i in range(0, bn_fl_context.indirectBranchesCount): + src = function.ArchAndAddr( + CoreArchitecture._from_cache(bn_fl_context.indirectBranches[i].sourceArch), + bn_fl_context.indirectBranches[i].sourceAddr, + ) + + dest = function.ArchAndAddr( + CoreArchitecture._from_cache(bn_fl_context.indirectBranches[i].destArch), + bn_fl_context.indirectBranches[i].dests[j].destAddr, + ) + + if bn_fl_context.indirectBranches[i].dests[j].isAutoDefined: + if src not in auto_indirect_branches: + auto_indirect_branches[src] = set() + auto_indirect_branches[src].add(dest) + else: + if src not in user_indirect_branches: + user_indirect_branches[src] = set() + user_indirect_branches[src].add(dest) + + + inlined_calls = set() + for i in range(0, bn_fl_context.inlinedCallsCount): + inlined_calls.add(bn_fl_context.inlinedCalls[i]) + + return FunctionLifterContext( + _handle=bn_fl_context, + _function=lowlevelil.LowLevelILFunction( + plat.arch, core.BNNewLowLevelILFunctionReference(func) + ), + _platform=plat, + _logger=logger, + _blocks=blocks, + _contextual_returns=contextual_returns, + _inline_remapping=inline_remapping, + _user_indirect_branches=user_indirect_branches, + _auto_indirect_branches=auto_indirect_branches, + _inlined_calls=inlined_calls, + ) + + def prepare_block_translation(self, function, arch, address): + """Prepare block for translation""" + + core.BNPrepareBlockTranslation(function.handle, arch.handle, address) + + @property + def blocks(self) -> List["basicblock.BasicBlock"]: + """Get the list of basic blocks in this context.""" + + return self._blocks + + @dataclass(frozen=True) class RegisterInfo: full_width_reg: RegisterName @@ -611,6 +723,7 @@ def __init__(self): self._get_instruction_low_level_il ) self._cb.analyzeBasicBlocks = self._cb.analyzeBasicBlocks.__class__(self._analyze_basic_blocks) + self._cb.liftFunction = self._cb.liftFunction.__class__(self._lift_function) self._cb.getRegisterName = self._cb.getRegisterName.__class__(self._get_register_name) self._cb.getFlagName = self._cb.getFlagName.__class__(self._get_flag_name) self._cb.getFlagWriteTypeName = self._cb.getFlagWriteTypeName.__class__(self._get_flag_write_type_name) @@ -1084,6 +1197,15 @@ def _analyze_basic_blocks(self, ctx, func, ptr_bn_bb_context): except: log_error_for_exception("Unhandled Python exception in Architecture._analyze_basic_blocks") + def _lift_function(self, ctx, func, ptr_bn_fl_context): + try: + bn_fl_context = ptr_bn_fl_context.contents + context = FunctionLifterContext.from_core_struct(func, bn_fl_context) + return self.lift_function(lowlevelil.LowLevelILFunction(arch=self, handle=core.BNNewLowLevelILFunctionReference(func)), context) + except: + log_error_for_exception("Unhandled Python exception in Architecture._lift_function") + return False + def _get_register_name(self, ctxt, reg): try: if reg in self._regs_by_index: @@ -1814,6 +1936,23 @@ def analyze_basic_blocks(self, func: 'function.Function', context: BasicBlockAna except: log_error_for_exception("Unhandled Python exception in Architecture.analyze_basic_blocks") + def lift_function(self, func: "lowlevelil.LowLevelILFunction", context: FunctionLifterContext) -> bool: + """ + ``lift_function`` performs lifting of the function and commits the results to the function analysis + + .. note:: Architecture subclasses should only implement this method if function-level analysis is required + + :param LowLevelILFunction func: the function to analyze + :param FunctionLifterContext context: the lifting context + :return: True on success, False otherwise + """ + + try: + return core.BNArchitectureDefaultLiftFunction(func.handle, context._handle) + except: + log_error_for_exception("Unhandled Python exception in Architecture.lift_function") + return False + def get_low_level_il_from_bytes(self, data: bytes, addr: int) -> 'lowlevelil.LowLevelILInstruction': """ ``get_low_level_il_from_bytes`` converts the instruction in bytes to ``il`` at the given virtual address diff --git a/python/log.py b/python/log.py index 2f5dda49e0..13c65c8194 100644 --- a/python/log.py +++ b/python/log.py @@ -365,10 +365,13 @@ def close_logs(): class Logger: - def __init__(self, session_id: int, logger_name: str): + def __init__(self, session_id: int, logger_name: str, handle=None): self.session_id = session_id self.logger_name = logger_name - self.handle = core.BNLogCreateLogger(logger_name, session_id) + if handle: + self.handle = handle + else: + self.handle = core.BNLogCreateLogger(logger_name, session_id) def log(self, level: LogLevel, message: str) -> None: log(level, message, self.logger_name, self.session_id) diff --git a/rust/src/architecture.rs b/rust/src/architecture.rs index 669b84c8ad..315341fe01 100644 --- a/rust/src/architecture.rs +++ b/rust/src/architecture.rs @@ -218,6 +218,16 @@ pub trait Architecture: 'static + Sized + AsRef { } } + fn lift_function( + &self, + function: LowLevelILMutableFunction, + context: &mut FunctionLifterContext, + ) -> bool { + unsafe { + BNArchitectureDefaultLiftFunction(function.handle, context.handle) + } + } + /// Fallback flag value calculation path. This method is invoked when the core is unable to /// recover the flag using semantics and resorts to emitting instructions that explicitly set each /// observed flag to the value of an expression returned by this function. @@ -542,6 +552,18 @@ pub trait Architecture: 'static + Sized + AsRef { fn handle(&self) -> Self::Handle; } +pub struct FunctionLifterContext { + pub(crate) handle: *mut BNFunctionLifterContext, +} + +impl FunctionLifterContext { + pub unsafe fn from_raw(handle: *mut BNFunctionLifterContext) -> Self { + debug_assert!(!handle.is_null()); + + FunctionLifterContext { handle } + } +} + // TODO: WTF?!?!?!? pub struct CoreArchitectureList(*mut *mut BNArchitecture, usize); @@ -725,6 +747,16 @@ impl Architecture for CoreArchitecture { } } + fn lift_function( + &self, + function: LowLevelILMutableFunction, + context: &mut FunctionLifterContext, + ) -> bool { + unsafe { + BNArchitectureLiftFunction(self.handle, function.handle, context.handle) + } + } + fn flag_write_llil<'a>( &self, _flag: Self::Flag, @@ -1440,6 +1472,23 @@ where custom_arch.analyze_basic_blocks(&mut function, &mut context); } + extern "C" fn cb_lift_function( + ctxt: *mut c_void, + function: *mut BNLowLevelILFunction, + context: *mut BNFunctionLifterContext, + ) -> bool + where + A: 'static + Architecture> + Send + Sync, + { + let custom_arch = unsafe { &*(ctxt as *mut A) }; + let function = unsafe { + LowLevelILMutableFunction::from_raw_with_arch(function, Some(*custom_arch.as_ref())) + }; + let mut context: FunctionLifterContext = + unsafe { FunctionLifterContext::from_raw(context) }; + custom_arch.lift_function(function, &mut context) + } + extern "C" fn cb_reg_name(ctxt: *mut c_void, reg: u32) -> *mut c_char where A: 'static + Architecture> + Send + Sync, @@ -2359,6 +2408,7 @@ where freeInstructionText: Some(cb_free_instruction_text), getInstructionLowLevelIL: Some(cb_instruction_llil::), analyzeBasicBlocks: Some(cb_analyze_basic_blocks::), + liftFunction: Some(cb_lift_function::), getRegisterName: Some(cb_reg_name::), getFlagName: Some(cb_flag_name::),