From 9252262693432b33dbe4ffc60d79bbdc6fbb5f66 Mon Sep 17 00:00:00 2001 From: Christoph Egger Date: Thu, 8 Jan 2015 18:42:04 +0100 Subject: [PATCH] Split blocks before finishing function --- src/disassembler/Function.hxx | 10 ++++++++++ src/disassembler/llvm/LLVMDisassembler.cxx | 21 ++++++++++++++------- src/disassembler/llvm/LLVMDisassembler.hxx | 2 +- src/disassembler/llvm/LLVMFunction.hxx | 3 --- 4 files changed, 25 insertions(+), 11 deletions(-) diff --git a/src/disassembler/Function.hxx b/src/disassembler/Function.hxx index ccca54c..57833ba 100644 --- a/src/disassembler/Function.hxx +++ b/src/disassembler/Function.hxx @@ -2,6 +2,7 @@ #define INCLUDE__Function_hxx #include "disassembler/BasicBlock.hxx" +#include class Function { public: @@ -17,9 +18,18 @@ public: std::string getName() const { return name; } + + void addBasicBlock(BasicBlock* block) { + _blocks.insert(std::make_pair(block->getStartAddress(), block)); + } + + std::map& blocks() { + return _blocks; + } private: std::string name; uint64_t start_address; + std::map _blocks; }; #endif diff --git a/src/disassembler/llvm/LLVMDisassembler.cxx b/src/disassembler/llvm/LLVMDisassembler.cxx index 79c7a54..c73e703 100644 --- a/src/disassembler/llvm/LLVMDisassembler.cxx +++ b/src/disassembler/llvm/LLVMDisassembler.cxx @@ -138,7 +138,7 @@ Function* LLVMDisassembler::disassembleFunctionAt(uint64_t address, const std::s text_section.getAddress(base_address); text_section.getSize(size); - if (address < base_address || + if (address < base_address || address >= base_address + size) { return NULL; } @@ -174,6 +174,7 @@ void LLVMDisassembler::disassembleFunction(LLVMFunction* function) { LLVMBasicBlock * block = new LLVMBasicBlock(function->getStartAddress(), this); remaining_blocks.push(block); blocks.insert(std::make_pair(block->getStartAddress(), block)); + function->addBasicBlock(block); while (remaining_blocks.size()) { LLVMBasicBlock * current_block = remaining_blocks.top(); @@ -206,6 +207,7 @@ void LLVMDisassembler::disassembleFunction(LLVMFunction* function) { if (blocks.find(jmptarget) == blocks.end()) { LLVMBasicBlock * block = new LLVMBasicBlock(jmptarget, this); blocks.insert(std::make_pair(block->getStartAddress(), block)); + function->addBasicBlock(block); remaining_blocks.push(block); } if (MIA->isConditionalBranch(inst)) { @@ -214,6 +216,7 @@ void LLVMDisassembler::disassembleFunction(LLVMFunction* function) { if (blocks.find(jmptarget) == blocks.end()) { LLVMBasicBlock * block = new LLVMBasicBlock(jmptarget, this); blocks.insert(std::make_pair(block->getStartAddress(), block)); + function->addBasicBlock(block); remaining_blocks.push(block); } } @@ -234,6 +237,7 @@ void LLVMDisassembler::disassembleFunction(LLVMFunction* function) { current_address += inst_size; } } + splitBlocks(function); LOG4CXX_DEBUG(logger, "Finished function " << function->getName()); manager->signal_new_function(function); } @@ -293,19 +297,19 @@ void LLVMDisassembler::disassemble() { LOG4CXX_INFO(logger, "No Symbols found, starting at the beginning of the text segment"); disassembleFunctionAt(text_entry); } - - splitBlocks(); } -void LLVMDisassembler::splitBlocks() { +void LLVMDisassembler::splitBlocks(LLVMFunction* function) { SectionRef text_section = sections[".text"]; StringRef bytes; text_section.getContents(bytes); StringRefMemoryObject ref(bytes); // Split blocks where jumps are going inside the block - for (auto it = blocks.begin(); it != blocks.end(); ++it) { - LLVMBasicBlock * current_block = it->second; + for (auto it = function->blocks().begin(); + it != function->blocks().end(); + ++it) { + BasicBlock * current_block = it->second; uint64_t inst_size; uint64_t base_address; text_section.getAddress(base_address); @@ -317,8 +321,10 @@ void LLVMDisassembler::splitBlocks() { if(llvm::MCDisassembler::Success == DisAsm->getInstruction(inst, inst_size, ref, current_address, nulls(), nulls())) { + // See if some other block starts here auto other = blocks.find(current_address + inst_size + base_address); + // Special case, other block starts here but we are at the end anyway if (other != blocks.end()) { uint64_t endaddress = current_address + inst_size + base_address; if (endaddress != current_block->getEndAddress()) { @@ -377,7 +383,8 @@ void LLVMDisassembler::forEachFunction(std::function } void LLVMDisassembler::printEachInstruction(uint64_t start, uint64_t end, - std::function fun) { + std::function fun) { SectionRef text_section = sections[".text"]; uint64_t base_address; text_section.getAddress(base_address); diff --git a/src/disassembler/llvm/LLVMDisassembler.hxx b/src/disassembler/llvm/LLVMDisassembler.hxx index 107767d..b0ab082 100644 --- a/src/disassembler/llvm/LLVMDisassembler.hxx +++ b/src/disassembler/llvm/LLVMDisassembler.hxx @@ -41,7 +41,7 @@ protected: private: // http://llvm.org/docs/doxygen/html/MCObjectDisassembler_8cpp_source.html +197 void disassembleFunction(LLVMFunction* function); - void splitBlocks(); + void splitBlocks(LLVMFunction* fun); void disassemble(); void readSymbols(); diff --git a/src/disassembler/llvm/LLVMFunction.hxx b/src/disassembler/llvm/LLVMFunction.hxx index 3bc813a..0bdb1dc 100644 --- a/src/disassembler/llvm/LLVMFunction.hxx +++ b/src/disassembler/llvm/LLVMFunction.hxx @@ -11,6 +11,3 @@ private: }; #endif - - - -- 2.39.2