From c55f9e2b5f5ff83817ab5187eb0122600a4c5cea Mon Sep 17 00:00:00 2001 From: Christoph Egger Date: Tue, 27 May 2014 14:34:51 +0200 Subject: [PATCH] Add interface to iterate over functions --- src/disassembler/Disassembler.hxx | 4 +- src/disassembler/Function.hxx | 14 ++-- src/disassembler/llvm/LLVMDisassembler.cxx | 76 +++++++++++++++------- src/disassembler/llvm/LLVMDisassembler.hxx | 9 +-- src/disassembler/llvm/LLVMFunction.hxx | 7 +- 5 files changed, 68 insertions(+), 42 deletions(-) diff --git a/src/disassembler/Disassembler.hxx b/src/disassembler/Disassembler.hxx index 1408326..b2576f7 100644 --- a/src/disassembler/Disassembler.hxx +++ b/src/disassembler/Disassembler.hxx @@ -5,6 +5,7 @@ #include #include "disassembler/BasicBlock.hxx" +#include "disassembler/Function.hxx" class Disassembler { @@ -15,7 +16,8 @@ public: void getSymbols(); uint64_t entryAddress(); - virtual void forEachInstruction(const std::string& name, std::function callback) = 0; + virtual void forEachFunction(std::function callback) = 0; + // virtual void forEachInstruction(const std::string& name, std::function callback) = 0; // http://llvm.org/docs/doxygen/html/MCObjectDisassembler_8cpp_source.html +197 BasicBlock * generateControlFlowGraph(const std::string& name); diff --git a/src/disassembler/Function.hxx b/src/disassembler/Function.hxx index 6cad001..ccca54c 100644 --- a/src/disassembler/Function.hxx +++ b/src/disassembler/Function.hxx @@ -5,19 +5,21 @@ class Function { public: - Function(const std::string& name) { - this->name = name; + Function(const std::string& name, uint64_t start_address) + : name(name) + , start_address(start_address) { + } + + uint64_t getStartAddress() const { + return start_address; } std::string getName() const { return name; } - - BasicBlock * getEntry(); - private: std::string name; - BasicBlock * start; + uint64_t start_address; }; #endif diff --git a/src/disassembler/llvm/LLVMDisassembler.cxx b/src/disassembler/llvm/LLVMDisassembler.cxx index e157075..3494b79 100644 --- a/src/disassembler/llvm/LLVMDisassembler.cxx +++ b/src/disassembler/llvm/LLVMDisassembler.cxx @@ -13,7 +13,6 @@ using namespace llvm::object; * ist sondern z.B. einfach nur Instruktionen oder ein Bootsektor oder * foo */ - LLVMDisassembler::LLVMDisassembler(const std::string& filename) : Disassembler(filename) , logger(log4cxx::Logger::getLogger("LLVMDisassembler")) @@ -117,24 +116,29 @@ void LLVMDisassembler::disassemble() { std::stack remaining_blocks; SectionRef text_section = sections[".text"]; - std::for_each(symbols.begin(), symbols.end(), - [&](std::pair x) { - uint64_t result; - bool contains; - SymbolRef::Type symbol_type; + for (auto x = symbols.begin(); x != symbols.end(); ++x) { + uint64_t result; + bool contains; + SymbolRef::Type symbol_type; + +/* + * TODO: If we jump into some Basic Block we need to split it there into two + */ - if (text_section.containsSymbol(x.second, contains) || !contains) - return; + if (text_section.containsSymbol(x->second, contains) || !contains) + continue; - if (x.second.getType(symbol_type) - || SymbolRef::ST_Function != symbol_type) - return; + if (x->second.getType(symbol_type) + || SymbolRef::ST_Function != symbol_type) + continue; - if (!x.second.getAddress(result)) { - remaining_functions.push(new LLVMFunction(x.first, result)); - LOG4CXX_DEBUG(logger, "Disasembling " << x.first); - } - }); + if (!x->second.getAddress(result)) { + LLVMFunction * fun = new LLVMFunction(x->first, result); + remaining_functions.push(fun); + functions.insert(std::make_pair(result, fun)); + LOG4CXX_DEBUG(logger, "Disasembling " << x->first); + } + } StringRef bytes; text_section.getContents(bytes); @@ -149,7 +153,9 @@ void LLVMDisassembler::disassemble() { // if ("_start" != current_function->getName()) // continue; - remaining_blocks.push(new LLVMBasicBlock(current_function->getStartAddress())); + LLVMBasicBlock * block = new LLVMBasicBlock(current_function->getStartAddress()); + remaining_blocks.push(block); + blocks.insert(std::make_pair(block->getStartAddress(), block)); while (remaining_blocks.size()) { LLVMBasicBlock * current_block = remaining_blocks.top(); @@ -187,14 +193,24 @@ void LLVMDisassembler::disassemble() { jmptarget += base_address; if (!MIA->isIndirectBranch(inst)) { if (MIA->isCall(inst)) { - if (blocks.find(jmptarget) == blocks.end()) - remaining_functions.push(new LLVMFunction("", jmptarget)); + if (functions.find(jmptarget) == functions.end()) { + std::stringstream s; + s << ""; + LLVMFunction * fun = new LLVMFunction(s.str(), jmptarget); + functions.insert(std::make_pair(jmptarget, fun)); + remaining_functions.push(fun); + } } else { - if (blocks.find(jmptarget) == blocks.end()) - remaining_blocks.push(new LLVMBasicBlock(jmptarget)); + if (blocks.find(jmptarget) == blocks.end()) { + LLVMBasicBlock * block = new LLVMBasicBlock(jmptarget); + blocks.insert(std::make_pair(block->getStartAddress(), block)); + remaining_blocks.push(block); + } if (MIA->isConditionalBranch(inst)) { jmptarget = base_address + current_address + inst_size; - if (blocks.find(jmptarget) == blocks.end()) + if (blocks.find(jmptarget) == blocks.end()) { + LLVMBasicBlock * block = new LLVMBasicBlock(jmptarget); + blocks.insert(std::make_pair(block->getStartAddress(), block)); remaining_blocks.push(new LLVMBasicBlock(jmptarget)); } } @@ -207,8 +223,9 @@ void LLVMDisassembler::disassemble() { if (inst_size == 0 || MIA->isTerminator(inst) || MIA->isBranch(inst)) { current_block->setEndAddress(current_address + base_address); - blocks.insert(std::make_pair(current_block->getStartAddress(), current_block)); - LOG4CXX_DEBUG(logger, "Finished Block at " << current_block->getEndAddress()); + LOG4CXX_DEBUG(logger, "Finished Block at " << std::hex << + current_block->getEndAddress()); + } break; } current_address += inst_size; @@ -247,6 +264,15 @@ void LLVMDisassembler::readSections() { } -BasicBlock * LLVMDisassembler::generateControlFlowGraph(uint64_t address) { +void LLVMDisassembler::forEachFunction(std::function callback) { + std::for_each(functions.begin(), functions.end(), + [&](std::pair x) { + callback(x.first, x.second); + }); +} + + + +void LLVMDisassembler::generateControlFlowGraph(uint64_t address) { } diff --git a/src/disassembler/llvm/LLVMDisassembler.hxx b/src/disassembler/llvm/LLVMDisassembler.hxx index 4f58a0e..87dbefa 100644 --- a/src/disassembler/llvm/LLVMDisassembler.hxx +++ b/src/disassembler/llvm/LLVMDisassembler.hxx @@ -8,6 +8,8 @@ #include "include_llvm.hxx" #include "disassembler/Disassembler.hxx" +#include "disassembler/BasicBlock.hxx" +#include "disassembler/Function.hxx" #include "disassembler/llvm/LLVMBasicBlock.hxx" @@ -20,9 +22,7 @@ public: void getSymbols(); uint64_t entryAddress(); - void forEachInstruction(const std::string& name, - std::function callback) - {} + void forEachFunction(std::function callback); BasicBlock * generateControlFlowGraph(const std::string& name); BasicBlock * generateControlFlowGraph(uint64_t address); @@ -38,7 +38,8 @@ private: void readSections(); log4cxx::LoggerPtr logger; - std::map blocks; + std::map blocks; + std::map functions; llvm::Triple triple; std::shared_ptr binary; diff --git a/src/disassembler/llvm/LLVMFunction.hxx b/src/disassembler/llvm/LLVMFunction.hxx index 0ac3ead..3bc813a 100644 --- a/src/disassembler/llvm/LLVMFunction.hxx +++ b/src/disassembler/llvm/LLVMFunction.hxx @@ -6,13 +6,8 @@ class LLVMFunction : public Function { public: LLVMFunction(const std::string& name, uint64_t start_address) - :Function(name) - , start_address(start_address) { - } - - uint64_t getStartAddress() const {return start_address;} + :Function(name, start_address) {} private: - uint64_t start_address; }; #endif -- 2.39.2