From d84d4109b1f5bd5f87732b7e49860639d963a553 Mon Sep 17 00:00:00 2001 From: Christoph Egger Date: Mon, 26 May 2014 15:23:23 +0200 Subject: [PATCH] Recursive disassembler Disassemblers properly recursive. Doesn't yet really build the neccessary data structure. Also doesn't handle jump targets in the middle of preexisting basic blocks properly. --- src/disassembler/BasicBlock.hxx | 35 +++++ src/disassembler/Disassembler.cxx | 3 + src/disassembler/Disassembler.hxx | 3 +- src/disassembler/Function.hxx | 23 +++ src/disassembler/llvm/LLVMBasicBlock.hxx | 17 +++ src/disassembler/llvm/LLVMDisassembler.cxx | 159 ++++++++++++++++++++- src/disassembler/llvm/LLVMDisassembler.hxx | 12 +- src/disassembler/llvm/LLVMFunction.hxx | 21 +++ 8 files changed, 268 insertions(+), 5 deletions(-) create mode 100644 src/disassembler/BasicBlock.hxx create mode 100644 src/disassembler/Function.hxx create mode 100644 src/disassembler/llvm/LLVMBasicBlock.hxx create mode 100644 src/disassembler/llvm/LLVMFunction.hxx diff --git a/src/disassembler/BasicBlock.hxx b/src/disassembler/BasicBlock.hxx new file mode 100644 index 0000000..c994d9e --- /dev/null +++ b/src/disassembler/BasicBlock.hxx @@ -0,0 +1,35 @@ +#ifndef INCLUDE__BasicBlock_hxx +#define INCLUDE__BasicBlock_hxx + +class BasicBlock { +public: + BasicBlock() {} + + uint64_t getStartAddress() const { + return start_address; + } + + uint64_t getEndAddress() const { + return end_address; + } + + BasicBlock * const * getNextBlocks() const { + return next_blocks; + } + + void setStartAddress(uint64_t address) { + start_address = address; + } + + void setEndAddress(uint64_t address) { + end_address = address; + } + +private: + uint64_t start_address; + uint64_t end_address; + + BasicBlock * next_blocks[2]; +}; + +#endif diff --git a/src/disassembler/Disassembler.cxx b/src/disassembler/Disassembler.cxx index 968ebe2..e3a48a2 100644 --- a/src/disassembler/Disassembler.cxx +++ b/src/disassembler/Disassembler.cxx @@ -1,3 +1,6 @@ #include "disassembler/Disassembler.hxx" +BasicBlock * Disassembler::generateControlFlowGraph(uint64_t address) { + +} diff --git a/src/disassembler/Disassembler.hxx b/src/disassembler/Disassembler.hxx index 9986ac8..1408326 100644 --- a/src/disassembler/Disassembler.hxx +++ b/src/disassembler/Disassembler.hxx @@ -4,7 +4,8 @@ #include #include -class BasicBlock {}; +#include "disassembler/BasicBlock.hxx" + class Disassembler { public: diff --git a/src/disassembler/Function.hxx b/src/disassembler/Function.hxx new file mode 100644 index 0000000..6cad001 --- /dev/null +++ b/src/disassembler/Function.hxx @@ -0,0 +1,23 @@ +#ifndef INCLUDE__Function_hxx +#define INCLUDE__Function_hxx + +#include "disassembler/BasicBlock.hxx" + +class Function { +public: + Function(const std::string& name) { + this->name = name; + } + + std::string getName() const { + return name; + } + + BasicBlock * getEntry(); + +private: + std::string name; + BasicBlock * start; +}; + +#endif diff --git a/src/disassembler/llvm/LLVMBasicBlock.hxx b/src/disassembler/llvm/LLVMBasicBlock.hxx new file mode 100644 index 0000000..ede89d6 --- /dev/null +++ b/src/disassembler/llvm/LLVMBasicBlock.hxx @@ -0,0 +1,17 @@ +#ifndef INCLUDE__LLVMBasicBlock_hxx +#define INCLUDE__LLVMBasicBlock_hxx + +#include "disassembler/BasicBlock.hxx" + +class LLVMBasicBlock : public BasicBlock { +public: + LLVMBasicBlock(uint64_t start_address) { + setStartAddress(start_address); + } + + +private: +}; + + +#endif diff --git a/src/disassembler/llvm/LLVMDisassembler.cxx b/src/disassembler/llvm/LLVMDisassembler.cxx index 881c662..e60e0c2 100644 --- a/src/disassembler/llvm/LLVMDisassembler.cxx +++ b/src/disassembler/llvm/LLVMDisassembler.cxx @@ -1,4 +1,9 @@ #include "disassembler/llvm/LLVMDisassembler.hxx" +#include "disassembler/llvm/LLVMBasicBlock.hxx" +#include "disassembler/llvm/LLVMFunction.hxx" + +#include +#include using namespace llvm; using namespace llvm::object; @@ -31,13 +36,13 @@ LLVMDisassembler::LLVMDisassembler(const std::string& filename) triple.setArch(Triple::ArchType(o->getArch())); std::string tripleName(triple.getTriple()); - LOG4CXX_INFO(logger, "Architecture " << tripleName); + LOG4CXX_INFO(logger, "Architecture " << tripleName); - std::string es; + std::string es; target = TargetRegistry::lookupTarget("", triple, es); if (!target) { - LOG4CXX_ERROR(logger, es); + LOG4CXX_ERROR(logger, es); return; } @@ -94,7 +99,155 @@ LLVMDisassembler::LLVMDisassembler(const std::string& filename) return; } + IP->setPrintImmHex(llvm::HexStyle::C); + IP->setPrintImmHex(true); + OwningPtr OD( new MCObjectDisassembler(*o, *DisAsm, *MIA)); Mod.reset(OD->buildModule(false)); + + readSymbols(); + readSections(); + disassemble(); +} + + +void LLVMDisassembler::disassemble() { + std::stack remaining_functions; + std::stack remaining_blocks; + SectionRef text_section = sections[".text"]; + + std::for_each(symbols.begin(), symbols.end(), + [&](std::pair x) { + uint64_t result; + bool contains; + SymbolRef::Type symbol_type; + + if (text_section.containsSymbol(x.second, contains) || !contains) + return; + + if (x.second.getType(symbol_type) + || SymbolRef::ST_Function != symbol_type) + return; + + if (!x.second.getAddress(result)) { + remaining_functions.push(new LLVMFunction(x.first, result)); + LOG4CXX_DEBUG(logger, "Disasembling " << x.first); + } + }); + + StringRef bytes; + text_section.getContents(bytes); + StringRefMemoryObject ref(bytes); + + while (remaining_functions.size()) { + LLVMFunction * current_function = remaining_functions.top(); + remaining_functions.pop(); + + LOG4CXX_INFO(logger, "Handling function " << current_function->getName()); + + // if ("_start" != current_function->getName()) + // continue; + + remaining_blocks.push(new LLVMBasicBlock(current_function->getStartAddress())); + + while (remaining_blocks.size()) { + LLVMBasicBlock * current_block = remaining_blocks.top(); + remaining_blocks.pop(); + + LOG4CXX_INFO(logger, "Handling Block starting at " << std::hex << current_block->getStartAddress()); + + uint64_t inst_size; + uint64_t base_address; + text_section.getAddress(base_address); + uint64_t current_address = current_block->getStartAddress() - base_address; + while(true) { + MCInst inst; + std::string buf; + llvm::raw_string_ostream s(buf); + + if(llvm::MCDisassembler::Success == + DisAsm->getInstruction(inst, inst_size, ref, current_address, nulls(), nulls())) { + LOG4CXX_DEBUG(logger, "Inst Size " << inst_size); + + uint8_t bytes[inst_size+2]; + ref.readBytes(current_address, inst_size, bytes); + s << '\t'; + for(uint8_t* cur = bytes; cur < bytes + inst_size; ++cur) { + s.write_hex(*cur); + s << ' '; + } + s << '\t'; + + IP->printInst(&inst, s, ""); + + LOG4CXX_DEBUG(logger, std::hex << current_address + base_address << s.str()); + + uint64_t jmptarget; + if (MIA->evaluateBranch(inst, current_address, inst_size, jmptarget)) { + jmptarget += base_address; + if (!MIA->isIndirectBranch(inst)) { + if (MIA->isCall(inst)) { + if (blocks.find(jmptarget) == blocks.end()) + remaining_functions.push(new LLVMFunction("", jmptarget)); + } else { + if (blocks.find(jmptarget) == blocks.end()) + remaining_blocks.push(new LLVMBasicBlock(jmptarget)); + if (MIA->isConditionalBranch(inst)) { + jmptarget = base_address + current_address + inst_size; + if (blocks.find(jmptarget) == blocks.end()) + remaining_blocks.push(new LLVMBasicBlock(jmptarget)); + } + } + } + } + } else { + inst_size = 0; + } + + + if (inst_size == 0 || MIA->isTerminator(inst) || MIA->isBranch(inst)) { + current_block->setEndAddress(current_address + base_address); + blocks.insert(std::make_pair(current_block->getStartAddress(), current_block)); + LOG4CXX_INFO(logger, "Finished Block at " << current_block->getEndAddress()); + break; + } + current_address += inst_size; + } + } + LOG4CXX_INFO(logger, "Finished function " << current_function->getName()); + } +} + +void LLVMDisassembler::readSymbols() { + error_code ec; + symbol_iterator si(o->symbol_begin()), se(o->symbol_end()); + for (; si != se; ++si) { + StringRef name; + if ((ec = si->getName(name))) { + LOG4CXX_ERROR(logger, ec.message()); + break; + } + LOG4CXX_DEBUG(logger, "Added symbol " << name.str()); + symbols.insert(make_pair(name.str(), *si)); + } +} + +void LLVMDisassembler::readSections() { + error_code ec; + section_iterator i(o->section_begin()), e(o->section_end()); + for (; i != e; ++i) { + StringRef name; + if ((ec = i->getName(name))) { + LOG4CXX_ERROR(logger, ec.message()); + break; + } + LOG4CXX_DEBUG(logger, "Added section " << name.str()); + sections.insert(make_pair(name.str(), *i)); + } + +} + +BasicBlock * LLVMDisassembler::generateControlFlowGraph(uint64_t address) { + } diff --git a/src/disassembler/llvm/LLVMDisassembler.hxx b/src/disassembler/llvm/LLVMDisassembler.hxx index ff81be5..4f58a0e 100644 --- a/src/disassembler/llvm/LLVMDisassembler.hxx +++ b/src/disassembler/llvm/LLVMDisassembler.hxx @@ -2,11 +2,13 @@ #define INCLUDE__LLVMDisassembler_hxx #include +#include #include #include "include_llvm.hxx" #include "disassembler/Disassembler.hxx" +#include "disassembler/llvm/LLVMBasicBlock.hxx" class LLVMDisassembler @@ -18,7 +20,9 @@ public: void getSymbols(); uint64_t entryAddress(); - void forEachInstruction(const std::string& name, std::function callback) {} + void forEachInstruction(const std::string& name, + std::function callback) + {} BasicBlock * generateControlFlowGraph(const std::string& name); BasicBlock * generateControlFlowGraph(uint64_t address); @@ -28,7 +32,13 @@ protected: bool isJump(uint64_t address) {return false;} private: + void disassemble(); + + void readSymbols(); + void readSections(); + log4cxx::LoggerPtr logger; + std::map blocks; llvm::Triple triple; std::shared_ptr binary; diff --git a/src/disassembler/llvm/LLVMFunction.hxx b/src/disassembler/llvm/LLVMFunction.hxx new file mode 100644 index 0000000..0ac3ead --- /dev/null +++ b/src/disassembler/llvm/LLVMFunction.hxx @@ -0,0 +1,21 @@ +#ifndef INCLUDE__LLVMFunction_hxx +#define INCLUDE__LLVMFunction_hxx + +#include "disassembler/Function.hxx" + +class LLVMFunction : public Function { +public: + LLVMFunction(const std::string& name, uint64_t start_address) + :Function(name) + , start_address(start_address) { + } + + uint64_t getStartAddress() const {return start_address;} +private: + uint64_t start_address; +}; + +#endif + + + -- 2.39.2