Recursive disassembler
authorChristoph Egger <siccegge@cs.fau.de>
Mon, 26 May 2014 13:23:23 +0000 (15:23 +0200)
committerChristoph Egger <siccegge@cs.fau.de>
Mon, 26 May 2014 13:23:23 +0000 (15:23 +0200)
Disassemblers properly recursive. Doesn't yet really build the
neccessary data structure. Also doesn't handle jump targets in the
middle of preexisting basic blocks properly.

src/disassembler/BasicBlock.hxx [new file with mode: 0644]
src/disassembler/Disassembler.cxx
src/disassembler/Disassembler.hxx
src/disassembler/Function.hxx [new file with mode: 0644]
src/disassembler/llvm/LLVMBasicBlock.hxx [new file with mode: 0644]
src/disassembler/llvm/LLVMDisassembler.cxx
src/disassembler/llvm/LLVMDisassembler.hxx
src/disassembler/llvm/LLVMFunction.hxx [new file with mode: 0644]

diff --git a/src/disassembler/BasicBlock.hxx b/src/disassembler/BasicBlock.hxx
new file mode 100644 (file)
index 0000000..c994d9e
--- /dev/null
@@ -0,0 +1,35 @@
+#ifndef INCLUDE__BasicBlock_hxx
+#define INCLUDE__BasicBlock_hxx
+
+class BasicBlock {
+public:
+    BasicBlock() {}
+
+    uint64_t getStartAddress() const {
+        return start_address;
+    }
+
+    uint64_t getEndAddress() const {
+        return end_address;
+    }
+
+       BasicBlock * const * getNextBlocks() const {
+               return next_blocks;
+       }
+
+       void setStartAddress(uint64_t address) {
+               start_address = address;
+       }
+
+       void setEndAddress(uint64_t address) {
+               end_address = address;
+       }
+
+private:
+    uint64_t start_address;
+    uint64_t end_address;
+
+    BasicBlock * next_blocks[2];
+};
+
+#endif
index 968ebe2522bb765d958dc55346dcb74ecde690c5..e3a48a262ce6c30e82512b22f3f3d8199ffde171 100644 (file)
@@ -1,3 +1,6 @@
 #include "disassembler/Disassembler.hxx"
 
 
+BasicBlock * Disassembler::generateControlFlowGraph(uint64_t address) {
+       
+}
index 9986ac8e3a0fd4f61f94c25b3da364b36d1a901b..14083262f5027ac7b22057c009d4dfb1feeb8ff0 100644 (file)
@@ -4,7 +4,8 @@
 #include <string>
 #include <functional>
 
-class BasicBlock {};
+#include "disassembler/BasicBlock.hxx"
+
 
 class Disassembler {
 public:
diff --git a/src/disassembler/Function.hxx b/src/disassembler/Function.hxx
new file mode 100644 (file)
index 0000000..6cad001
--- /dev/null
@@ -0,0 +1,23 @@
+#ifndef INCLUDE__Function_hxx
+#define INCLUDE__Function_hxx
+
+#include "disassembler/BasicBlock.hxx"
+
+class Function {
+public:
+       Function(const std::string& name) {
+               this->name = name;
+       }
+
+       std::string getName() const {
+               return name;
+       }
+
+       BasicBlock * getEntry();
+
+private:
+       std::string name;
+       BasicBlock * start;
+};
+
+#endif
diff --git a/src/disassembler/llvm/LLVMBasicBlock.hxx b/src/disassembler/llvm/LLVMBasicBlock.hxx
new file mode 100644 (file)
index 0000000..ede89d6
--- /dev/null
@@ -0,0 +1,17 @@
+#ifndef INCLUDE__LLVMBasicBlock_hxx
+#define INCLUDE__LLVMBasicBlock_hxx
+
+#include "disassembler/BasicBlock.hxx"
+
+class LLVMBasicBlock : public BasicBlock {
+public:
+       LLVMBasicBlock(uint64_t start_address) {
+               setStartAddress(start_address);
+       }
+
+
+private:
+};
+
+
+#endif
index 881c662961717041f2d6a9b8ab60b47cf7782f6f..e60e0c28bf635e0f0ac21f6fd34232f2ec4d3251 100644 (file)
@@ -1,4 +1,9 @@
 #include "disassembler/llvm/LLVMDisassembler.hxx"
+#include "disassembler/llvm/LLVMBasicBlock.hxx"
+#include "disassembler/llvm/LLVMFunction.hxx"
+
+#include <stack>
+#include <algorithm>
 
 using namespace llvm;
 using namespace llvm::object;
@@ -31,13 +36,13 @@ LLVMDisassembler::LLVMDisassembler(const std::string& filename)
     triple.setArch(Triple::ArchType(o->getArch()));
     std::string tripleName(triple.getTriple());
 
-       LOG4CXX_INFO(logger, "Architecture " << tripleName);
+    LOG4CXX_INFO(logger, "Architecture " << tripleName);
 
 
-       std::string es;
+    std::string es;
     target = TargetRegistry::lookupTarget("", triple, es);
     if (!target) {
-               LOG4CXX_ERROR(logger, es);
+        LOG4CXX_ERROR(logger, es);
         return;
     }
 
@@ -94,7 +99,155 @@ LLVMDisassembler::LLVMDisassembler(const std::string& filename)
         return;
     }
 
+    IP->setPrintImmHex(llvm::HexStyle::C);
+    IP->setPrintImmHex(true);
+
     OwningPtr<MCObjectDisassembler> OD(
         new MCObjectDisassembler(*o, *DisAsm, *MIA));
     Mod.reset(OD->buildModule(false));
+
+    readSymbols();
+    readSections();
+    disassemble();
+}
+
+
+void LLVMDisassembler::disassemble() {
+    std::stack<LLVMFunction*> remaining_functions;
+    std::stack<LLVMBasicBlock*> remaining_blocks;
+    SectionRef text_section = sections[".text"];
+
+    std::for_each(symbols.begin(), symbols.end(),
+                  [&](std::pair<const std::string, SymbolRef> x) {
+                      uint64_t result;
+                      bool contains;
+                      SymbolRef::Type symbol_type;
+
+                      if (text_section.containsSymbol(x.second, contains) || !contains)
+                          return;
+
+                      if (x.second.getType(symbol_type)
+                          || SymbolRef::ST_Function != symbol_type)
+                          return;
+
+                      if (!x.second.getAddress(result)) {
+                          remaining_functions.push(new LLVMFunction(x.first, result));
+                          LOG4CXX_DEBUG(logger, "Disasembling " << x.first);
+                      }
+                  });
+
+    StringRef bytes;
+    text_section.getContents(bytes);
+    StringRefMemoryObject ref(bytes);
+
+    while (remaining_functions.size()) {
+        LLVMFunction * current_function = remaining_functions.top();
+        remaining_functions.pop();
+
+        LOG4CXX_INFO(logger, "Handling function " << current_function->getName());
+
+        // if ("_start" != current_function->getName())
+        //  continue;
+
+        remaining_blocks.push(new LLVMBasicBlock(current_function->getStartAddress()));
+
+        while (remaining_blocks.size()) {
+            LLVMBasicBlock * current_block = remaining_blocks.top();
+            remaining_blocks.pop();
+
+            LOG4CXX_INFO(logger, "Handling Block starting at " << std::hex << current_block->getStartAddress());
+
+            uint64_t inst_size;
+            uint64_t base_address;
+            text_section.getAddress(base_address);
+            uint64_t current_address = current_block->getStartAddress() - base_address;
+            while(true) {
+                MCInst inst;
+                std::string buf;
+                llvm::raw_string_ostream s(buf);
+
+                if(llvm::MCDisassembler::Success ==
+                   DisAsm->getInstruction(inst, inst_size, ref, current_address, nulls(), nulls())) {
+                    LOG4CXX_DEBUG(logger, "Inst Size " << inst_size);
+
+                    uint8_t bytes[inst_size+2];
+                    ref.readBytes(current_address, inst_size, bytes);
+                    s << '\t';
+                    for(uint8_t* cur = bytes; cur < bytes + inst_size; ++cur) {
+                        s.write_hex(*cur);
+                        s << ' ';
+                    }
+                    s << '\t';
+
+                    IP->printInst(&inst, s, "");
+
+                    LOG4CXX_DEBUG(logger, std::hex << current_address + base_address << s.str());
+
+                    uint64_t jmptarget;
+                    if (MIA->evaluateBranch(inst, current_address, inst_size, jmptarget)) {
+                                               jmptarget += base_address;
+                        if (!MIA->isIndirectBranch(inst)) {
+                            if (MIA->isCall(inst)) {
+                                                               if (blocks.find(jmptarget) == blocks.end())
+                                                                       remaining_functions.push(new LLVMFunction("<Unnamed>", jmptarget));
+                            } else {
+                                                               if (blocks.find(jmptarget) == blocks.end())
+                                remaining_blocks.push(new LLVMBasicBlock(jmptarget));
+                                if (MIA->isConditionalBranch(inst)) {
+                                                                       jmptarget = base_address + current_address + inst_size;
+                                                                       if (blocks.find(jmptarget) == blocks.end())
+                                                                               remaining_blocks.push(new LLVMBasicBlock(jmptarget));
+                                }
+                            }
+                        }
+                    }
+                } else {
+                    inst_size = 0;
+                }
+
+
+                if (inst_size == 0 || MIA->isTerminator(inst) || MIA->isBranch(inst)) {
+                    current_block->setEndAddress(current_address + base_address);
+                                       blocks.insert(std::make_pair(current_block->getStartAddress(), current_block));
+                    LOG4CXX_INFO(logger, "Finished Block at " << current_block->getEndAddress());
+                    break;
+                }
+                current_address += inst_size;
+            }
+        }
+        LOG4CXX_INFO(logger, "Finished function " << current_function->getName());
+    }
+}
+
+void LLVMDisassembler::readSymbols() {
+    error_code ec;
+    symbol_iterator si(o->symbol_begin()), se(o->symbol_end());
+    for (; si != se; ++si) {
+        StringRef name;
+        if ((ec = si->getName(name))) {
+            LOG4CXX_ERROR(logger, ec.message());
+            break;
+        }
+        LOG4CXX_DEBUG(logger, "Added symbol " << name.str());
+        symbols.insert(make_pair(name.str(), *si));
+    }
+}
+
+void LLVMDisassembler::readSections() {
+    error_code ec;
+    section_iterator i(o->section_begin()), e(o->section_end());
+    for (; i != e; ++i) {
+        StringRef name;
+        if ((ec = i->getName(name))) {
+            LOG4CXX_ERROR(logger, ec.message());
+            break;
+        }
+        LOG4CXX_DEBUG(logger, "Added section " << name.str());
+        sections.insert(make_pair(name.str(), *i));
+    }
+
+}
+
+BasicBlock * LLVMDisassembler::generateControlFlowGraph(uint64_t address) {
+
 }
index ff81be55385890a7ca642d9fa7d50a469ea001ac..4f58a0eb8450078758d5dd6486138cc8d500afda 100644 (file)
@@ -2,11 +2,13 @@
 #define INCLUDE__LLVMDisassembler_hxx
 
 #include <memory>
+#include <map>
 #include <log4cxx/logger.h>
 
 #include "include_llvm.hxx"
 
 #include "disassembler/Disassembler.hxx"
+#include "disassembler/llvm/LLVMBasicBlock.hxx"
 
 
 class LLVMDisassembler
@@ -18,7 +20,9 @@ public:
        void getSymbols();
        uint64_t entryAddress();
 
-    void forEachInstruction(const std::string& name, std::function<void (long, std::string, std::string)> callback) {}
+    void forEachInstruction(const std::string& name,
+                                                       std::function<void (long, std::string, std::string)> callback)
+               {}
 
        BasicBlock * generateControlFlowGraph(const std::string& name);
        BasicBlock * generateControlFlowGraph(uint64_t address);
@@ -28,7 +32,13 @@ protected:
     bool isJump(uint64_t address) {return false;}
 
 private:
+       void disassemble();
+
+       void readSymbols();
+       void readSections();
+
     log4cxx::LoggerPtr logger;
+       std::map<uint8_t, LLVMBasicBlock*> blocks;
 
     llvm::Triple triple;
     std::shared_ptr<llvm::object::Binary> binary;
diff --git a/src/disassembler/llvm/LLVMFunction.hxx b/src/disassembler/llvm/LLVMFunction.hxx
new file mode 100644 (file)
index 0000000..0ac3ead
--- /dev/null
@@ -0,0 +1,21 @@
+#ifndef INCLUDE__LLVMFunction_hxx
+#define INCLUDE__LLVMFunction_hxx
+
+#include "disassembler/Function.hxx"
+
+class LLVMFunction : public Function {
+public:
+       LLVMFunction(const std::string& name, uint64_t start_address)
+               :Function(name)
+               , start_address(start_address) {
+       }
+
+       uint64_t getStartAddress() const {return start_address;}
+private:
+       uint64_t start_address;
+};
+
+#endif
+
+
+