Recursive disassembler
[frida/frida.git] / src / disassembler / llvm / LLVMDisassembler.cxx
index 881c662961717041f2d6a9b8ab60b47cf7782f6f..e60e0c28bf635e0f0ac21f6fd34232f2ec4d3251 100644 (file)
@@ -1,4 +1,9 @@
 #include "disassembler/llvm/LLVMDisassembler.hxx"
+#include "disassembler/llvm/LLVMBasicBlock.hxx"
+#include "disassembler/llvm/LLVMFunction.hxx"
+
+#include <stack>
+#include <algorithm>
 
 using namespace llvm;
 using namespace llvm::object;
@@ -31,13 +36,13 @@ LLVMDisassembler::LLVMDisassembler(const std::string& filename)
     triple.setArch(Triple::ArchType(o->getArch()));
     std::string tripleName(triple.getTriple());
 
-       LOG4CXX_INFO(logger, "Architecture " << tripleName);
+    LOG4CXX_INFO(logger, "Architecture " << tripleName);
 
 
-       std::string es;
+    std::string es;
     target = TargetRegistry::lookupTarget("", triple, es);
     if (!target) {
-               LOG4CXX_ERROR(logger, es);
+        LOG4CXX_ERROR(logger, es);
         return;
     }
 
@@ -94,7 +99,155 @@ LLVMDisassembler::LLVMDisassembler(const std::string& filename)
         return;
     }
 
+    IP->setPrintImmHex(llvm::HexStyle::C);
+    IP->setPrintImmHex(true);
+
     OwningPtr<MCObjectDisassembler> OD(
         new MCObjectDisassembler(*o, *DisAsm, *MIA));
     Mod.reset(OD->buildModule(false));
+
+    readSymbols();
+    readSections();
+    disassemble();
+}
+
+
+void LLVMDisassembler::disassemble() {
+    std::stack<LLVMFunction*> remaining_functions;
+    std::stack<LLVMBasicBlock*> remaining_blocks;
+    SectionRef text_section = sections[".text"];
+
+    std::for_each(symbols.begin(), symbols.end(),
+                  [&](std::pair<const std::string, SymbolRef> x) {
+                      uint64_t result;
+                      bool contains;
+                      SymbolRef::Type symbol_type;
+
+                      if (text_section.containsSymbol(x.second, contains) || !contains)
+                          return;
+
+                      if (x.second.getType(symbol_type)
+                          || SymbolRef::ST_Function != symbol_type)
+                          return;
+
+                      if (!x.second.getAddress(result)) {
+                          remaining_functions.push(new LLVMFunction(x.first, result));
+                          LOG4CXX_DEBUG(logger, "Disasembling " << x.first);
+                      }
+                  });
+
+    StringRef bytes;
+    text_section.getContents(bytes);
+    StringRefMemoryObject ref(bytes);
+
+    while (remaining_functions.size()) {
+        LLVMFunction * current_function = remaining_functions.top();
+        remaining_functions.pop();
+
+        LOG4CXX_INFO(logger, "Handling function " << current_function->getName());
+
+        // if ("_start" != current_function->getName())
+        //  continue;
+
+        remaining_blocks.push(new LLVMBasicBlock(current_function->getStartAddress()));
+
+        while (remaining_blocks.size()) {
+            LLVMBasicBlock * current_block = remaining_blocks.top();
+            remaining_blocks.pop();
+
+            LOG4CXX_INFO(logger, "Handling Block starting at " << std::hex << current_block->getStartAddress());
+
+            uint64_t inst_size;
+            uint64_t base_address;
+            text_section.getAddress(base_address);
+            uint64_t current_address = current_block->getStartAddress() - base_address;
+            while(true) {
+                MCInst inst;
+                std::string buf;
+                llvm::raw_string_ostream s(buf);
+
+                if(llvm::MCDisassembler::Success ==
+                   DisAsm->getInstruction(inst, inst_size, ref, current_address, nulls(), nulls())) {
+                    LOG4CXX_DEBUG(logger, "Inst Size " << inst_size);
+
+                    uint8_t bytes[inst_size+2];
+                    ref.readBytes(current_address, inst_size, bytes);
+                    s << '\t';
+                    for(uint8_t* cur = bytes; cur < bytes + inst_size; ++cur) {
+                        s.write_hex(*cur);
+                        s << ' ';
+                    }
+                    s << '\t';
+
+                    IP->printInst(&inst, s, "");
+
+                    LOG4CXX_DEBUG(logger, std::hex << current_address + base_address << s.str());
+
+                    uint64_t jmptarget;
+                    if (MIA->evaluateBranch(inst, current_address, inst_size, jmptarget)) {
+                                               jmptarget += base_address;
+                        if (!MIA->isIndirectBranch(inst)) {
+                            if (MIA->isCall(inst)) {
+                                                               if (blocks.find(jmptarget) == blocks.end())
+                                                                       remaining_functions.push(new LLVMFunction("<Unnamed>", jmptarget));
+                            } else {
+                                                               if (blocks.find(jmptarget) == blocks.end())
+                                remaining_blocks.push(new LLVMBasicBlock(jmptarget));
+                                if (MIA->isConditionalBranch(inst)) {
+                                                                       jmptarget = base_address + current_address + inst_size;
+                                                                       if (blocks.find(jmptarget) == blocks.end())
+                                                                               remaining_blocks.push(new LLVMBasicBlock(jmptarget));
+                                }
+                            }
+                        }
+                    }
+                } else {
+                    inst_size = 0;
+                }
+
+
+                if (inst_size == 0 || MIA->isTerminator(inst) || MIA->isBranch(inst)) {
+                    current_block->setEndAddress(current_address + base_address);
+                                       blocks.insert(std::make_pair(current_block->getStartAddress(), current_block));
+                    LOG4CXX_INFO(logger, "Finished Block at " << current_block->getEndAddress());
+                    break;
+                }
+                current_address += inst_size;
+            }
+        }
+        LOG4CXX_INFO(logger, "Finished function " << current_function->getName());
+    }
+}
+
+void LLVMDisassembler::readSymbols() {
+    error_code ec;
+    symbol_iterator si(o->symbol_begin()), se(o->symbol_end());
+    for (; si != se; ++si) {
+        StringRef name;
+        if ((ec = si->getName(name))) {
+            LOG4CXX_ERROR(logger, ec.message());
+            break;
+        }
+        LOG4CXX_DEBUG(logger, "Added symbol " << name.str());
+        symbols.insert(make_pair(name.str(), *si));
+    }
+}
+
+void LLVMDisassembler::readSections() {
+    error_code ec;
+    section_iterator i(o->section_begin()), e(o->section_end());
+    for (; i != e; ++i) {
+        StringRef name;
+        if ((ec = i->getName(name))) {
+            LOG4CXX_ERROR(logger, ec.message());
+            break;
+        }
+        LOG4CXX_DEBUG(logger, "Added section " << name.str());
+        sections.insert(make_pair(name.str(), *i));
+    }
+
+}
+
+BasicBlock * LLVMDisassembler::generateControlFlowGraph(uint64_t address) {
+
 }