]> git.siccegge.de Git - frida/frida.git/blobdiff - src/disassembler/llvm/LLVMDisassembler.cxx
Avoid memory leak
[frida/frida.git] / src / disassembler / llvm / LLVMDisassembler.cxx
index 881c662961717041f2d6a9b8ab60b47cf7782f6f..7bce1cf6ab2107d4ff9d237149919769b8431a0f 100644 (file)
@@ -1,4 +1,9 @@
 #include "disassembler/llvm/LLVMDisassembler.hxx"
+#include "disassembler/llvm/LLVMBasicBlock.hxx"
+#include "disassembler/llvm/LLVMFunction.hxx"
+
+#include <stack>
+#include <algorithm>
 
 using namespace llvm;
 using namespace llvm::object;
@@ -8,7 +13,6 @@ using namespace llvm::object;
  * ist sondern z.B. einfach nur Instruktionen oder ein Bootsektor oder
  * foo
  */
-
 LLVMDisassembler::LLVMDisassembler(const std::string& filename)
     : Disassembler(filename)
     , logger(log4cxx::Logger::getLogger("LLVMDisassembler"))
@@ -31,13 +35,13 @@ LLVMDisassembler::LLVMDisassembler(const std::string& filename)
     triple.setArch(Triple::ArchType(o->getArch()));
     std::string tripleName(triple.getTriple());
 
-       LOG4CXX_INFO(logger, "Architecture " << tripleName);
+    LOG4CXX_INFO(logger, "Architecture " << tripleName);
 
 
-       std::string es;
+    std::string es;
     target = TargetRegistry::lookupTarget("", triple, es);
     if (!target) {
-               LOG4CXX_ERROR(logger, es);
+        LOG4CXX_ERROR(logger, es);
         return;
     }
 
@@ -94,7 +98,191 @@ LLVMDisassembler::LLVMDisassembler(const std::string& filename)
         return;
     }
 
+    IP->setPrintImmHex(llvm::HexStyle::C);
+    IP->setPrintImmHex(true);
+
     OwningPtr<MCObjectDisassembler> OD(
         new MCObjectDisassembler(*o, *DisAsm, *MIA));
     Mod.reset(OD->buildModule(false));
+
+    readSymbols();
+    readSections();
+    disassemble();
+}
+
+LLVMDisassembler::~LLVMDisassembler() {
+       std::for_each(functions.begin(), functions.end(),
+                                 [](std::pair<uint64_t,LLVMFunction*> it) {
+                                         delete it.second;
+                                 });
+       std::for_each(blocks.begin(), blocks.end(),
+                                 [](std::pair<uint64_t, LLVMBasicBlock*> it) {
+                                         delete it.second;
+                                 });
+}
+
+void LLVMDisassembler::disassemble() {
+    std::stack<LLVMFunction*> remaining_functions;
+    std::stack<LLVMBasicBlock*> remaining_blocks;
+    SectionRef text_section = sections[".text"];
+
+       for (auto x = symbols.begin(); x != symbols.end(); ++x) {
+               uint64_t result;
+               bool contains;
+               SymbolRef::Type symbol_type;
+
+/*
+ * TODO: If we jump into some Basic Block we need to split it there into two
+ */
+
+               if (text_section.containsSymbol(x->second, contains) || !contains)
+                       continue;
+
+               if (x->second.getType(symbol_type)
+                       || SymbolRef::ST_Function != symbol_type)
+                       continue;
+
+               if (!x->second.getAddress(result)) {
+                       LLVMFunction * fun = new LLVMFunction(x->first, result);
+                       remaining_functions.push(fun);
+                       functions.insert(std::make_pair(result, fun));
+                       LOG4CXX_DEBUG(logger, "Disasembling " << x->first);
+               }
+       }
+
+    StringRef bytes;
+    text_section.getContents(bytes);
+    StringRefMemoryObject ref(bytes);
+
+    while (remaining_functions.size()) {
+        LLVMFunction * current_function = remaining_functions.top();
+        remaining_functions.pop();
+
+        LOG4CXX_DEBUG(logger, "Handling function " << current_function->getName());
+
+        // if ("_start" != current_function->getName())
+        //  continue;
+
+               LLVMBasicBlock * block = new LLVMBasicBlock(current_function->getStartAddress());
+        remaining_blocks.push(block);
+               blocks.insert(std::make_pair(block->getStartAddress(), block));
+
+        while (remaining_blocks.size()) {
+            LLVMBasicBlock * current_block = remaining_blocks.top();
+            remaining_blocks.pop();
+
+            LOG4CXX_DEBUG(logger, "Handling Block starting at " << std::hex << current_block->getStartAddress());
+
+            uint64_t inst_size;
+            uint64_t base_address;
+            text_section.getAddress(base_address);
+            uint64_t current_address = current_block->getStartAddress() - base_address;
+            while(true) {
+                MCInst inst;
+                std::string buf;
+                llvm::raw_string_ostream s(buf);
+
+                if(llvm::MCDisassembler::Success ==
+                   DisAsm->getInstruction(inst, inst_size, ref, current_address, nulls(), nulls())) {
+
+                    uint8_t bytes[inst_size+2];
+                    ref.readBytes(current_address, inst_size, bytes);
+                    s << '\t';
+                    for(uint8_t* cur = bytes; cur < bytes + inst_size; ++cur) {
+                        s.write_hex(*cur);
+                        s << ' ';
+                    }
+                    s << '\t';
+
+                    IP->printInst(&inst, s, "");
+
+                    LOG4CXX_DEBUG(logger, std::hex << current_address + base_address << s.str());
+
+                    uint64_t jmptarget;
+                    if (MIA->evaluateBranch(inst, current_address, inst_size, jmptarget)) {
+                                               jmptarget += base_address;
+                        if (!MIA->isIndirectBranch(inst)) {
+                            if (MIA->isCall(inst)) {
+                                                               if (functions.find(jmptarget) == functions.end()) {
+                                                                       std::stringstream s;
+                                                                       s << "<Unnamed 0x" << std::hex << jmptarget << ">";
+                                                                       LLVMFunction * fun = new LLVMFunction(s.str(), jmptarget);
+                                                                       functions.insert(std::make_pair(jmptarget, fun));
+                                                                       remaining_functions.push(fun);
+                                                               }
+                            } else {
+                                                               if (blocks.find(jmptarget) == blocks.end()) {
+                                                                       LLVMBasicBlock * block = new LLVMBasicBlock(jmptarget);
+                                                                       blocks.insert(std::make_pair(block->getStartAddress(), block));
+                                                                       remaining_blocks.push(block);
+                                                               }
+                                if (MIA->isConditionalBranch(inst)) {
+                                                                       jmptarget = base_address + current_address + inst_size;
+                                                                       if (blocks.find(jmptarget) == blocks.end()) {
+                                                                               LLVMBasicBlock * block = new LLVMBasicBlock(jmptarget);
+                                                                               blocks.insert(std::make_pair(block->getStartAddress(), block));
+                                                                               remaining_blocks.push(new LLVMBasicBlock(jmptarget));
+                                }
+                            }
+                        }
+                    }
+                } else {
+                    inst_size = 0;
+                }
+
+
+                if (inst_size == 0 || MIA->isTerminator(inst) || MIA->isBranch(inst)) {
+                    current_block->setEndAddress(current_address + base_address);
+                    LOG4CXX_DEBUG(logger, "Finished Block at " << std::hex << 
+                                                                 current_block->getEndAddress());
+                                                                       }
+                    break;
+                }
+                current_address += inst_size;
+            }
+        }
+        LOG4CXX_DEBUG(logger, "Finished function " << current_function->getName());
+    }
+}
+
+void LLVMDisassembler::readSymbols() {
+    error_code ec;
+    symbol_iterator si(o->symbol_begin()), se(o->symbol_end());
+    for (; si != se; ++si) {
+        StringRef name;
+        if ((ec = si->getName(name))) {
+            LOG4CXX_ERROR(logger, ec.message());
+            break;
+        }
+        LOG4CXX_DEBUG(logger, "Added symbol " << name.str());
+        symbols.insert(make_pair(name.str(), *si));
+    }
+}
+
+void LLVMDisassembler::readSections() {
+    error_code ec;
+    section_iterator i(o->section_begin()), e(o->section_end());
+    for (; i != e; ++i) {
+        StringRef name;
+        if ((ec = i->getName(name))) {
+            LOG4CXX_ERROR(logger, ec.message());
+            break;
+        }
+        LOG4CXX_DEBUG(logger, "Added section " << name.str());
+        sections.insert(make_pair(name.str(), *i));
+    }
+
+}
+
+void LLVMDisassembler::forEachFunction(std::function<void (uint64_t, Function*)> callback) {
+       std::for_each(functions.begin(), functions.end(),
+                                 [&](std::pair<uint64_t, LLVMFunction*> x) {
+                                         callback(x.first, x.second);
+                                 });
+}
+
+
+
+void LLVMDisassembler::generateControlFlowGraph(uint64_t address) {
+
 }