]> git.siccegge.de Git - frida/frida.git/blobdiff - src/disassembler/llvm/LLVMDisassembler.cxx
Only disassemble the text segment
[frida/frida.git] / src / disassembler / llvm / LLVMDisassembler.cxx
index 881c662961717041f2d6a9b8ab60b47cf7782f6f..6b3402eb0b2414d56c4b415f7467e3c29d7e33cd 100644 (file)
@@ -1,14 +1,19 @@
 #include "disassembler/llvm/LLVMDisassembler.hxx"
+#include "disassembler/llvm/LLVMBasicBlock.hxx"
+#include "disassembler/llvm/LLVMFunction.hxx"
+
+#include <stack>
+#include <algorithm>
 
 using namespace llvm;
 using namespace llvm::object;
+using std::error_code;
 
 /*
  * TODO: fallback code falls die Datei kein ELF/PE/COFF/MacO/.. binary
  * ist sondern z.B. einfach nur Instruktionen oder ein Bootsektor oder
  * foo
  */
-
 LLVMDisassembler::LLVMDisassembler(const std::string& filename)
     : Disassembler(filename)
     , logger(log4cxx::Logger::getLogger("LLVMDisassembler"))
@@ -31,13 +36,13 @@ LLVMDisassembler::LLVMDisassembler(const std::string& filename)
     triple.setArch(Triple::ArchType(o->getArch()));
     std::string tripleName(triple.getTriple());
 
-       LOG4CXX_INFO(logger, "Architecture " << tripleName);
+    LOG4CXX_INFO(logger, "Architecture " << tripleName);
 
 
-       std::string es;
+    std::string es;
     target = TargetRegistry::lookupTarget("", triple, es);
     if (!target) {
-               LOG4CXX_ERROR(logger, es);
+        LOG4CXX_ERROR(logger, es);
         return;
     }
 
@@ -68,24 +73,30 @@ LLVMDisassembler::LLVMDisassembler(const std::string& filename)
         return;
     }
 
-    DisAsm.reset(target->createMCDisassembler(*STI));
+    MOFI.reset(new MCObjectFileInfo);
+    MCContext Ctx(AsmInfo.get(), MRI.get(), MOFI.get());
+
+    DisAsm.reset(target->createMCDisassembler(*STI, Ctx));
     if (!DisAsm) {
         LOG4CXX_ERROR(logger, "no disassembler for target " << tripleName);
         return;
     }
-
-    MOFI.reset(new MCObjectFileInfo);
-    Ctx.reset(new MCContext(AsmInfo.get(), MRI.get(), MOFI.get()));
     RelInfo.reset(
-        target->createMCRelocationInfo(tripleName, *Ctx.get()));
+        target->createMCRelocationInfo(tripleName, Ctx));
     if (RelInfo) {
         Symzer.reset(
-            MCObjectSymbolizer::createObjectSymbolizer(*Ctx.get(), RelInfo, o));
+            MCObjectSymbolizer::createObjectSymbolizer(Ctx, std::move(RelInfo), o));
         if (Symzer)
-            DisAsm->setSymbolizer(Symzer);
+            DisAsm->setSymbolizer(std::move(Symzer));
     }
+    RelInfo.release();
+    Symzer.release();
 
     MIA.reset(target->createMCInstrAnalysis(MII.get()));
+    if (!MIA) {
+        LOG4CXX_ERROR(logger, "no instruction analysis for target " << tripleName);
+        return;
+    }
 
     int AsmPrinterVariant = AsmInfo->getAssemblerDialect();
     IP.reset(target->createMCInstPrinter(AsmPrinterVariant, *AsmInfo, *MII, *MRI, *STI));
@@ -94,7 +105,310 @@ LLVMDisassembler::LLVMDisassembler(const std::string& filename)
         return;
     }
 
-    OwningPtr<MCObjectDisassembler> OD(
+    IP->setPrintImmHex(llvm::HexStyle::C);
+    IP->setPrintImmHex(true);
+
+    std::unique_ptr<MCObjectDisassembler> OD(
         new MCObjectDisassembler(*o, *DisAsm, *MIA));
     Mod.reset(OD->buildModule(false));
+
+    readSymbols();
+    readSections();
+    disassemble();
+}
+
+LLVMDisassembler::~LLVMDisassembler() {
+    std::for_each(functions.begin(), functions.end(),
+                  [](std::pair<uint64_t,LLVMFunction*> it) {
+                      delete it.second;
+                  });
+    std::for_each(blocks.begin(), blocks.end(),
+                  [](std::pair<uint64_t, LLVMBasicBlock*> it) {
+                      delete it.second;
+                  });
+}
+
+Function* LLVMDisassembler::disassembleFunctionAt(uint64_t address, const std::string& name) {
+    SectionRef text_section = sections[".text"];
+       uint64_t base_address, size;
+       text_section.getAddress(base_address);
+       text_section.getSize(size);
+
+       if (address < base_address || 
+               address >= base_address + size) {
+               return NULL;
+       }
+
+    if (functions.find(address) != functions.end()) {
+        return functions[address];
+    }
+
+    LLVMFunction * function;
+    if (name == "") {
+        std::stringstream s;
+        s << "<Unnamed 0x" << std::hex << address << ">";
+        function = new LLVMFunction(s.str(), address);
+    } else {
+        function = new LLVMFunction(name, address);
+    }
+    functions.insert(std::make_pair(address, function));
+
+    disassembleFunction(function);
+
+    return function;
+}
+
+void LLVMDisassembler::disassembleFunction(LLVMFunction* function) {
+    std::stack<LLVMBasicBlock*> remaining_blocks;
+    SectionRef text_section = sections[".text"];
+    StringRef bytes;
+    text_section.getContents(bytes);
+    StringRefMemoryObject ref(bytes);
+
+    LOG4CXX_DEBUG(logger, "Handling function " << function->getName());
+
+    LLVMBasicBlock * block = new LLVMBasicBlock(function->getStartAddress(), this);
+    remaining_blocks.push(block);
+    blocks.insert(std::make_pair(block->getStartAddress(), block));
+
+    while (remaining_blocks.size()) {
+        LLVMBasicBlock * current_block = remaining_blocks.top();
+        remaining_blocks.pop();
+
+        LOG4CXX_DEBUG(logger, "Handling Block starting at " << std::hex << current_block->getStartAddress());
+
+        uint64_t inst_size;
+        uint64_t base_address;
+        text_section.getAddress(base_address);
+        uint64_t current_address = current_block->getStartAddress() - base_address;
+        while(true) {
+            MCInst inst;
+            std::string buf;
+            llvm::raw_string_ostream s(buf);
+
+            if(llvm::MCDisassembler::Success ==
+               DisAsm->getInstruction(inst, inst_size, ref, current_address, nulls(), nulls())) {
+                uint64_t jmptarget;
+
+                if (MIA->evaluateBranch(inst, current_address, inst_size, jmptarget)) {
+                    jmptarget += base_address;
+                    if (!MIA->isIndirectBranch(inst)) {
+                        if (MIA->isCall(inst)) {
+                            if (functions.find(jmptarget) == functions.end()) {
+                                disassembleFunctionAt(jmptarget);
+                            }
+                        } else {
+                            current_block->setNextBlock(0, jmptarget);
+                            if (blocks.find(jmptarget) == blocks.end()) {
+                                LLVMBasicBlock * block = new LLVMBasicBlock(jmptarget, this);
+                                blocks.insert(std::make_pair(block->getStartAddress(), block));
+                                remaining_blocks.push(block);
+                            }
+                            if (MIA->isConditionalBranch(inst)) {
+                                jmptarget = base_address + current_address + inst_size;
+                                current_block->setNextBlock(1, jmptarget);
+                                if (blocks.find(jmptarget) == blocks.end()) {
+                                    LLVMBasicBlock * block = new LLVMBasicBlock(jmptarget, this);
+                                    blocks.insert(std::make_pair(block->getStartAddress(), block));
+                                    remaining_blocks.push(block);
+                                }
+                            }
+                        }
+                    }
+                }
+            } else {
+                inst_size = 0;
+            }
+
+
+            if (inst_size == 0 || MIA->isTerminator(inst) || MIA->isBranch(inst)) {
+                current_block->setEndAddress(current_address + base_address + inst_size);
+                LOG4CXX_DEBUG(logger, "Finished Block at " << std::hex <<
+                              current_block->getEndAddress());
+                break;
+            }
+            current_address += inst_size;
+        }
+    }
+    LOG4CXX_DEBUG(logger, "Finished function " << function->getName());
+}
+
+void LLVMDisassembler::disassemble() {
+    SectionRef text_section = sections[".text"];
+    std::vector<LLVMFunction*> remaining_functions;
+
+    // Assume all function symbols actually start a real function
+    for (auto x = symbols.begin(); x != symbols.end(); ++x) {
+        uint64_t result;
+        bool contains;
+        SymbolRef::Type symbol_type;
+
+
+        if (text_section.containsSymbol(x->second, contains) || !contains)
+            continue;
+
+        if (x->second.getType(symbol_type)
+            || SymbolRef::ST_Function != symbol_type)
+            continue;
+
+        if (!x->second.getAddress(result)) {
+            LLVMFunction * fun = new LLVMFunction(x->first, result);
+            remaining_functions.push_back(fun);
+            functions.insert(std::make_pair(result, fun));
+            LOG4CXX_DEBUG(logger, "Disasembling " << x->first);
+        }
+    }
+
+    for (LLVMFunction* function : remaining_functions) {
+        disassembleFunction(function);
+    }
+
+    if (binary->isELF()) {
+        bool is64bit = (binary->getData()[4] == 0x02);
+
+        uint64_t entry(0);
+        for (int i(0); i < (is64bit? 8 : 4); ++i) {
+            if (binary->isLittleEndian()) {
+                entry |= (unsigned int)((unsigned char)binary->getData()[0x18 + i]) << 8*i;
+            } else {
+                entry = entry << 8;
+                entry |= (unsigned char)binary->getData()[0x18 + i];
+            }
+        }
+        LOG4CXX_DEBUG(logger, "Adding entry at: " << std::hex << entry);
+        std::stringstream s;
+        s << "<_start 0x" << std::hex << entry << ">";
+
+        disassembleFunctionAt(entry, s.str());
+    }
+
+    if (functions.empty()) {
+        uint64_t text_entry;
+        text_section.getAddress(text_entry);
+        LOG4CXX_INFO(logger, "No Symbols found, starting at the beginning of the text segment");
+        disassembleFunctionAt(text_entry);
+    }
+
+    splitBlocks();
+}
+
+void LLVMDisassembler::splitBlocks() {
+    SectionRef text_section = sections[".text"];
+    StringRef bytes;
+    text_section.getContents(bytes);
+    StringRefMemoryObject ref(bytes);
+
+    // Split blocks where jumps are going inside the block
+    for (auto it = blocks.begin(); it != blocks.end(); ++it) {
+        LLVMBasicBlock * current_block = it->second;
+        uint64_t inst_size;
+        uint64_t base_address;
+        text_section.getAddress(base_address);
+        uint64_t current_address = current_block->getStartAddress() - base_address;
+        while(current_block->getEndAddress() - base_address > current_address) {
+            MCInst inst;
+            std::string buf;
+            llvm::raw_string_ostream s(buf);
+
+            if(llvm::MCDisassembler::Success ==
+               DisAsm->getInstruction(inst, inst_size, ref, current_address, nulls(), nulls())) {
+                auto other = blocks.find(current_address + inst_size + base_address);
+
+                if (other != blocks.end()) {
+                    uint64_t endaddress = current_address + inst_size + base_address;
+                    if (endaddress != current_block->getEndAddress()) {
+                        LOG4CXX_DEBUG(logger, "Shortening block starting at "
+                                      << std::hex
+                                      << current_block->getStartAddress()
+                                      << " now ending at "
+                                      << other->first);
+                        current_block->setEndAddress(endaddress);
+                        current_block->setNextBlock(0, other->first);
+                        current_block->setNextBlock(1, 0);
+                    }
+                }
+            } else {
+                inst_size = 1;
+            }
+            current_address += inst_size;
+        }
+    }
+}
+
+void LLVMDisassembler::readSymbols() {
+    error_code ec;
+    symbol_iterator si(o->symbol_begin()), se(o->symbol_end());
+    for (; si != se; ++si) {
+        StringRef name;
+        if ((ec = si->getName(name))) {
+            LOG4CXX_ERROR(logger, ec.message());
+            break;
+        }
+        LOG4CXX_DEBUG(logger, "Added symbol " << name.str());
+        symbols.insert(make_pair(name.str(), *si));
+    }
+}
+
+void LLVMDisassembler::readSections() {
+    error_code ec;
+    section_iterator i(o->section_begin()), e(o->section_end());
+    for (; i != e; ++i) {
+        StringRef name;
+        if ((ec = i->getName(name))) {
+            LOG4CXX_ERROR(logger, ec.message());
+            break;
+        }
+        LOG4CXX_DEBUG(logger, "Added section " << name.str());
+        sections.insert(make_pair(name.str(), *i));
+    }
+
+}
+
+void LLVMDisassembler::forEachFunction(std::function<void (uint64_t, Function*)> callback) {
+    std::for_each(functions.begin(), functions.end(),
+                  [&](std::pair<uint64_t, LLVMFunction*> x) {
+                      callback(x.first, x.second);
+                  });
+}
+
+void LLVMDisassembler::printEachInstruction(uint64_t start, uint64_t end,
+                                                                                       std::function<void (uint8_t*, size_t, const std::string&)> fun) {
+    SectionRef text_section = sections[".text"];
+    uint64_t base_address;
+    text_section.getAddress(base_address);
+    uint64_t current_address = start - base_address;
+
+    StringRef bytes;
+    text_section.getContents(bytes);
+    StringRefMemoryObject ref(bytes);
+
+    while (current_address < end - base_address) {
+        uint64_t inst_size;
+        MCInst inst;
+               std::string buf;
+               llvm::raw_string_ostream s(buf);
+
+        if(llvm::MCDisassembler::Success ==
+           DisAsm->getInstruction(inst, inst_size, ref, current_address, nulls(), nulls())) {
+
+            uint8_t bytes[inst_size+2];
+            ref.readBytes(current_address, inst_size, bytes);
+
+                       uint64_t jmptarget;
+                       if (MIA->evaluateBranch(inst, current_address, inst_size, jmptarget)) {
+                               std::stringstream stream;
+                               stream << std::hex << (base_address + jmptarget);
+                               IP->printInst(&inst, s, stream.str());
+                       } else
+                               IP->printInst(&inst, s, "");
+
+                       fun(bytes, inst_size, s.str());
+        } else {
+                       LOG4CXX_WARN(logger, "Invalid byte at" << std::hex << current_address + base_address);
+                       fun(NULL, 0, "Invalid Byte");
+                       inst_size = 1;
+               }
+
+               current_address += inst_size;
+    }
 }