]> git.siccegge.de Git - frida/frida.git/blobdiff - src/disassembler/llvm/LLVMDisassembler.cxx
Only disassemble the text segment
[frida/frida.git] / src / disassembler / llvm / LLVMDisassembler.cxx
index a8c2867f50784255a27246bb10be5db9312d2e87..6b3402eb0b2414d56c4b415f7467e3c29d7e33cd 100644 (file)
@@ -7,6 +7,7 @@
 
 using namespace llvm;
 using namespace llvm::object;
+using std::error_code;
 
 /*
  * TODO: fallback code falls die Datei kein ELF/PE/COFF/MacO/.. binary
@@ -72,24 +73,30 @@ LLVMDisassembler::LLVMDisassembler(const std::string& filename)
         return;
     }
 
-    DisAsm.reset(target->createMCDisassembler(*STI));
+    MOFI.reset(new MCObjectFileInfo);
+    MCContext Ctx(AsmInfo.get(), MRI.get(), MOFI.get());
+
+    DisAsm.reset(target->createMCDisassembler(*STI, Ctx));
     if (!DisAsm) {
         LOG4CXX_ERROR(logger, "no disassembler for target " << tripleName);
         return;
     }
-
-    MOFI.reset(new MCObjectFileInfo);
-    Ctx.reset(new MCContext(AsmInfo.get(), MRI.get(), MOFI.get()));
     RelInfo.reset(
-        target->createMCRelocationInfo(tripleName, *Ctx.get()));
+        target->createMCRelocationInfo(tripleName, Ctx));
     if (RelInfo) {
         Symzer.reset(
-            MCObjectSymbolizer::createObjectSymbolizer(*Ctx.get(), RelInfo, o));
+            MCObjectSymbolizer::createObjectSymbolizer(Ctx, std::move(RelInfo), o));
         if (Symzer)
-            DisAsm->setSymbolizer(Symzer);
+            DisAsm->setSymbolizer(std::move(Symzer));
     }
+    RelInfo.release();
+    Symzer.release();
 
     MIA.reset(target->createMCInstrAnalysis(MII.get()));
+    if (!MIA) {
+        LOG4CXX_ERROR(logger, "no instruction analysis for target " << tripleName);
+        return;
+    }
 
     int AsmPrinterVariant = AsmInfo->getAssemblerDialect();
     IP.reset(target->createMCInstPrinter(AsmPrinterVariant, *AsmInfo, *MII, *MRI, *STI));
@@ -101,7 +108,7 @@ LLVMDisassembler::LLVMDisassembler(const std::string& filename)
     IP->setPrintImmHex(llvm::HexStyle::C);
     IP->setPrintImmHex(true);
 
-    OwningPtr<MCObjectDisassembler> OD(
+    std::unique_ptr<MCObjectDisassembler> OD(
         new MCObjectDisassembler(*o, *DisAsm, *MIA));
     Mod.reset(OD->buildModule(false));
 
@@ -111,139 +118,220 @@ LLVMDisassembler::LLVMDisassembler(const std::string& filename)
 }
 
 LLVMDisassembler::~LLVMDisassembler() {
-       std::for_each(functions.begin(), functions.end(),
-                                 [](std::pair<uint64_t,LLVMFunction*> it) {
-                                         delete it.second;
-                                 });
-       std::for_each(blocks.begin(), blocks.end(),
-                                 [](std::pair<uint64_t, LLVMBasicBlock*> it) {
-                                         delete it.second;
-                                 });
+    std::for_each(functions.begin(), functions.end(),
+                  [](std::pair<uint64_t,LLVMFunction*> it) {
+                      delete it.second;
+                  });
+    std::for_each(blocks.begin(), blocks.end(),
+                  [](std::pair<uint64_t, LLVMBasicBlock*> it) {
+                      delete it.second;
+                  });
 }
 
-/*
- * TODO: If we jump into some Basic Block we need to split it there into two
- */
-void LLVMDisassembler::disassemble() {
-    std::stack<LLVMFunction*> remaining_functions;
-    std::stack<LLVMBasicBlock*> remaining_blocks;
+Function* LLVMDisassembler::disassembleFunctionAt(uint64_t address, const std::string& name) {
     SectionRef text_section = sections[".text"];
+       uint64_t base_address, size;
+       text_section.getAddress(base_address);
+       text_section.getSize(size);
 
-       for (auto x = symbols.begin(); x != symbols.end(); ++x) {
-               uint64_t result;
-               bool contains;
-               SymbolRef::Type symbol_type;
+       if (address < base_address || 
+               address >= base_address + size) {
+               return NULL;
+       }
 
+    if (functions.find(address) != functions.end()) {
+        return functions[address];
+    }
 
-               if (text_section.containsSymbol(x->second, contains) || !contains)
-                       continue;
+    LLVMFunction * function;
+    if (name == "") {
+        std::stringstream s;
+        s << "<Unnamed 0x" << std::hex << address << ">";
+        function = new LLVMFunction(s.str(), address);
+    } else {
+        function = new LLVMFunction(name, address);
+    }
+    functions.insert(std::make_pair(address, function));
 
-               if (x->second.getType(symbol_type)
-                       || SymbolRef::ST_Function != symbol_type)
-                       continue;
+    disassembleFunction(function);
 
-               if (!x->second.getAddress(result)) {
-                       LLVMFunction * fun = new LLVMFunction(x->first, result);
-                       remaining_functions.push(fun);
-                       functions.insert(std::make_pair(result, fun));
-                       LOG4CXX_DEBUG(logger, "Disasembling " << x->first);
-               }
-       }
+    return function;
+}
 
+void LLVMDisassembler::disassembleFunction(LLVMFunction* function) {
+    std::stack<LLVMBasicBlock*> remaining_blocks;
+    SectionRef text_section = sections[".text"];
     StringRef bytes;
     text_section.getContents(bytes);
     StringRefMemoryObject ref(bytes);
 
-    while (remaining_functions.size()) {
-        LLVMFunction * current_function = remaining_functions.top();
-        remaining_functions.pop();
+    LOG4CXX_DEBUG(logger, "Handling function " << function->getName());
+
+    LLVMBasicBlock * block = new LLVMBasicBlock(function->getStartAddress(), this);
+    remaining_blocks.push(block);
+    blocks.insert(std::make_pair(block->getStartAddress(), block));
+
+    while (remaining_blocks.size()) {
+        LLVMBasicBlock * current_block = remaining_blocks.top();
+        remaining_blocks.pop();
+
+        LOG4CXX_DEBUG(logger, "Handling Block starting at " << std::hex << current_block->getStartAddress());
+
+        uint64_t inst_size;
+        uint64_t base_address;
+        text_section.getAddress(base_address);
+        uint64_t current_address = current_block->getStartAddress() - base_address;
+        while(true) {
+            MCInst inst;
+            std::string buf;
+            llvm::raw_string_ostream s(buf);
+
+            if(llvm::MCDisassembler::Success ==
+               DisAsm->getInstruction(inst, inst_size, ref, current_address, nulls(), nulls())) {
+                uint64_t jmptarget;
+
+                if (MIA->evaluateBranch(inst, current_address, inst_size, jmptarget)) {
+                    jmptarget += base_address;
+                    if (!MIA->isIndirectBranch(inst)) {
+                        if (MIA->isCall(inst)) {
+                            if (functions.find(jmptarget) == functions.end()) {
+                                disassembleFunctionAt(jmptarget);
+                            }
+                        } else {
+                            current_block->setNextBlock(0, jmptarget);
+                            if (blocks.find(jmptarget) == blocks.end()) {
+                                LLVMBasicBlock * block = new LLVMBasicBlock(jmptarget, this);
+                                blocks.insert(std::make_pair(block->getStartAddress(), block));
+                                remaining_blocks.push(block);
+                            }
+                            if (MIA->isConditionalBranch(inst)) {
+                                jmptarget = base_address + current_address + inst_size;
+                                current_block->setNextBlock(1, jmptarget);
+                                if (blocks.find(jmptarget) == blocks.end()) {
+                                    LLVMBasicBlock * block = new LLVMBasicBlock(jmptarget, this);
+                                    blocks.insert(std::make_pair(block->getStartAddress(), block));
+                                    remaining_blocks.push(block);
+                                }
+                            }
+                        }
+                    }
+                }
+            } else {
+                inst_size = 0;
+            }
 
-        LOG4CXX_DEBUG(logger, "Handling function " << current_function->getName());
 
-        // if ("_start" != current_function->getName())
-        //  continue;
+            if (inst_size == 0 || MIA->isTerminator(inst) || MIA->isBranch(inst)) {
+                current_block->setEndAddress(current_address + base_address + inst_size);
+                LOG4CXX_DEBUG(logger, "Finished Block at " << std::hex <<
+                              current_block->getEndAddress());
+                break;
+            }
+            current_address += inst_size;
+        }
+    }
+    LOG4CXX_DEBUG(logger, "Finished function " << function->getName());
+}
 
-               LLVMBasicBlock * block = new LLVMBasicBlock(current_function->getStartAddress());
-        remaining_blocks.push(block);
-               blocks.insert(std::make_pair(block->getStartAddress(), block));
+void LLVMDisassembler::disassemble() {
+    SectionRef text_section = sections[".text"];
+    std::vector<LLVMFunction*> remaining_functions;
 
-        while (remaining_blocks.size()) {
-            LLVMBasicBlock * current_block = remaining_blocks.top();
-            remaining_blocks.pop();
+    // Assume all function symbols actually start a real function
+    for (auto x = symbols.begin(); x != symbols.end(); ++x) {
+        uint64_t result;
+        bool contains;
+        SymbolRef::Type symbol_type;
 
-            LOG4CXX_DEBUG(logger, "Handling Block starting at " << std::hex << current_block->getStartAddress());
 
-            uint64_t inst_size;
-            uint64_t base_address;
-            text_section.getAddress(base_address);
-            uint64_t current_address = current_block->getStartAddress() - base_address;
-            while(true) {
-                MCInst inst;
-                std::string buf;
-                llvm::raw_string_ostream s(buf);
+        if (text_section.containsSymbol(x->second, contains) || !contains)
+            continue;
 
-                if(llvm::MCDisassembler::Success ==
-                   DisAsm->getInstruction(inst, inst_size, ref, current_address, nulls(), nulls())) {
+        if (x->second.getType(symbol_type)
+            || SymbolRef::ST_Function != symbol_type)
+            continue;
 
-                    uint8_t bytes[inst_size+2];
-                    ref.readBytes(current_address, inst_size, bytes);
-                    s << '\t';
-                    for(uint8_t* cur = bytes; cur < bytes + inst_size; ++cur) {
-                        s.write_hex(*cur);
-                        s << ' ';
-                    }
-                    s << '\t';
-
-                    IP->printInst(&inst, s, "");
-
-                    LOG4CXX_DEBUG(logger, std::hex << current_address + base_address << s.str());
-
-                    uint64_t jmptarget;
-                    if (MIA->evaluateBranch(inst, current_address, inst_size, jmptarget)) {
-                                               jmptarget += base_address;
-                        if (!MIA->isIndirectBranch(inst)) {
-                            if (MIA->isCall(inst)) {
-                                                               if (functions.find(jmptarget) == functions.end()) {
-                                                                       std::stringstream s;
-                                                                       s << "<Unnamed 0x" << std::hex << jmptarget << ">";
-                                                                       LLVMFunction * fun = new LLVMFunction(s.str(), jmptarget);
-                                                                       functions.insert(std::make_pair(jmptarget, fun));
-                                                                       remaining_functions.push(fun);
-                                                               }
-                            } else {
-                                                               if (blocks.find(jmptarget) == blocks.end()) {
-                                                                       LLVMBasicBlock * block = new LLVMBasicBlock(jmptarget);
-                                                                       blocks.insert(std::make_pair(block->getStartAddress(), block));
-                                                                       current_block->setNextBlock(0, block->getStartAddress());
-                                                                       remaining_blocks.push(block);
-                                                               }
-                                if (MIA->isConditionalBranch(inst)) {
-                                                                       jmptarget = base_address + current_address + inst_size;
-                                                                       if (blocks.find(jmptarget) == blocks.end()) {
-                                                                               LLVMBasicBlock * block = new LLVMBasicBlock(jmptarget);
-                                                                               blocks.insert(std::make_pair(block->getStartAddress(), block));
-                                                                               current_block->setNextBlock(1, block->getStartAddress());
-                                                                               remaining_blocks.push(new LLVMBasicBlock(jmptarget));
-                                                                       }
-                                }
-                            }
-                        }
-                    }
-                } else {
-                    inst_size = 0;
-                }
+        if (!x->second.getAddress(result)) {
+            LLVMFunction * fun = new LLVMFunction(x->first, result);
+            remaining_functions.push_back(fun);
+            functions.insert(std::make_pair(result, fun));
+            LOG4CXX_DEBUG(logger, "Disasembling " << x->first);
+        }
+    }
+
+    for (LLVMFunction* function : remaining_functions) {
+        disassembleFunction(function);
+    }
+
+    if (binary->isELF()) {
+        bool is64bit = (binary->getData()[4] == 0x02);
+
+        uint64_t entry(0);
+        for (int i(0); i < (is64bit? 8 : 4); ++i) {
+            if (binary->isLittleEndian()) {
+                entry |= (unsigned int)((unsigned char)binary->getData()[0x18 + i]) << 8*i;
+            } else {
+                entry = entry << 8;
+                entry |= (unsigned char)binary->getData()[0x18 + i];
+            }
+        }
+        LOG4CXX_DEBUG(logger, "Adding entry at: " << std::hex << entry);
+        std::stringstream s;
+        s << "<_start 0x" << std::hex << entry << ">";
+
+        disassembleFunctionAt(entry, s.str());
+    }
+
+    if (functions.empty()) {
+        uint64_t text_entry;
+        text_section.getAddress(text_entry);
+        LOG4CXX_INFO(logger, "No Symbols found, starting at the beginning of the text segment");
+        disassembleFunctionAt(text_entry);
+    }
+
+    splitBlocks();
+}
 
+void LLVMDisassembler::splitBlocks() {
+    SectionRef text_section = sections[".text"];
+    StringRef bytes;
+    text_section.getContents(bytes);
+    StringRefMemoryObject ref(bytes);
 
-                if (inst_size == 0 || MIA->isTerminator(inst) || MIA->isBranch(inst)) {
-                    current_block->setEndAddress(current_address + base_address);
-                    LOG4CXX_DEBUG(logger, "Finished Block at " << std::hex << 
-                                                                 current_block->getEndAddress());
-                    break;
+    // Split blocks where jumps are going inside the block
+    for (auto it = blocks.begin(); it != blocks.end(); ++it) {
+        LLVMBasicBlock * current_block = it->second;
+        uint64_t inst_size;
+        uint64_t base_address;
+        text_section.getAddress(base_address);
+        uint64_t current_address = current_block->getStartAddress() - base_address;
+        while(current_block->getEndAddress() - base_address > current_address) {
+            MCInst inst;
+            std::string buf;
+            llvm::raw_string_ostream s(buf);
+
+            if(llvm::MCDisassembler::Success ==
+               DisAsm->getInstruction(inst, inst_size, ref, current_address, nulls(), nulls())) {
+                auto other = blocks.find(current_address + inst_size + base_address);
+
+                if (other != blocks.end()) {
+                    uint64_t endaddress = current_address + inst_size + base_address;
+                    if (endaddress != current_block->getEndAddress()) {
+                        LOG4CXX_DEBUG(logger, "Shortening block starting at "
+                                      << std::hex
+                                      << current_block->getStartAddress()
+                                      << " now ending at "
+                                      << other->first);
+                        current_block->setEndAddress(endaddress);
+                        current_block->setNextBlock(0, other->first);
+                        current_block->setNextBlock(1, 0);
+                    }
                 }
-                current_address += inst_size;
+            } else {
+                inst_size = 1;
             }
+            current_address += inst_size;
         }
-        LOG4CXX_DEBUG(logger, "Finished function " << current_function->getName());
     }
 }
 
@@ -277,8 +365,50 @@ void LLVMDisassembler::readSections() {
 }
 
 void LLVMDisassembler::forEachFunction(std::function<void (uint64_t, Function*)> callback) {
-       std::for_each(functions.begin(), functions.end(),
-                                 [&](std::pair<uint64_t, LLVMFunction*> x) {
-                                         callback(x.first, x.second);
-                                 });
+    std::for_each(functions.begin(), functions.end(),
+                  [&](std::pair<uint64_t, LLVMFunction*> x) {
+                      callback(x.first, x.second);
+                  });
+}
+
+void LLVMDisassembler::printEachInstruction(uint64_t start, uint64_t end,
+                                                                                       std::function<void (uint8_t*, size_t, const std::string&)> fun) {
+    SectionRef text_section = sections[".text"];
+    uint64_t base_address;
+    text_section.getAddress(base_address);
+    uint64_t current_address = start - base_address;
+
+    StringRef bytes;
+    text_section.getContents(bytes);
+    StringRefMemoryObject ref(bytes);
+
+    while (current_address < end - base_address) {
+        uint64_t inst_size;
+        MCInst inst;
+               std::string buf;
+               llvm::raw_string_ostream s(buf);
+
+        if(llvm::MCDisassembler::Success ==
+           DisAsm->getInstruction(inst, inst_size, ref, current_address, nulls(), nulls())) {
+
+            uint8_t bytes[inst_size+2];
+            ref.readBytes(current_address, inst_size, bytes);
+
+                       uint64_t jmptarget;
+                       if (MIA->evaluateBranch(inst, current_address, inst_size, jmptarget)) {
+                               std::stringstream stream;
+                               stream << std::hex << (base_address + jmptarget);
+                               IP->printInst(&inst, s, stream.str());
+                       } else
+                               IP->printInst(&inst, s, "");
+
+                       fun(bytes, inst_size, s.str());
+        } else {
+                       LOG4CXX_WARN(logger, "Invalid byte at" << std::hex << current_address + base_address);
+                       fun(NULL, 0, "Invalid Byte");
+                       inst_size = 1;
+               }
+
+               current_address += inst_size;
+    }
 }