]> git.siccegge.de Git - frida/frida.git/blobdiff - src/disassembler/llvm/LLVMDisassembler.cxx
Only disassemble the text segment
[frida/frida.git] / src / disassembler / llvm / LLVMDisassembler.cxx
index 240e5c560952b35a1de3719aead77058775a7de8..6b3402eb0b2414d56c4b415f7467e3c29d7e33cd 100644 (file)
@@ -7,6 +7,7 @@
 
 using namespace llvm;
 using namespace llvm::object;
+using std::error_code;
 
 /*
  * TODO: fallback code falls die Datei kein ELF/PE/COFF/MacO/.. binary
@@ -72,24 +73,30 @@ LLVMDisassembler::LLVMDisassembler(const std::string& filename)
         return;
     }
 
-    DisAsm.reset(target->createMCDisassembler(*STI));
+    MOFI.reset(new MCObjectFileInfo);
+    MCContext Ctx(AsmInfo.get(), MRI.get(), MOFI.get());
+
+    DisAsm.reset(target->createMCDisassembler(*STI, Ctx));
     if (!DisAsm) {
         LOG4CXX_ERROR(logger, "no disassembler for target " << tripleName);
         return;
     }
-
-    MOFI.reset(new MCObjectFileInfo);
-    Ctx.reset(new MCContext(AsmInfo.get(), MRI.get(), MOFI.get()));
     RelInfo.reset(
-        target->createMCRelocationInfo(tripleName, *Ctx.get()));
+        target->createMCRelocationInfo(tripleName, Ctx));
     if (RelInfo) {
         Symzer.reset(
-            MCObjectSymbolizer::createObjectSymbolizer(*Ctx.get(), RelInfo, o));
+            MCObjectSymbolizer::createObjectSymbolizer(Ctx, std::move(RelInfo), o));
         if (Symzer)
-            DisAsm->setSymbolizer(Symzer);
+            DisAsm->setSymbolizer(std::move(Symzer));
     }
+    RelInfo.release();
+    Symzer.release();
 
     MIA.reset(target->createMCInstrAnalysis(MII.get()));
+    if (!MIA) {
+        LOG4CXX_ERROR(logger, "no instruction analysis for target " << tripleName);
+        return;
+    }
 
     int AsmPrinterVariant = AsmInfo->getAssemblerDialect();
     IP.reset(target->createMCInstPrinter(AsmPrinterVariant, *AsmInfo, *MII, *MRI, *STI));
@@ -101,7 +108,7 @@ LLVMDisassembler::LLVMDisassembler(const std::string& filename)
     IP->setPrintImmHex(llvm::HexStyle::C);
     IP->setPrintImmHex(true);
 
-    OwningPtr<MCObjectDisassembler> OD(
+    std::unique_ptr<MCObjectDisassembler> OD(
         new MCObjectDisassembler(*o, *DisAsm, *MIA));
     Mod.reset(OD->buildModule(false));
 
@@ -121,12 +128,116 @@ LLVMDisassembler::~LLVMDisassembler() {
                   });
 }
 
-void LLVMDisassembler::disassemble() {
-    std::stack<LLVMFunction*> remaining_functions;
+Function* LLVMDisassembler::disassembleFunctionAt(uint64_t address, const std::string& name) {
+    SectionRef text_section = sections[".text"];
+       uint64_t base_address, size;
+       text_section.getAddress(base_address);
+       text_section.getSize(size);
+
+       if (address < base_address || 
+               address >= base_address + size) {
+               return NULL;
+       }
+
+    if (functions.find(address) != functions.end()) {
+        return functions[address];
+    }
+
+    LLVMFunction * function;
+    if (name == "") {
+        std::stringstream s;
+        s << "<Unnamed 0x" << std::hex << address << ">";
+        function = new LLVMFunction(s.str(), address);
+    } else {
+        function = new LLVMFunction(name, address);
+    }
+    functions.insert(std::make_pair(address, function));
+
+    disassembleFunction(function);
+
+    return function;
+}
+
+void LLVMDisassembler::disassembleFunction(LLVMFunction* function) {
     std::stack<LLVMBasicBlock*> remaining_blocks;
     SectionRef text_section = sections[".text"];
+    StringRef bytes;
+    text_section.getContents(bytes);
+    StringRefMemoryObject ref(bytes);
+
+    LOG4CXX_DEBUG(logger, "Handling function " << function->getName());
+
+    LLVMBasicBlock * block = new LLVMBasicBlock(function->getStartAddress(), this);
+    remaining_blocks.push(block);
+    blocks.insert(std::make_pair(block->getStartAddress(), block));
+
+    while (remaining_blocks.size()) {
+        LLVMBasicBlock * current_block = remaining_blocks.top();
+        remaining_blocks.pop();
+
+        LOG4CXX_DEBUG(logger, "Handling Block starting at " << std::hex << current_block->getStartAddress());
+
+        uint64_t inst_size;
+        uint64_t base_address;
+        text_section.getAddress(base_address);
+        uint64_t current_address = current_block->getStartAddress() - base_address;
+        while(true) {
+            MCInst inst;
+            std::string buf;
+            llvm::raw_string_ostream s(buf);
+
+            if(llvm::MCDisassembler::Success ==
+               DisAsm->getInstruction(inst, inst_size, ref, current_address, nulls(), nulls())) {
+                uint64_t jmptarget;
+
+                if (MIA->evaluateBranch(inst, current_address, inst_size, jmptarget)) {
+                    jmptarget += base_address;
+                    if (!MIA->isIndirectBranch(inst)) {
+                        if (MIA->isCall(inst)) {
+                            if (functions.find(jmptarget) == functions.end()) {
+                                disassembleFunctionAt(jmptarget);
+                            }
+                        } else {
+                            current_block->setNextBlock(0, jmptarget);
+                            if (blocks.find(jmptarget) == blocks.end()) {
+                                LLVMBasicBlock * block = new LLVMBasicBlock(jmptarget, this);
+                                blocks.insert(std::make_pair(block->getStartAddress(), block));
+                                remaining_blocks.push(block);
+                            }
+                            if (MIA->isConditionalBranch(inst)) {
+                                jmptarget = base_address + current_address + inst_size;
+                                current_block->setNextBlock(1, jmptarget);
+                                if (blocks.find(jmptarget) == blocks.end()) {
+                                    LLVMBasicBlock * block = new LLVMBasicBlock(jmptarget, this);
+                                    blocks.insert(std::make_pair(block->getStartAddress(), block));
+                                    remaining_blocks.push(block);
+                                }
+                            }
+                        }
+                    }
+                }
+            } else {
+                inst_size = 0;
+            }
+
+
+            if (inst_size == 0 || MIA->isTerminator(inst) || MIA->isBranch(inst)) {
+                current_block->setEndAddress(current_address + base_address + inst_size);
+                LOG4CXX_DEBUG(logger, "Finished Block at " << std::hex <<
+                              current_block->getEndAddress());
+                break;
+            }
+            current_address += inst_size;
+        }
+    }
+    LOG4CXX_DEBUG(logger, "Finished function " << function->getName());
+}
+
+void LLVMDisassembler::disassemble() {
+    SectionRef text_section = sections[".text"];
+    std::vector<LLVMFunction*> remaining_functions;
 
-       // Assume all function symbols actually start a real function
+    // Assume all function symbols actually start a real function
     for (auto x = symbols.begin(); x != symbols.end(); ++x) {
         uint64_t result;
         bool contains;
@@ -142,127 +253,86 @@ void LLVMDisassembler::disassemble() {
 
         if (!x->second.getAddress(result)) {
             LLVMFunction * fun = new LLVMFunction(x->first, result);
-            remaining_functions.push(fun);
+            remaining_functions.push_back(fun);
             functions.insert(std::make_pair(result, fun));
             LOG4CXX_DEBUG(logger, "Disasembling " << x->first);
         }
     }
 
+    for (LLVMFunction* function : remaining_functions) {
+        disassembleFunction(function);
+    }
+
+    if (binary->isELF()) {
+        bool is64bit = (binary->getData()[4] == 0x02);
+
+        uint64_t entry(0);
+        for (int i(0); i < (is64bit? 8 : 4); ++i) {
+            if (binary->isLittleEndian()) {
+                entry |= (unsigned int)((unsigned char)binary->getData()[0x18 + i]) << 8*i;
+            } else {
+                entry = entry << 8;
+                entry |= (unsigned char)binary->getData()[0x18 + i];
+            }
+        }
+        LOG4CXX_DEBUG(logger, "Adding entry at: " << std::hex << entry);
+        std::stringstream s;
+        s << "<_start 0x" << std::hex << entry << ">";
+
+        disassembleFunctionAt(entry, s.str());
+    }
+
+    if (functions.empty()) {
+        uint64_t text_entry;
+        text_section.getAddress(text_entry);
+        LOG4CXX_INFO(logger, "No Symbols found, starting at the beginning of the text segment");
+        disassembleFunctionAt(text_entry);
+    }
+
+    splitBlocks();
+}
+
+void LLVMDisassembler::splitBlocks() {
+    SectionRef text_section = sections[".text"];
     StringRef bytes;
     text_section.getContents(bytes);
     StringRefMemoryObject ref(bytes);
 
-    while (remaining_functions.size()) {
-        LLVMFunction * current_function = remaining_functions.top();
-        remaining_functions.pop();
-
-        LOG4CXX_DEBUG(logger, "Handling function " << current_function->getName());
-
-        LLVMBasicBlock * block = new LLVMBasicBlock(current_function->getStartAddress(), this);
-        remaining_blocks.push(block);
-        blocks.insert(std::make_pair(block->getStartAddress(), block));
-
-        while (remaining_blocks.size()) {
-            LLVMBasicBlock * current_block = remaining_blocks.top();
-            remaining_blocks.pop();
-
-            LOG4CXX_DEBUG(logger, "Handling Block starting at " << std::hex << current_block->getStartAddress());
-
-            uint64_t inst_size;
-            uint64_t base_address;
-            text_section.getAddress(base_address);
-            uint64_t current_address = current_block->getStartAddress() - base_address;
-            while(true) {
-                MCInst inst;
-                std::string buf;
-                llvm::raw_string_ostream s(buf);
-
-                if(llvm::MCDisassembler::Success ==
-                   DisAsm->getInstruction(inst, inst_size, ref, current_address, nulls(), nulls())) {
-
-                    uint64_t jmptarget;
-                    if (MIA->evaluateBranch(inst, current_address, inst_size, jmptarget)) {
-                        jmptarget += base_address;
-                        if (!MIA->isIndirectBranch(inst)) {
-                            if (MIA->isCall(inst)) {
-                                if (functions.find(jmptarget) == functions.end()) {
-                                    std::stringstream s;
-                                    s << "<Unnamed 0x" << std::hex << jmptarget << ">";
-                                    LLVMFunction * fun = new LLVMFunction(s.str(), jmptarget);
-                                    functions.insert(std::make_pair(jmptarget, fun));
-                                    remaining_functions.push(fun);
-                                }
-                            } else {
-                                                               current_block->setNextBlock(0, jmptarget);
-                                if (blocks.find(jmptarget) == blocks.end()) {
-                                    LLVMBasicBlock * block = new LLVMBasicBlock(jmptarget, this);
-                                    blocks.insert(std::make_pair(block->getStartAddress(), block));
-                                    remaining_blocks.push(block);
-                                }
-                                if (MIA->isConditionalBranch(inst)) {
-                                    jmptarget = base_address + current_address + inst_size;
-                                                                       current_block->setNextBlock(1, jmptarget);
-                                    if (blocks.find(jmptarget) == blocks.end()) {
-                                        LLVMBasicBlock * block = new LLVMBasicBlock(jmptarget, this);
-                                        blocks.insert(std::make_pair(block->getStartAddress(), block));
-                                        remaining_blocks.push(block);
-                                    }
-                                }
-                            }
-                        }
+    // Split blocks where jumps are going inside the block
+    for (auto it = blocks.begin(); it != blocks.end(); ++it) {
+        LLVMBasicBlock * current_block = it->second;
+        uint64_t inst_size;
+        uint64_t base_address;
+        text_section.getAddress(base_address);
+        uint64_t current_address = current_block->getStartAddress() - base_address;
+        while(current_block->getEndAddress() - base_address > current_address) {
+            MCInst inst;
+            std::string buf;
+            llvm::raw_string_ostream s(buf);
+
+            if(llvm::MCDisassembler::Success ==
+               DisAsm->getInstruction(inst, inst_size, ref, current_address, nulls(), nulls())) {
+                auto other = blocks.find(current_address + inst_size + base_address);
+
+                if (other != blocks.end()) {
+                    uint64_t endaddress = current_address + inst_size + base_address;
+                    if (endaddress != current_block->getEndAddress()) {
+                        LOG4CXX_DEBUG(logger, "Shortening block starting at "
+                                      << std::hex
+                                      << current_block->getStartAddress()
+                                      << " now ending at "
+                                      << other->first);
+                        current_block->setEndAddress(endaddress);
+                        current_block->setNextBlock(0, other->first);
+                        current_block->setNextBlock(1, 0);
                     }
-                } else {
-                    inst_size = 0;
-                }
-
-
-                if (inst_size == 0 || MIA->isTerminator(inst) || MIA->isBranch(inst)) {
-                    current_block->setEndAddress(current_address + base_address + inst_size);
-                    LOG4CXX_DEBUG(logger, "Finished Block at " << std::hex <<
-                                  current_block->getEndAddress());
-                    break;
                 }
-                current_address += inst_size;
+            } else {
+                inst_size = 1;
             }
+            current_address += inst_size;
         }
-        LOG4CXX_DEBUG(logger, "Finished function " << current_function->getName());
     }
-
-       // Split blocks where jumps are going inside the block
-       for (auto it = blocks.begin(); it != blocks.end(); ++it) {
-               LLVMBasicBlock * current_block = it->second;
-               uint64_t inst_size;
-               uint64_t base_address;
-               text_section.getAddress(base_address);
-               uint64_t current_address = current_block->getStartAddress() - base_address;
-               while(current_block->getEndAddress() - base_address > current_address) {
-                       MCInst inst;
-                       std::string buf;
-                       llvm::raw_string_ostream s(buf);
-
-                       if(llvm::MCDisassembler::Success ==
-                          DisAsm->getInstruction(inst, inst_size, ref, current_address, nulls(), nulls())) {
-                               auto other = blocks.find(current_address + inst_size + base_address);
-
-                               if (other != blocks.end()) {
-                                       uint64_t endaddress = current_address + inst_size + base_address;
-                                       if (endaddress != current_block->getEndAddress()) {
-                                               LOG4CXX_DEBUG(logger, "Shortening block starting at "
-                                                                         << std::hex
-                                                                         << current_block->getStartAddress()
-                                                                         << " now ending at "
-                                                                         << other->first);
-                                               current_block->setEndAddress(endaddress);
-                                               current_block->setNextBlock(0, other->first);
-                                               current_block->setNextBlock(1, 0);
-                                       }
-                               }
-                       } else {
-                               inst_size = 1;
-                       }
-                       current_address += inst_size;
-               }
-       }
 }
 
 void LLVMDisassembler::readSymbols() {
@@ -324,7 +394,14 @@ void LLVMDisassembler::printEachInstruction(uint64_t start, uint64_t end,
             uint8_t bytes[inst_size+2];
             ref.readBytes(current_address, inst_size, bytes);
 
-            IP->printInst(&inst, s, "");
+                       uint64_t jmptarget;
+                       if (MIA->evaluateBranch(inst, current_address, inst_size, jmptarget)) {
+                               std::stringstream stream;
+                               stream << std::hex << (base_address + jmptarget);
+                               IP->printInst(&inst, s, stream.str());
+                       } else
+                               IP->printInst(&inst, s, "");
+
                        fun(bytes, inst_size, s.str());
         } else {
                        LOG4CXX_WARN(logger, "Invalid byte at" << std::hex << current_address + base_address);