]> git.siccegge.de Git - frida/frida.git/blobdiff - src/disassembler/llvm/LLVMDisassembler.cxx
Only disassemble the text segment
[frida/frida.git] / src / disassembler / llvm / LLVMDisassembler.cxx
index 6e76c91bc27c8121f7088460401ae3c20d4faf47..6b3402eb0b2414d56c4b415f7467e3c29d7e33cd 100644 (file)
@@ -7,6 +7,7 @@
 
 using namespace llvm;
 using namespace llvm::object;
+using std::error_code;
 
 /*
  * TODO: fallback code falls die Datei kein ELF/PE/COFF/MacO/.. binary
@@ -72,24 +73,30 @@ LLVMDisassembler::LLVMDisassembler(const std::string& filename)
         return;
     }
 
-    DisAsm.reset(target->createMCDisassembler(*STI));
+    MOFI.reset(new MCObjectFileInfo);
+    MCContext Ctx(AsmInfo.get(), MRI.get(), MOFI.get());
+
+    DisAsm.reset(target->createMCDisassembler(*STI, Ctx));
     if (!DisAsm) {
         LOG4CXX_ERROR(logger, "no disassembler for target " << tripleName);
         return;
     }
-
-    MOFI.reset(new MCObjectFileInfo);
-    Ctx.reset(new MCContext(AsmInfo.get(), MRI.get(), MOFI.get()));
     RelInfo.reset(
-        target->createMCRelocationInfo(tripleName, *Ctx.get()));
+        target->createMCRelocationInfo(tripleName, Ctx));
     if (RelInfo) {
         Symzer.reset(
-            MCObjectSymbolizer::createObjectSymbolizer(*Ctx.get(), RelInfo, o));
+            MCObjectSymbolizer::createObjectSymbolizer(Ctx, std::move(RelInfo), o));
         if (Symzer)
-            DisAsm->setSymbolizer(Symzer);
+            DisAsm->setSymbolizer(std::move(Symzer));
     }
+    RelInfo.release();
+    Symzer.release();
 
     MIA.reset(target->createMCInstrAnalysis(MII.get()));
+    if (!MIA) {
+        LOG4CXX_ERROR(logger, "no instruction analysis for target " << tripleName);
+        return;
+    }
 
     int AsmPrinterVariant = AsmInfo->getAssemblerDialect();
     IP.reset(target->createMCInstPrinter(AsmPrinterVariant, *AsmInfo, *MII, *MRI, *STI));
@@ -101,7 +108,7 @@ LLVMDisassembler::LLVMDisassembler(const std::string& filename)
     IP->setPrintImmHex(llvm::HexStyle::C);
     IP->setPrintImmHex(true);
 
-    OwningPtr<MCObjectDisassembler> OD(
+    std::unique_ptr<MCObjectDisassembler> OD(
         new MCObjectDisassembler(*o, *DisAsm, *MIA));
     Mod.reset(OD->buildModule(false));
 
@@ -121,14 +128,116 @@ LLVMDisassembler::~LLVMDisassembler() {
                   });
 }
 
-/*
- * TODO: If we jump into some Basic Block we need to split it there into two
- */
-void LLVMDisassembler::disassemble() {
-    std::stack<LLVMFunction*> remaining_functions;
+Function* LLVMDisassembler::disassembleFunctionAt(uint64_t address, const std::string& name) {
+    SectionRef text_section = sections[".text"];
+       uint64_t base_address, size;
+       text_section.getAddress(base_address);
+       text_section.getSize(size);
+
+       if (address < base_address || 
+               address >= base_address + size) {
+               return NULL;
+       }
+
+    if (functions.find(address) != functions.end()) {
+        return functions[address];
+    }
+
+    LLVMFunction * function;
+    if (name == "") {
+        std::stringstream s;
+        s << "<Unnamed 0x" << std::hex << address << ">";
+        function = new LLVMFunction(s.str(), address);
+    } else {
+        function = new LLVMFunction(name, address);
+    }
+    functions.insert(std::make_pair(address, function));
+
+    disassembleFunction(function);
+
+    return function;
+}
+
+void LLVMDisassembler::disassembleFunction(LLVMFunction* function) {
     std::stack<LLVMBasicBlock*> remaining_blocks;
     SectionRef text_section = sections[".text"];
+    StringRef bytes;
+    text_section.getContents(bytes);
+    StringRefMemoryObject ref(bytes);
+
+    LOG4CXX_DEBUG(logger, "Handling function " << function->getName());
+
+    LLVMBasicBlock * block = new LLVMBasicBlock(function->getStartAddress(), this);
+    remaining_blocks.push(block);
+    blocks.insert(std::make_pair(block->getStartAddress(), block));
+
+    while (remaining_blocks.size()) {
+        LLVMBasicBlock * current_block = remaining_blocks.top();
+        remaining_blocks.pop();
+
+        LOG4CXX_DEBUG(logger, "Handling Block starting at " << std::hex << current_block->getStartAddress());
+
+        uint64_t inst_size;
+        uint64_t base_address;
+        text_section.getAddress(base_address);
+        uint64_t current_address = current_block->getStartAddress() - base_address;
+        while(true) {
+            MCInst inst;
+            std::string buf;
+            llvm::raw_string_ostream s(buf);
+
+            if(llvm::MCDisassembler::Success ==
+               DisAsm->getInstruction(inst, inst_size, ref, current_address, nulls(), nulls())) {
+                uint64_t jmptarget;
+
+                if (MIA->evaluateBranch(inst, current_address, inst_size, jmptarget)) {
+                    jmptarget += base_address;
+                    if (!MIA->isIndirectBranch(inst)) {
+                        if (MIA->isCall(inst)) {
+                            if (functions.find(jmptarget) == functions.end()) {
+                                disassembleFunctionAt(jmptarget);
+                            }
+                        } else {
+                            current_block->setNextBlock(0, jmptarget);
+                            if (blocks.find(jmptarget) == blocks.end()) {
+                                LLVMBasicBlock * block = new LLVMBasicBlock(jmptarget, this);
+                                blocks.insert(std::make_pair(block->getStartAddress(), block));
+                                remaining_blocks.push(block);
+                            }
+                            if (MIA->isConditionalBranch(inst)) {
+                                jmptarget = base_address + current_address + inst_size;
+                                current_block->setNextBlock(1, jmptarget);
+                                if (blocks.find(jmptarget) == blocks.end()) {
+                                    LLVMBasicBlock * block = new LLVMBasicBlock(jmptarget, this);
+                                    blocks.insert(std::make_pair(block->getStartAddress(), block));
+                                    remaining_blocks.push(block);
+                                }
+                            }
+                        }
+                    }
+                }
+            } else {
+                inst_size = 0;
+            }
+
+
+            if (inst_size == 0 || MIA->isTerminator(inst) || MIA->isBranch(inst)) {
+                current_block->setEndAddress(current_address + base_address + inst_size);
+                LOG4CXX_DEBUG(logger, "Finished Block at " << std::hex <<
+                              current_block->getEndAddress());
+                break;
+            }
+            current_address += inst_size;
+        }
+    }
+    LOG4CXX_DEBUG(logger, "Finished function " << function->getName());
+}
+
+void LLVMDisassembler::disassemble() {
+    SectionRef text_section = sections[".text"];
+    std::vector<LLVMFunction*> remaining_functions;
 
+    // Assume all function symbols actually start a real function
     for (auto x = symbols.begin(); x != symbols.end(); ++x) {
         uint64_t result;
         bool contains;
@@ -144,106 +253,85 @@ void LLVMDisassembler::disassemble() {
 
         if (!x->second.getAddress(result)) {
             LLVMFunction * fun = new LLVMFunction(x->first, result);
-            remaining_functions.push(fun);
+            remaining_functions.push_back(fun);
             functions.insert(std::make_pair(result, fun));
             LOG4CXX_DEBUG(logger, "Disasembling " << x->first);
         }
     }
 
-    StringRef bytes;
-    text_section.getContents(bytes);
-    StringRefMemoryObject ref(bytes);
-
-    while (remaining_functions.size()) {
-        LLVMFunction * current_function = remaining_functions.top();
-        remaining_functions.pop();
-
-        LOG4CXX_DEBUG(logger, "Handling function " << current_function->getName());
+    for (LLVMFunction* function : remaining_functions) {
+        disassembleFunction(function);
+    }
 
-        // if ("_start" != current_function->getName())
-        //  continue;
+    if (binary->isELF()) {
+        bool is64bit = (binary->getData()[4] == 0x02);
 
-        LLVMBasicBlock * block = new LLVMBasicBlock(current_function->getStartAddress(), this);
-        remaining_blocks.push(block);
-        blocks.insert(std::make_pair(block->getStartAddress(), block));
+        uint64_t entry(0);
+        for (int i(0); i < (is64bit? 8 : 4); ++i) {
+            if (binary->isLittleEndian()) {
+                entry |= (unsigned int)((unsigned char)binary->getData()[0x18 + i]) << 8*i;
+            } else {
+                entry = entry << 8;
+                entry |= (unsigned char)binary->getData()[0x18 + i];
+            }
+        }
+        LOG4CXX_DEBUG(logger, "Adding entry at: " << std::hex << entry);
+        std::stringstream s;
+        s << "<_start 0x" << std::hex << entry << ">";
 
-        while (remaining_blocks.size()) {
-            LLVMBasicBlock * current_block = remaining_blocks.top();
-            remaining_blocks.pop();
+        disassembleFunctionAt(entry, s.str());
+    }
 
-            LOG4CXX_DEBUG(logger, "Handling Block starting at " << std::hex << current_block->getStartAddress());
+    if (functions.empty()) {
+        uint64_t text_entry;
+        text_section.getAddress(text_entry);
+        LOG4CXX_INFO(logger, "No Symbols found, starting at the beginning of the text segment");
+        disassembleFunctionAt(text_entry);
+    }
 
-            uint64_t inst_size;
-            uint64_t base_address;
-            text_section.getAddress(base_address);
-            uint64_t current_address = current_block->getStartAddress() - base_address;
-            while(true) {
-                MCInst inst;
-                std::string buf;
-                llvm::raw_string_ostream s(buf);
+    splitBlocks();
+}
 
-                if(llvm::MCDisassembler::Success ==
-                   DisAsm->getInstruction(inst, inst_size, ref, current_address, nulls(), nulls())) {
+void LLVMDisassembler::splitBlocks() {
+    SectionRef text_section = sections[".text"];
+    StringRef bytes;
+    text_section.getContents(bytes);
+    StringRefMemoryObject ref(bytes);
 
-                    uint8_t bytes[inst_size+2];
-                    ref.readBytes(current_address, inst_size, bytes);
-                    s << '\t';
-                    for(uint8_t* cur = bytes; cur < bytes + inst_size; ++cur) {
-                        s.write_hex(*cur);
-                        s << ' ';
-                    }
-                    s << '\t';
-
-                    IP->printInst(&inst, s, "");
-
-                    LOG4CXX_DEBUG(logger, std::hex << current_address + base_address << s.str());
-
-                    uint64_t jmptarget;
-                    if (MIA->evaluateBranch(inst, current_address, inst_size, jmptarget)) {
-                        jmptarget += base_address;
-                        if (!MIA->isIndirectBranch(inst)) {
-                            if (MIA->isCall(inst)) {
-                                if (functions.find(jmptarget) == functions.end()) {
-                                    std::stringstream s;
-                                    s << "<Unnamed 0x" << std::hex << jmptarget << ">";
-                                    LLVMFunction * fun = new LLVMFunction(s.str(), jmptarget);
-                                    functions.insert(std::make_pair(jmptarget, fun));
-                                    remaining_functions.push(fun);
-                                }
-                            } else {
-                                if (blocks.find(jmptarget) == blocks.end()) {
-                                    LLVMBasicBlock * block = new LLVMBasicBlock(jmptarget, this);
-                                    blocks.insert(std::make_pair(block->getStartAddress(), block));
-                                    current_block->setNextBlock(0, block->getStartAddress());
-                                    remaining_blocks.push(block);
-                                }
-                                if (MIA->isConditionalBranch(inst)) {
-                                    jmptarget = base_address + current_address + inst_size;
-                                    if (blocks.find(jmptarget) == blocks.end()) {
-                                        LLVMBasicBlock * block = new LLVMBasicBlock(jmptarget, this);
-                                        blocks.insert(std::make_pair(block->getStartAddress(), block));
-                                        current_block->setNextBlock(1, block->getStartAddress());
-                                        remaining_blocks.push(block);
-                                    }
-                                }
-                            }
-                        }
+    // Split blocks where jumps are going inside the block
+    for (auto it = blocks.begin(); it != blocks.end(); ++it) {
+        LLVMBasicBlock * current_block = it->second;
+        uint64_t inst_size;
+        uint64_t base_address;
+        text_section.getAddress(base_address);
+        uint64_t current_address = current_block->getStartAddress() - base_address;
+        while(current_block->getEndAddress() - base_address > current_address) {
+            MCInst inst;
+            std::string buf;
+            llvm::raw_string_ostream s(buf);
+
+            if(llvm::MCDisassembler::Success ==
+               DisAsm->getInstruction(inst, inst_size, ref, current_address, nulls(), nulls())) {
+                auto other = blocks.find(current_address + inst_size + base_address);
+
+                if (other != blocks.end()) {
+                    uint64_t endaddress = current_address + inst_size + base_address;
+                    if (endaddress != current_block->getEndAddress()) {
+                        LOG4CXX_DEBUG(logger, "Shortening block starting at "
+                                      << std::hex
+                                      << current_block->getStartAddress()
+                                      << " now ending at "
+                                      << other->first);
+                        current_block->setEndAddress(endaddress);
+                        current_block->setNextBlock(0, other->first);
+                        current_block->setNextBlock(1, 0);
                     }
-                } else {
-                    inst_size = 0;
                 }
-
-
-                if (inst_size == 0 || MIA->isTerminator(inst) || MIA->isBranch(inst)) {
-                    current_block->setEndAddress(current_address + base_address + inst_size);
-                    LOG4CXX_DEBUG(logger, "Finished Block at " << std::hex <<
-                                  current_block->getEndAddress());
-                    break;
-                }
-                current_address += inst_size;
+            } else {
+                inst_size = 1;
             }
+            current_address += inst_size;
         }
-        LOG4CXX_DEBUG(logger, "Finished function " << current_function->getName());
     }
 }
 
@@ -283,7 +371,8 @@ void LLVMDisassembler::forEachFunction(std::function<void (uint64_t, Function*)>
                   });
 }
 
-void LLVMDisassembler::printEachInstruction(uint64_t start, uint64_t end, std::function<void (const std::string&)> fun) {
+void LLVMDisassembler::printEachInstruction(uint64_t start, uint64_t end,
+                                                                                       std::function<void (uint8_t*, size_t, const std::string&)> fun) {
     SectionRef text_section = sections[".text"];
     uint64_t base_address;
     text_section.getAddress(base_address);
@@ -304,16 +393,19 @@ void LLVMDisassembler::printEachInstruction(uint64_t start, uint64_t end, std::f
 
             uint8_t bytes[inst_size+2];
             ref.readBytes(current_address, inst_size, bytes);
-            for(uint8_t* cur = bytes; cur < bytes + inst_size; ++cur) {
-                s.write_hex(*cur);
-                s << ' ';
-            }
-            s << '\t';
 
-            IP->printInst(&inst, s, "");
-                       fun(s.str());
+                       uint64_t jmptarget;
+                       if (MIA->evaluateBranch(inst, current_address, inst_size, jmptarget)) {
+                               std::stringstream stream;
+                               stream << std::hex << (base_address + jmptarget);
+                               IP->printInst(&inst, s, stream.str());
+                       } else
+                               IP->printInst(&inst, s, "");
+
+                       fun(bytes, inst_size, s.str());
         } else {
-                       fun("Invalid Byte");
+                       LOG4CXX_WARN(logger, "Invalid byte at" << std::hex << current_address + base_address);
+                       fun(NULL, 0, "Invalid Byte");
                        inst_size = 1;
                }