]> git.siccegge.de Git - frida/frida.git/blobdiff - src/disassembler/llvm/LLVMDisassembler.cxx
Change from list to vector
[frida/frida.git] / src / disassembler / llvm / LLVMDisassembler.cxx
index 96d08830e4f26087df5cd3feb373f5f47a781bf5..990ed4f53c19c6f24255d2fb735d1bedb4ba8722 100644 (file)
@@ -1,3 +1,4 @@
+#include "disassembler/Instruction.hxx"
 #include "disassembler/llvm/LLVMDisassembler.hxx"
 #include "core/InformationManager.hxx"
 #include "core/Function.hxx"
@@ -15,6 +16,10 @@ namespace {
        class COFFT {
 
        };
+
+       class MACHOT {
+
+       };
 }
 
 /*
@@ -44,6 +49,9 @@ Disassembler * createLLVMDisassembler(const std::string& filename, InformationMa
        if (COFFObjectFile * object = dyn_cast<COFFObjectFile>(op)) {
                return new LLVMDisassembler<COFFT>(filename, manager, object);
        }
+       if (MachOObjectFile * object = dyn_cast<MachOObjectFile>(op)) {
+               return new LLVMDisassembler<MACHOT>(filename, manager, object);
+       }
 
        return NULL;
 }
@@ -177,7 +185,7 @@ LLVMDisassembler<ELFT>::~LLVMDisassembler() {}
 template <typename ELFT>
 Function* LLVMDisassembler<ELFT>::disassembleFunctionAt(uint64_t address, const std::string& name) {
        Function * function;
-       SectionRef text_section = sections[".text"];
+       SectionRef text_section = getTextSection();
        uint64_t base_address, size;
        text_section.getAddress(base_address);
        text_section.getSize(size);
@@ -213,7 +221,7 @@ void LLVMDisassembler<ELFT>::disassembleFunction(Function* function) {
         * the other ones at the end of the function!
         */
        std::map<uint64_t, BasicBlock*> new_blocks;
-       SectionRef text_section = sections[".text"];
+       SectionRef text_section = getTextSection();
        StringRef bytes;
        text_section.getContents(bytes);
        StringRefMemoryObject ref(bytes);
@@ -225,6 +233,11 @@ void LLVMDisassembler<ELFT>::disassembleFunction(Function* function) {
        new_blocks.insert(std::make_pair(block->getStartAddress(), block));
        function->addBasicBlock(block);
 
+       uint64_t base_address, size;
+       text_section.getAddress(base_address);
+       text_section.getSize(size);
+       LOG4CXX_DEBUG(logger, "Text section at " << std::hex << base_address << " with size " << size);
+
        while (remaining_blocks.size()) {
                BasicBlock * current_block = remaining_blocks.top();
                remaining_blocks.pop();
@@ -233,8 +246,6 @@ void LLVMDisassembler<ELFT>::disassembleFunction(Function* function) {
                              << current_block->getStartAddress());
 
                uint64_t inst_size;
-               uint64_t base_address;
-               text_section.getAddress(base_address);
                uint64_t current_address = current_block->getStartAddress() - base_address;
                while(true) {
                        MCInst inst;
@@ -305,7 +316,7 @@ void LLVMDisassembler<ELFT>::disassembleFunction(Function* function) {
 
 template <typename ELFT>
 void LLVMDisassembler<ELFT>::disassemble() {
-       SectionRef text_section = sections[".text"];
+       SectionRef text_section = getTextSection();
        std::vector<Function*> remaining_functions;
 
        // Assume all function symbols actually start a real function
@@ -324,9 +335,14 @@ void LLVMDisassembler<ELFT>::disassemble() {
 
                if (!x->second.getAddress(result)) {
                        Function * fun = manager->newFunction(result);
-                       fun->setName(x->first);
-                       remaining_functions.push_back(fun);
-                       LOG4CXX_DEBUG(logger, "Disasembling " << x->first);
+                       if (fun) {
+                               fun->setName(x->first);
+                               remaining_functions.push_back(fun);
+                               LOG4CXX_DEBUG(logger, "Disasembling " << x->first);
+                       } else {
+                               LOG4CXX_DEBUG(logger, "Function at " << std::hex << result
+                                             << " already disassembled as " << manager->getFunction(result)->getName());
+                       }
                }
        }
 
@@ -368,6 +384,12 @@ uint64_t LLVMDisassembler<COFFT>::entryAddress() {
        }
 }
 
+template<>
+uint64_t LLVMDisassembler<MACHOT>::entryAddress() {
+       // TODO
+       return 0;
+}
+
 template <typename ELFT>
 uint64_t LLVMDisassembler<ELFT>::entryAddress() {
        const auto elffile = dyn_cast<ELFObjectFile<ELFT>>(o)->getELFFile();
@@ -378,16 +400,21 @@ uint64_t LLVMDisassembler<ELFT>::entryAddress() {
 
 template <typename ELFT>
 void LLVMDisassembler<ELFT>::splitBlocks(Function* function) {
-       SectionRef text_section = sections[".text"];
+       SectionRef text_section = getTextSection();
        StringRef bytes;
        text_section.getContents(bytes);
        StringRefMemoryObject ref(bytes);
 
+       LOG4CXX_DEBUG(logger, "Splitting Blocks in Function " << function->getName());
        // Split blocks where jumps are going inside the block
        for (auto it = function->blocks().begin();
             it != function->blocks().end();
             ++it) {
                BasicBlock * current_block = it->second;
+               if (current_block->getEndAddress() == 0) {
+                       LOG4CXX_ERROR(logger, "UNFINISHED BLOCK " << std::hex << current_block->getStartAddress());
+                       break;
+               }
                uint64_t inst_size;
                uint64_t base_address;
                text_section.getAddress(base_address);
@@ -432,6 +459,11 @@ void LLVMDisassembler<COFFT>::readDynamicSymbols() {
        //TODO
 }
 
+template<>
+void LLVMDisassembler<MACHOT>::readDynamicSymbols() {
+       //TODO
+}
+
 template <typename ELFT>
 void LLVMDisassembler<ELFT>::readDynamicSymbols() {
        const auto elffile = dyn_cast<ELFObjectFile<ELFT>>(o)->getELFFile();
@@ -444,7 +476,11 @@ void LLVMDisassembler<ELFT>::readDynamicSymbols() {
                        // TODO: Error handling
                        std::string symbolname = *(elffile->getSymbolName(it));
                        std::string symbolversion = *(elffile->getSymbolVersion(nullptr, &*it, is_default));
-                       manager->signal_new_dyn_symbol(symbolname + (is_default? "@@" : "@") + symbolversion);
+                       // TODO: actually get the symbol address from relocations
+                       Function* f = manager->newDynamicFunction(0);
+                       f->setName(symbolname + (is_default? "@@" : "@") + symbolversion);
+                       manager->finishFunction(f);
+
                        LOG4CXX_DEBUG(logger, "Adding dynamic Symbol " << symbolname << (is_default? "@@" : "@") << symbolversion);
                }
        }
@@ -489,12 +525,66 @@ void LLVMDisassembler<ELFT>::readSections() {
 //     //               });
 // }
 
+template <typename ELFT>
+std::vector<Instruction> LLVMDisassembler<ELFT>::getInstructions(const BasicBlock *block) {
+       std::vector<Instruction> result;
+       SectionRef text_section = getTextSection();
+       uint64_t base_address;
+       text_section.getAddress(base_address);
+       uint64_t current_address = block->getStartAddress() - base_address;
+       uint64_t end_position = block->getEndAddress() - base_address;
+
+       StringRef bytes;
+       text_section.getContents(bytes);
+       StringRefMemoryObject ref(bytes);
+
+       while (current_address < end_position) {
+               uint64_t inst_size;
+               MCInst inst;
+               std::string buf;
+               llvm::raw_string_ostream s(buf);
+
+               if(llvm::MCDisassembler::Success ==
+                  DisAsm->getInstruction(inst, inst_size, ref, current_address, nulls(), nulls())) {
+
+                       uint8_t bytes[inst_size+2];
+                       ref.readBytes(current_address, inst_size, bytes);
+
+                       uint64_t jmptarget;
+                       std::string ref("");
+                       IP->printInst(&inst, s, "");
+                       if (MIA->evaluateBranch(inst, current_address, inst_size, jmptarget)) {
+                               std::stringstream stream;
+                               if (MIA->isCall(inst))
+                                       stream << "function:";
+                               else
+                                       stream << "block:";
+
+                               stream << std::hex << (base_address + jmptarget);
+                               ref = stream.str();
+                       }
+                       result.push_back(Instruction(current_address + base_address, s.str(),
+                                                    std::vector<uint8_t>(bytes, bytes+inst_size), ref));
+               } else {
+                       LOG4CXX_WARN(logger, "Invalid byte at" << std::hex << current_address + base_address);
+                       uint8_t bytes[1];
+                       ref.readBytes(current_address, 1, bytes);
+                       result.push_back(Instruction(current_address + base_address, "Invalid Instruction",
+                                                    std::vector<uint8_t>(bytes, bytes+1), ""));
+                       inst_size = 1;
+               }
+
+               current_address += inst_size;
+       }
+       return result;
+}
+
 template <typename ELFT>
 void LLVMDisassembler<ELFT>::printEachInstruction(uint64_t start, uint64_t end,
                                                   std::function<void (uint8_t*, size_t,
                                                                          const std::string&,
                                                                          const std::string&)> fun) {
-       SectionRef text_section = sections[".text"];
+       SectionRef text_section = getTextSection();
        uint64_t base_address;
        text_section.getAddress(base_address);
        uint64_t current_address = start - base_address;
@@ -540,3 +630,13 @@ void LLVMDisassembler<ELFT>::printEachInstruction(uint64_t start, uint64_t end,
                current_address += inst_size;
        }
 }
+
+template <typename ELFT>
+SectionRef LLVMDisassembler<ELFT>::getTextSection() {
+       return sections[".text"];
+}
+
+template <>
+SectionRef LLVMDisassembler<MACHOT>::getTextSection() {
+       return sections["__text"];
+}