]> git.siccegge.de Git - frida/frida.git/blobdiff - src/disassembler/llvm/LLVMDisassembler.cxx
Preliminary LLVM-3.6 support
[frida/frida.git] / src / disassembler / llvm / LLVMDisassembler.cxx
index 1f1cdff1d17670d6b87b900523ef33b7f85bbc6a..e673c371650027021455b63f64b94e422dcc7a5d 100644 (file)
@@ -1,7 +1,9 @@
+#include "disassembler/Instruction.hxx"
 #include "disassembler/llvm/LLVMDisassembler.hxx"
 #include "core/InformationManager.hxx"
 #include "core/Function.hxx"
 #include "core/BasicBlock.hxx"
+#include <boost/algorithm/string.hpp>
 
 #include <stack>
 #include <algorithm>
@@ -15,18 +17,35 @@ namespace {
        class COFFT {
 
        };
+
+       class MACHOT {
+
+       };
 }
 
 /*
  *
  */
 Disassembler * createLLVMDisassembler(const std::string& filename, InformationManager* manager) {
+       log4cxx::LoggerPtr logger(log4cxx::Logger::getLogger("disassembler.LLVMDisassembler"));
        if (filename == "")
                return NULL;
 
-       std::unique_ptr<Binary> o;
-       o.reset(createBinary(filename).get());
-       Binary * op = o.release();
+       auto retval = createBinary(filename);
+       if (error_code ec = retval.getError()) {
+               LOG4CXX_ERROR(logger, ec.message());
+               return NULL;
+       }
+#if defined(LLVM_35)
+       Binary * op = retval.get();
+#elif defined(LLVM_36)
+       OwningBinary<Binary> ob;
+       ob  = std::move(retval.get());
+       Binary* op = ob.getBinary();
+       auto foo = ob.takeBinary();
+       foo.first.release();
+       foo.second.release();
+#endif
 
        // ELFType<endian, maxalign, 64bit>
        if (ELF32LEObjectFile * object = dyn_cast<ELF32LEObjectFile>(op)) {
@@ -44,6 +63,9 @@ Disassembler * createLLVMDisassembler(const std::string& filename, InformationMa
        if (COFFObjectFile * object = dyn_cast<COFFObjectFile>(op)) {
                return new LLVMDisassembler<COFFT>(filename, manager, object);
        }
+       if (MachOObjectFile * object = dyn_cast<MachOObjectFile>(op)) {
+               return new LLVMDisassembler<MACHOT>(filename, manager, object);
+       }
 
        return NULL;
 }
@@ -58,7 +80,7 @@ LLVMDisassembler<ELFT>::LLVMDisassembler(const std::string& filename,
                                          InformationManager* manager,
                                          ObjectFile* file)
        : Disassembler()
-       , logger(log4cxx::Logger::getLogger("LLVMDisassembler"))
+       , logger(log4cxx::Logger::getLogger("disassembler.LLVMDisassembler"))
        , triple("unknown-unknown-unknown")
        , manager(manager)
 {
@@ -74,7 +96,15 @@ LLVMDisassembler<ELFT>::LLVMDisassembler(const std::string& filename,
                        return;
                }
 
+#if defined(LLVM_35)
                binary.reset(result.get());
+#elif defined(LLVM_36)
+               OwningBinary<Binary> ob;
+               ob  = std::move(result.get());
+               Binary* op = ob.getBinary();
+
+               binary.reset(op);
+#endif
 
                o = dyn_cast<ObjectFile>(binary.get());
        } else {
@@ -157,9 +187,9 @@ LLVMDisassembler<ELFT>::LLVMDisassembler(const std::string& filename,
        IP->setPrintImmHex(llvm::HexStyle::C);
        IP->setPrintImmHex(true);
 
-       std::unique_ptr<MCObjectDisassembler> OD(
-               new MCObjectDisassembler(*o, *DisAsm, *MIA));
-       Mod.reset(OD->buildModule(false));
+//     std::unique_ptr<MCObjectDisassembler> OD(
+//             new MCObjectDisassembler(*o, *DisAsm, *MIA));
+       //Mod.reset(OD->buildModule(false));
 
        readSections();
 }
@@ -177,11 +207,15 @@ LLVMDisassembler<ELFT>::~LLVMDisassembler() {}
 template <typename ELFT>
 Function* LLVMDisassembler<ELFT>::disassembleFunctionAt(uint64_t address, const std::string& name) {
        Function * function;
-       SectionRef text_section = sections[".text"];
+       SectionRef text_section = getTextSection();
        uint64_t base_address, size;
+#if defined(LLVM_35)
        text_section.getAddress(base_address);
        text_section.getSize(size);
-
+#elif defined(LLVM_36)
+       base_address = text_section.getAddress();
+       size = text_section.getSize();
+#endif
        if (address < base_address ||
            address >= base_address + size) {
                return NULL;
@@ -213,10 +247,17 @@ void LLVMDisassembler<ELFT>::disassembleFunction(Function* function) {
         * the other ones at the end of the function!
         */
        std::map<uint64_t, BasicBlock*> new_blocks;
-       SectionRef text_section = sections[".text"];
+       SectionRef text_section = getTextSection();
        StringRef bytes;
        text_section.getContents(bytes);
+#if defined(LLVM_35)
        StringRefMemoryObject ref(bytes);
+#elif defined(LLVM_36)
+       ArrayRef<uint8_t> bytearray(reinterpret_cast<const uint8_t *>(bytes.data()),
+                          bytes.size());
+#else
+#error LLVM != 3.5 | 3.6 not supported
+#endif
 
        LOG4CXX_DEBUG(logger, "Handling function " << function->getName());
 
@@ -225,6 +266,16 @@ void LLVMDisassembler<ELFT>::disassembleFunction(Function* function) {
        new_blocks.insert(std::make_pair(block->getStartAddress(), block));
        function->addBasicBlock(block);
 
+       uint64_t base_address, size;
+#if defined(LLVM_35)
+       text_section.getAddress(base_address);
+       text_section.getSize(size);
+#elif defined(LLVM_36)
+       base_address = text_section.getAddress();
+       size = text_section.getSize();
+#endif
+       LOG4CXX_DEBUG(logger, "Text section at " << std::hex << base_address << " with size " << size);
+
        while (remaining_blocks.size()) {
                BasicBlock * current_block = remaining_blocks.top();
                remaining_blocks.pop();
@@ -233,8 +284,6 @@ void LLVMDisassembler<ELFT>::disassembleFunction(Function* function) {
                              << current_block->getStartAddress());
 
                uint64_t inst_size;
-               uint64_t base_address;
-               text_section.getAddress(base_address);
                uint64_t current_address = current_block->getStartAddress() - base_address;
                while(true) {
                        MCInst inst;
@@ -242,7 +291,14 @@ void LLVMDisassembler<ELFT>::disassembleFunction(Function* function) {
                        llvm::raw_string_ostream s(buf);
 
                        if(llvm::MCDisassembler::Success ==
+#if defined(LLVM_35)
                           DisAsm->getInstruction(inst, inst_size, ref, current_address, nulls(), nulls())) {
+#elif defined(LLVM_36)
+                               DisAsm->getInstruction(inst, inst_size,
+                                                      bytearray.slice(current_address),
+                                                      base_address + current_address,
+                                                      nulls(), nulls())) {
+#endif
                                uint64_t jmptarget;
 
                                if (MIA->evaluateBranch(inst, current_address, inst_size, jmptarget)) {
@@ -305,7 +361,7 @@ void LLVMDisassembler<ELFT>::disassembleFunction(Function* function) {
 
 template <typename ELFT>
 void LLVMDisassembler<ELFT>::disassemble() {
-       SectionRef text_section = sections[".text"];
+       SectionRef text_section = getTextSection();
        std::vector<Function*> remaining_functions;
 
        // Assume all function symbols actually start a real function
@@ -314,8 +370,11 @@ void LLVMDisassembler<ELFT>::disassemble() {
                bool contains;
                SymbolRef::Type symbol_type;
 
-
+#if defined(LLVM_35)
                if (text_section.containsSymbol(x->second, contains) || !contains)
+#elif defined(LLVM_36)
+               if (text_section.containsSymbol(x->second))
+#endif
                        continue;
 
                if (x->second.getType(symbol_type)
@@ -324,9 +383,14 @@ void LLVMDisassembler<ELFT>::disassemble() {
 
                if (!x->second.getAddress(result)) {
                        Function * fun = manager->newFunction(result);
-                       fun->setName(x->first);
-                       remaining_functions.push_back(fun);
-                       LOG4CXX_DEBUG(logger, "Disasembling " << x->first);
+                       if (fun) {
+                               fun->setName(x->first);
+                               remaining_functions.push_back(fun);
+                               LOG4CXX_DEBUG(logger, "Disasembling " << x->first);
+                       } else {
+                               LOG4CXX_DEBUG(logger, "Function at " << std::hex << result
+                                             << " already disassembled as " << manager->getFunction(result)->getName());
+                       }
                }
        }
 
@@ -346,7 +410,11 @@ void LLVMDisassembler<ELFT>::disassemble() {
 
        if (!manager->hasFunctions()) {
                uint64_t text_entry;
+#if defined(LLVM_35)
                text_section.getAddress(text_entry);
+#elif defined(LLVM_36)
+               text_entry = text_section.getAddress();
+#endif
                LOG4CXX_INFO(logger, "No Symbols found, starting at the beginning of the text segment");
                disassembleFunctionAt(text_entry);
        }
@@ -368,6 +436,12 @@ uint64_t LLVMDisassembler<COFFT>::entryAddress() {
        }
 }
 
+template<>
+uint64_t LLVMDisassembler<MACHOT>::entryAddress() {
+       // TODO
+       return 0;
+}
+
 template <typename ELFT>
 uint64_t LLVMDisassembler<ELFT>::entryAddress() {
        const auto elffile = dyn_cast<ELFObjectFile<ELFT>>(o)->getELFFile();
@@ -378,19 +452,34 @@ uint64_t LLVMDisassembler<ELFT>::entryAddress() {
 
 template <typename ELFT>
 void LLVMDisassembler<ELFT>::splitBlocks(Function* function) {
-       SectionRef text_section = sections[".text"];
+       SectionRef text_section = getTextSection();
        StringRef bytes;
        text_section.getContents(bytes);
+#if defined(LLVM_35)
        StringRefMemoryObject ref(bytes);
+#elif defined(LLVM_36)
+       ArrayRef<uint8_t> bytearray(reinterpret_cast<const uint8_t *>(bytes.data()),
+                          bytes.size());
+#endif
+
 
+       LOG4CXX_DEBUG(logger, "Splitting Blocks in Function " << function->getName());
        // Split blocks where jumps are going inside the block
        for (auto it = function->blocks().begin();
             it != function->blocks().end();
             ++it) {
                BasicBlock * current_block = it->second;
+               if (current_block->getEndAddress() == 0) {
+                       LOG4CXX_ERROR(logger, "UNFINISHED BLOCK " << std::hex << current_block->getStartAddress());
+                       break;
+               }
                uint64_t inst_size;
                uint64_t base_address;
+#if defined(LLVM_35)
                text_section.getAddress(base_address);
+#elif defined(LLVM_36)
+               base_address = text_section.getAddress();
+#endif
                uint64_t current_address = current_block->getStartAddress() - base_address;
                while(current_block->getEndAddress() - base_address > current_address) {
                        MCInst inst;
@@ -398,7 +487,15 @@ void LLVMDisassembler<ELFT>::splitBlocks(Function* function) {
                        llvm::raw_string_ostream s(buf);
 
                        if(llvm::MCDisassembler::Success ==
+#if defined(LLVM_35)
                           DisAsm->getInstruction(inst, inst_size, ref, current_address, nulls(), nulls())) {
+#elif defined(LLVM_36)
+                               DisAsm->getInstruction(inst, inst_size,
+                                                      bytearray.slice(current_address),
+                                                      base_address + current_address,
+                                                      nulls(), nulls())) {
+#endif
+
                                // See if some other block starts here
                                BasicBlock* other = manager->getBasicBlock(current_address
                                                                           + inst_size
@@ -432,6 +529,11 @@ void LLVMDisassembler<COFFT>::readDynamicSymbols() {
        //TODO
 }
 
+template<>
+void LLVMDisassembler<MACHOT>::readDynamicSymbols() {
+       //TODO
+}
+
 template <typename ELFT>
 void LLVMDisassembler<ELFT>::readDynamicSymbols() {
        const auto elffile = dyn_cast<ELFObjectFile<ELFT>>(o)->getELFFile();
@@ -444,7 +546,11 @@ void LLVMDisassembler<ELFT>::readDynamicSymbols() {
                        // TODO: Error handling
                        std::string symbolname = *(elffile->getSymbolName(it));
                        std::string symbolversion = *(elffile->getSymbolVersion(nullptr, &*it, is_default));
-                       manager->signal_new_dyn_symbol(symbolname + (is_default? "@@" : "@") + symbolversion);
+                       // TODO: actually get the symbol address from relocations
+                       Function* f = manager->newDynamicFunction(0);
+                       f->setName(symbolname + (is_default? "@@" : "@") + symbolversion);
+                       manager->finishFunction(f);
+
                        LOG4CXX_DEBUG(logger, "Adding dynamic Symbol " << symbolname << (is_default? "@@" : "@") << symbolversion);
                }
        }
@@ -489,19 +595,113 @@ void LLVMDisassembler<ELFT>::readSections() {
 //     //               });
 // }
 
+template <typename ELFT>
+std::vector<Instruction> LLVMDisassembler<ELFT>::getInstructions(const BasicBlock *block) {
+       std::vector<Instruction> result;
+       SectionRef text_section = getTextSection();
+       uint64_t base_address;
+#if defined(LLVM_35)
+       text_section.getAddress(base_address);
+#elif defined(LLVM_36)
+       base_address = text_section.getAddress();
+#endif
+
+       uint64_t current_address = block->getStartAddress() - base_address;
+       uint64_t end_position = block->getEndAddress() - base_address;
+
+       StringRef bytes;
+       text_section.getContents(bytes);
+#if defined(LLVM_35)
+       StringRefMemoryObject ref(bytes);
+#elif defined(LLVM_36)
+       ArrayRef<uint8_t> bytearray(reinterpret_cast<const uint8_t *>(bytes.data()),
+                          bytes.size());
+#endif
+
+
+       while (current_address < end_position) {
+               uint64_t inst_size;
+               MCInst inst;
+               std::string buf;
+               llvm::raw_string_ostream s(buf);
+
+               if(llvm::MCDisassembler::Success ==
+#if defined(LLVM_35)
+                          DisAsm->getInstruction(inst, inst_size, ref, current_address, nulls(), nulls())) {
+#elif defined(LLVM_36)
+                               DisAsm->getInstruction(inst, inst_size,
+                                                      bytearray.slice(current_address),
+                                                      base_address + current_address,
+                                                      nulls(), nulls())) {
+#endif
+
+                       uint8_t bytes[inst_size+2];
+#if defined(LLVM_35)
+                       ref.readBytes(current_address, inst_size, bytes);
+#elif defined(LLVM_36)
+                       size_t bytesindex(0);
+                       for (uint8_t byte : bytearray.slice(current_address, inst_size)) {
+                               bytes[bytesindex++] = byte;
+                       }
+#endif
+
+                       uint64_t jmptarget;
+                       std::string ref("");
+                       IP->printInst(&inst, s, "");
+                       if (MIA->evaluateBranch(inst, current_address, inst_size, jmptarget)) {
+                               std::stringstream stream;
+                               if (MIA->isCall(inst))
+                                       stream << "function:";
+                               else
+                                       stream << "block:";
+
+                               stream << std::hex << (base_address + jmptarget);
+                               ref = stream.str();
+                       }
+                       result.push_back(Instruction(current_address + base_address, boost::algorithm::trim_copy(s.str()),
+                                                    std::vector<uint8_t>(bytes, bytes+inst_size), ref));
+               } else {
+                       LOG4CXX_WARN(logger, "Invalid byte at" << std::hex << current_address + base_address);
+                       uint8_t bytes[1];
+#if defined(LLVM_35)
+                       ref.readBytes(current_address, 1, bytes);
+#elif defined(LLVM_36)
+                       bytes[0] = bytearray[current_address];
+#endif
+                       result.push_back(Instruction(current_address + base_address, "Invalid Instruction",
+                                                    std::vector<uint8_t>(bytes, bytes+1), ""));
+                       inst_size = 1;
+               }
+
+               current_address += inst_size;
+       }
+       return result;
+}
+
 template <typename ELFT>
 void LLVMDisassembler<ELFT>::printEachInstruction(uint64_t start, uint64_t end,
                                                   std::function<void (uint8_t*, size_t,
                                                                          const std::string&,
                                                                          const std::string&)> fun) {
-       SectionRef text_section = sections[".text"];
+       SectionRef text_section = getTextSection();
        uint64_t base_address;
+#if defined(LLVM_35)
        text_section.getAddress(base_address);
+#elif defined(LLVM_36)
+       base_address = text_section.getAddress();
+#endif
+
        uint64_t current_address = start - base_address;
 
        StringRef bytes;
        text_section.getContents(bytes);
+#if defined(LLVM_35)
        StringRefMemoryObject ref(bytes);
+#elif defined(LLVM_36)
+       ArrayRef<uint8_t> bytearray(reinterpret_cast<const uint8_t *>(bytes.data()),
+                          bytes.size());
+#endif
+
 
        while (current_address < end - base_address) {
                uint64_t inst_size;
@@ -510,10 +710,24 @@ void LLVMDisassembler<ELFT>::printEachInstruction(uint64_t start, uint64_t end,
                llvm::raw_string_ostream s(buf);
 
                if(llvm::MCDisassembler::Success ==
-                  DisAsm->getInstruction(inst, inst_size, ref, current_address, nulls(), nulls())) {
+#if defined(LLVM_35)
+                          DisAsm->getInstruction(inst, inst_size, ref, current_address, nulls(), nulls())) {
+#elif defined(LLVM_36)
+                               DisAsm->getInstruction(inst, inst_size,
+                                                      bytearray.slice(current_address),
+                                                      base_address + current_address,
+                                                      nulls(), nulls())) {
+#endif
 
                        uint8_t bytes[inst_size+2];
+#if defined(LLVM_35)
                        ref.readBytes(current_address, inst_size, bytes);
+#elif defined(LLVM_36)
+                       size_t bytesindex(0);
+                       for (uint8_t byte : bytearray.slice(current_address, inst_size)) {
+                               bytes[bytesindex++] = byte;
+                       }
+#endif
 
                        uint64_t jmptarget;
                        std::string ref("");
@@ -540,3 +754,13 @@ void LLVMDisassembler<ELFT>::printEachInstruction(uint64_t start, uint64_t end,
                current_address += inst_size;
        }
 }
+
+template <typename ELFT>
+SectionRef LLVMDisassembler<ELFT>::getTextSection() {
+       return sections[".text"];
+}
+
+template <>
+SectionRef LLVMDisassembler<MACHOT>::getTextSection() {
+       return sections["__text"];
+}