]> git.siccegge.de Git - frida/frida.git/blobdiff - src/disassembler/llvm/LLVMDisassembler.cxx
Some code cleanup
[frida/frida.git] / src / disassembler / llvm / LLVMDisassembler.cxx
index b97f93f7df0618d5bf2ca460d6e071726f5ecfe9..831a1a9da7a09f0e34ca055e49e8c4bb640fcc3d 100644 (file)
@@ -3,6 +3,7 @@
 #include "core/InformationManager.hxx"
 #include "core/Function.hxx"
 #include "core/BasicBlock.hxx"
+#include "core/Exception.hxx"
 #include <boost/algorithm/string.hpp>
 
 #include <stack>
@@ -27,33 +28,49 @@ namespace {
  *
  */
 Disassembler * createLLVMDisassembler(const std::string& filename, InformationManager* manager) {
+       log4cxx::LoggerPtr logger(log4cxx::Logger::getLogger("disassembler.LLVMDisassembler"));
        if (filename == "")
                return NULL;
 
-       std::unique_ptr<Binary> o;
-       o.reset(createBinary(filename).get());
-       Binary * op = o.release();
-
-       // ELFType<endian, maxalign, 64bit>
-       if (ELF32LEObjectFile * object = dyn_cast<ELF32LEObjectFile>(op)) {
-               return new LLVMDisassembler<ELFType<support::little, 2, false>>(filename, manager, object);
-       }
-       if (ELF64LEObjectFile * object = dyn_cast<ELF64LEObjectFile>(op)) {
-               return new LLVMDisassembler<ELFType<support::little, 2, true>>(filename, manager, object);
-       }
-       if (ELF32BEObjectFile * object = dyn_cast<ELF32BEObjectFile>(op)) {
-               return new LLVMDisassembler<ELFType<support::big, 2, false>>(filename, manager, object);
-       }
-       if (ELF64BEObjectFile * object = dyn_cast<ELF64BEObjectFile>(op)) {
-               return new LLVMDisassembler<ELFType<support::big, 2, true>>(filename, manager, object);
-       }
-       if (COFFObjectFile * object = dyn_cast<COFFObjectFile>(op)) {
-               return new LLVMDisassembler<COFFT>(filename, manager, object);
+       auto retval = createBinary(filename);
+       if (error_code ec = retval.getError()) {
+               LOG4CXX_ERROR(logger, ec.message());
+               return NULL;
        }
-       if (MachOObjectFile * object = dyn_cast<MachOObjectFile>(op)) {
-               return new LLVMDisassembler<MACHOT>(filename, manager, object);
+#if defined(LLVM_35)
+       Binary * op = retval.get();
+#elif defined(LLVM_36)
+       OwningBinary<Binary> ob;
+       ob  = std::move(retval.get());
+       Binary* op = ob.getBinary();
+       auto foo = ob.takeBinary();
+       foo.first.release();
+       foo.second.release();
+#endif
+
+       try {
+               // ELFType<endian, maxalign, 64bit>
+               if (ELF32LEObjectFile * object = dyn_cast<ELF32LEObjectFile>(op)) {
+                       return new LLVMDisassembler<ELFType<support::little, 2, false>>(filename, manager, object);
+               }
+               if (ELF64LEObjectFile * object = dyn_cast<ELF64LEObjectFile>(op)) {
+                       return new LLVMDisassembler<ELFType<support::little, 2, true>>(filename, manager, object);
+               }
+               if (ELF32BEObjectFile * object = dyn_cast<ELF32BEObjectFile>(op)) {
+                       return new LLVMDisassembler<ELFType<support::big, 2, false>>(filename, manager, object);
+               }
+               if (ELF64BEObjectFile * object = dyn_cast<ELF64BEObjectFile>(op)) {
+                       return new LLVMDisassembler<ELFType<support::big, 2, true>>(filename, manager, object);
+               }
+               if (COFFObjectFile * object = dyn_cast<COFFObjectFile>(op)) {
+                       return new LLVMDisassembler<COFFT>(filename, manager, object);
+               }
+               if (MachOObjectFile * object = dyn_cast<MachOObjectFile>(op)) {
+                       return new LLVMDisassembler<MACHOT>(filename, manager, object);
+               }
+       } catch (BinaryNotSupported& e) {
+               return NULL;
        }
-
        return NULL;
 }
 
@@ -83,7 +100,15 @@ LLVMDisassembler<ELFT>::LLVMDisassembler(const std::string& filename,
                        return;
                }
 
+#if defined(LLVM_35)
                binary.reset(result.get());
+#elif defined(LLVM_36)
+               OwningBinary<Binary> ob;
+               ob  = std::move(result.get());
+               Binary* op = ob.getBinary();
+
+               binary.reset(op);
+#endif
 
                o = dyn_cast<ObjectFile>(binary.get());
        } else {
@@ -101,7 +126,8 @@ LLVMDisassembler<ELFT>::LLVMDisassembler(const std::string& filename,
        target = TargetRegistry::lookupTarget("", triple, es);
        if (!target) {
                LOG4CXX_ERROR(logger, es);
-               return;
+               BinaryNotSupported e;
+               throw e;
        }
 
        LOG4CXX_INFO(logger, "Target " << target->getName());
@@ -109,26 +135,30 @@ LLVMDisassembler<ELFT>::LLVMDisassembler(const std::string& filename,
        MRI.reset(target->createMCRegInfo(tripleName));
        if (!MRI) {
                LOG4CXX_ERROR(logger, "no register info for target " << tripleName);
-               return;
+               BinaryNotSupported e;
+               throw e;
        }
 
        // Set up disassembler.
        AsmInfo.reset(target->createMCAsmInfo(*MRI, tripleName));
        if (!AsmInfo) {
                LOG4CXX_ERROR(logger, "no assembly info for target " << tripleName);
-               return;
+               BinaryNotSupported e;
+               throw e;
        }
 
        STI.reset(target->createMCSubtargetInfo(tripleName, "", ""));
        if (!STI) {
                LOG4CXX_ERROR(logger, "no subtarget info for target " << tripleName);
-               return;
+               BinaryNotSupported e;
+               throw e;
        }
 
        MII.reset(target->createMCInstrInfo());
        if (!MII) {
                LOG4CXX_ERROR(logger, "no instruction info for target " << tripleName);
-               return;
+                               BinaryNotSupported e;
+               throw e;
        }
 
        MOFI.reset(new MCObjectFileInfo);
@@ -137,7 +167,8 @@ LLVMDisassembler<ELFT>::LLVMDisassembler(const std::string& filename,
        DisAsm.reset(target->createMCDisassembler(*STI, Ctx));
        if (!DisAsm) {
                LOG4CXX_ERROR(logger, "no disassembler for target " << tripleName);
-               return;
+               BinaryNotSupported e;
+               throw e;
        }
        RelInfo.reset(
                target->createMCRelocationInfo(tripleName, Ctx));
@@ -153,22 +184,24 @@ LLVMDisassembler<ELFT>::LLVMDisassembler(const std::string& filename,
        MIA.reset(target->createMCInstrAnalysis(MII.get()));
        if (!MIA) {
                LOG4CXX_ERROR(logger, "no instruction analysis for target " << tripleName);
-               return;
+               BinaryNotSupported e;
+               throw e;
        }
 
        int AsmPrinterVariant = AsmInfo->getAssemblerDialect();
        IP.reset(target->createMCInstPrinter(AsmPrinterVariant, *AsmInfo, *MII, *MRI, *STI));
        if (!IP) {
                LOG4CXX_ERROR(logger, "no instruction printer for target " << tripleName);
-               return;
+               BinaryNotSupported e;
+               throw e;
        }
 
        IP->setPrintImmHex(llvm::HexStyle::C);
        IP->setPrintImmHex(true);
 
-       std::unique_ptr<MCObjectDisassembler> OD(
-               new MCObjectDisassembler(*o, *DisAsm, *MIA));
-       Mod.reset(OD->buildModule(false));
+//     std::unique_ptr<MCObjectDisassembler> OD(
+//             new MCObjectDisassembler(*o, *DisAsm, *MIA));
+       //Mod.reset(OD->buildModule(false));
 
        readSections();
 }
@@ -188,9 +221,13 @@ Function* LLVMDisassembler<ELFT>::disassembleFunctionAt(uint64_t address, const
        Function * function;
        SectionRef text_section = getTextSection();
        uint64_t base_address, size;
+#if defined(LLVM_35)
        text_section.getAddress(base_address);
        text_section.getSize(size);
-
+#elif defined(LLVM_36)
+       base_address = text_section.getAddress();
+       size = text_section.getSize();
+#endif
        if (address < base_address ||
            address >= base_address + size) {
                return NULL;
@@ -224,19 +261,33 @@ void LLVMDisassembler<ELFT>::disassembleFunction(Function* function) {
        std::map<uint64_t, BasicBlock*> new_blocks;
        SectionRef text_section = getTextSection();
        StringRef bytes;
+       uint64_t base_address, size;
        text_section.getContents(bytes);
+#if defined(LLVM_35)
        StringRefMemoryObject ref(bytes);
+       text_section.getAddress(base_address);
+       text_section.getSize(size);
+#elif defined(LLVM_36)
+       ArrayRef<uint8_t> bytearray(reinterpret_cast<const uint8_t *>(bytes.data()),
+                          bytes.size());
+       base_address = text_section.getAddress();
+       size = text_section.getSize();
+#else
+#error LLVM != 3.5 | 3.6 not supported
+#endif
 
        LOG4CXX_DEBUG(logger, "Handling function " << function->getName());
 
+       if(function->getStartAddress() < base_address || function->getStartAddress() > base_address + size) {
+               LOG4CXX_INFO(logger, "Trying to disassemble function " << function->getName() << " but start address " << std::hex << function->getStartAddress() << " is located outside the text segment");
+               return;
+       }
+
        BasicBlock * block = manager->newBasicBlock(function->getStartAddress());
        remaining_blocks.push(block);
        new_blocks.insert(std::make_pair(block->getStartAddress(), block));
        function->addBasicBlock(block);
 
-       uint64_t base_address, size;
-       text_section.getAddress(base_address);
-       text_section.getSize(size);
        LOG4CXX_DEBUG(logger, "Text section at " << std::hex << base_address << " with size " << size);
 
        while (remaining_blocks.size()) {
@@ -254,7 +305,14 @@ void LLVMDisassembler<ELFT>::disassembleFunction(Function* function) {
                        llvm::raw_string_ostream s(buf);
 
                        if(llvm::MCDisassembler::Success ==
+#if defined(LLVM_35)
                           DisAsm->getInstruction(inst, inst_size, ref, current_address, nulls(), nulls())) {
+#elif defined(LLVM_36)
+                               DisAsm->getInstruction(inst, inst_size,
+                                                      bytearray.slice(current_address),
+                                                      base_address + current_address,
+                                                      nulls(), nulls())) {
+#endif
                                uint64_t jmptarget;
 
                                if (MIA->evaluateBranch(inst, current_address, inst_size, jmptarget)) {
@@ -326,8 +384,11 @@ void LLVMDisassembler<ELFT>::disassemble() {
                bool contains;
                SymbolRef::Type symbol_type;
 
-
+#if defined(LLVM_35)
                if (text_section.containsSymbol(x->second, contains) || !contains)
+#elif defined(LLVM_36)
+               if (!text_section.containsSymbol(x->second))
+#endif
                        continue;
 
                if (x->second.getType(symbol_type)
@@ -363,7 +424,11 @@ void LLVMDisassembler<ELFT>::disassemble() {
 
        if (!manager->hasFunctions()) {
                uint64_t text_entry;
+#if defined(LLVM_35)
                text_section.getAddress(text_entry);
+#elif defined(LLVM_36)
+               text_entry = text_section.getAddress();
+#endif
                LOG4CXX_INFO(logger, "No Symbols found, starting at the beginning of the text segment");
                disassembleFunctionAt(text_entry);
        }
@@ -404,7 +469,13 @@ void LLVMDisassembler<ELFT>::splitBlocks(Function* function) {
        SectionRef text_section = getTextSection();
        StringRef bytes;
        text_section.getContents(bytes);
+#if defined(LLVM_35)
        StringRefMemoryObject ref(bytes);
+#elif defined(LLVM_36)
+       ArrayRef<uint8_t> bytearray(reinterpret_cast<const uint8_t *>(bytes.data()),
+                          bytes.size());
+#endif
+
 
        LOG4CXX_DEBUG(logger, "Splitting Blocks in Function " << function->getName());
        // Split blocks where jumps are going inside the block
@@ -418,7 +489,11 @@ void LLVMDisassembler<ELFT>::splitBlocks(Function* function) {
                }
                uint64_t inst_size;
                uint64_t base_address;
+#if defined(LLVM_35)
                text_section.getAddress(base_address);
+#elif defined(LLVM_36)
+               base_address = text_section.getAddress();
+#endif
                uint64_t current_address = current_block->getStartAddress() - base_address;
                while(current_block->getEndAddress() - base_address > current_address) {
                        MCInst inst;
@@ -426,7 +501,15 @@ void LLVMDisassembler<ELFT>::splitBlocks(Function* function) {
                        llvm::raw_string_ostream s(buf);
 
                        if(llvm::MCDisassembler::Success ==
+#if defined(LLVM_35)
                           DisAsm->getInstruction(inst, inst_size, ref, current_address, nulls(), nulls())) {
+#elif defined(LLVM_36)
+                               DisAsm->getInstruction(inst, inst_size,
+                                                      bytearray.slice(current_address),
+                                                      base_address + current_address,
+                                                      nulls(), nulls())) {
+#endif
+
                                // See if some other block starts here
                                BasicBlock* other = manager->getBasicBlock(current_address
                                                                           + inst_size
@@ -493,11 +576,13 @@ void LLVMDisassembler<ELFT>::readSymbols() {
        symbol_iterator si(o->symbol_begin()), se(o->symbol_end());
        for (; si != se; ++si) {
                StringRef name;
+               uint64_t address;
+               si->getAddress(address);
                if ((ec = si->getName(name))) {
                        LOG4CXX_ERROR(logger, ec.message());
                        break;
                }
-               LOG4CXX_DEBUG(logger, "Added symbol " << name.str());
+               LOG4CXX_DEBUG(logger, "Added symbol " << name.str() << " at address " << std::hex << address);
                symbols.insert(make_pair(name.str(), *si));
        }
 }
@@ -531,13 +616,24 @@ std::vector<Instruction> LLVMDisassembler<ELFT>::getInstructions(const BasicBloc
        std::vector<Instruction> result;
        SectionRef text_section = getTextSection();
        uint64_t base_address;
+#if defined(LLVM_35)
        text_section.getAddress(base_address);
+#elif defined(LLVM_36)
+       base_address = text_section.getAddress();
+#endif
+
        uint64_t current_address = block->getStartAddress() - base_address;
        uint64_t end_position = block->getEndAddress() - base_address;
 
        StringRef bytes;
        text_section.getContents(bytes);
+#if defined(LLVM_35)
        StringRefMemoryObject ref(bytes);
+#elif defined(LLVM_36)
+       ArrayRef<uint8_t> bytearray(reinterpret_cast<const uint8_t *>(bytes.data()),
+                          bytes.size());
+#endif
+
 
        while (current_address < end_position) {
                uint64_t inst_size;
@@ -546,10 +642,24 @@ std::vector<Instruction> LLVMDisassembler<ELFT>::getInstructions(const BasicBloc
                llvm::raw_string_ostream s(buf);
 
                if(llvm::MCDisassembler::Success ==
-                  DisAsm->getInstruction(inst, inst_size, ref, current_address, nulls(), nulls())) {
+#if defined(LLVM_35)
+                          DisAsm->getInstruction(inst, inst_size, ref, current_address, nulls(), nulls())) {
+#elif defined(LLVM_36)
+                               DisAsm->getInstruction(inst, inst_size,
+                                                      bytearray.slice(current_address),
+                                                      base_address + current_address,
+                                                      nulls(), nulls())) {
+#endif
 
                        uint8_t bytes[inst_size+2];
+#if defined(LLVM_35)
                        ref.readBytes(current_address, inst_size, bytes);
+#elif defined(LLVM_36)
+                       size_t bytesindex(0);
+                       for (uint8_t byte : bytearray.slice(current_address, inst_size)) {
+                               bytes[bytesindex++] = byte;
+                       }
+#endif
 
                        uint64_t jmptarget;
                        std::string ref("");
@@ -569,7 +679,11 @@ std::vector<Instruction> LLVMDisassembler<ELFT>::getInstructions(const BasicBloc
                } else {
                        LOG4CXX_WARN(logger, "Invalid byte at" << std::hex << current_address + base_address);
                        uint8_t bytes[1];
+#if defined(LLVM_35)
                        ref.readBytes(current_address, 1, bytes);
+#elif defined(LLVM_36)
+                       bytes[0] = bytearray[current_address];
+#endif
                        result.push_back(Instruction(current_address + base_address, "Invalid Instruction",
                                                     std::vector<uint8_t>(bytes, bytes+1), ""));
                        inst_size = 1;
@@ -587,12 +701,23 @@ void LLVMDisassembler<ELFT>::printEachInstruction(uint64_t start, uint64_t end,
                                                                          const std::string&)> fun) {
        SectionRef text_section = getTextSection();
        uint64_t base_address;
+#if defined(LLVM_35)
        text_section.getAddress(base_address);
+#elif defined(LLVM_36)
+       base_address = text_section.getAddress();
+#endif
+
        uint64_t current_address = start - base_address;
 
        StringRef bytes;
        text_section.getContents(bytes);
+#if defined(LLVM_35)
        StringRefMemoryObject ref(bytes);
+#elif defined(LLVM_36)
+       ArrayRef<uint8_t> bytearray(reinterpret_cast<const uint8_t *>(bytes.data()),
+                          bytes.size());
+#endif
+
 
        while (current_address < end - base_address) {
                uint64_t inst_size;
@@ -601,10 +726,24 @@ void LLVMDisassembler<ELFT>::printEachInstruction(uint64_t start, uint64_t end,
                llvm::raw_string_ostream s(buf);
 
                if(llvm::MCDisassembler::Success ==
-                  DisAsm->getInstruction(inst, inst_size, ref, current_address, nulls(), nulls())) {
+#if defined(LLVM_35)
+                          DisAsm->getInstruction(inst, inst_size, ref, current_address, nulls(), nulls())) {
+#elif defined(LLVM_36)
+                               DisAsm->getInstruction(inst, inst_size,
+                                                      bytearray.slice(current_address),
+                                                      base_address + current_address,
+                                                      nulls(), nulls())) {
+#endif
 
                        uint8_t bytes[inst_size+2];
+#if defined(LLVM_35)
                        ref.readBytes(current_address, inst_size, bytes);
+#elif defined(LLVM_36)
+                       size_t bytesindex(0);
+                       for (uint8_t byte : bytearray.slice(current_address, inst_size)) {
+                               bytes[bytesindex++] = byte;
+                       }
+#endif
 
                        uint64_t jmptarget;
                        std::string ref("");