]> git.siccegge.de Git - frida/frida.git/blobdiff - src/disassembler/llvm/LLVMDisassembler.cxx
Preliminary LLVM-3.6 support
[frida/frida.git] / src / disassembler / llvm / LLVMDisassembler.cxx
index 773a5189a27a33d35085da9f698864970b3ac9ec..e673c371650027021455b63f64b94e422dcc7a5d 100644 (file)
@@ -1,7 +1,9 @@
+#include "disassembler/Instruction.hxx"
 #include "disassembler/llvm/LLVMDisassembler.hxx"
 #include "core/InformationManager.hxx"
 #include "core/Function.hxx"
 #include "core/BasicBlock.hxx"
+#include <boost/algorithm/string.hpp>
 
 #include <stack>
 #include <algorithm>
@@ -11,16 +13,39 @@ using namespace llvm;
 using namespace llvm::object;
 using std::error_code;
 
+namespace {
+       class COFFT {
+
+       };
+
+       class MACHOT {
+
+       };
+}
+
 /*
  *
  */
 Disassembler * createLLVMDisassembler(const std::string& filename, InformationManager* manager) {
+       log4cxx::LoggerPtr logger(log4cxx::Logger::getLogger("disassembler.LLVMDisassembler"));
        if (filename == "")
                return NULL;
 
-       std::unique_ptr<Binary> o;
-       o.reset(createBinary(filename).get());
-       Binary * op = o.release();
+       auto retval = createBinary(filename);
+       if (error_code ec = retval.getError()) {
+               LOG4CXX_ERROR(logger, ec.message());
+               return NULL;
+       }
+#if defined(LLVM_35)
+       Binary * op = retval.get();
+#elif defined(LLVM_36)
+       OwningBinary<Binary> ob;
+       ob  = std::move(retval.get());
+       Binary* op = ob.getBinary();
+       auto foo = ob.takeBinary();
+       foo.first.release();
+       foo.second.release();
+#endif
 
        // ELFType<endian, maxalign, 64bit>
        if (ELF32LEObjectFile * object = dyn_cast<ELF32LEObjectFile>(op)) {
@@ -35,6 +60,12 @@ Disassembler * createLLVMDisassembler(const std::string& filename, InformationMa
        if (ELF64BEObjectFile * object = dyn_cast<ELF64BEObjectFile>(op)) {
                return new LLVMDisassembler<ELFType<support::big, 2, true>>(filename, manager, object);
        }
+       if (COFFObjectFile * object = dyn_cast<COFFObjectFile>(op)) {
+               return new LLVMDisassembler<COFFT>(filename, manager, object);
+       }
+       if (MachOObjectFile * object = dyn_cast<MachOObjectFile>(op)) {
+               return new LLVMDisassembler<MACHOT>(filename, manager, object);
+       }
 
        return NULL;
 }
@@ -47,13 +78,13 @@ Disassembler * createLLVMDisassembler(const std::string& filename, InformationMa
 template <typename ELFT>
 LLVMDisassembler<ELFT>::LLVMDisassembler(const std::string& filename,
                                          InformationManager* manager,
-                                         ELFObjectFile<ELFT>* file)
-       : Disassembler(filename, manager)
-       , logger(log4cxx::Logger::getLogger("LLVMDisassembler"))
+                                         ObjectFile* file)
+       : Disassembler()
+       , logger(log4cxx::Logger::getLogger("disassembler.LLVMDisassembler"))
        , triple("unknown-unknown-unknown")
        , manager(manager)
 {
-       LOG4CXX_DEBUG(logger, "Handling file" << filename);
+       LOG4CXX_DEBUG(logger, "Handling file " << filename);
 
        if (!file) {
                auto result = createBinary(filename);
@@ -65,9 +96,17 @@ LLVMDisassembler<ELFT>::LLVMDisassembler(const std::string& filename,
                        return;
                }
 
+#if defined(LLVM_35)
                binary.reset(result.get());
+#elif defined(LLVM_36)
+               OwningBinary<Binary> ob;
+               ob  = std::move(result.get());
+               Binary* op = ob.getBinary();
+
+               binary.reset(op);
+#endif
 
-               o = dyn_cast<ELFObjectFile<ELFT>>(binary.get());
+               o = dyn_cast<ObjectFile>(binary.get());
        } else {
                o = file;
                binary.reset(file);
@@ -124,10 +163,10 @@ LLVMDisassembler<ELFT>::LLVMDisassembler(const std::string& filename,
        RelInfo.reset(
                target->createMCRelocationInfo(tripleName, Ctx));
        if (RelInfo) {
-               Symzer.reset(
-                       MCObjectSymbolizer::createObjectSymbolizer(Ctx, std::move(RelInfo), o));
-               if (Symzer)
-                       DisAsm->setSymbolizer(std::move(Symzer));
+               // Symzer.reset(
+               //      MCObjectSymbolizer::createObjectSymbolizer(Ctx, std::move(RelInfo), o));
+               // if (Symzer)
+               //      DisAsm->setSymbolizer(std::move(Symzer));
        }
        RelInfo.release();
        Symzer.release();
@@ -148,15 +187,16 @@ LLVMDisassembler<ELFT>::LLVMDisassembler(const std::string& filename,
        IP->setPrintImmHex(llvm::HexStyle::C);
        IP->setPrintImmHex(true);
 
-       std::unique_ptr<MCObjectDisassembler> OD(
-               new MCObjectDisassembler(*o, *DisAsm, *MIA));
-       Mod.reset(OD->buildModule(false));
+//     std::unique_ptr<MCObjectDisassembler> OD(
+//             new MCObjectDisassembler(*o, *DisAsm, *MIA));
+       //Mod.reset(OD->buildModule(false));
+
+       readSections();
 }
 
 template <typename ELFT>
 void LLVMDisassembler<ELFT>::start() {
        readSymbols();
-       readSections();
        disassemble();
        readDynamicSymbols();
 }
@@ -167,11 +207,15 @@ LLVMDisassembler<ELFT>::~LLVMDisassembler() {}
 template <typename ELFT>
 Function* LLVMDisassembler<ELFT>::disassembleFunctionAt(uint64_t address, const std::string& name) {
        Function * function;
-       SectionRef text_section = sections[".text"];
+       SectionRef text_section = getTextSection();
        uint64_t base_address, size;
+#if defined(LLVM_35)
        text_section.getAddress(base_address);
        text_section.getSize(size);
-
+#elif defined(LLVM_36)
+       base_address = text_section.getAddress();
+       size = text_section.getSize();
+#endif
        if (address < base_address ||
            address >= base_address + size) {
                return NULL;
@@ -189,7 +233,6 @@ Function* LLVMDisassembler<ELFT>::disassembleFunctionAt(uint64_t address, const
                        function->setName(name);
                }
                disassembleFunction(function);
-               manager->finishFunction(function);
        }
 
        return function;
@@ -197,16 +240,24 @@ Function* LLVMDisassembler<ELFT>::disassembleFunctionAt(uint64_t address, const
 
 template <typename ELFT>
 void LLVMDisassembler<ELFT>::disassembleFunction(Function* function) {
+       std::vector<uint64_t> called_functions;
        std::stack<BasicBlock*> remaining_blocks;
        /* TODO:
         * Do all blocks get added properly? We should take care to remove
         * the other ones at the end of the function!
         */
        std::map<uint64_t, BasicBlock*> new_blocks;
-       SectionRef text_section = sections[".text"];
+       SectionRef text_section = getTextSection();
        StringRef bytes;
        text_section.getContents(bytes);
+#if defined(LLVM_35)
        StringRefMemoryObject ref(bytes);
+#elif defined(LLVM_36)
+       ArrayRef<uint8_t> bytearray(reinterpret_cast<const uint8_t *>(bytes.data()),
+                          bytes.size());
+#else
+#error LLVM != 3.5 | 3.6 not supported
+#endif
 
        LOG4CXX_DEBUG(logger, "Handling function " << function->getName());
 
@@ -215,6 +266,16 @@ void LLVMDisassembler<ELFT>::disassembleFunction(Function* function) {
        new_blocks.insert(std::make_pair(block->getStartAddress(), block));
        function->addBasicBlock(block);
 
+       uint64_t base_address, size;
+#if defined(LLVM_35)
+       text_section.getAddress(base_address);
+       text_section.getSize(size);
+#elif defined(LLVM_36)
+       base_address = text_section.getAddress();
+       size = text_section.getSize();
+#endif
+       LOG4CXX_DEBUG(logger, "Text section at " << std::hex << base_address << " with size " << size);
+
        while (remaining_blocks.size()) {
                BasicBlock * current_block = remaining_blocks.top();
                remaining_blocks.pop();
@@ -223,8 +284,6 @@ void LLVMDisassembler<ELFT>::disassembleFunction(Function* function) {
                              << current_block->getStartAddress());
 
                uint64_t inst_size;
-               uint64_t base_address;
-               text_section.getAddress(base_address);
                uint64_t current_address = current_block->getStartAddress() - base_address;
                while(true) {
                        MCInst inst;
@@ -232,7 +291,14 @@ void LLVMDisassembler<ELFT>::disassembleFunction(Function* function) {
                        llvm::raw_string_ostream s(buf);
 
                        if(llvm::MCDisassembler::Success ==
+#if defined(LLVM_35)
                           DisAsm->getInstruction(inst, inst_size, ref, current_address, nulls(), nulls())) {
+#elif defined(LLVM_36)
+                               DisAsm->getInstruction(inst, inst_size,
+                                                      bytearray.slice(current_address),
+                                                      base_address + current_address,
+                                                      nulls(), nulls())) {
+#endif
                                uint64_t jmptarget;
 
                                if (MIA->evaluateBranch(inst, current_address, inst_size, jmptarget)) {
@@ -240,7 +306,7 @@ void LLVMDisassembler<ELFT>::disassembleFunction(Function* function) {
                                        if (!MIA->isIndirectBranch(inst)) {
                                                if (MIA->isCall(inst)) {
                                                        if (NULL == manager->getFunction(jmptarget))
-                                                               disassembleFunctionAt(jmptarget);
+                                                               called_functions.push_back(jmptarget);
                                                } else {
                                                        current_block->setNextBlock(0, jmptarget);
                                                        if (new_blocks.find(jmptarget) == new_blocks.end()) {
@@ -288,12 +354,14 @@ void LLVMDisassembler<ELFT>::disassembleFunction(Function* function) {
        }
        splitBlocks(function);
        LOG4CXX_DEBUG(logger, "Finished function " << function->getName());
-       manager->signal_new_function(function);
+       manager->finishFunction(function);
+       for (uint64_t address : called_functions)
+               disassembleFunctionAt(address);
 }
 
 template <typename ELFT>
 void LLVMDisassembler<ELFT>::disassemble() {
-       SectionRef text_section = sections[".text"];
+       SectionRef text_section = getTextSection();
        std::vector<Function*> remaining_functions;
 
        // Assume all function symbols actually start a real function
@@ -302,8 +370,11 @@ void LLVMDisassembler<ELFT>::disassemble() {
                bool contains;
                SymbolRef::Type symbol_type;
 
-
+#if defined(LLVM_35)
                if (text_section.containsSymbol(x->second, contains) || !contains)
+#elif defined(LLVM_36)
+               if (text_section.containsSymbol(x->second))
+#endif
                        continue;
 
                if (x->second.getType(symbol_type)
@@ -312,9 +383,14 @@ void LLVMDisassembler<ELFT>::disassemble() {
 
                if (!x->second.getAddress(result)) {
                        Function * fun = manager->newFunction(result);
-                       fun->setName(x->first);
-                       remaining_functions.push_back(fun);
-                       LOG4CXX_DEBUG(logger, "Disasembling " << x->first);
+                       if (fun) {
+                               fun->setName(x->first);
+                               remaining_functions.push_back(fun);
+                               LOG4CXX_DEBUG(logger, "Disasembling " << x->first);
+                       } else {
+                               LOG4CXX_DEBUG(logger, "Function at " << std::hex << result
+                                             << " already disassembled as " << manager->getFunction(result)->getName());
+                       }
                }
        }
 
@@ -324,10 +400,7 @@ void LLVMDisassembler<ELFT>::disassemble() {
        }
 
        if (binary->isELF()) {
-               const ELFO * elffile = o->getELFFile();
-               const typename ELFO::Elf_Ehdr * header = elffile->getHeader();
-
-               _entryAddress = header->e_entry;
+               uint64_t _entryAddress = entryAddress();
                LOG4CXX_DEBUG(logger, "Adding entryAddress at: " << std::hex << _entryAddress);
                std::stringstream s;
                s << "<_start 0x" << std::hex << _entryAddress << ">";
@@ -335,27 +408,78 @@ void LLVMDisassembler<ELFT>::disassemble() {
                disassembleFunctionAt(_entryAddress, s.str());
        }
 
-       uint64_t text_entry;
-       text_section.getAddress(text_entry);
-       LOG4CXX_INFO(logger, "No Symbols found, starting at the beginning of the text segment");
-       disassembleFunctionAt(text_entry);
+       if (!manager->hasFunctions()) {
+               uint64_t text_entry;
+#if defined(LLVM_35)
+               text_section.getAddress(text_entry);
+#elif defined(LLVM_36)
+               text_entry = text_section.getAddress();
+#endif
+               LOG4CXX_INFO(logger, "No Symbols found, starting at the beginning of the text segment");
+               disassembleFunctionAt(text_entry);
+       }
+}
+
+template <>
+uint64_t LLVMDisassembler<COFFT>::entryAddress() {
+       const auto coffobject = dyn_cast<COFFObjectFile>(o);
+       const struct pe32_header* pe32_header;
+       const struct pe32plus_header* pe32plus_header;
+
+       coffobject->getPE32PlusHeader(pe32plus_header);
+
+       if (pe32plus_header) {
+               return pe32plus_header->AddressOfEntryPoint;
+       } else {
+               coffobject->getPE32Header(pe32_header);
+               return pe32_header->AddressOfEntryPoint;
+       }
+}
+
+template<>
+uint64_t LLVMDisassembler<MACHOT>::entryAddress() {
+       // TODO
+       return 0;
+}
+
+template <typename ELFT>
+uint64_t LLVMDisassembler<ELFT>::entryAddress() {
+       const auto elffile = dyn_cast<ELFObjectFile<ELFT>>(o)->getELFFile();
+       const auto * header = elffile->getHeader();
+
+       return header->e_entry;
 }
 
 template <typename ELFT>
 void LLVMDisassembler<ELFT>::splitBlocks(Function* function) {
-       SectionRef text_section = sections[".text"];
+       SectionRef text_section = getTextSection();
        StringRef bytes;
        text_section.getContents(bytes);
+#if defined(LLVM_35)
        StringRefMemoryObject ref(bytes);
+#elif defined(LLVM_36)
+       ArrayRef<uint8_t> bytearray(reinterpret_cast<const uint8_t *>(bytes.data()),
+                          bytes.size());
+#endif
 
+
+       LOG4CXX_DEBUG(logger, "Splitting Blocks in Function " << function->getName());
        // Split blocks where jumps are going inside the block
        for (auto it = function->blocks().begin();
             it != function->blocks().end();
             ++it) {
                BasicBlock * current_block = it->second;
+               if (current_block->getEndAddress() == 0) {
+                       LOG4CXX_ERROR(logger, "UNFINISHED BLOCK " << std::hex << current_block->getStartAddress());
+                       break;
+               }
                uint64_t inst_size;
                uint64_t base_address;
+#if defined(LLVM_35)
                text_section.getAddress(base_address);
+#elif defined(LLVM_36)
+               base_address = text_section.getAddress();
+#endif
                uint64_t current_address = current_block->getStartAddress() - base_address;
                while(current_block->getEndAddress() - base_address > current_address) {
                        MCInst inst;
@@ -363,7 +487,15 @@ void LLVMDisassembler<ELFT>::splitBlocks(Function* function) {
                        llvm::raw_string_ostream s(buf);
 
                        if(llvm::MCDisassembler::Success ==
+#if defined(LLVM_35)
                           DisAsm->getInstruction(inst, inst_size, ref, current_address, nulls(), nulls())) {
+#elif defined(LLVM_36)
+                               DisAsm->getInstruction(inst, inst_size,
+                                                      bytearray.slice(current_address),
+                                                      base_address + current_address,
+                                                      nulls(), nulls())) {
+#endif
+
                                // See if some other block starts here
                                BasicBlock* other = manager->getBasicBlock(current_address
                                                                           + inst_size
@@ -392,11 +524,20 @@ void LLVMDisassembler<ELFT>::splitBlocks(Function* function) {
        }
 }
 
+template<>
+void LLVMDisassembler<COFFT>::readDynamicSymbols() {
+       //TODO
+}
+
+template<>
+void LLVMDisassembler<MACHOT>::readDynamicSymbols() {
+       //TODO
+}
+
 template <typename ELFT>
 void LLVMDisassembler<ELFT>::readDynamicSymbols() {
-       const ELFO * elffile = o->getELFFile();
-       for (typename ELFO::Elf_Sym_Iter
-                    it = elffile->begin_dynamic_symbols(),
+       const auto elffile = dyn_cast<ELFObjectFile<ELFT>>(o)->getELFFile();
+       for (auto it = elffile->begin_dynamic_symbols(),
                     end = elffile->end_dynamic_symbols();
             it != end;
             ++it) {
@@ -405,7 +546,11 @@ void LLVMDisassembler<ELFT>::readDynamicSymbols() {
                        // TODO: Error handling
                        std::string symbolname = *(elffile->getSymbolName(it));
                        std::string symbolversion = *(elffile->getSymbolVersion(nullptr, &*it, is_default));
-                       manager->signal_new_dyn_symbol(symbolname + (is_default? "@@" : "@") + symbolversion);
+                       // TODO: actually get the symbol address from relocations
+                       Function* f = manager->newDynamicFunction(0);
+                       f->setName(symbolname + (is_default? "@@" : "@") + symbolversion);
+                       manager->finishFunction(f);
+
                        LOG4CXX_DEBUG(logger, "Adding dynamic Symbol " << symbolname << (is_default? "@@" : "@") << symbolversion);
                }
        }
@@ -450,19 +595,113 @@ void LLVMDisassembler<ELFT>::readSections() {
 //     //               });
 // }
 
+template <typename ELFT>
+std::vector<Instruction> LLVMDisassembler<ELFT>::getInstructions(const BasicBlock *block) {
+       std::vector<Instruction> result;
+       SectionRef text_section = getTextSection();
+       uint64_t base_address;
+#if defined(LLVM_35)
+       text_section.getAddress(base_address);
+#elif defined(LLVM_36)
+       base_address = text_section.getAddress();
+#endif
+
+       uint64_t current_address = block->getStartAddress() - base_address;
+       uint64_t end_position = block->getEndAddress() - base_address;
+
+       StringRef bytes;
+       text_section.getContents(bytes);
+#if defined(LLVM_35)
+       StringRefMemoryObject ref(bytes);
+#elif defined(LLVM_36)
+       ArrayRef<uint8_t> bytearray(reinterpret_cast<const uint8_t *>(bytes.data()),
+                          bytes.size());
+#endif
+
+
+       while (current_address < end_position) {
+               uint64_t inst_size;
+               MCInst inst;
+               std::string buf;
+               llvm::raw_string_ostream s(buf);
+
+               if(llvm::MCDisassembler::Success ==
+#if defined(LLVM_35)
+                          DisAsm->getInstruction(inst, inst_size, ref, current_address, nulls(), nulls())) {
+#elif defined(LLVM_36)
+                               DisAsm->getInstruction(inst, inst_size,
+                                                      bytearray.slice(current_address),
+                                                      base_address + current_address,
+                                                      nulls(), nulls())) {
+#endif
+
+                       uint8_t bytes[inst_size+2];
+#if defined(LLVM_35)
+                       ref.readBytes(current_address, inst_size, bytes);
+#elif defined(LLVM_36)
+                       size_t bytesindex(0);
+                       for (uint8_t byte : bytearray.slice(current_address, inst_size)) {
+                               bytes[bytesindex++] = byte;
+                       }
+#endif
+
+                       uint64_t jmptarget;
+                       std::string ref("");
+                       IP->printInst(&inst, s, "");
+                       if (MIA->evaluateBranch(inst, current_address, inst_size, jmptarget)) {
+                               std::stringstream stream;
+                               if (MIA->isCall(inst))
+                                       stream << "function:";
+                               else
+                                       stream << "block:";
+
+                               stream << std::hex << (base_address + jmptarget);
+                               ref = stream.str();
+                       }
+                       result.push_back(Instruction(current_address + base_address, boost::algorithm::trim_copy(s.str()),
+                                                    std::vector<uint8_t>(bytes, bytes+inst_size), ref));
+               } else {
+                       LOG4CXX_WARN(logger, "Invalid byte at" << std::hex << current_address + base_address);
+                       uint8_t bytes[1];
+#if defined(LLVM_35)
+                       ref.readBytes(current_address, 1, bytes);
+#elif defined(LLVM_36)
+                       bytes[0] = bytearray[current_address];
+#endif
+                       result.push_back(Instruction(current_address + base_address, "Invalid Instruction",
+                                                    std::vector<uint8_t>(bytes, bytes+1), ""));
+                       inst_size = 1;
+               }
+
+               current_address += inst_size;
+       }
+       return result;
+}
+
 template <typename ELFT>
 void LLVMDisassembler<ELFT>::printEachInstruction(uint64_t start, uint64_t end,
                                                   std::function<void (uint8_t*, size_t,
                                                                          const std::string&,
                                                                          const std::string&)> fun) {
-       SectionRef text_section = sections[".text"];
+       SectionRef text_section = getTextSection();
        uint64_t base_address;
+#if defined(LLVM_35)
        text_section.getAddress(base_address);
+#elif defined(LLVM_36)
+       base_address = text_section.getAddress();
+#endif
+
        uint64_t current_address = start - base_address;
 
        StringRef bytes;
        text_section.getContents(bytes);
+#if defined(LLVM_35)
        StringRefMemoryObject ref(bytes);
+#elif defined(LLVM_36)
+       ArrayRef<uint8_t> bytearray(reinterpret_cast<const uint8_t *>(bytes.data()),
+                          bytes.size());
+#endif
+
 
        while (current_address < end - base_address) {
                uint64_t inst_size;
@@ -471,10 +710,24 @@ void LLVMDisassembler<ELFT>::printEachInstruction(uint64_t start, uint64_t end,
                llvm::raw_string_ostream s(buf);
 
                if(llvm::MCDisassembler::Success ==
-                  DisAsm->getInstruction(inst, inst_size, ref, current_address, nulls(), nulls())) {
+#if defined(LLVM_35)
+                          DisAsm->getInstruction(inst, inst_size, ref, current_address, nulls(), nulls())) {
+#elif defined(LLVM_36)
+                               DisAsm->getInstruction(inst, inst_size,
+                                                      bytearray.slice(current_address),
+                                                      base_address + current_address,
+                                                      nulls(), nulls())) {
+#endif
 
                        uint8_t bytes[inst_size+2];
+#if defined(LLVM_35)
                        ref.readBytes(current_address, inst_size, bytes);
+#elif defined(LLVM_36)
+                       size_t bytesindex(0);
+                       for (uint8_t byte : bytearray.slice(current_address, inst_size)) {
+                               bytes[bytesindex++] = byte;
+                       }
+#endif
 
                        uint64_t jmptarget;
                        std::string ref("");
@@ -501,3 +754,13 @@ void LLVMDisassembler<ELFT>::printEachInstruction(uint64_t start, uint64_t end,
                current_address += inst_size;
        }
 }
+
+template <typename ELFT>
+SectionRef LLVMDisassembler<ELFT>::getTextSection() {
+       return sections[".text"];
+}
+
+template <>
+SectionRef LLVMDisassembler<MACHOT>::getTextSection() {
+       return sections["__text"];
+}