]> git.siccegge.de Git - frida/frida.git/blobdiff - src/disassembler/llvm/LLVMDisassembler.cxx
Change from list to vector
[frida/frida.git] / src / disassembler / llvm / LLVMDisassembler.cxx
index abfafa48e61d566de36c3b8879c555de84290561..990ed4f53c19c6f24255d2fb735d1bedb4ba8722 100644 (file)
@@ -1,3 +1,4 @@
+#include "disassembler/Instruction.hxx"
 #include "disassembler/llvm/LLVMDisassembler.hxx"
 #include "core/InformationManager.hxx"
 #include "core/Function.hxx"
@@ -11,6 +12,16 @@ using namespace llvm;
 using namespace llvm::object;
 using std::error_code;
 
+namespace {
+       class COFFT {
+
+       };
+
+       class MACHOT {
+
+       };
+}
+
 /*
  *
  */
@@ -35,6 +46,12 @@ Disassembler * createLLVMDisassembler(const std::string& filename, InformationMa
        if (ELF64BEObjectFile * object = dyn_cast<ELF64BEObjectFile>(op)) {
                return new LLVMDisassembler<ELFType<support::big, 2, true>>(filename, manager, object);
        }
+       if (COFFObjectFile * object = dyn_cast<COFFObjectFile>(op)) {
+               return new LLVMDisassembler<COFFT>(filename, manager, object);
+       }
+       if (MachOObjectFile * object = dyn_cast<MachOObjectFile>(op)) {
+               return new LLVMDisassembler<MACHOT>(filename, manager, object);
+       }
 
        return NULL;
 }
@@ -47,13 +64,13 @@ Disassembler * createLLVMDisassembler(const std::string& filename, InformationMa
 template <typename ELFT>
 LLVMDisassembler<ELFT>::LLVMDisassembler(const std::string& filename,
                                          InformationManager* manager,
-                                         ELFObjectFile<ELFT>* file)
+                                         ObjectFile* file)
        : Disassembler()
-       , logger(log4cxx::Logger::getLogger("LLVMDisassembler"))
+       , logger(log4cxx::Logger::getLogger("disassembler.LLVMDisassembler"))
        , triple("unknown-unknown-unknown")
        , manager(manager)
 {
-       LOG4CXX_DEBUG(logger, "Handling file" << filename);
+       LOG4CXX_DEBUG(logger, "Handling file " << filename);
 
        if (!file) {
                auto result = createBinary(filename);
@@ -67,7 +84,7 @@ LLVMDisassembler<ELFT>::LLVMDisassembler(const std::string& filename,
 
                binary.reset(result.get());
 
-               o = dyn_cast<ELFObjectFile<ELFT>>(binary.get());
+               o = dyn_cast<ObjectFile>(binary.get());
        } else {
                o = file;
                binary.reset(file);
@@ -124,10 +141,10 @@ LLVMDisassembler<ELFT>::LLVMDisassembler(const std::string& filename,
        RelInfo.reset(
                target->createMCRelocationInfo(tripleName, Ctx));
        if (RelInfo) {
-               Symzer.reset(
-                       MCObjectSymbolizer::createObjectSymbolizer(Ctx, std::move(RelInfo), o));
-               if (Symzer)
-                       DisAsm->setSymbolizer(std::move(Symzer));
+               // Symzer.reset(
+               //      MCObjectSymbolizer::createObjectSymbolizer(Ctx, std::move(RelInfo), o));
+               // if (Symzer)
+               //      DisAsm->setSymbolizer(std::move(Symzer));
        }
        RelInfo.release();
        Symzer.release();
@@ -151,12 +168,13 @@ LLVMDisassembler<ELFT>::LLVMDisassembler(const std::string& filename,
        std::unique_ptr<MCObjectDisassembler> OD(
                new MCObjectDisassembler(*o, *DisAsm, *MIA));
        Mod.reset(OD->buildModule(false));
+
+       readSections();
 }
 
 template <typename ELFT>
 void LLVMDisassembler<ELFT>::start() {
        readSymbols();
-       readSections();
        disassemble();
        readDynamicSymbols();
 }
@@ -167,7 +185,7 @@ LLVMDisassembler<ELFT>::~LLVMDisassembler() {}
 template <typename ELFT>
 Function* LLVMDisassembler<ELFT>::disassembleFunctionAt(uint64_t address, const std::string& name) {
        Function * function;
-       SectionRef text_section = sections[".text"];
+       SectionRef text_section = getTextSection();
        uint64_t base_address, size;
        text_section.getAddress(base_address);
        text_section.getSize(size);
@@ -189,7 +207,6 @@ Function* LLVMDisassembler<ELFT>::disassembleFunctionAt(uint64_t address, const
                        function->setName(name);
                }
                disassembleFunction(function);
-               manager->finishFunction(function);
        }
 
        return function;
@@ -197,13 +214,14 @@ Function* LLVMDisassembler<ELFT>::disassembleFunctionAt(uint64_t address, const
 
 template <typename ELFT>
 void LLVMDisassembler<ELFT>::disassembleFunction(Function* function) {
+       std::vector<uint64_t> called_functions;
        std::stack<BasicBlock*> remaining_blocks;
        /* TODO:
         * Do all blocks get added properly? We should take care to remove
         * the other ones at the end of the function!
         */
        std::map<uint64_t, BasicBlock*> new_blocks;
-       SectionRef text_section = sections[".text"];
+       SectionRef text_section = getTextSection();
        StringRef bytes;
        text_section.getContents(bytes);
        StringRefMemoryObject ref(bytes);
@@ -215,6 +233,11 @@ void LLVMDisassembler<ELFT>::disassembleFunction(Function* function) {
        new_blocks.insert(std::make_pair(block->getStartAddress(), block));
        function->addBasicBlock(block);
 
+       uint64_t base_address, size;
+       text_section.getAddress(base_address);
+       text_section.getSize(size);
+       LOG4CXX_DEBUG(logger, "Text section at " << std::hex << base_address << " with size " << size);
+
        while (remaining_blocks.size()) {
                BasicBlock * current_block = remaining_blocks.top();
                remaining_blocks.pop();
@@ -223,8 +246,6 @@ void LLVMDisassembler<ELFT>::disassembleFunction(Function* function) {
                              << current_block->getStartAddress());
 
                uint64_t inst_size;
-               uint64_t base_address;
-               text_section.getAddress(base_address);
                uint64_t current_address = current_block->getStartAddress() - base_address;
                while(true) {
                        MCInst inst;
@@ -240,7 +261,7 @@ void LLVMDisassembler<ELFT>::disassembleFunction(Function* function) {
                                        if (!MIA->isIndirectBranch(inst)) {
                                                if (MIA->isCall(inst)) {
                                                        if (NULL == manager->getFunction(jmptarget))
-                                                               disassembleFunctionAt(jmptarget);
+                                                               called_functions.push_back(jmptarget);
                                                } else {
                                                        current_block->setNextBlock(0, jmptarget);
                                                        if (new_blocks.find(jmptarget) == new_blocks.end()) {
@@ -288,12 +309,14 @@ void LLVMDisassembler<ELFT>::disassembleFunction(Function* function) {
        }
        splitBlocks(function);
        LOG4CXX_DEBUG(logger, "Finished function " << function->getName());
-       manager->signal_new_function(function);
+       manager->finishFunction(function);
+       for (uint64_t address : called_functions)
+               disassembleFunctionAt(address);
 }
 
 template <typename ELFT>
 void LLVMDisassembler<ELFT>::disassemble() {
-       SectionRef text_section = sections[".text"];
+       SectionRef text_section = getTextSection();
        std::vector<Function*> remaining_functions;
 
        // Assume all function symbols actually start a real function
@@ -312,9 +335,14 @@ void LLVMDisassembler<ELFT>::disassemble() {
 
                if (!x->second.getAddress(result)) {
                        Function * fun = manager->newFunction(result);
-                       fun->setName(x->first);
-                       remaining_functions.push_back(fun);
-                       LOG4CXX_DEBUG(logger, "Disasembling " << x->first);
+                       if (fun) {
+                               fun->setName(x->first);
+                               remaining_functions.push_back(fun);
+                               LOG4CXX_DEBUG(logger, "Disasembling " << x->first);
+                       } else {
+                               LOG4CXX_DEBUG(logger, "Function at " << std::hex << result
+                                             << " already disassembled as " << manager->getFunction(result)->getName());
+                       }
                }
        }
 
@@ -324,10 +352,7 @@ void LLVMDisassembler<ELFT>::disassemble() {
        }
 
        if (binary->isELF()) {
-               const ELFO * elffile = o->getELFFile();
-               const typename ELFO::Elf_Ehdr * header = elffile->getHeader();
-
-               _entryAddress = header->e_entry;
+               uint64_t _entryAddress = entryAddress();
                LOG4CXX_DEBUG(logger, "Adding entryAddress at: " << std::hex << _entryAddress);
                std::stringstream s;
                s << "<_start 0x" << std::hex << _entryAddress << ">";
@@ -343,18 +368,53 @@ void LLVMDisassembler<ELFT>::disassemble() {
        }
 }
 
+template <>
+uint64_t LLVMDisassembler<COFFT>::entryAddress() {
+       const auto coffobject = dyn_cast<COFFObjectFile>(o);
+       const struct pe32_header* pe32_header;
+       const struct pe32plus_header* pe32plus_header;
+
+       coffobject->getPE32PlusHeader(pe32plus_header);
+
+       if (pe32plus_header) {
+               return pe32plus_header->AddressOfEntryPoint;
+       } else {
+               coffobject->getPE32Header(pe32_header);
+               return pe32_header->AddressOfEntryPoint;
+       }
+}
+
+template<>
+uint64_t LLVMDisassembler<MACHOT>::entryAddress() {
+       // TODO
+       return 0;
+}
+
+template <typename ELFT>
+uint64_t LLVMDisassembler<ELFT>::entryAddress() {
+       const auto elffile = dyn_cast<ELFObjectFile<ELFT>>(o)->getELFFile();
+       const auto * header = elffile->getHeader();
+
+       return header->e_entry;
+}
+
 template <typename ELFT>
 void LLVMDisassembler<ELFT>::splitBlocks(Function* function) {
-       SectionRef text_section = sections[".text"];
+       SectionRef text_section = getTextSection();
        StringRef bytes;
        text_section.getContents(bytes);
        StringRefMemoryObject ref(bytes);
 
+       LOG4CXX_DEBUG(logger, "Splitting Blocks in Function " << function->getName());
        // Split blocks where jumps are going inside the block
        for (auto it = function->blocks().begin();
             it != function->blocks().end();
             ++it) {
                BasicBlock * current_block = it->second;
+               if (current_block->getEndAddress() == 0) {
+                       LOG4CXX_ERROR(logger, "UNFINISHED BLOCK " << std::hex << current_block->getStartAddress());
+                       break;
+               }
                uint64_t inst_size;
                uint64_t base_address;
                text_section.getAddress(base_address);
@@ -394,11 +454,20 @@ void LLVMDisassembler<ELFT>::splitBlocks(Function* function) {
        }
 }
 
+template<>
+void LLVMDisassembler<COFFT>::readDynamicSymbols() {
+       //TODO
+}
+
+template<>
+void LLVMDisassembler<MACHOT>::readDynamicSymbols() {
+       //TODO
+}
+
 template <typename ELFT>
 void LLVMDisassembler<ELFT>::readDynamicSymbols() {
-       const ELFO * elffile = o->getELFFile();
-       for (typename ELFO::Elf_Sym_Iter
-                    it = elffile->begin_dynamic_symbols(),
+       const auto elffile = dyn_cast<ELFObjectFile<ELFT>>(o)->getELFFile();
+       for (auto it = elffile->begin_dynamic_symbols(),
                     end = elffile->end_dynamic_symbols();
             it != end;
             ++it) {
@@ -407,7 +476,11 @@ void LLVMDisassembler<ELFT>::readDynamicSymbols() {
                        // TODO: Error handling
                        std::string symbolname = *(elffile->getSymbolName(it));
                        std::string symbolversion = *(elffile->getSymbolVersion(nullptr, &*it, is_default));
-                       manager->signal_new_dyn_symbol(symbolname + (is_default? "@@" : "@") + symbolversion);
+                       // TODO: actually get the symbol address from relocations
+                       Function* f = manager->newDynamicFunction(0);
+                       f->setName(symbolname + (is_default? "@@" : "@") + symbolversion);
+                       manager->finishFunction(f);
+
                        LOG4CXX_DEBUG(logger, "Adding dynamic Symbol " << symbolname << (is_default? "@@" : "@") << symbolversion);
                }
        }
@@ -452,12 +525,66 @@ void LLVMDisassembler<ELFT>::readSections() {
 //     //               });
 // }
 
+template <typename ELFT>
+std::vector<Instruction> LLVMDisassembler<ELFT>::getInstructions(const BasicBlock *block) {
+       std::vector<Instruction> result;
+       SectionRef text_section = getTextSection();
+       uint64_t base_address;
+       text_section.getAddress(base_address);
+       uint64_t current_address = block->getStartAddress() - base_address;
+       uint64_t end_position = block->getEndAddress() - base_address;
+
+       StringRef bytes;
+       text_section.getContents(bytes);
+       StringRefMemoryObject ref(bytes);
+
+       while (current_address < end_position) {
+               uint64_t inst_size;
+               MCInst inst;
+               std::string buf;
+               llvm::raw_string_ostream s(buf);
+
+               if(llvm::MCDisassembler::Success ==
+                  DisAsm->getInstruction(inst, inst_size, ref, current_address, nulls(), nulls())) {
+
+                       uint8_t bytes[inst_size+2];
+                       ref.readBytes(current_address, inst_size, bytes);
+
+                       uint64_t jmptarget;
+                       std::string ref("");
+                       IP->printInst(&inst, s, "");
+                       if (MIA->evaluateBranch(inst, current_address, inst_size, jmptarget)) {
+                               std::stringstream stream;
+                               if (MIA->isCall(inst))
+                                       stream << "function:";
+                               else
+                                       stream << "block:";
+
+                               stream << std::hex << (base_address + jmptarget);
+                               ref = stream.str();
+                       }
+                       result.push_back(Instruction(current_address + base_address, s.str(),
+                                                    std::vector<uint8_t>(bytes, bytes+inst_size), ref));
+               } else {
+                       LOG4CXX_WARN(logger, "Invalid byte at" << std::hex << current_address + base_address);
+                       uint8_t bytes[1];
+                       ref.readBytes(current_address, 1, bytes);
+                       result.push_back(Instruction(current_address + base_address, "Invalid Instruction",
+                                                    std::vector<uint8_t>(bytes, bytes+1), ""));
+                       inst_size = 1;
+               }
+
+               current_address += inst_size;
+       }
+       return result;
+}
+
 template <typename ELFT>
 void LLVMDisassembler<ELFT>::printEachInstruction(uint64_t start, uint64_t end,
                                                   std::function<void (uint8_t*, size_t,
                                                                          const std::string&,
                                                                          const std::string&)> fun) {
-       SectionRef text_section = sections[".text"];
+       SectionRef text_section = getTextSection();
        uint64_t base_address;
        text_section.getAddress(base_address);
        uint64_t current_address = start - base_address;
@@ -503,3 +630,13 @@ void LLVMDisassembler<ELFT>::printEachInstruction(uint64_t start, uint64_t end,
                current_address += inst_size;
        }
 }
+
+template <typename ELFT>
+SectionRef LLVMDisassembler<ELFT>::getTextSection() {
+       return sections[".text"];
+}
+
+template <>
+SectionRef LLVMDisassembler<MACHOT>::getTextSection() {
+       return sections["__text"];
+}