X-Git-Url: https://git.siccegge.de//index.cgi?p=frida%2Ffrida.git;a=blobdiff_plain;f=src%2Fdisassembler%2Fllvm%2FLLVMDisassembler.cxx;h=049f1fc90477b82d1a30427bbd7352a9d7a98cf3;hp=96d08830e4f26087df5cd3feb373f5f47a781bf5;hb=5d65588185f3d19fc6fa311f642cc5fd78966087;hpb=cab6494b97d1626c7b9285b69df324d9c9953614 diff --git a/src/disassembler/llvm/LLVMDisassembler.cxx b/src/disassembler/llvm/LLVMDisassembler.cxx index 96d0883..049f1fc 100644 --- a/src/disassembler/llvm/LLVMDisassembler.cxx +++ b/src/disassembler/llvm/LLVMDisassembler.cxx @@ -1,7 +1,10 @@ +#include "disassembler/Instruction.hxx" #include "disassembler/llvm/LLVMDisassembler.hxx" #include "core/InformationManager.hxx" #include "core/Function.hxx" #include "core/BasicBlock.hxx" +#include "core/Exception.hxx" +#include #include #include @@ -15,36 +18,59 @@ namespace { class COFFT { }; + + class MACHOT { + + }; } /* * */ Disassembler * createLLVMDisassembler(const std::string& filename, InformationManager* manager) { + log4cxx::LoggerPtr logger(log4cxx::Logger::getLogger("disassembler.LLVMDisassembler")); if (filename == "") return NULL; - std::unique_ptr o; - o.reset(createBinary(filename).get()); - Binary * op = o.release(); - - // ELFType - if (ELF32LEObjectFile * object = dyn_cast(op)) { - return new LLVMDisassembler>(filename, manager, object); - } - if (ELF64LEObjectFile * object = dyn_cast(op)) { - return new LLVMDisassembler>(filename, manager, object); - } - if (ELF32BEObjectFile * object = dyn_cast(op)) { - return new LLVMDisassembler>(filename, manager, object); - } - if (ELF64BEObjectFile * object = dyn_cast(op)) { - return new LLVMDisassembler>(filename, manager, object); + auto retval = createBinary(filename); + if (error_code ec = retval.getError()) { + LOG4CXX_ERROR(logger, ec.message()); + return NULL; } - if (COFFObjectFile * object = dyn_cast(op)) { - return new LLVMDisassembler(filename, manager, object); +#if defined(LLVM_35) + Binary * op = retval.get(); +#elif defined(LLVM_36) + OwningBinary ob; + ob = std::move(retval.get()); + Binary* op = ob.getBinary(); + auto foo = ob.takeBinary(); + foo.first.release(); + foo.second.release(); +#endif + + try { + // ELFType + if (ELF32LEObjectFile * object = dyn_cast(op)) { + return new LLVMDisassembler>(filename, manager, object); + } + if (ELF64LEObjectFile * object = dyn_cast(op)) { + return new LLVMDisassembler>(filename, manager, object); + } + if (ELF32BEObjectFile * object = dyn_cast(op)) { + return new LLVMDisassembler>(filename, manager, object); + } + if (ELF64BEObjectFile * object = dyn_cast(op)) { + return new LLVMDisassembler>(filename, manager, object); + } + if (COFFObjectFile * object = dyn_cast(op)) { + return new LLVMDisassembler(filename, manager, object); + } + if (MachOObjectFile * object = dyn_cast(op)) { + return new LLVMDisassembler(filename, manager, object); + } + } catch (BinaryNotSupported& e) { + return NULL; } - return NULL; } @@ -74,7 +100,15 @@ LLVMDisassembler::LLVMDisassembler(const std::string& filename, return; } +#if defined(LLVM_35) binary.reset(result.get()); +#elif defined(LLVM_36) + OwningBinary ob; + ob = std::move(result.get()); + Binary* op = ob.getBinary(); + + binary.reset(op); +#endif o = dyn_cast(binary.get()); } else { @@ -92,7 +126,8 @@ LLVMDisassembler::LLVMDisassembler(const std::string& filename, target = TargetRegistry::lookupTarget("", triple, es); if (!target) { LOG4CXX_ERROR(logger, es); - return; + BinaryNotSupported e; + throw e; } LOG4CXX_INFO(logger, "Target " << target->getName()); @@ -100,26 +135,30 @@ LLVMDisassembler::LLVMDisassembler(const std::string& filename, MRI.reset(target->createMCRegInfo(tripleName)); if (!MRI) { LOG4CXX_ERROR(logger, "no register info for target " << tripleName); - return; + BinaryNotSupported e; + throw e; } // Set up disassembler. AsmInfo.reset(target->createMCAsmInfo(*MRI, tripleName)); if (!AsmInfo) { LOG4CXX_ERROR(logger, "no assembly info for target " << tripleName); - return; + BinaryNotSupported e; + throw e; } STI.reset(target->createMCSubtargetInfo(tripleName, "", "")); if (!STI) { LOG4CXX_ERROR(logger, "no subtarget info for target " << tripleName); - return; + BinaryNotSupported e; + throw e; } MII.reset(target->createMCInstrInfo()); if (!MII) { LOG4CXX_ERROR(logger, "no instruction info for target " << tripleName); - return; + BinaryNotSupported e; + throw e; } MOFI.reset(new MCObjectFileInfo); @@ -128,7 +167,8 @@ LLVMDisassembler::LLVMDisassembler(const std::string& filename, DisAsm.reset(target->createMCDisassembler(*STI, Ctx)); if (!DisAsm) { LOG4CXX_ERROR(logger, "no disassembler for target " << tripleName); - return; + BinaryNotSupported e; + throw e; } RelInfo.reset( target->createMCRelocationInfo(tripleName, Ctx)); @@ -144,22 +184,24 @@ LLVMDisassembler::LLVMDisassembler(const std::string& filename, MIA.reset(target->createMCInstrAnalysis(MII.get())); if (!MIA) { LOG4CXX_ERROR(logger, "no instruction analysis for target " << tripleName); - return; + BinaryNotSupported e; + throw e; } int AsmPrinterVariant = AsmInfo->getAssemblerDialect(); IP.reset(target->createMCInstPrinter(AsmPrinterVariant, *AsmInfo, *MII, *MRI, *STI)); if (!IP) { LOG4CXX_ERROR(logger, "no instruction printer for target " << tripleName); - return; + BinaryNotSupported e; + throw e; } IP->setPrintImmHex(llvm::HexStyle::C); IP->setPrintImmHex(true); - std::unique_ptr OD( - new MCObjectDisassembler(*o, *DisAsm, *MIA)); - Mod.reset(OD->buildModule(false)); +// std::unique_ptr OD( +// new MCObjectDisassembler(*o, *DisAsm, *MIA)); + //Mod.reset(OD->buildModule(false)); readSections(); } @@ -177,11 +219,15 @@ LLVMDisassembler::~LLVMDisassembler() {} template Function* LLVMDisassembler::disassembleFunctionAt(uint64_t address, const std::string& name) { Function * function; - SectionRef text_section = sections[".text"]; + SectionRef text_section = getTextSection(); uint64_t base_address, size; +#if defined(LLVM_35) text_section.getAddress(base_address); text_section.getSize(size); - +#elif defined(LLVM_36) + base_address = text_section.getAddress(); + size = text_section.getSize(); +#endif if (address < base_address || address >= base_address + size) { return NULL; @@ -213,18 +259,37 @@ void LLVMDisassembler::disassembleFunction(Function* function) { * the other ones at the end of the function! */ std::map new_blocks; - SectionRef text_section = sections[".text"]; + SectionRef text_section = getTextSection(); StringRef bytes; + uint64_t base_address, size; text_section.getContents(bytes); +#if defined(LLVM_35) StringRefMemoryObject ref(bytes); + text_section.getAddress(base_address); + text_section.getSize(size); +#elif defined(LLVM_36) + ArrayRef bytearray(reinterpret_cast(bytes.data()), + bytes.size()); + base_address = text_section.getAddress(); + size = text_section.getSize(); +#else +#error LLVM != 3.5 | 3.6 not supported +#endif LOG4CXX_DEBUG(logger, "Handling function " << function->getName()); + if(function->getStartAddress() < base_address || function->getStartAddress() > base_address + size) { + LOG4CXX_INFO(logger, "Trying to disassemble function " << function->getName() << " but start address " << std::hex << function->getStartAddress() << " is located outside the text segment"); + return; + } + BasicBlock * block = manager->newBasicBlock(function->getStartAddress()); remaining_blocks.push(block); new_blocks.insert(std::make_pair(block->getStartAddress(), block)); function->addBasicBlock(block); + LOG4CXX_DEBUG(logger, "Text section at " << std::hex << base_address << " with size " << size); + while (remaining_blocks.size()) { BasicBlock * current_block = remaining_blocks.top(); remaining_blocks.pop(); @@ -233,8 +298,6 @@ void LLVMDisassembler::disassembleFunction(Function* function) { << current_block->getStartAddress()); uint64_t inst_size; - uint64_t base_address; - text_section.getAddress(base_address); uint64_t current_address = current_block->getStartAddress() - base_address; while(true) { MCInst inst; @@ -242,7 +305,14 @@ void LLVMDisassembler::disassembleFunction(Function* function) { llvm::raw_string_ostream s(buf); if(llvm::MCDisassembler::Success == +#if defined(LLVM_35) DisAsm->getInstruction(inst, inst_size, ref, current_address, nulls(), nulls())) { +#elif defined(LLVM_36) + DisAsm->getInstruction(inst, inst_size, + bytearray.slice(current_address), + base_address + current_address, + nulls(), nulls())) { +#endif uint64_t jmptarget; if (MIA->evaluateBranch(inst, current_address, inst_size, jmptarget)) { @@ -305,7 +375,7 @@ void LLVMDisassembler::disassembleFunction(Function* function) { template void LLVMDisassembler::disassemble() { - SectionRef text_section = sections[".text"]; + SectionRef text_section = getTextSection(); std::vector remaining_functions; // Assume all function symbols actually start a real function @@ -314,8 +384,11 @@ void LLVMDisassembler::disassemble() { bool contains; SymbolRef::Type symbol_type; - +#if defined(LLVM_35) if (text_section.containsSymbol(x->second, contains) || !contains) +#elif defined(LLVM_36) + if (!text_section.containsSymbol(x->second)) +#endif continue; if (x->second.getType(symbol_type) @@ -324,9 +397,14 @@ void LLVMDisassembler::disassemble() { if (!x->second.getAddress(result)) { Function * fun = manager->newFunction(result); - fun->setName(x->first); - remaining_functions.push_back(fun); - LOG4CXX_DEBUG(logger, "Disasembling " << x->first); + if (fun) { + fun->setName(x->first); + remaining_functions.push_back(fun); + LOG4CXX_DEBUG(logger, "Disasembling " << x->first); + } else { + LOG4CXX_DEBUG(logger, "Function at " << std::hex << result + << " already disassembled as " << manager->getFunction(result)->getName()); + } } } @@ -346,7 +424,11 @@ void LLVMDisassembler::disassemble() { if (!manager->hasFunctions()) { uint64_t text_entry; +#if defined(LLVM_35) text_section.getAddress(text_entry); +#elif defined(LLVM_36) + text_entry = text_section.getAddress(); +#endif LOG4CXX_INFO(logger, "No Symbols found, starting at the beginning of the text segment"); disassembleFunctionAt(text_entry); } @@ -368,6 +450,12 @@ uint64_t LLVMDisassembler::entryAddress() { } } +template<> +uint64_t LLVMDisassembler::entryAddress() { + // TODO + return 0; +} + template uint64_t LLVMDisassembler::entryAddress() { const auto elffile = dyn_cast>(o)->getELFFile(); @@ -378,19 +466,34 @@ uint64_t LLVMDisassembler::entryAddress() { template void LLVMDisassembler::splitBlocks(Function* function) { - SectionRef text_section = sections[".text"]; + SectionRef text_section = getTextSection(); StringRef bytes; text_section.getContents(bytes); +#if defined(LLVM_35) StringRefMemoryObject ref(bytes); +#elif defined(LLVM_36) + ArrayRef bytearray(reinterpret_cast(bytes.data()), + bytes.size()); +#endif + + LOG4CXX_DEBUG(logger, "Splitting Blocks in Function " << function->getName()); // Split blocks where jumps are going inside the block for (auto it = function->blocks().begin(); it != function->blocks().end(); ++it) { BasicBlock * current_block = it->second; + if (current_block->getEndAddress() == 0) { + LOG4CXX_ERROR(logger, "UNFINISHED BLOCK " << std::hex << current_block->getStartAddress()); + break; + } uint64_t inst_size; uint64_t base_address; +#if defined(LLVM_35) text_section.getAddress(base_address); +#elif defined(LLVM_36) + base_address = text_section.getAddress(); +#endif uint64_t current_address = current_block->getStartAddress() - base_address; while(current_block->getEndAddress() - base_address > current_address) { MCInst inst; @@ -398,7 +501,15 @@ void LLVMDisassembler::splitBlocks(Function* function) { llvm::raw_string_ostream s(buf); if(llvm::MCDisassembler::Success == +#if defined(LLVM_35) DisAsm->getInstruction(inst, inst_size, ref, current_address, nulls(), nulls())) { +#elif defined(LLVM_36) + DisAsm->getInstruction(inst, inst_size, + bytearray.slice(current_address), + base_address + current_address, + nulls(), nulls())) { +#endif + // See if some other block starts here BasicBlock* other = manager->getBasicBlock(current_address + inst_size @@ -432,6 +543,11 @@ void LLVMDisassembler::readDynamicSymbols() { //TODO } +template<> +void LLVMDisassembler::readDynamicSymbols() { + //TODO +} + template void LLVMDisassembler::readDynamicSymbols() { const auto elffile = dyn_cast>(o)->getELFFile(); @@ -444,7 +560,11 @@ void LLVMDisassembler::readDynamicSymbols() { // TODO: Error handling std::string symbolname = *(elffile->getSymbolName(it)); std::string symbolversion = *(elffile->getSymbolVersion(nullptr, &*it, is_default)); - manager->signal_new_dyn_symbol(symbolname + (is_default? "@@" : "@") + symbolversion); + // TODO: actually get the symbol address from relocations + Function* f = manager->newDynamicFunction(0); + f->setName(symbolname + (is_default? "@@" : "@") + symbolversion); + manager->finishFunction(f); + LOG4CXX_DEBUG(logger, "Adding dynamic Symbol " << symbolname << (is_default? "@@" : "@") << symbolversion); } } @@ -456,11 +576,13 @@ void LLVMDisassembler::readSymbols() { symbol_iterator si(o->symbol_begin()), se(o->symbol_end()); for (; si != se; ++si) { StringRef name; + uint64_t address; + si->getAddress(address); if ((ec = si->getName(name))) { LOG4CXX_ERROR(logger, ec.message()); break; } - LOG4CXX_DEBUG(logger, "Added symbol " << name.str()); + LOG4CXX_DEBUG(logger, "Added symbol " << name.str() << " at address " << std::hex << address); symbols.insert(make_pair(name.str(), *si)); } } @@ -490,30 +612,54 @@ void LLVMDisassembler::readSections() { // } template -void LLVMDisassembler::printEachInstruction(uint64_t start, uint64_t end, - std::function fun) { - SectionRef text_section = sections[".text"]; +std::vector LLVMDisassembler::getInstructions(const BasicBlock *block) { + std::vector result; + SectionRef text_section = getTextSection(); uint64_t base_address; +#if defined(LLVM_35) text_section.getAddress(base_address); - uint64_t current_address = start - base_address; +#elif defined(LLVM_36) + base_address = text_section.getAddress(); +#endif + + uint64_t current_address = block->getStartAddress() - base_address; + uint64_t end_position = block->getEndAddress() - base_address; StringRef bytes; text_section.getContents(bytes); +#if defined(LLVM_35) StringRefMemoryObject ref(bytes); +#elif defined(LLVM_36) + ArrayRef bytearray(reinterpret_cast(bytes.data()), + bytes.size()); +#endif - while (current_address < end - base_address) { + + while (current_address < end_position) { uint64_t inst_size; MCInst inst; std::string buf; llvm::raw_string_ostream s(buf); if(llvm::MCDisassembler::Success == - DisAsm->getInstruction(inst, inst_size, ref, current_address, nulls(), nulls())) { +#if defined(LLVM_35) + DisAsm->getInstruction(inst, inst_size, ref, current_address, nulls(), nulls())) { +#elif defined(LLVM_36) + DisAsm->getInstruction(inst, inst_size, + bytearray.slice(current_address), + base_address + current_address, + nulls(), nulls())) { +#endif uint8_t bytes[inst_size+2]; +#if defined(LLVM_35) ref.readBytes(current_address, inst_size, bytes); +#elif defined(LLVM_36) + size_t bytesindex(0); + for (uint8_t byte : bytearray.slice(current_address, inst_size)) { + bytes[bytesindex++] = byte; + } +#endif uint64_t jmptarget; std::string ref(""); @@ -528,15 +674,32 @@ void LLVMDisassembler::printEachInstruction(uint64_t start, uint64_t end, stream << std::hex << (base_address + jmptarget); ref = stream.str(); } - - - fun(bytes, inst_size, s.str(), ref); + result.push_back(Instruction(current_address + base_address, boost::algorithm::trim_copy(s.str()), + std::vector(bytes, bytes+inst_size), ref)); } else { LOG4CXX_WARN(logger, "Invalid byte at" << std::hex << current_address + base_address); - fun(NULL, 0, "Invalid Byte", ""); + uint8_t bytes[1]; +#if defined(LLVM_35) + ref.readBytes(current_address, 1, bytes); +#elif defined(LLVM_36) + bytes[0] = bytearray[current_address]; +#endif + result.push_back(Instruction(current_address + base_address, "Invalid Instruction", + std::vector(bytes, bytes+1), "")); inst_size = 1; } current_address += inst_size; } + return result; +} + +template +SectionRef LLVMDisassembler::getTextSection() { + return sections[".text"]; +} + +template <> +SectionRef LLVMDisassembler::getTextSection() { + return sections["__text"]; }