X-Git-Url: https://git.siccegge.de//index.cgi?p=frida%2Ffrida.git;a=blobdiff_plain;f=src%2Fdisassembler%2Fllvm%2FLLVMDisassembler.cxx;h=45cd04e25566d97f830df05be7a00f42ffbe29a8;hp=cdcd3cb2cd516883400d08a9b47bef5c94a0341d;hb=9d5b0bc7a1a5670e30637c1e71c432a4fd9255ca;hpb=4b2336f2066f76d2be017ad9731865ac6743abcb diff --git a/src/disassembler/llvm/LLVMDisassembler.cxx b/src/disassembler/llvm/LLVMDisassembler.cxx index cdcd3cb..45cd04e 100644 --- a/src/disassembler/llvm/LLVMDisassembler.cxx +++ b/src/disassembler/llvm/LLVMDisassembler.cxx @@ -1,7 +1,9 @@ +#include "disassembler/Instruction.hxx" #include "disassembler/llvm/LLVMDisassembler.hxx" #include "core/InformationManager.hxx" #include "core/Function.hxx" #include "core/BasicBlock.hxx" +#include #include #include @@ -15,18 +17,32 @@ namespace { class COFFT { }; + + class MACHOT { + + }; } /* * */ Disassembler * createLLVMDisassembler(const std::string& filename, InformationManager* manager) { + log4cxx::LoggerPtr logger(log4cxx::Logger::getLogger("disassembler.LLVMDisassembler")); if (filename == "") return NULL; - std::unique_ptr o; - o.reset(createBinary(filename).get()); - Binary * op = o.release(); + auto retval = createBinary(filename); + if (error_code ec = retval.getError()) { + LOG4CXX_ERROR(logger, ec.message()); + return NULL; + } + + Binary * op = retval.get(); + + if (!op) { + LOG4CXX_ERROR(logger, "Could not open " << filename); + return NULL; + } // ELFType if (ELF32LEObjectFile * object = dyn_cast(op)) { @@ -44,6 +60,9 @@ Disassembler * createLLVMDisassembler(const std::string& filename, InformationMa if (COFFObjectFile * object = dyn_cast(op)) { return new LLVMDisassembler(filename, manager, object); } + if (MachOObjectFile * object = dyn_cast(op)) { + return new LLVMDisassembler(filename, manager, object); + } return NULL; } @@ -58,7 +77,7 @@ LLVMDisassembler::LLVMDisassembler(const std::string& filename, InformationManager* manager, ObjectFile* file) : Disassembler() - , logger(log4cxx::Logger::getLogger("LLVMDisassembler")) + , logger(log4cxx::Logger::getLogger("disassembler.LLVMDisassembler")) , triple("unknown-unknown-unknown") , manager(manager) { @@ -133,10 +152,10 @@ LLVMDisassembler::LLVMDisassembler(const std::string& filename, RelInfo.reset( target->createMCRelocationInfo(tripleName, Ctx)); if (RelInfo) { - Symzer.reset( - MCObjectSymbolizer::createObjectSymbolizer(Ctx, std::move(RelInfo), o)); - if (Symzer) - DisAsm->setSymbolizer(std::move(Symzer)); + // Symzer.reset( + // MCObjectSymbolizer::createObjectSymbolizer(Ctx, std::move(RelInfo), o)); + // if (Symzer) + // DisAsm->setSymbolizer(std::move(Symzer)); } RelInfo.release(); Symzer.release(); @@ -159,7 +178,7 @@ LLVMDisassembler::LLVMDisassembler(const std::string& filename, std::unique_ptr OD( new MCObjectDisassembler(*o, *DisAsm, *MIA)); - Mod.reset(OD->buildModule(false)); + //Mod.reset(OD->buildModule(false)); readSections(); } @@ -177,7 +196,7 @@ LLVMDisassembler::~LLVMDisassembler() {} template Function* LLVMDisassembler::disassembleFunctionAt(uint64_t address, const std::string& name) { Function * function; - SectionRef text_section = sections[".text"]; + SectionRef text_section = getTextSection(); uint64_t base_address, size; text_section.getAddress(base_address); text_section.getSize(size); @@ -199,7 +218,6 @@ Function* LLVMDisassembler::disassembleFunctionAt(uint64_t address, const function->setName(name); } disassembleFunction(function); - manager->finishFunction(function); } return function; @@ -207,13 +225,14 @@ Function* LLVMDisassembler::disassembleFunctionAt(uint64_t address, const template void LLVMDisassembler::disassembleFunction(Function* function) { + std::vector called_functions; std::stack remaining_blocks; /* TODO: * Do all blocks get added properly? We should take care to remove * the other ones at the end of the function! */ std::map new_blocks; - SectionRef text_section = sections[".text"]; + SectionRef text_section = getTextSection(); StringRef bytes; text_section.getContents(bytes); StringRefMemoryObject ref(bytes); @@ -225,6 +244,11 @@ void LLVMDisassembler::disassembleFunction(Function* function) { new_blocks.insert(std::make_pair(block->getStartAddress(), block)); function->addBasicBlock(block); + uint64_t base_address, size; + text_section.getAddress(base_address); + text_section.getSize(size); + LOG4CXX_DEBUG(logger, "Text section at " << std::hex << base_address << " with size " << size); + while (remaining_blocks.size()) { BasicBlock * current_block = remaining_blocks.top(); remaining_blocks.pop(); @@ -233,8 +257,6 @@ void LLVMDisassembler::disassembleFunction(Function* function) { << current_block->getStartAddress()); uint64_t inst_size; - uint64_t base_address; - text_section.getAddress(base_address); uint64_t current_address = current_block->getStartAddress() - base_address; while(true) { MCInst inst; @@ -250,7 +272,7 @@ void LLVMDisassembler::disassembleFunction(Function* function) { if (!MIA->isIndirectBranch(inst)) { if (MIA->isCall(inst)) { if (NULL == manager->getFunction(jmptarget)) - disassembleFunctionAt(jmptarget); + called_functions.push_back(jmptarget); } else { current_block->setNextBlock(0, jmptarget); if (new_blocks.find(jmptarget) == new_blocks.end()) { @@ -298,12 +320,14 @@ void LLVMDisassembler::disassembleFunction(Function* function) { } splitBlocks(function); LOG4CXX_DEBUG(logger, "Finished function " << function->getName()); - manager->signal_new_function(function); + manager->finishFunction(function); + for (uint64_t address : called_functions) + disassembleFunctionAt(address); } template void LLVMDisassembler::disassemble() { - SectionRef text_section = sections[".text"]; + SectionRef text_section = getTextSection(); std::vector remaining_functions; // Assume all function symbols actually start a real function @@ -322,9 +346,14 @@ void LLVMDisassembler::disassemble() { if (!x->second.getAddress(result)) { Function * fun = manager->newFunction(result); - fun->setName(x->first); - remaining_functions.push_back(fun); - LOG4CXX_DEBUG(logger, "Disasembling " << x->first); + if (fun) { + fun->setName(x->first); + remaining_functions.push_back(fun); + LOG4CXX_DEBUG(logger, "Disasembling " << x->first); + } else { + LOG4CXX_DEBUG(logger, "Function at " << std::hex << result + << " already disassembled as " << manager->getFunction(result)->getName()); + } } } @@ -366,6 +395,12 @@ uint64_t LLVMDisassembler::entryAddress() { } } +template<> +uint64_t LLVMDisassembler::entryAddress() { + // TODO + return 0; +} + template uint64_t LLVMDisassembler::entryAddress() { const auto elffile = dyn_cast>(o)->getELFFile(); @@ -376,16 +411,21 @@ uint64_t LLVMDisassembler::entryAddress() { template void LLVMDisassembler::splitBlocks(Function* function) { - SectionRef text_section = sections[".text"]; + SectionRef text_section = getTextSection(); StringRef bytes; text_section.getContents(bytes); StringRefMemoryObject ref(bytes); + LOG4CXX_DEBUG(logger, "Splitting Blocks in Function " << function->getName()); // Split blocks where jumps are going inside the block for (auto it = function->blocks().begin(); it != function->blocks().end(); ++it) { BasicBlock * current_block = it->second; + if (current_block->getEndAddress() == 0) { + LOG4CXX_ERROR(logger, "UNFINISHED BLOCK " << std::hex << current_block->getStartAddress()); + break; + } uint64_t inst_size; uint64_t base_address; text_section.getAddress(base_address); @@ -430,6 +470,11 @@ void LLVMDisassembler::readDynamicSymbols() { //TODO } +template<> +void LLVMDisassembler::readDynamicSymbols() { + //TODO +} + template void LLVMDisassembler::readDynamicSymbols() { const auto elffile = dyn_cast>(o)->getELFFile(); @@ -442,7 +487,11 @@ void LLVMDisassembler::readDynamicSymbols() { // TODO: Error handling std::string symbolname = *(elffile->getSymbolName(it)); std::string symbolversion = *(elffile->getSymbolVersion(nullptr, &*it, is_default)); - manager->signal_new_dyn_symbol(symbolname + (is_default? "@@" : "@") + symbolversion); + // TODO: actually get the symbol address from relocations + Function* f = manager->newDynamicFunction(0); + f->setName(symbolname + (is_default? "@@" : "@") + symbolversion); + manager->finishFunction(f); + LOG4CXX_DEBUG(logger, "Adding dynamic Symbol " << symbolname << (is_default? "@@" : "@") << symbolversion); } } @@ -487,12 +536,66 @@ void LLVMDisassembler::readSections() { // // }); // } +template +std::vector LLVMDisassembler::getInstructions(const BasicBlock *block) { + std::vector result; + SectionRef text_section = getTextSection(); + uint64_t base_address; + text_section.getAddress(base_address); + uint64_t current_address = block->getStartAddress() - base_address; + uint64_t end_position = block->getEndAddress() - base_address; + + StringRef bytes; + text_section.getContents(bytes); + StringRefMemoryObject ref(bytes); + + while (current_address < end_position) { + uint64_t inst_size; + MCInst inst; + std::string buf; + llvm::raw_string_ostream s(buf); + + if(llvm::MCDisassembler::Success == + DisAsm->getInstruction(inst, inst_size, ref, current_address, nulls(), nulls())) { + + uint8_t bytes[inst_size+2]; + ref.readBytes(current_address, inst_size, bytes); + + uint64_t jmptarget; + std::string ref(""); + IP->printInst(&inst, s, ""); + if (MIA->evaluateBranch(inst, current_address, inst_size, jmptarget)) { + std::stringstream stream; + if (MIA->isCall(inst)) + stream << "function:"; + else + stream << "block:"; + + stream << std::hex << (base_address + jmptarget); + ref = stream.str(); + } + result.push_back(Instruction(current_address + base_address, boost::algorithm::trim_copy(s.str()), + std::vector(bytes, bytes+inst_size), ref)); + } else { + LOG4CXX_WARN(logger, "Invalid byte at" << std::hex << current_address + base_address); + uint8_t bytes[1]; + ref.readBytes(current_address, 1, bytes); + result.push_back(Instruction(current_address + base_address, "Invalid Instruction", + std::vector(bytes, bytes+1), "")); + inst_size = 1; + } + + current_address += inst_size; + } + return result; +} + template void LLVMDisassembler::printEachInstruction(uint64_t start, uint64_t end, std::function fun) { - SectionRef text_section = sections[".text"]; + SectionRef text_section = getTextSection(); uint64_t base_address; text_section.getAddress(base_address); uint64_t current_address = start - base_address; @@ -538,3 +641,13 @@ void LLVMDisassembler::printEachInstruction(uint64_t start, uint64_t end, current_address += inst_size; } } + +template +SectionRef LLVMDisassembler::getTextSection() { + return sections[".text"]; +} + +template <> +SectionRef LLVMDisassembler::getTextSection() { + return sections["__text"]; +}