X-Git-Url: https://git.siccegge.de//index.cgi?p=frida%2Ffrida.git;a=blobdiff_plain;f=src%2Fdisassembler%2Fllvm%2FLLVMDisassembler.cxx;h=6b3402eb0b2414d56c4b415f7467e3c29d7e33cd;hp=e1570758da27831e02d1ca63fe85fa326297bda1;hb=cc7580dd344851907ef3003b838da0aa41f6aaf2;hpb=30ce0f02e529603965d15a3afa71c4612dd40c4a diff --git a/src/disassembler/llvm/LLVMDisassembler.cxx b/src/disassembler/llvm/LLVMDisassembler.cxx index e157075..6b3402e 100644 --- a/src/disassembler/llvm/LLVMDisassembler.cxx +++ b/src/disassembler/llvm/LLVMDisassembler.cxx @@ -7,13 +7,13 @@ using namespace llvm; using namespace llvm::object; +using std::error_code; /* * TODO: fallback code falls die Datei kein ELF/PE/COFF/MacO/.. binary * ist sondern z.B. einfach nur Instruktionen oder ein Bootsektor oder * foo */ - LLVMDisassembler::LLVMDisassembler(const std::string& filename) : Disassembler(filename) , logger(log4cxx::Logger::getLogger("LLVMDisassembler")) @@ -73,24 +73,30 @@ LLVMDisassembler::LLVMDisassembler(const std::string& filename) return; } - DisAsm.reset(target->createMCDisassembler(*STI)); + MOFI.reset(new MCObjectFileInfo); + MCContext Ctx(AsmInfo.get(), MRI.get(), MOFI.get()); + + DisAsm.reset(target->createMCDisassembler(*STI, Ctx)); if (!DisAsm) { LOG4CXX_ERROR(logger, "no disassembler for target " << tripleName); return; } - - MOFI.reset(new MCObjectFileInfo); - Ctx.reset(new MCContext(AsmInfo.get(), MRI.get(), MOFI.get())); RelInfo.reset( - target->createMCRelocationInfo(tripleName, *Ctx.get())); + target->createMCRelocationInfo(tripleName, Ctx)); if (RelInfo) { Symzer.reset( - MCObjectSymbolizer::createObjectSymbolizer(*Ctx.get(), RelInfo, o)); + MCObjectSymbolizer::createObjectSymbolizer(Ctx, std::move(RelInfo), o)); if (Symzer) - DisAsm->setSymbolizer(Symzer); + DisAsm->setSymbolizer(std::move(Symzer)); } + RelInfo.release(); + Symzer.release(); MIA.reset(target->createMCInstrAnalysis(MII.get())); + if (!MIA) { + LOG4CXX_ERROR(logger, "no instruction analysis for target " << tripleName); + return; + } int AsmPrinterVariant = AsmInfo->getAssemblerDialect(); IP.reset(target->createMCInstPrinter(AsmPrinterVariant, *AsmInfo, *MII, *MRI, *STI)); @@ -102,7 +108,7 @@ LLVMDisassembler::LLVMDisassembler(const std::string& filename) IP->setPrintImmHex(llvm::HexStyle::C); IP->setPrintImmHex(true); - OwningPtr OD( + std::unique_ptr OD( new MCObjectDisassembler(*o, *DisAsm, *MIA)); Mod.reset(OD->buildModule(false)); @@ -111,110 +117,221 @@ LLVMDisassembler::LLVMDisassembler(const std::string& filename) disassemble(); } +LLVMDisassembler::~LLVMDisassembler() { + std::for_each(functions.begin(), functions.end(), + [](std::pair it) { + delete it.second; + }); + std::for_each(blocks.begin(), blocks.end(), + [](std::pair it) { + delete it.second; + }); +} -void LLVMDisassembler::disassemble() { - std::stack remaining_functions; - std::stack remaining_blocks; +Function* LLVMDisassembler::disassembleFunctionAt(uint64_t address, const std::string& name) { SectionRef text_section = sections[".text"]; + uint64_t base_address, size; + text_section.getAddress(base_address); + text_section.getSize(size); + + if (address < base_address || + address >= base_address + size) { + return NULL; + } - std::for_each(symbols.begin(), symbols.end(), - [&](std::pair x) { - uint64_t result; - bool contains; - SymbolRef::Type symbol_type; + if (functions.find(address) != functions.end()) { + return functions[address]; + } - if (text_section.containsSymbol(x.second, contains) || !contains) - return; + LLVMFunction * function; + if (name == "") { + std::stringstream s; + s << ""; + function = new LLVMFunction(s.str(), address); + } else { + function = new LLVMFunction(name, address); + } + functions.insert(std::make_pair(address, function)); - if (x.second.getType(symbol_type) - || SymbolRef::ST_Function != symbol_type) - return; + disassembleFunction(function); - if (!x.second.getAddress(result)) { - remaining_functions.push(new LLVMFunction(x.first, result)); - LOG4CXX_DEBUG(logger, "Disasembling " << x.first); - } - }); + return function; +} +void LLVMDisassembler::disassembleFunction(LLVMFunction* function) { + std::stack remaining_blocks; + SectionRef text_section = sections[".text"]; StringRef bytes; text_section.getContents(bytes); StringRefMemoryObject ref(bytes); - while (remaining_functions.size()) { - LLVMFunction * current_function = remaining_functions.top(); - remaining_functions.pop(); + LOG4CXX_DEBUG(logger, "Handling function " << function->getName()); + + LLVMBasicBlock * block = new LLVMBasicBlock(function->getStartAddress(), this); + remaining_blocks.push(block); + blocks.insert(std::make_pair(block->getStartAddress(), block)); + + while (remaining_blocks.size()) { + LLVMBasicBlock * current_block = remaining_blocks.top(); + remaining_blocks.pop(); + + LOG4CXX_DEBUG(logger, "Handling Block starting at " << std::hex << current_block->getStartAddress()); + + uint64_t inst_size; + uint64_t base_address; + text_section.getAddress(base_address); + uint64_t current_address = current_block->getStartAddress() - base_address; + while(true) { + MCInst inst; + std::string buf; + llvm::raw_string_ostream s(buf); + + if(llvm::MCDisassembler::Success == + DisAsm->getInstruction(inst, inst_size, ref, current_address, nulls(), nulls())) { + uint64_t jmptarget; + + if (MIA->evaluateBranch(inst, current_address, inst_size, jmptarget)) { + jmptarget += base_address; + if (!MIA->isIndirectBranch(inst)) { + if (MIA->isCall(inst)) { + if (functions.find(jmptarget) == functions.end()) { + disassembleFunctionAt(jmptarget); + } + } else { + current_block->setNextBlock(0, jmptarget); + if (blocks.find(jmptarget) == blocks.end()) { + LLVMBasicBlock * block = new LLVMBasicBlock(jmptarget, this); + blocks.insert(std::make_pair(block->getStartAddress(), block)); + remaining_blocks.push(block); + } + if (MIA->isConditionalBranch(inst)) { + jmptarget = base_address + current_address + inst_size; + current_block->setNextBlock(1, jmptarget); + if (blocks.find(jmptarget) == blocks.end()) { + LLVMBasicBlock * block = new LLVMBasicBlock(jmptarget, this); + blocks.insert(std::make_pair(block->getStartAddress(), block)); + remaining_blocks.push(block); + } + } + } + } + } + } else { + inst_size = 0; + } - LOG4CXX_DEBUG(logger, "Handling function " << current_function->getName()); - // if ("_start" != current_function->getName()) - // continue; + if (inst_size == 0 || MIA->isTerminator(inst) || MIA->isBranch(inst)) { + current_block->setEndAddress(current_address + base_address + inst_size); + LOG4CXX_DEBUG(logger, "Finished Block at " << std::hex << + current_block->getEndAddress()); + break; + } + current_address += inst_size; + } + } + LOG4CXX_DEBUG(logger, "Finished function " << function->getName()); +} - remaining_blocks.push(new LLVMBasicBlock(current_function->getStartAddress())); +void LLVMDisassembler::disassemble() { + SectionRef text_section = sections[".text"]; + std::vector remaining_functions; - while (remaining_blocks.size()) { - LLVMBasicBlock * current_block = remaining_blocks.top(); - remaining_blocks.pop(); + // Assume all function symbols actually start a real function + for (auto x = symbols.begin(); x != symbols.end(); ++x) { + uint64_t result; + bool contains; + SymbolRef::Type symbol_type; - LOG4CXX_DEBUG(logger, "Handling Block starting at " << std::hex << current_block->getStartAddress()); - uint64_t inst_size; - uint64_t base_address; - text_section.getAddress(base_address); - uint64_t current_address = current_block->getStartAddress() - base_address; - while(true) { - MCInst inst; - std::string buf; - llvm::raw_string_ostream s(buf); + if (text_section.containsSymbol(x->second, contains) || !contains) + continue; - if(llvm::MCDisassembler::Success == - DisAsm->getInstruction(inst, inst_size, ref, current_address, nulls(), nulls())) { + if (x->second.getType(symbol_type) + || SymbolRef::ST_Function != symbol_type) + continue; - uint8_t bytes[inst_size+2]; - ref.readBytes(current_address, inst_size, bytes); - s << '\t'; - for(uint8_t* cur = bytes; cur < bytes + inst_size; ++cur) { - s.write_hex(*cur); - s << ' '; - } - s << '\t'; - - IP->printInst(&inst, s, ""); - - LOG4CXX_DEBUG(logger, std::hex << current_address + base_address << s.str()); - - uint64_t jmptarget; - if (MIA->evaluateBranch(inst, current_address, inst_size, jmptarget)) { - jmptarget += base_address; - if (!MIA->isIndirectBranch(inst)) { - if (MIA->isCall(inst)) { - if (blocks.find(jmptarget) == blocks.end()) - remaining_functions.push(new LLVMFunction("", jmptarget)); - } else { - if (blocks.find(jmptarget) == blocks.end()) - remaining_blocks.push(new LLVMBasicBlock(jmptarget)); - if (MIA->isConditionalBranch(inst)) { - jmptarget = base_address + current_address + inst_size; - if (blocks.find(jmptarget) == blocks.end()) - remaining_blocks.push(new LLVMBasicBlock(jmptarget)); - } - } - } - } - } else { - inst_size = 0; - } + if (!x->second.getAddress(result)) { + LLVMFunction * fun = new LLVMFunction(x->first, result); + remaining_functions.push_back(fun); + functions.insert(std::make_pair(result, fun)); + LOG4CXX_DEBUG(logger, "Disasembling " << x->first); + } + } + + for (LLVMFunction* function : remaining_functions) { + disassembleFunction(function); + } + if (binary->isELF()) { + bool is64bit = (binary->getData()[4] == 0x02); - if (inst_size == 0 || MIA->isTerminator(inst) || MIA->isBranch(inst)) { - current_block->setEndAddress(current_address + base_address); - blocks.insert(std::make_pair(current_block->getStartAddress(), current_block)); - LOG4CXX_DEBUG(logger, "Finished Block at " << current_block->getEndAddress()); - break; + uint64_t entry(0); + for (int i(0); i < (is64bit? 8 : 4); ++i) { + if (binary->isLittleEndian()) { + entry |= (unsigned int)((unsigned char)binary->getData()[0x18 + i]) << 8*i; + } else { + entry = entry << 8; + entry |= (unsigned char)binary->getData()[0x18 + i]; + } + } + LOG4CXX_DEBUG(logger, "Adding entry at: " << std::hex << entry); + std::stringstream s; + s << "<_start 0x" << std::hex << entry << ">"; + + disassembleFunctionAt(entry, s.str()); + } + + if (functions.empty()) { + uint64_t text_entry; + text_section.getAddress(text_entry); + LOG4CXX_INFO(logger, "No Symbols found, starting at the beginning of the text segment"); + disassembleFunctionAt(text_entry); + } + + splitBlocks(); +} + +void LLVMDisassembler::splitBlocks() { + SectionRef text_section = sections[".text"]; + StringRef bytes; + text_section.getContents(bytes); + StringRefMemoryObject ref(bytes); + + // Split blocks where jumps are going inside the block + for (auto it = blocks.begin(); it != blocks.end(); ++it) { + LLVMBasicBlock * current_block = it->second; + uint64_t inst_size; + uint64_t base_address; + text_section.getAddress(base_address); + uint64_t current_address = current_block->getStartAddress() - base_address; + while(current_block->getEndAddress() - base_address > current_address) { + MCInst inst; + std::string buf; + llvm::raw_string_ostream s(buf); + + if(llvm::MCDisassembler::Success == + DisAsm->getInstruction(inst, inst_size, ref, current_address, nulls(), nulls())) { + auto other = blocks.find(current_address + inst_size + base_address); + + if (other != blocks.end()) { + uint64_t endaddress = current_address + inst_size + base_address; + if (endaddress != current_block->getEndAddress()) { + LOG4CXX_DEBUG(logger, "Shortening block starting at " + << std::hex + << current_block->getStartAddress() + << " now ending at " + << other->first); + current_block->setEndAddress(endaddress); + current_block->setNextBlock(0, other->first); + current_block->setNextBlock(1, 0); + } } - current_address += inst_size; + } else { + inst_size = 1; } + current_address += inst_size; } - LOG4CXX_DEBUG(logger, "Finished function " << current_function->getName()); } } @@ -247,6 +364,51 @@ void LLVMDisassembler::readSections() { } -BasicBlock * LLVMDisassembler::generateControlFlowGraph(uint64_t address) { +void LLVMDisassembler::forEachFunction(std::function callback) { + std::for_each(functions.begin(), functions.end(), + [&](std::pair x) { + callback(x.first, x.second); + }); +} + +void LLVMDisassembler::printEachInstruction(uint64_t start, uint64_t end, + std::function fun) { + SectionRef text_section = sections[".text"]; + uint64_t base_address; + text_section.getAddress(base_address); + uint64_t current_address = start - base_address; + + StringRef bytes; + text_section.getContents(bytes); + StringRefMemoryObject ref(bytes); + while (current_address < end - base_address) { + uint64_t inst_size; + MCInst inst; + std::string buf; + llvm::raw_string_ostream s(buf); + + if(llvm::MCDisassembler::Success == + DisAsm->getInstruction(inst, inst_size, ref, current_address, nulls(), nulls())) { + + uint8_t bytes[inst_size+2]; + ref.readBytes(current_address, inst_size, bytes); + + uint64_t jmptarget; + if (MIA->evaluateBranch(inst, current_address, inst_size, jmptarget)) { + std::stringstream stream; + stream << std::hex << (base_address + jmptarget); + IP->printInst(&inst, s, stream.str()); + } else + IP->printInst(&inst, s, ""); + + fun(bytes, inst_size, s.str()); + } else { + LOG4CXX_WARN(logger, "Invalid byte at" << std::hex << current_address + base_address); + fun(NULL, 0, "Invalid Byte"); + inst_size = 1; + } + + current_address += inst_size; + } }