X-Git-Url: https://git.siccegge.de//index.cgi?p=frida%2Ffrida.git;a=blobdiff_plain;f=src%2Fdisassembler%2Fllvm%2FLLVMDisassembler.cxx;h=b40cdd4a3cd81f59dc111515ca79d169acf8fd37;hp=a8c2867f50784255a27246bb10be5db9312d2e87;hb=c79a955e4fe1cad85b526bdc4af86d1226f99329;hpb=1d66010fc38eab99ec29d25606e3d44a3297e6e6 diff --git a/src/disassembler/llvm/LLVMDisassembler.cxx b/src/disassembler/llvm/LLVMDisassembler.cxx index a8c2867..b40cdd4 100644 --- a/src/disassembler/llvm/LLVMDisassembler.cxx +++ b/src/disassembler/llvm/LLVMDisassembler.cxx @@ -1,134 +1,303 @@ #include "disassembler/llvm/LLVMDisassembler.hxx" -#include "disassembler/llvm/LLVMBasicBlock.hxx" -#include "disassembler/llvm/LLVMFunction.hxx" +#include "core/InformationManager.hxx" +#include "core/Function.hxx" +#include "core/BasicBlock.hxx" #include #include +#include using namespace llvm; using namespace llvm::object; +using std::error_code; + +/* + * + */ +Disassembler * createLLVMDisassembler(const std::string& filename, InformationManager* manager) { + if (filename == "") + return NULL; + + std::unique_ptr o; + o.reset(createBinary(filename).get()); + Binary * op = o.release(); + + // ELFType + if (ELF32LEObjectFile * object = dyn_cast(op)) { + return new LLVMDisassembler>(filename, manager, object); + } + if (ELF64LEObjectFile * object = dyn_cast(op)) { + return new LLVMDisassembler>(filename, manager, object); + } + if (ELF32BEObjectFile * object = dyn_cast(op)) { + return new LLVMDisassembler>(filename, manager, object); + } + if (ELF64BEObjectFile * object = dyn_cast(op)) { + return new LLVMDisassembler>(filename, manager, object); + } + + return NULL; +} /* * TODO: fallback code falls die Datei kein ELF/PE/COFF/MacO/.. binary * ist sondern z.B. einfach nur Instruktionen oder ein Bootsektor oder * foo */ -LLVMDisassembler::LLVMDisassembler(const std::string& filename) - : Disassembler(filename) - , logger(log4cxx::Logger::getLogger("LLVMDisassembler")) - , triple("unknown-unknown-unknown") +template +LLVMDisassembler::LLVMDisassembler(const std::string& filename, + InformationManager* manager, + ELFObjectFile* file) + : Disassembler() + , logger(log4cxx::Logger::getLogger("LLVMDisassembler")) + , triple("unknown-unknown-unknown") + , manager(manager) { - LOG4CXX_DEBUG(logger, "Handling file" << filename); - auto result = createBinary(filename); - - error_code ec; - if ((ec = result.getError())) { - LOG4CXX_ERROR(logger, "Failed to load Binary" << ec.message()); - binary = NULL; - return; - } - - binary.reset(result.get()); - - o = dyn_cast(binary.get()); - - triple.setArch(Triple::ArchType(o->getArch())); - std::string tripleName(triple.getTriple()); - - LOG4CXX_INFO(logger, "Architecture " << tripleName); - - - std::string es; - target = TargetRegistry::lookupTarget("", triple, es); - if (!target) { - LOG4CXX_ERROR(logger, es); - return; - } - - LOG4CXX_INFO(logger, "Target " << target->getName()); - - MRI.reset(target->createMCRegInfo(tripleName)); - if (!MRI) { - LOG4CXX_ERROR(logger, "no register info for target " << tripleName); - return; - } - - // Set up disassembler. - AsmInfo.reset(target->createMCAsmInfo(*MRI, tripleName)); - if (!AsmInfo) { - LOG4CXX_ERROR(logger, "no assembly info for target " << tripleName); - return; - } - - STI.reset(target->createMCSubtargetInfo(tripleName, "", "")); - if (!STI) { - LOG4CXX_ERROR(logger, "no subtarget info for target " << tripleName); - return; - } - - MII.reset(target->createMCInstrInfo()); - if (!MII) { - LOG4CXX_ERROR(logger, "no instruction info for target " << tripleName); - return; - } - - DisAsm.reset(target->createMCDisassembler(*STI)); - if (!DisAsm) { - LOG4CXX_ERROR(logger, "no disassembler for target " << tripleName); - return; - } - - MOFI.reset(new MCObjectFileInfo); - Ctx.reset(new MCContext(AsmInfo.get(), MRI.get(), MOFI.get())); - RelInfo.reset( - target->createMCRelocationInfo(tripleName, *Ctx.get())); - if (RelInfo) { - Symzer.reset( - MCObjectSymbolizer::createObjectSymbolizer(*Ctx.get(), RelInfo, o)); - if (Symzer) - DisAsm->setSymbolizer(Symzer); - } - - MIA.reset(target->createMCInstrAnalysis(MII.get())); - - int AsmPrinterVariant = AsmInfo->getAssemblerDialect(); - IP.reset(target->createMCInstPrinter(AsmPrinterVariant, *AsmInfo, *MII, *MRI, *STI)); - if (!IP) { - LOG4CXX_ERROR(logger, "no instruction printer for target " << tripleName); - return; - } - - IP->setPrintImmHex(llvm::HexStyle::C); - IP->setPrintImmHex(true); - - OwningPtr OD( - new MCObjectDisassembler(*o, *DisAsm, *MIA)); - Mod.reset(OD->buildModule(false)); - - readSymbols(); - readSections(); - disassemble(); + LOG4CXX_DEBUG(logger, "Handling file" << filename); + + if (!file) { + auto result = createBinary(filename); + + error_code ec; + if ((ec = result.getError())) { + LOG4CXX_ERROR(logger, "Failed to load Binary" << ec.message()); + binary = NULL; + return; + } + + binary.reset(result.get()); + + o = dyn_cast>(binary.get()); + } else { + o = file; + binary.reset(file); + } + + triple.setArch(Triple::ArchType(o->getArch())); + std::string tripleName(triple.getTriple()); + + LOG4CXX_INFO(logger, "Architecture " << tripleName); + + + std::string es; + target = TargetRegistry::lookupTarget("", triple, es); + if (!target) { + LOG4CXX_ERROR(logger, es); + return; + } + + LOG4CXX_INFO(logger, "Target " << target->getName()); + + MRI.reset(target->createMCRegInfo(tripleName)); + if (!MRI) { + LOG4CXX_ERROR(logger, "no register info for target " << tripleName); + return; + } + + // Set up disassembler. + AsmInfo.reset(target->createMCAsmInfo(*MRI, tripleName)); + if (!AsmInfo) { + LOG4CXX_ERROR(logger, "no assembly info for target " << tripleName); + return; + } + + STI.reset(target->createMCSubtargetInfo(tripleName, "", "")); + if (!STI) { + LOG4CXX_ERROR(logger, "no subtarget info for target " << tripleName); + return; + } + + MII.reset(target->createMCInstrInfo()); + if (!MII) { + LOG4CXX_ERROR(logger, "no instruction info for target " << tripleName); + return; + } + + MOFI.reset(new MCObjectFileInfo); + MCContext Ctx(AsmInfo.get(), MRI.get(), MOFI.get()); + + DisAsm.reset(target->createMCDisassembler(*STI, Ctx)); + if (!DisAsm) { + LOG4CXX_ERROR(logger, "no disassembler for target " << tripleName); + return; + } + RelInfo.reset( + target->createMCRelocationInfo(tripleName, Ctx)); + if (RelInfo) { + Symzer.reset( + MCObjectSymbolizer::createObjectSymbolizer(Ctx, std::move(RelInfo), o)); + if (Symzer) + DisAsm->setSymbolizer(std::move(Symzer)); + } + RelInfo.release(); + Symzer.release(); + + MIA.reset(target->createMCInstrAnalysis(MII.get())); + if (!MIA) { + LOG4CXX_ERROR(logger, "no instruction analysis for target " << tripleName); + return; + } + + int AsmPrinterVariant = AsmInfo->getAssemblerDialect(); + IP.reset(target->createMCInstPrinter(AsmPrinterVariant, *AsmInfo, *MII, *MRI, *STI)); + if (!IP) { + LOG4CXX_ERROR(logger, "no instruction printer for target " << tripleName); + return; + } + + IP->setPrintImmHex(llvm::HexStyle::C); + IP->setPrintImmHex(true); + + std::unique_ptr OD( + new MCObjectDisassembler(*o, *DisAsm, *MIA)); + Mod.reset(OD->buildModule(false)); + + readSections(); } -LLVMDisassembler::~LLVMDisassembler() { - std::for_each(functions.begin(), functions.end(), - [](std::pair it) { - delete it.second; - }); - std::for_each(blocks.begin(), blocks.end(), - [](std::pair it) { - delete it.second; - }); +template +void LLVMDisassembler::start() { + readSymbols(); + disassemble(); + readDynamicSymbols(); } -/* - * TODO: If we jump into some Basic Block we need to split it there into two - */ -void LLVMDisassembler::disassemble() { - std::stack remaining_functions; - std::stack remaining_blocks; - SectionRef text_section = sections[".text"]; +template +LLVMDisassembler::~LLVMDisassembler() {} + +template +Function* LLVMDisassembler::disassembleFunctionAt(uint64_t address, const std::string& name) { + Function * function; + SectionRef text_section = sections[".text"]; + uint64_t base_address, size; + text_section.getAddress(base_address); + text_section.getSize(size); + + if (address < base_address || + address >= base_address + size) { + return NULL; + } + + if (NULL == (function = manager->getFunction(address))) { + if (name == "") { + std::stringstream s; + s << ""; + function = manager->newFunction(address); + function->setName(s.str()); + } else { + function = manager->newFunction(address); + function->setName(name); + } + disassembleFunction(function); + manager->finishFunction(function); + } + + return function; +} + +template +void LLVMDisassembler::disassembleFunction(Function* function) { + std::stack remaining_blocks; + /* TODO: + * Do all blocks get added properly? We should take care to remove + * the other ones at the end of the function! + */ + std::map new_blocks; + SectionRef text_section = sections[".text"]; + StringRef bytes; + text_section.getContents(bytes); + StringRefMemoryObject ref(bytes); + + LOG4CXX_DEBUG(logger, "Handling function " << function->getName()); + + BasicBlock * block = manager->newBasicBlock(function->getStartAddress()); + remaining_blocks.push(block); + new_blocks.insert(std::make_pair(block->getStartAddress(), block)); + function->addBasicBlock(block); + + while (remaining_blocks.size()) { + BasicBlock * current_block = remaining_blocks.top(); + remaining_blocks.pop(); + + LOG4CXX_DEBUG(logger, "Handling Block starting at " << std::hex + << current_block->getStartAddress()); + + uint64_t inst_size; + uint64_t base_address; + text_section.getAddress(base_address); + uint64_t current_address = current_block->getStartAddress() - base_address; + while(true) { + MCInst inst; + std::string buf; + llvm::raw_string_ostream s(buf); + + if(llvm::MCDisassembler::Success == + DisAsm->getInstruction(inst, inst_size, ref, current_address, nulls(), nulls())) { + uint64_t jmptarget; + + if (MIA->evaluateBranch(inst, current_address, inst_size, jmptarget)) { + jmptarget += base_address; + if (!MIA->isIndirectBranch(inst)) { + if (MIA->isCall(inst)) { + if (NULL == manager->getFunction(jmptarget)) + disassembleFunctionAt(jmptarget); + } else { + current_block->setNextBlock(0, jmptarget); + if (new_blocks.find(jmptarget) == new_blocks.end()) { + BasicBlock * block = manager->newBasicBlock(jmptarget); + assert(block); + new_blocks.insert(std::make_pair(block->getStartAddress(), block)); + function->addBasicBlock(block); + remaining_blocks.push(block); + } else { + LOG4CXX_DEBUG(logger, "Reusing Block starting at " << std::hex + << current_block->getStartAddress()); + function->addBasicBlock(new_blocks.find(jmptarget)->second); + } + if (MIA->isConditionalBranch(inst)) { + jmptarget = base_address + current_address + inst_size; + current_block->setNextBlock(1, jmptarget); + if (new_blocks.find(jmptarget) == new_blocks.end()) { + BasicBlock * block = manager->newBasicBlock(jmptarget); + assert(block); + new_blocks.insert(std::make_pair(block->getStartAddress(), block)); + function->addBasicBlock(block); + remaining_blocks.push(block); + } else { + LOG4CXX_DEBUG(logger, "Reusing Block starting at " << std::hex + << current_block->getStartAddress()); + function->addBasicBlock(new_blocks.find(jmptarget)->second); + } + } + } + } + } + } else { + inst_size = 0; + } + + + if (inst_size == 0 || MIA->isTerminator(inst) || MIA->isBranch(inst)) { + current_block->setEndAddress(current_address + base_address + inst_size); + LOG4CXX_DEBUG(logger, "Finished Block at " << std::hex << + current_block->getEndAddress()); + break; + } + current_address += inst_size; + } + } + splitBlocks(function); + LOG4CXX_DEBUG(logger, "Finished function " << function->getName()); + manager->signal_new_function(function); +} + +template +void LLVMDisassembler::disassemble() { + SectionRef text_section = sections[".text"]; + std::vector remaining_functions; + + // Assume all function symbols actually start a real function for (auto x = symbols.begin(); x != symbols.end(); ++x) { uint64_t result; bool contains; @@ -139,146 +308,199 @@ void LLVMDisassembler::disassemble() { continue; if (x->second.getType(symbol_type) - || SymbolRef::ST_Function != symbol_type) + || SymbolRef::ST_Function != symbol_type) continue; if (!x->second.getAddress(result)) { - LLVMFunction * fun = new LLVMFunction(x->first, result); - remaining_functions.push(fun); - functions.insert(std::make_pair(result, fun)); + Function * fun = manager->newFunction(result); + fun->setName(x->first); + remaining_functions.push_back(fun); LOG4CXX_DEBUG(logger, "Disasembling " << x->first); } } - StringRef bytes; - text_section.getContents(bytes); - StringRefMemoryObject ref(bytes); - - while (remaining_functions.size()) { - LLVMFunction * current_function = remaining_functions.top(); - remaining_functions.pop(); - - LOG4CXX_DEBUG(logger, "Handling function " << current_function->getName()); - - // if ("_start" != current_function->getName()) - // continue; - - LLVMBasicBlock * block = new LLVMBasicBlock(current_function->getStartAddress()); - remaining_blocks.push(block); - blocks.insert(std::make_pair(block->getStartAddress(), block)); - - while (remaining_blocks.size()) { - LLVMBasicBlock * current_block = remaining_blocks.top(); - remaining_blocks.pop(); - - LOG4CXX_DEBUG(logger, "Handling Block starting at " << std::hex << current_block->getStartAddress()); - - uint64_t inst_size; - uint64_t base_address; - text_section.getAddress(base_address); - uint64_t current_address = current_block->getStartAddress() - base_address; - while(true) { - MCInst inst; - std::string buf; - llvm::raw_string_ostream s(buf); - - if(llvm::MCDisassembler::Success == - DisAsm->getInstruction(inst, inst_size, ref, current_address, nulls(), nulls())) { - - uint8_t bytes[inst_size+2]; - ref.readBytes(current_address, inst_size, bytes); - s << '\t'; - for(uint8_t* cur = bytes; cur < bytes + inst_size; ++cur) { - s.write_hex(*cur); - s << ' '; - } - s << '\t'; - - IP->printInst(&inst, s, ""); - - LOG4CXX_DEBUG(logger, std::hex << current_address + base_address << s.str()); - - uint64_t jmptarget; - if (MIA->evaluateBranch(inst, current_address, inst_size, jmptarget)) { - jmptarget += base_address; - if (!MIA->isIndirectBranch(inst)) { - if (MIA->isCall(inst)) { - if (functions.find(jmptarget) == functions.end()) { - std::stringstream s; - s << ""; - LLVMFunction * fun = new LLVMFunction(s.str(), jmptarget); - functions.insert(std::make_pair(jmptarget, fun)); - remaining_functions.push(fun); - } - } else { - if (blocks.find(jmptarget) == blocks.end()) { - LLVMBasicBlock * block = new LLVMBasicBlock(jmptarget); - blocks.insert(std::make_pair(block->getStartAddress(), block)); - current_block->setNextBlock(0, block->getStartAddress()); - remaining_blocks.push(block); - } - if (MIA->isConditionalBranch(inst)) { - jmptarget = base_address + current_address + inst_size; - if (blocks.find(jmptarget) == blocks.end()) { - LLVMBasicBlock * block = new LLVMBasicBlock(jmptarget); - blocks.insert(std::make_pair(block->getStartAddress(), block)); - current_block->setNextBlock(1, block->getStartAddress()); - remaining_blocks.push(new LLVMBasicBlock(jmptarget)); - } - } - } - } - } - } else { - inst_size = 0; - } - - - if (inst_size == 0 || MIA->isTerminator(inst) || MIA->isBranch(inst)) { - current_block->setEndAddress(current_address + base_address); - LOG4CXX_DEBUG(logger, "Finished Block at " << std::hex << - current_block->getEndAddress()); - break; - } - current_address += inst_size; - } - } - LOG4CXX_DEBUG(logger, "Finished function " << current_function->getName()); - } + for (Function* function : remaining_functions) { + disassembleFunction(function); + manager->finishFunction(function); + } + + if (binary->isELF()) { + const ELFO * elffile = o->getELFFile(); + const typename ELFO::Elf_Ehdr * header = elffile->getHeader(); + + _entryAddress = header->e_entry; + LOG4CXX_DEBUG(logger, "Adding entryAddress at: " << std::hex << _entryAddress); + std::stringstream s; + s << "<_start 0x" << std::hex << _entryAddress << ">"; + + disassembleFunctionAt(_entryAddress, s.str()); + } + + if (!manager->hasFunctions()) { + uint64_t text_entry; + text_section.getAddress(text_entry); + LOG4CXX_INFO(logger, "No Symbols found, starting at the beginning of the text segment"); + disassembleFunctionAt(text_entry); + } } -void LLVMDisassembler::readSymbols() { - error_code ec; - symbol_iterator si(o->symbol_begin()), se(o->symbol_end()); - for (; si != se; ++si) { - StringRef name; - if ((ec = si->getName(name))) { - LOG4CXX_ERROR(logger, ec.message()); - break; - } - LOG4CXX_DEBUG(logger, "Added symbol " << name.str()); - symbols.insert(make_pair(name.str(), *si)); - } +template +void LLVMDisassembler::splitBlocks(Function* function) { + SectionRef text_section = sections[".text"]; + StringRef bytes; + text_section.getContents(bytes); + StringRefMemoryObject ref(bytes); + + // Split blocks where jumps are going inside the block + for (auto it = function->blocks().begin(); + it != function->blocks().end(); + ++it) { + BasicBlock * current_block = it->second; + uint64_t inst_size; + uint64_t base_address; + text_section.getAddress(base_address); + uint64_t current_address = current_block->getStartAddress() - base_address; + while(current_block->getEndAddress() - base_address > current_address) { + MCInst inst; + std::string buf; + llvm::raw_string_ostream s(buf); + + if(llvm::MCDisassembler::Success == + DisAsm->getInstruction(inst, inst_size, ref, current_address, nulls(), nulls())) { + // See if some other block starts here + BasicBlock* other = manager->getBasicBlock(current_address + + inst_size + + base_address); + + // Special case, other block starts here but we are at the end anyway + if (other != NULL) { + uint64_t endaddress = current_address + inst_size + base_address; + if (endaddress != current_block->getEndAddress()) { + LOG4CXX_DEBUG(logger, "Shortening block starting at " + << std::hex + << current_block->getStartAddress() + << " now ending at " + << other->getStartAddress()); + function->addBasicBlock(other); + current_block->setEndAddress(endaddress); + current_block->setNextBlock(0, other->getStartAddress()); + current_block->setNextBlock(1, 0); + } + } + } else { + inst_size = 1; + } + current_address += inst_size; + } + } } -void LLVMDisassembler::readSections() { - error_code ec; - section_iterator i(o->section_begin()), e(o->section_end()); - for (; i != e; ++i) { - StringRef name; - if ((ec = i->getName(name))) { - LOG4CXX_ERROR(logger, ec.message()); - break; - } - LOG4CXX_DEBUG(logger, "Added section " << name.str()); - sections.insert(make_pair(name.str(), *i)); - } +template +void LLVMDisassembler::readDynamicSymbols() { + const ELFO * elffile = o->getELFFile(); + for (typename ELFO::Elf_Sym_Iter + it = elffile->begin_dynamic_symbols(), + end = elffile->end_dynamic_symbols(); + it != end; + ++it) { + if (it->getType() == 2) { // Function + bool is_default; + // TODO: Error handling + std::string symbolname = *(elffile->getSymbolName(it)); + std::string symbolversion = *(elffile->getSymbolVersion(nullptr, &*it, is_default)); + manager->signal_new_dyn_symbol(symbolname + (is_default? "@@" : "@") + symbolversion); + LOG4CXX_DEBUG(logger, "Adding dynamic Symbol " << symbolname << (is_default? "@@" : "@") << symbolversion); + } + } +} + +template +void LLVMDisassembler::readSymbols() { + error_code ec; + symbol_iterator si(o->symbol_begin()), se(o->symbol_end()); + for (; si != se; ++si) { + StringRef name; + if ((ec = si->getName(name))) { + LOG4CXX_ERROR(logger, ec.message()); + break; + } + LOG4CXX_DEBUG(logger, "Added symbol " << name.str()); + symbols.insert(make_pair(name.str(), *si)); + } +} + +template +void LLVMDisassembler::readSections() { + error_code ec; + section_iterator i(o->section_begin()), e(o->section_end()); + for (; i != e; ++i) { + StringRef name; + if ((ec = i->getName(name))) { + LOG4CXX_ERROR(logger, ec.message()); + break; + } + LOG4CXX_DEBUG(logger, "Added section " << name.str()); + sections.insert(make_pair(name.str(), *i)); + } } -void LLVMDisassembler::forEachFunction(std::function callback) { - std::for_each(functions.begin(), functions.end(), - [&](std::pair x) { - callback(x.first, x.second); - }); +// template +// void LLVMDisassembler::forEachFunction(std::function callback) { +// // std::for_each(functions.begin(), functions.end(), +// // [&](std::pair x) { +// // callback(x.first, x.second); +// // }); +// } + +template +void LLVMDisassembler::printEachInstruction(uint64_t start, uint64_t end, + std::function fun) { + SectionRef text_section = sections[".text"]; + uint64_t base_address; + text_section.getAddress(base_address); + uint64_t current_address = start - base_address; + + StringRef bytes; + text_section.getContents(bytes); + StringRefMemoryObject ref(bytes); + + while (current_address < end - base_address) { + uint64_t inst_size; + MCInst inst; + std::string buf; + llvm::raw_string_ostream s(buf); + + if(llvm::MCDisassembler::Success == + DisAsm->getInstruction(inst, inst_size, ref, current_address, nulls(), nulls())) { + + uint8_t bytes[inst_size+2]; + ref.readBytes(current_address, inst_size, bytes); + + uint64_t jmptarget; + std::string ref(""); + IP->printInst(&inst, s, ""); + if (MIA->evaluateBranch(inst, current_address, inst_size, jmptarget)) { + std::stringstream stream; + if (MIA->isCall(inst)) + stream << "function:"; + else + stream << "block:"; + + stream << std::hex << (base_address + jmptarget); + ref = stream.str(); + } + + + fun(bytes, inst_size, s.str(), ref); + } else { + LOG4CXX_WARN(logger, "Invalid byte at" << std::hex << current_address + base_address); + fun(NULL, 0, "Invalid Byte", ""); + inst_size = 1; + } + + current_address += inst_size; + } }