X-Git-Url: https://git.siccegge.de//index.cgi?p=frida%2Ffrida.git;a=blobdiff_plain;f=src%2Fdisassembler%2Fllvm%2FLLVMDisassembler.cxx;h=e673c371650027021455b63f64b94e422dcc7a5d;hp=18cfe8065eefc0fb5e409d7978408e50dd47a1b2;hb=1a19eafdb36507230a6f421defbc49162d5246e6;hpb=c862852c36741e1be13abff538e526114e3d828c diff --git a/src/disassembler/llvm/LLVMDisassembler.cxx b/src/disassembler/llvm/LLVMDisassembler.cxx index 18cfe80..e673c37 100644 --- a/src/disassembler/llvm/LLVMDisassembler.cxx +++ b/src/disassembler/llvm/LLVMDisassembler.cxx @@ -1,24 +1,51 @@ +#include "disassembler/Instruction.hxx" #include "disassembler/llvm/LLVMDisassembler.hxx" -#include "disassembler/llvm/LLVMBasicBlock.hxx" -#include "disassembler/llvm/LLVMFunction.hxx" +#include "core/InformationManager.hxx" +#include "core/Function.hxx" +#include "core/BasicBlock.hxx" +#include #include #include +#include using namespace llvm; using namespace llvm::object; using std::error_code; +namespace { + class COFFT { + + }; + + class MACHOT { + + }; +} + /* * */ Disassembler * createLLVMDisassembler(const std::string& filename, InformationManager* manager) { + log4cxx::LoggerPtr logger(log4cxx::Logger::getLogger("disassembler.LLVMDisassembler")); if (filename == "") return NULL; - std::unique_ptr o; - o.reset(createBinary(filename).get()); - Binary * op = o.release(); + auto retval = createBinary(filename); + if (error_code ec = retval.getError()) { + LOG4CXX_ERROR(logger, ec.message()); + return NULL; + } +#if defined(LLVM_35) + Binary * op = retval.get(); +#elif defined(LLVM_36) + OwningBinary ob; + ob = std::move(retval.get()); + Binary* op = ob.getBinary(); + auto foo = ob.takeBinary(); + foo.first.release(); + foo.second.release(); +#endif // ELFType if (ELF32LEObjectFile * object = dyn_cast(op)) { @@ -33,6 +60,12 @@ Disassembler * createLLVMDisassembler(const std::string& filename, InformationMa if (ELF64BEObjectFile * object = dyn_cast(op)) { return new LLVMDisassembler>(filename, manager, object); } + if (COFFObjectFile * object = dyn_cast(op)) { + return new LLVMDisassembler(filename, manager, object); + } + if (MachOObjectFile * object = dyn_cast(op)) { + return new LLVMDisassembler(filename, manager, object); + } return NULL; } @@ -45,13 +78,13 @@ Disassembler * createLLVMDisassembler(const std::string& filename, InformationMa template LLVMDisassembler::LLVMDisassembler(const std::string& filename, InformationManager* manager, - ELFObjectFile* file) - : Disassembler(filename, manager) - , logger(log4cxx::Logger::getLogger("LLVMDisassembler")) + ObjectFile* file) + : Disassembler() + , logger(log4cxx::Logger::getLogger("disassembler.LLVMDisassembler")) , triple("unknown-unknown-unknown") , manager(manager) { - LOG4CXX_DEBUG(logger, "Handling file" << filename); + LOG4CXX_DEBUG(logger, "Handling file " << filename); if (!file) { auto result = createBinary(filename); @@ -63,9 +96,17 @@ LLVMDisassembler::LLVMDisassembler(const std::string& filename, return; } +#if defined(LLVM_35) binary.reset(result.get()); +#elif defined(LLVM_36) + OwningBinary ob; + ob = std::move(result.get()); + Binary* op = ob.getBinary(); - o = dyn_cast>(binary.get()); + binary.reset(op); +#endif + + o = dyn_cast(binary.get()); } else { o = file; binary.reset(file); @@ -122,10 +163,10 @@ LLVMDisassembler::LLVMDisassembler(const std::string& filename, RelInfo.reset( target->createMCRelocationInfo(tripleName, Ctx)); if (RelInfo) { - Symzer.reset( - MCObjectSymbolizer::createObjectSymbolizer(Ctx, std::move(RelInfo), o)); - if (Symzer) - DisAsm->setSymbolizer(std::move(Symzer)); + // Symzer.reset( + // MCObjectSymbolizer::createObjectSymbolizer(Ctx, std::move(RelInfo), o)); + // if (Symzer) + // DisAsm->setSymbolizer(std::move(Symzer)); } RelInfo.release(); Symzer.release(); @@ -146,85 +187,103 @@ LLVMDisassembler::LLVMDisassembler(const std::string& filename, IP->setPrintImmHex(llvm::HexStyle::C); IP->setPrintImmHex(true); - std::unique_ptr OD( - new MCObjectDisassembler(*o, *DisAsm, *MIA)); - Mod.reset(OD->buildModule(false)); +// std::unique_ptr OD( +// new MCObjectDisassembler(*o, *DisAsm, *MIA)); + //Mod.reset(OD->buildModule(false)); + + readSections(); } template void LLVMDisassembler::start() { readSymbols(); - readSections(); disassemble(); + readDynamicSymbols(); } template -LLVMDisassembler::~LLVMDisassembler() { - std::for_each(functions.begin(), functions.end(), - [](std::pair it) { - delete it.second; - }); - std::for_each(blocks.begin(), blocks.end(), - [](std::pair it) { - delete it.second; - }); -} +LLVMDisassembler::~LLVMDisassembler() {} template Function* LLVMDisassembler::disassembleFunctionAt(uint64_t address, const std::string& name) { - SectionRef text_section = sections[".text"]; + Function * function; + SectionRef text_section = getTextSection(); uint64_t base_address, size; +#if defined(LLVM_35) text_section.getAddress(base_address); text_section.getSize(size); - +#elif defined(LLVM_36) + base_address = text_section.getAddress(); + size = text_section.getSize(); +#endif if (address < base_address || address >= base_address + size) { return NULL; } - if (functions.find(address) != functions.end()) { - return functions[address]; - } + if (NULL == (function = manager->getFunction(address))) { - LLVMFunction * function; - if (name == "") { - std::stringstream s; - s << ""; - function = new LLVMFunction(s.str(), address); - } else { - function = new LLVMFunction(name, address); + if (name == "") { + std::stringstream s; + s << ""; + function = manager->newFunction(address); + function->setName(s.str()); + } else { + function = manager->newFunction(address); + function->setName(name); + } + disassembleFunction(function); } - functions.insert(std::make_pair(address, function)); - - disassembleFunction(function); return function; } template -void LLVMDisassembler::disassembleFunction(LLVMFunction* function) { - std::stack remaining_blocks; - SectionRef text_section = sections[".text"]; +void LLVMDisassembler::disassembleFunction(Function* function) { + std::vector called_functions; + std::stack remaining_blocks; + /* TODO: + * Do all blocks get added properly? We should take care to remove + * the other ones at the end of the function! + */ + std::map new_blocks; + SectionRef text_section = getTextSection(); StringRef bytes; text_section.getContents(bytes); +#if defined(LLVM_35) StringRefMemoryObject ref(bytes); +#elif defined(LLVM_36) + ArrayRef bytearray(reinterpret_cast(bytes.data()), + bytes.size()); +#else +#error LLVM != 3.5 | 3.6 not supported +#endif LOG4CXX_DEBUG(logger, "Handling function " << function->getName()); - LLVMBasicBlock * block = new LLVMBasicBlock(function->getStartAddress(), this); + BasicBlock * block = manager->newBasicBlock(function->getStartAddress()); remaining_blocks.push(block); - blocks.insert(std::make_pair(block->getStartAddress(), block)); + new_blocks.insert(std::make_pair(block->getStartAddress(), block)); function->addBasicBlock(block); + uint64_t base_address, size; +#if defined(LLVM_35) + text_section.getAddress(base_address); + text_section.getSize(size); +#elif defined(LLVM_36) + base_address = text_section.getAddress(); + size = text_section.getSize(); +#endif + LOG4CXX_DEBUG(logger, "Text section at " << std::hex << base_address << " with size " << size); + while (remaining_blocks.size()) { - LLVMBasicBlock * current_block = remaining_blocks.top(); + BasicBlock * current_block = remaining_blocks.top(); remaining_blocks.pop(); - LOG4CXX_DEBUG(logger, "Handling Block starting at " << std::hex << current_block->getStartAddress()); + LOG4CXX_DEBUG(logger, "Handling Block starting at " << std::hex + << current_block->getStartAddress()); uint64_t inst_size; - uint64_t base_address; - text_section.getAddress(base_address); uint64_t current_address = current_block->getStartAddress() - base_address; while(true) { MCInst inst; @@ -232,38 +291,48 @@ void LLVMDisassembler::disassembleFunction(LLVMFunction* function) { llvm::raw_string_ostream s(buf); if(llvm::MCDisassembler::Success == +#if defined(LLVM_35) DisAsm->getInstruction(inst, inst_size, ref, current_address, nulls(), nulls())) { +#elif defined(LLVM_36) + DisAsm->getInstruction(inst, inst_size, + bytearray.slice(current_address), + base_address + current_address, + nulls(), nulls())) { +#endif uint64_t jmptarget; if (MIA->evaluateBranch(inst, current_address, inst_size, jmptarget)) { jmptarget += base_address; if (!MIA->isIndirectBranch(inst)) { if (MIA->isCall(inst)) { - if (functions.find(jmptarget) == functions.end()) { - disassembleFunctionAt(jmptarget); - } + if (NULL == manager->getFunction(jmptarget)) + called_functions.push_back(jmptarget); } else { current_block->setNextBlock(0, jmptarget); - if (blocks.find(jmptarget) == blocks.end()) { - LLVMBasicBlock * block = new LLVMBasicBlock(jmptarget, this); - blocks.insert(std::make_pair(block->getStartAddress(), block)); + if (new_blocks.find(jmptarget) == new_blocks.end()) { + BasicBlock * block = manager->newBasicBlock(jmptarget); + assert(block); + new_blocks.insert(std::make_pair(block->getStartAddress(), block)); function->addBasicBlock(block); remaining_blocks.push(block); } else { - LOG4CXX_DEBUG(logger, "Reusing Block starting at " << std::hex << current_block->getStartAddress()); - function->addBasicBlock(blocks.find(jmptarget)->second); + LOG4CXX_DEBUG(logger, "Reusing Block starting at " << std::hex + << current_block->getStartAddress()); + function->addBasicBlock(new_blocks.find(jmptarget)->second); } if (MIA->isConditionalBranch(inst)) { jmptarget = base_address + current_address + inst_size; current_block->setNextBlock(1, jmptarget); - if (blocks.find(jmptarget) == blocks.end()) { - LLVMBasicBlock * block = new LLVMBasicBlock(jmptarget, this); - blocks.insert(std::make_pair(block->getStartAddress(), block)); + if (new_blocks.find(jmptarget) == new_blocks.end()) { + BasicBlock * block = manager->newBasicBlock(jmptarget); + assert(block); + new_blocks.insert(std::make_pair(block->getStartAddress(), block)); function->addBasicBlock(block); remaining_blocks.push(block); } else { - LOG4CXX_DEBUG(logger, "Reusing Block starting at " << std::hex << current_block->getStartAddress()); - function->addBasicBlock(blocks.find(jmptarget)->second); + LOG4CXX_DEBUG(logger, "Reusing Block starting at " << std::hex + << current_block->getStartAddress()); + function->addBasicBlock(new_blocks.find(jmptarget)->second); } } } @@ -285,13 +354,15 @@ void LLVMDisassembler::disassembleFunction(LLVMFunction* function) { } splitBlocks(function); LOG4CXX_DEBUG(logger, "Finished function " << function->getName()); - manager->signal_new_function(function); + manager->finishFunction(function); + for (uint64_t address : called_functions) + disassembleFunctionAt(address); } template void LLVMDisassembler::disassemble() { - SectionRef text_section = sections[".text"]; - std::vector remaining_functions; + SectionRef text_section = getTextSection(); + std::vector remaining_functions; // Assume all function symbols actually start a real function for (auto x = symbols.begin(); x != symbols.end(); ++x) { @@ -299,8 +370,11 @@ void LLVMDisassembler::disassemble() { bool contains; SymbolRef::Type symbol_type; - +#if defined(LLVM_35) if (text_section.containsSymbol(x->second, contains) || !contains) +#elif defined(LLVM_36) + if (text_section.containsSymbol(x->second)) +#endif continue; if (x->second.getType(symbol_type) @@ -308,23 +382,25 @@ void LLVMDisassembler::disassemble() { continue; if (!x->second.getAddress(result)) { - LLVMFunction * fun = new LLVMFunction(x->first, result); - remaining_functions.push_back(fun); - functions.insert(std::make_pair(result, fun)); - LOG4CXX_DEBUG(logger, "Disasembling " << x->first); + Function * fun = manager->newFunction(result); + if (fun) { + fun->setName(x->first); + remaining_functions.push_back(fun); + LOG4CXX_DEBUG(logger, "Disasembling " << x->first); + } else { + LOG4CXX_DEBUG(logger, "Function at " << std::hex << result + << " already disassembled as " << manager->getFunction(result)->getName()); + } } } - for (LLVMFunction* function : remaining_functions) { + for (Function* function : remaining_functions) { disassembleFunction(function); + manager->finishFunction(function); } if (binary->isELF()) { - typedef ELFFile ELFO; - const ELFO * elffile = o->getELFFile(); - const typename ELFO::Elf_Ehdr * header = elffile->getHeader(); - - _entryAddress = header->e_entry; + uint64_t _entryAddress = entryAddress(); LOG4CXX_DEBUG(logger, "Adding entryAddress at: " << std::hex << _entryAddress); std::stringstream s; s << "<_start 0x" << std::hex << _entryAddress << ">"; @@ -332,29 +408,78 @@ void LLVMDisassembler::disassemble() { disassembleFunctionAt(_entryAddress, s.str()); } - if (functions.empty()) { + if (!manager->hasFunctions()) { uint64_t text_entry; +#if defined(LLVM_35) text_section.getAddress(text_entry); +#elif defined(LLVM_36) + text_entry = text_section.getAddress(); +#endif LOG4CXX_INFO(logger, "No Symbols found, starting at the beginning of the text segment"); disassembleFunctionAt(text_entry); } } +template <> +uint64_t LLVMDisassembler::entryAddress() { + const auto coffobject = dyn_cast(o); + const struct pe32_header* pe32_header; + const struct pe32plus_header* pe32plus_header; + + coffobject->getPE32PlusHeader(pe32plus_header); + + if (pe32plus_header) { + return pe32plus_header->AddressOfEntryPoint; + } else { + coffobject->getPE32Header(pe32_header); + return pe32_header->AddressOfEntryPoint; + } +} + +template<> +uint64_t LLVMDisassembler::entryAddress() { + // TODO + return 0; +} + template -void LLVMDisassembler::splitBlocks(LLVMFunction* function) { - SectionRef text_section = sections[".text"]; +uint64_t LLVMDisassembler::entryAddress() { + const auto elffile = dyn_cast>(o)->getELFFile(); + const auto * header = elffile->getHeader(); + + return header->e_entry; +} + +template +void LLVMDisassembler::splitBlocks(Function* function) { + SectionRef text_section = getTextSection(); StringRef bytes; text_section.getContents(bytes); +#if defined(LLVM_35) StringRefMemoryObject ref(bytes); +#elif defined(LLVM_36) + ArrayRef bytearray(reinterpret_cast(bytes.data()), + bytes.size()); +#endif + + LOG4CXX_DEBUG(logger, "Splitting Blocks in Function " << function->getName()); // Split blocks where jumps are going inside the block for (auto it = function->blocks().begin(); it != function->blocks().end(); ++it) { BasicBlock * current_block = it->second; + if (current_block->getEndAddress() == 0) { + LOG4CXX_ERROR(logger, "UNFINISHED BLOCK " << std::hex << current_block->getStartAddress()); + break; + } uint64_t inst_size; uint64_t base_address; +#if defined(LLVM_35) text_section.getAddress(base_address); +#elif defined(LLVM_36) + base_address = text_section.getAddress(); +#endif uint64_t current_address = current_block->getStartAddress() - base_address; while(current_block->getEndAddress() - base_address > current_address) { MCInst inst; @@ -362,22 +487,32 @@ void LLVMDisassembler::splitBlocks(LLVMFunction* function) { llvm::raw_string_ostream s(buf); if(llvm::MCDisassembler::Success == +#if defined(LLVM_35) DisAsm->getInstruction(inst, inst_size, ref, current_address, nulls(), nulls())) { +#elif defined(LLVM_36) + DisAsm->getInstruction(inst, inst_size, + bytearray.slice(current_address), + base_address + current_address, + nulls(), nulls())) { +#endif + // See if some other block starts here - auto other = blocks.find(current_address + inst_size + base_address); + BasicBlock* other = manager->getBasicBlock(current_address + + inst_size + + base_address); // Special case, other block starts here but we are at the end anyway - if (other != blocks.end()) { + if (other != NULL) { uint64_t endaddress = current_address + inst_size + base_address; if (endaddress != current_block->getEndAddress()) { LOG4CXX_DEBUG(logger, "Shortening block starting at " << std::hex << current_block->getStartAddress() << " now ending at " - << other->first); - function->addBasicBlock(other->second); + << other->getStartAddress()); + function->addBasicBlock(other); current_block->setEndAddress(endaddress); - current_block->setNextBlock(0, other->first); + current_block->setNextBlock(0, other->getStartAddress()); current_block->setNextBlock(1, 0); } } @@ -389,6 +524,38 @@ void LLVMDisassembler::splitBlocks(LLVMFunction* function) { } } +template<> +void LLVMDisassembler::readDynamicSymbols() { + //TODO +} + +template<> +void LLVMDisassembler::readDynamicSymbols() { + //TODO +} + +template +void LLVMDisassembler::readDynamicSymbols() { + const auto elffile = dyn_cast>(o)->getELFFile(); + for (auto it = elffile->begin_dynamic_symbols(), + end = elffile->end_dynamic_symbols(); + it != end; + ++it) { + if (it->getType() == 2) { // Function + bool is_default; + // TODO: Error handling + std::string symbolname = *(elffile->getSymbolName(it)); + std::string symbolversion = *(elffile->getSymbolVersion(nullptr, &*it, is_default)); + // TODO: actually get the symbol address from relocations + Function* f = manager->newDynamicFunction(0); + f->setName(symbolname + (is_default? "@@" : "@") + symbolversion); + manager->finishFunction(f); + + LOG4CXX_DEBUG(logger, "Adding dynamic Symbol " << symbolname << (is_default? "@@" : "@") << symbolversion); + } + } +} + template void LLVMDisassembler::readSymbols() { error_code ec; @@ -420,12 +587,95 @@ void LLVMDisassembler::readSections() { } +// template +// void LLVMDisassembler::forEachFunction(std::function callback) { +// // std::for_each(functions.begin(), functions.end(), +// // [&](std::pair x) { +// // callback(x.first, x.second); +// // }); +// } + template -void LLVMDisassembler::forEachFunction(std::function callback) { - std::for_each(functions.begin(), functions.end(), - [&](std::pair x) { - callback(x.first, x.second); - }); +std::vector LLVMDisassembler::getInstructions(const BasicBlock *block) { + std::vector result; + SectionRef text_section = getTextSection(); + uint64_t base_address; +#if defined(LLVM_35) + text_section.getAddress(base_address); +#elif defined(LLVM_36) + base_address = text_section.getAddress(); +#endif + + uint64_t current_address = block->getStartAddress() - base_address; + uint64_t end_position = block->getEndAddress() - base_address; + + StringRef bytes; + text_section.getContents(bytes); +#if defined(LLVM_35) + StringRefMemoryObject ref(bytes); +#elif defined(LLVM_36) + ArrayRef bytearray(reinterpret_cast(bytes.data()), + bytes.size()); +#endif + + + while (current_address < end_position) { + uint64_t inst_size; + MCInst inst; + std::string buf; + llvm::raw_string_ostream s(buf); + + if(llvm::MCDisassembler::Success == +#if defined(LLVM_35) + DisAsm->getInstruction(inst, inst_size, ref, current_address, nulls(), nulls())) { +#elif defined(LLVM_36) + DisAsm->getInstruction(inst, inst_size, + bytearray.slice(current_address), + base_address + current_address, + nulls(), nulls())) { +#endif + + uint8_t bytes[inst_size+2]; +#if defined(LLVM_35) + ref.readBytes(current_address, inst_size, bytes); +#elif defined(LLVM_36) + size_t bytesindex(0); + for (uint8_t byte : bytearray.slice(current_address, inst_size)) { + bytes[bytesindex++] = byte; + } +#endif + + uint64_t jmptarget; + std::string ref(""); + IP->printInst(&inst, s, ""); + if (MIA->evaluateBranch(inst, current_address, inst_size, jmptarget)) { + std::stringstream stream; + if (MIA->isCall(inst)) + stream << "function:"; + else + stream << "block:"; + + stream << std::hex << (base_address + jmptarget); + ref = stream.str(); + } + result.push_back(Instruction(current_address + base_address, boost::algorithm::trim_copy(s.str()), + std::vector(bytes, bytes+inst_size), ref)); + } else { + LOG4CXX_WARN(logger, "Invalid byte at" << std::hex << current_address + base_address); + uint8_t bytes[1]; +#if defined(LLVM_35) + ref.readBytes(current_address, 1, bytes); +#elif defined(LLVM_36) + bytes[0] = bytearray[current_address]; +#endif + result.push_back(Instruction(current_address + base_address, "Invalid Instruction", + std::vector(bytes, bytes+1), "")); + inst_size = 1; + } + + current_address += inst_size; + } + return result; } template @@ -433,14 +683,25 @@ void LLVMDisassembler::printEachInstruction(uint64_t start, uint64_t end, std::function fun) { - SectionRef text_section = sections[".text"]; + SectionRef text_section = getTextSection(); uint64_t base_address; +#if defined(LLVM_35) text_section.getAddress(base_address); +#elif defined(LLVM_36) + base_address = text_section.getAddress(); +#endif + uint64_t current_address = start - base_address; StringRef bytes; text_section.getContents(bytes); +#if defined(LLVM_35) StringRefMemoryObject ref(bytes); +#elif defined(LLVM_36) + ArrayRef bytearray(reinterpret_cast(bytes.data()), + bytes.size()); +#endif + while (current_address < end - base_address) { uint64_t inst_size; @@ -449,10 +710,24 @@ void LLVMDisassembler::printEachInstruction(uint64_t start, uint64_t end, llvm::raw_string_ostream s(buf); if(llvm::MCDisassembler::Success == - DisAsm->getInstruction(inst, inst_size, ref, current_address, nulls(), nulls())) { +#if defined(LLVM_35) + DisAsm->getInstruction(inst, inst_size, ref, current_address, nulls(), nulls())) { +#elif defined(LLVM_36) + DisAsm->getInstruction(inst, inst_size, + bytearray.slice(current_address), + base_address + current_address, + nulls(), nulls())) { +#endif uint8_t bytes[inst_size+2]; +#if defined(LLVM_35) ref.readBytes(current_address, inst_size, bytes); +#elif defined(LLVM_36) + size_t bytesindex(0); + for (uint8_t byte : bytearray.slice(current_address, inst_size)) { + bytes[bytesindex++] = byte; + } +#endif uint64_t jmptarget; std::string ref(""); @@ -479,3 +754,13 @@ void LLVMDisassembler::printEachInstruction(uint64_t start, uint64_t end, current_address += inst_size; } } + +template +SectionRef LLVMDisassembler::getTextSection() { + return sections[".text"]; +} + +template <> +SectionRef LLVMDisassembler::getTextSection() { + return sections["__text"]; +}