X-Git-Url: https://git.siccegge.de//index.cgi?p=frida%2Ffrida.git;a=blobdiff_plain;f=src%2Fdisassembler%2Fllvm%2FLLVMDisassembler.cxx;h=b40cdd4a3cd81f59dc111515ca79d169acf8fd37;hp=c73e703d29c33e4f803bdb6bd926aa4892741a3d;hb=c79a955e4fe1cad85b526bdc4af86d1226f99329;hpb=9252262693432b33dbe4ffc60d79bbdc6fbb5f66 diff --git a/src/disassembler/llvm/LLVMDisassembler.cxx b/src/disassembler/llvm/LLVMDisassembler.cxx index c73e703..b40cdd4 100644 --- a/src/disassembler/llvm/LLVMDisassembler.cxx +++ b/src/disassembler/llvm/LLVMDisassembler.cxx @@ -1,39 +1,77 @@ #include "disassembler/llvm/LLVMDisassembler.hxx" -#include "disassembler/llvm/LLVMBasicBlock.hxx" -#include "disassembler/llvm/LLVMFunction.hxx" +#include "core/InformationManager.hxx" +#include "core/Function.hxx" +#include "core/BasicBlock.hxx" #include #include +#include using namespace llvm; using namespace llvm::object; using std::error_code; +/* + * + */ +Disassembler * createLLVMDisassembler(const std::string& filename, InformationManager* manager) { + if (filename == "") + return NULL; + + std::unique_ptr o; + o.reset(createBinary(filename).get()); + Binary * op = o.release(); + + // ELFType + if (ELF32LEObjectFile * object = dyn_cast(op)) { + return new LLVMDisassembler>(filename, manager, object); + } + if (ELF64LEObjectFile * object = dyn_cast(op)) { + return new LLVMDisassembler>(filename, manager, object); + } + if (ELF32BEObjectFile * object = dyn_cast(op)) { + return new LLVMDisassembler>(filename, manager, object); + } + if (ELF64BEObjectFile * object = dyn_cast(op)) { + return new LLVMDisassembler>(filename, manager, object); + } + + return NULL; +} + /* * TODO: fallback code falls die Datei kein ELF/PE/COFF/MacO/.. binary * ist sondern z.B. einfach nur Instruktionen oder ein Bootsektor oder * foo */ -LLVMDisassembler::LLVMDisassembler(const std::string& filename, - InformationManager* manager) - : Disassembler(filename, manager) +template +LLVMDisassembler::LLVMDisassembler(const std::string& filename, + InformationManager* manager, + ELFObjectFile* file) + : Disassembler() , logger(log4cxx::Logger::getLogger("LLVMDisassembler")) , triple("unknown-unknown-unknown") , manager(manager) { LOG4CXX_DEBUG(logger, "Handling file" << filename); - auto result = createBinary(filename); - error_code ec; - if ((ec = result.getError())) { - LOG4CXX_ERROR(logger, "Failed to load Binary" << ec.message()); - binary = NULL; - return; - } + if (!file) { + auto result = createBinary(filename); - binary.reset(result.get()); + error_code ec; + if ((ec = result.getError())) { + LOG4CXX_ERROR(logger, "Failed to load Binary" << ec.message()); + binary = NULL; + return; + } - o = dyn_cast(binary.get()); + binary.reset(result.get()); + + o = dyn_cast>(binary.get()); + } else { + o = file; + binary.reset(file); + } triple.setArch(Triple::ArchType(o->getArch())); std::string tripleName(triple.getTriple()); @@ -113,26 +151,23 @@ LLVMDisassembler::LLVMDisassembler(const std::string& filename, std::unique_ptr OD( new MCObjectDisassembler(*o, *DisAsm, *MIA)); Mod.reset(OD->buildModule(false)); + + readSections(); } -void LLVMDisassembler::start() { +template +void LLVMDisassembler::start() { readSymbols(); - readSections(); disassemble(); + readDynamicSymbols(); } -LLVMDisassembler::~LLVMDisassembler() { - std::for_each(functions.begin(), functions.end(), - [](std::pair it) { - delete it.second; - }); - std::for_each(blocks.begin(), blocks.end(), - [](std::pair it) { - delete it.second; - }); -} +template +LLVMDisassembler::~LLVMDisassembler() {} -Function* LLVMDisassembler::disassembleFunctionAt(uint64_t address, const std::string& name) { +template +Function* LLVMDisassembler::disassembleFunctionAt(uint64_t address, const std::string& name) { + Function * function; SectionRef text_section = sections[".text"]; uint64_t base_address, size; text_section.getAddress(base_address); @@ -143,27 +178,32 @@ Function* LLVMDisassembler::disassembleFunctionAt(uint64_t address, const std::s return NULL; } - if (functions.find(address) != functions.end()) { - return functions[address]; - } + if (NULL == (function = manager->getFunction(address))) { - LLVMFunction * function; - if (name == "") { - std::stringstream s; - s << ""; - function = new LLVMFunction(s.str(), address); - } else { - function = new LLVMFunction(name, address); + if (name == "") { + std::stringstream s; + s << ""; + function = manager->newFunction(address); + function->setName(s.str()); + } else { + function = manager->newFunction(address); + function->setName(name); + } + disassembleFunction(function); + manager->finishFunction(function); } - functions.insert(std::make_pair(address, function)); - - disassembleFunction(function); return function; } -void LLVMDisassembler::disassembleFunction(LLVMFunction* function) { - std::stack remaining_blocks; +template +void LLVMDisassembler::disassembleFunction(Function* function) { + std::stack remaining_blocks; + /* TODO: + * Do all blocks get added properly? We should take care to remove + * the other ones at the end of the function! + */ + std::map new_blocks; SectionRef text_section = sections[".text"]; StringRef bytes; text_section.getContents(bytes); @@ -171,16 +211,17 @@ void LLVMDisassembler::disassembleFunction(LLVMFunction* function) { LOG4CXX_DEBUG(logger, "Handling function " << function->getName()); - LLVMBasicBlock * block = new LLVMBasicBlock(function->getStartAddress(), this); + BasicBlock * block = manager->newBasicBlock(function->getStartAddress()); remaining_blocks.push(block); - blocks.insert(std::make_pair(block->getStartAddress(), block)); + new_blocks.insert(std::make_pair(block->getStartAddress(), block)); function->addBasicBlock(block); while (remaining_blocks.size()) { - LLVMBasicBlock * current_block = remaining_blocks.top(); + BasicBlock * current_block = remaining_blocks.top(); remaining_blocks.pop(); - LOG4CXX_DEBUG(logger, "Handling Block starting at " << std::hex << current_block->getStartAddress()); + LOG4CXX_DEBUG(logger, "Handling Block starting at " << std::hex + << current_block->getStartAddress()); uint64_t inst_size; uint64_t base_address; @@ -199,25 +240,34 @@ void LLVMDisassembler::disassembleFunction(LLVMFunction* function) { jmptarget += base_address; if (!MIA->isIndirectBranch(inst)) { if (MIA->isCall(inst)) { - if (functions.find(jmptarget) == functions.end()) { + if (NULL == manager->getFunction(jmptarget)) disassembleFunctionAt(jmptarget); - } } else { current_block->setNextBlock(0, jmptarget); - if (blocks.find(jmptarget) == blocks.end()) { - LLVMBasicBlock * block = new LLVMBasicBlock(jmptarget, this); - blocks.insert(std::make_pair(block->getStartAddress(), block)); + if (new_blocks.find(jmptarget) == new_blocks.end()) { + BasicBlock * block = manager->newBasicBlock(jmptarget); + assert(block); + new_blocks.insert(std::make_pair(block->getStartAddress(), block)); function->addBasicBlock(block); remaining_blocks.push(block); + } else { + LOG4CXX_DEBUG(logger, "Reusing Block starting at " << std::hex + << current_block->getStartAddress()); + function->addBasicBlock(new_blocks.find(jmptarget)->second); } if (MIA->isConditionalBranch(inst)) { jmptarget = base_address + current_address + inst_size; current_block->setNextBlock(1, jmptarget); - if (blocks.find(jmptarget) == blocks.end()) { - LLVMBasicBlock * block = new LLVMBasicBlock(jmptarget, this); - blocks.insert(std::make_pair(block->getStartAddress(), block)); + if (new_blocks.find(jmptarget) == new_blocks.end()) { + BasicBlock * block = manager->newBasicBlock(jmptarget); + assert(block); + new_blocks.insert(std::make_pair(block->getStartAddress(), block)); function->addBasicBlock(block); remaining_blocks.push(block); + } else { + LOG4CXX_DEBUG(logger, "Reusing Block starting at " << std::hex + << current_block->getStartAddress()); + function->addBasicBlock(new_blocks.find(jmptarget)->second); } } } @@ -242,9 +292,10 @@ void LLVMDisassembler::disassembleFunction(LLVMFunction* function) { manager->signal_new_function(function); } -void LLVMDisassembler::disassemble() { +template +void LLVMDisassembler::disassemble() { SectionRef text_section = sections[".text"]; - std::vector remaining_functions; + std::vector remaining_functions; // Assume all function symbols actually start a real function for (auto x = symbols.begin(); x != symbols.end(); ++x) { @@ -261,37 +312,31 @@ void LLVMDisassembler::disassemble() { continue; if (!x->second.getAddress(result)) { - LLVMFunction * fun = new LLVMFunction(x->first, result); + Function * fun = manager->newFunction(result); + fun->setName(x->first); remaining_functions.push_back(fun); - functions.insert(std::make_pair(result, fun)); LOG4CXX_DEBUG(logger, "Disasembling " << x->first); } } - for (LLVMFunction* function : remaining_functions) { + for (Function* function : remaining_functions) { disassembleFunction(function); + manager->finishFunction(function); } if (binary->isELF()) { - bool is64bit = (binary->getData()[4] == 0x02); + const ELFO * elffile = o->getELFFile(); + const typename ELFO::Elf_Ehdr * header = elffile->getHeader(); - uint64_t entry(0); - for (int i(0); i < (is64bit? 8 : 4); ++i) { - if (binary->isLittleEndian()) { - entry |= (unsigned int)((unsigned char)binary->getData()[0x18 + i]) << 8*i; - } else { - entry = entry << 8; - entry |= (unsigned char)binary->getData()[0x18 + i]; - } - } - LOG4CXX_DEBUG(logger, "Adding entry at: " << std::hex << entry); + _entryAddress = header->e_entry; + LOG4CXX_DEBUG(logger, "Adding entryAddress at: " << std::hex << _entryAddress); std::stringstream s; - s << "<_start 0x" << std::hex << entry << ">"; + s << "<_start 0x" << std::hex << _entryAddress << ">"; - disassembleFunctionAt(entry, s.str()); + disassembleFunctionAt(_entryAddress, s.str()); } - if (functions.empty()) { + if (!manager->hasFunctions()) { uint64_t text_entry; text_section.getAddress(text_entry); LOG4CXX_INFO(logger, "No Symbols found, starting at the beginning of the text segment"); @@ -299,7 +344,8 @@ void LLVMDisassembler::disassemble() { } } -void LLVMDisassembler::splitBlocks(LLVMFunction* function) { +template +void LLVMDisassembler::splitBlocks(Function* function) { SectionRef text_section = sections[".text"]; StringRef bytes; text_section.getContents(bytes); @@ -322,19 +368,22 @@ void LLVMDisassembler::splitBlocks(LLVMFunction* function) { if(llvm::MCDisassembler::Success == DisAsm->getInstruction(inst, inst_size, ref, current_address, nulls(), nulls())) { // See if some other block starts here - auto other = blocks.find(current_address + inst_size + base_address); + BasicBlock* other = manager->getBasicBlock(current_address + + inst_size + + base_address); // Special case, other block starts here but we are at the end anyway - if (other != blocks.end()) { + if (other != NULL) { uint64_t endaddress = current_address + inst_size + base_address; if (endaddress != current_block->getEndAddress()) { LOG4CXX_DEBUG(logger, "Shortening block starting at " << std::hex << current_block->getStartAddress() << " now ending at " - << other->first); + << other->getStartAddress()); + function->addBasicBlock(other); current_block->setEndAddress(endaddress); - current_block->setNextBlock(0, other->first); + current_block->setNextBlock(0, other->getStartAddress()); current_block->setNextBlock(1, 0); } } @@ -346,7 +395,27 @@ void LLVMDisassembler::splitBlocks(LLVMFunction* function) { } } -void LLVMDisassembler::readSymbols() { +template +void LLVMDisassembler::readDynamicSymbols() { + const ELFO * elffile = o->getELFFile(); + for (typename ELFO::Elf_Sym_Iter + it = elffile->begin_dynamic_symbols(), + end = elffile->end_dynamic_symbols(); + it != end; + ++it) { + if (it->getType() == 2) { // Function + bool is_default; + // TODO: Error handling + std::string symbolname = *(elffile->getSymbolName(it)); + std::string symbolversion = *(elffile->getSymbolVersion(nullptr, &*it, is_default)); + manager->signal_new_dyn_symbol(symbolname + (is_default? "@@" : "@") + symbolversion); + LOG4CXX_DEBUG(logger, "Adding dynamic Symbol " << symbolname << (is_default? "@@" : "@") << symbolversion); + } + } +} + +template +void LLVMDisassembler::readSymbols() { error_code ec; symbol_iterator si(o->symbol_begin()), se(o->symbol_end()); for (; si != se; ++si) { @@ -360,7 +429,8 @@ void LLVMDisassembler::readSymbols() { } } -void LLVMDisassembler::readSections() { +template +void LLVMDisassembler::readSections() { error_code ec; section_iterator i(o->section_begin()), e(o->section_end()); for (; i != e; ++i) { @@ -375,16 +445,19 @@ void LLVMDisassembler::readSections() { } -void LLVMDisassembler::forEachFunction(std::function callback) { - std::for_each(functions.begin(), functions.end(), - [&](std::pair x) { - callback(x.first, x.second); - }); -} - -void LLVMDisassembler::printEachInstruction(uint64_t start, uint64_t end, - std::function fun) { +// template +// void LLVMDisassembler::forEachFunction(std::function callback) { +// // std::for_each(functions.begin(), functions.end(), +// // [&](std::pair x) { +// // callback(x.first, x.second); +// // }); +// } + +template +void LLVMDisassembler::printEachInstruction(uint64_t start, uint64_t end, + std::function fun) { SectionRef text_section = sections[".text"]; uint64_t base_address; text_section.getAddress(base_address); @@ -407,17 +480,24 @@ void LLVMDisassembler::printEachInstruction(uint64_t start, uint64_t end, ref.readBytes(current_address, inst_size, bytes); uint64_t jmptarget; + std::string ref(""); + IP->printInst(&inst, s, ""); if (MIA->evaluateBranch(inst, current_address, inst_size, jmptarget)) { std::stringstream stream; + if (MIA->isCall(inst)) + stream << "function:"; + else + stream << "block:"; + stream << std::hex << (base_address + jmptarget); - IP->printInst(&inst, s, stream.str()); - } else - IP->printInst(&inst, s, ""); + ref = stream.str(); + } + - fun(bytes, inst_size, s.str()); + fun(bytes, inst_size, s.str(), ref); } else { LOG4CXX_WARN(logger, "Invalid byte at" << std::hex << current_address + base_address); - fun(NULL, 0, "Invalid Byte"); + fun(NULL, 0, "Invalid Byte", ""); inst_size = 1; }