X-Git-Url: https://git.siccegge.de//index.cgi?p=frida%2Ffrida.git;a=blobdiff_plain;f=src%2Fdisassembler%2Fllvm%2FLLVMDisassembler.cxx;h=76b313de3bf996d0d355bc69e9b004c47d476efb;hp=3494b79bd8a370814d35eba28fec53db0e1543d6;hb=9244195b13b1673757b4bc77d6306e7b2f21244a;hpb=c55f9e2b5f5ff83817ab5187eb0122600a4c5cea diff --git a/src/disassembler/llvm/LLVMDisassembler.cxx b/src/disassembler/llvm/LLVMDisassembler.cxx index 3494b79..76b313d 100644 --- a/src/disassembler/llvm/LLVMDisassembler.cxx +++ b/src/disassembler/llvm/LLVMDisassembler.cxx @@ -110,35 +110,43 @@ LLVMDisassembler::LLVMDisassembler(const std::string& filename) disassemble(); } +LLVMDisassembler::~LLVMDisassembler() { + std::for_each(functions.begin(), functions.end(), + [](std::pair it) { + delete it.second; + }); + std::for_each(blocks.begin(), blocks.end(), + [](std::pair it) { + delete it.second; + }); +} void LLVMDisassembler::disassemble() { std::stack remaining_functions; std::stack remaining_blocks; SectionRef text_section = sections[".text"]; - for (auto x = symbols.begin(); x != symbols.end(); ++x) { - uint64_t result; - bool contains; - SymbolRef::Type symbol_type; + // Assume all function symbols actually start a real function + for (auto x = symbols.begin(); x != symbols.end(); ++x) { + uint64_t result; + bool contains; + SymbolRef::Type symbol_type; -/* - * TODO: If we jump into some Basic Block we need to split it there into two - */ - if (text_section.containsSymbol(x->second, contains) || !contains) - continue; + if (text_section.containsSymbol(x->second, contains) || !contains) + continue; - if (x->second.getType(symbol_type) - || SymbolRef::ST_Function != symbol_type) - continue; + if (x->second.getType(symbol_type) + || SymbolRef::ST_Function != symbol_type) + continue; - if (!x->second.getAddress(result)) { - LLVMFunction * fun = new LLVMFunction(x->first, result); - remaining_functions.push(fun); - functions.insert(std::make_pair(result, fun)); - LOG4CXX_DEBUG(logger, "Disasembling " << x->first); - } - } + if (!x->second.getAddress(result)) { + LLVMFunction * fun = new LLVMFunction(x->first, result); + remaining_functions.push(fun); + functions.insert(std::make_pair(result, fun)); + LOG4CXX_DEBUG(logger, "Disasembling " << x->first); + } + } StringRef bytes; text_section.getContents(bytes); @@ -150,12 +158,9 @@ void LLVMDisassembler::disassemble() { LOG4CXX_DEBUG(logger, "Handling function " << current_function->getName()); - // if ("_start" != current_function->getName()) - // continue; - - LLVMBasicBlock * block = new LLVMBasicBlock(current_function->getStartAddress()); + LLVMBasicBlock * block = new LLVMBasicBlock(current_function->getStartAddress(), this); remaining_blocks.push(block); - blocks.insert(std::make_pair(block->getStartAddress(), block)); + blocks.insert(std::make_pair(block->getStartAddress(), block)); while (remaining_blocks.size()) { LLVMBasicBlock * current_block = remaining_blocks.top(); @@ -175,43 +180,33 @@ void LLVMDisassembler::disassemble() { if(llvm::MCDisassembler::Success == DisAsm->getInstruction(inst, inst_size, ref, current_address, nulls(), nulls())) { - uint8_t bytes[inst_size+2]; - ref.readBytes(current_address, inst_size, bytes); - s << '\t'; - for(uint8_t* cur = bytes; cur < bytes + inst_size; ++cur) { - s.write_hex(*cur); - s << ' '; - } - s << '\t'; - - IP->printInst(&inst, s, ""); - - LOG4CXX_DEBUG(logger, std::hex << current_address + base_address << s.str()); - uint64_t jmptarget; if (MIA->evaluateBranch(inst, current_address, inst_size, jmptarget)) { - jmptarget += base_address; + jmptarget += base_address; if (!MIA->isIndirectBranch(inst)) { if (MIA->isCall(inst)) { - if (functions.find(jmptarget) == functions.end()) { - std::stringstream s; - s << ""; - LLVMFunction * fun = new LLVMFunction(s.str(), jmptarget); - functions.insert(std::make_pair(jmptarget, fun)); - remaining_functions.push(fun); - } + if (functions.find(jmptarget) == functions.end()) { + std::stringstream s; + s << ""; + LLVMFunction * fun = new LLVMFunction(s.str(), jmptarget); + functions.insert(std::make_pair(jmptarget, fun)); + remaining_functions.push(fun); + } } else { - if (blocks.find(jmptarget) == blocks.end()) { - LLVMBasicBlock * block = new LLVMBasicBlock(jmptarget); - blocks.insert(std::make_pair(block->getStartAddress(), block)); - remaining_blocks.push(block); - } + current_block->setNextBlock(0, jmptarget); + if (blocks.find(jmptarget) == blocks.end()) { + LLVMBasicBlock * block = new LLVMBasicBlock(jmptarget, this); + blocks.insert(std::make_pair(block->getStartAddress(), block)); + remaining_blocks.push(block); + } if (MIA->isConditionalBranch(inst)) { - jmptarget = base_address + current_address + inst_size; - if (blocks.find(jmptarget) == blocks.end()) { - LLVMBasicBlock * block = new LLVMBasicBlock(jmptarget); - blocks.insert(std::make_pair(block->getStartAddress(), block)); - remaining_blocks.push(new LLVMBasicBlock(jmptarget)); + jmptarget = base_address + current_address + inst_size; + current_block->setNextBlock(1, jmptarget); + if (blocks.find(jmptarget) == blocks.end()) { + LLVMBasicBlock * block = new LLVMBasicBlock(jmptarget, this); + blocks.insert(std::make_pair(block->getStartAddress(), block)); + remaining_blocks.push(block); + } } } } @@ -222,10 +217,9 @@ void LLVMDisassembler::disassemble() { if (inst_size == 0 || MIA->isTerminator(inst) || MIA->isBranch(inst)) { - current_block->setEndAddress(current_address + base_address); - LOG4CXX_DEBUG(logger, "Finished Block at " << std::hex << - current_block->getEndAddress()); - } + current_block->setEndAddress(current_address + base_address + inst_size); + LOG4CXX_DEBUG(logger, "Finished Block at " << std::hex << + current_block->getEndAddress()); break; } current_address += inst_size; @@ -233,6 +227,42 @@ void LLVMDisassembler::disassemble() { } LOG4CXX_DEBUG(logger, "Finished function " << current_function->getName()); } + + // Split blocks where jumps are going inside the block + for (auto it = blocks.begin(); it != blocks.end(); ++it) { + LLVMBasicBlock * current_block = it->second; + uint64_t inst_size; + uint64_t base_address; + text_section.getAddress(base_address); + uint64_t current_address = current_block->getStartAddress() - base_address; + while(current_block->getEndAddress() - base_address > current_address) { + MCInst inst; + std::string buf; + llvm::raw_string_ostream s(buf); + + if(llvm::MCDisassembler::Success == + DisAsm->getInstruction(inst, inst_size, ref, current_address, nulls(), nulls())) { + auto other = blocks.find(current_address + inst_size + base_address); + + if (other != blocks.end()) { + uint64_t endaddress = current_address + inst_size + base_address; + if (endaddress != current_block->getEndAddress()) { + LOG4CXX_DEBUG(logger, "Shortening block starting at " + << std::hex + << current_block->getStartAddress() + << " now ending at " + << other->first); + current_block->setEndAddress(endaddress); + current_block->setNextBlock(0, other->first); + current_block->setNextBlock(1, 0); + } + } + } else { + inst_size = 1; + } + current_address += inst_size; + } + } } void LLVMDisassembler::readSymbols() { @@ -265,14 +295,50 @@ void LLVMDisassembler::readSections() { } void LLVMDisassembler::forEachFunction(std::function callback) { - std::for_each(functions.begin(), functions.end(), - [&](std::pair x) { - callback(x.first, x.second); - }); + std::for_each(functions.begin(), functions.end(), + [&](std::pair x) { + callback(x.first, x.second); + }); } +void LLVMDisassembler::printEachInstruction(uint64_t start, uint64_t end, + std::function fun) { + SectionRef text_section = sections[".text"]; + uint64_t base_address; + text_section.getAddress(base_address); + uint64_t current_address = start - base_address; + StringRef bytes; + text_section.getContents(bytes); + StringRefMemoryObject ref(bytes); -void LLVMDisassembler::generateControlFlowGraph(uint64_t address) { + while (current_address < end - base_address) { + uint64_t inst_size; + MCInst inst; + std::string buf; + llvm::raw_string_ostream s(buf); + + if(llvm::MCDisassembler::Success == + DisAsm->getInstruction(inst, inst_size, ref, current_address, nulls(), nulls())) { + + uint8_t bytes[inst_size+2]; + ref.readBytes(current_address, inst_size, bytes); + + uint64_t jmptarget; + if (MIA->evaluateBranch(inst, current_address, inst_size, jmptarget)) { + std::stringstream stream; + stream << std::hex << (base_address + jmptarget); + IP->printInst(&inst, s, stream.str()); + } else + IP->printInst(&inst, s, ""); + + fun(bytes, inst_size, s.str()); + } else { + LOG4CXX_WARN(logger, "Invalid byte at" << std::hex << current_address + base_address); + fun(NULL, 0, "Invalid Byte"); + inst_size = 1; + } + current_address += inst_size; + } }