From 880ba6d085d9a6b70523328b8099679a664b8c0c Mon Sep 17 00:00:00 2001 From: Christoph Egger Date: Sat, 13 Dec 2014 00:40:37 +0100 Subject: [PATCH 1/1] Split disassemble() disassemble() still does take care of initial disassembling of the binary. However parts needed to (interactively) start disassembling further parts are now separated. --- src/disassembler/llvm/LLVMDisassembler.cxx | 267 +++++++++++---------- src/disassembler/llvm/LLVMDisassembler.hxx | 6 +- 2 files changed, 148 insertions(+), 125 deletions(-) diff --git a/src/disassembler/llvm/LLVMDisassembler.cxx b/src/disassembler/llvm/LLVMDisassembler.cxx index 1ae024a..3642c5e 100644 --- a/src/disassembler/llvm/LLVMDisassembler.cxx +++ b/src/disassembler/llvm/LLVMDisassembler.cxx @@ -128,12 +128,106 @@ LLVMDisassembler::~LLVMDisassembler() { }); } -void LLVMDisassembler::disassemble() { - std::stack remaining_functions; +Function* LLVMDisassembler::disassembleFunctionAt(uint64_t address, const std::string& name) { + if (functions.find(address) != functions.end()) { + return functions[address]; + } + + LLVMFunction * function; + if (name == "") { + std::stringstream s; + s << ""; + function = new LLVMFunction(s.str(), address); + } else { + function = new LLVMFunction(name, address); + } + functions.insert(std::make_pair(address, function)); + + disassembleFunction(function); + + return function; +} + +void LLVMDisassembler::disassembleFunction(LLVMFunction* function) { std::stack remaining_blocks; SectionRef text_section = sections[".text"]; + StringRef bytes; + text_section.getContents(bytes); + StringRefMemoryObject ref(bytes); + + LOG4CXX_DEBUG(logger, "Handling function " << function->getName()); + + LLVMBasicBlock * block = new LLVMBasicBlock(function->getStartAddress(), this); + remaining_blocks.push(block); + blocks.insert(std::make_pair(block->getStartAddress(), block)); + + while (remaining_blocks.size()) { + LLVMBasicBlock * current_block = remaining_blocks.top(); + remaining_blocks.pop(); + + LOG4CXX_DEBUG(logger, "Handling Block starting at " << std::hex << current_block->getStartAddress()); + + uint64_t inst_size; + uint64_t base_address; + text_section.getAddress(base_address); + uint64_t current_address = current_block->getStartAddress() - base_address; + while(true) { + MCInst inst; + std::string buf; + llvm::raw_string_ostream s(buf); + + if(llvm::MCDisassembler::Success == + DisAsm->getInstruction(inst, inst_size, ref, current_address, nulls(), nulls())) { + uint64_t jmptarget; + + if (MIA->evaluateBranch(inst, current_address, inst_size, jmptarget)) { + jmptarget += base_address; + if (!MIA->isIndirectBranch(inst)) { + if (MIA->isCall(inst)) { + if (functions.find(jmptarget) == functions.end()) { + disassembleFunctionAt(jmptarget); + } + } else { + current_block->setNextBlock(0, jmptarget); + if (blocks.find(jmptarget) == blocks.end()) { + LLVMBasicBlock * block = new LLVMBasicBlock(jmptarget, this); + blocks.insert(std::make_pair(block->getStartAddress(), block)); + remaining_blocks.push(block); + } + if (MIA->isConditionalBranch(inst)) { + jmptarget = base_address + current_address + inst_size; + current_block->setNextBlock(1, jmptarget); + if (blocks.find(jmptarget) == blocks.end()) { + LLVMBasicBlock * block = new LLVMBasicBlock(jmptarget, this); + blocks.insert(std::make_pair(block->getStartAddress(), block)); + remaining_blocks.push(block); + } + } + } + } + } + } else { + inst_size = 0; + } + + + if (inst_size == 0 || MIA->isTerminator(inst) || MIA->isBranch(inst)) { + current_block->setEndAddress(current_address + base_address + inst_size); + LOG4CXX_DEBUG(logger, "Finished Block at " << std::hex << + current_block->getEndAddress()); + break; + } + current_address += inst_size; + } + } + LOG4CXX_DEBUG(logger, "Finished function " << function->getName()); +} + +void LLVMDisassembler::disassemble() { + SectionRef text_section = sections[".text"]; + std::vector remaining_functions; - // Assume all function symbols actually start a real function + // Assume all function symbols actually start a real function for (auto x = symbols.begin(); x != symbols.end(); ++x) { uint64_t result; bool contains; @@ -149,12 +243,16 @@ void LLVMDisassembler::disassemble() { if (!x->second.getAddress(result)) { LLVMFunction * fun = new LLVMFunction(x->first, result); - remaining_functions.push(fun); + remaining_functions.push_back(fun); functions.insert(std::make_pair(result, fun)); LOG4CXX_DEBUG(logger, "Disasembling " << x->first); } } + for (LLVMFunction* function : remaining_functions) { + disassembleFunction(function); + } + if (binary->isELF()) { bool is64bit = (binary->getData()[4] == 0x02); @@ -167,143 +265,64 @@ void LLVMDisassembler::disassemble() { entry |= (unsigned char)binary->getData()[0x18 + i]; } } - if (functions.find(entry) == functions.end()) { - LOG4CXX_DEBUG(logger, "Adding entry at: " << std::hex << entry); - std::stringstream s; - s << "<_start 0x" << std::hex << entry << ">"; - LLVMFunction * fun = new LLVMFunction(s.str(), entry); - functions.insert(std::make_pair(entry, fun)); - remaining_functions.push(fun); - } + LOG4CXX_DEBUG(logger, "Adding entry at: " << std::hex << entry); + std::stringstream s; + s << "<_start 0x" << std::hex << entry << ">"; + + disassembleFunctionAt(entry, s.str()); } if (functions.empty()) { uint64_t text_entry; text_section.getAddress(text_entry); LOG4CXX_INFO(logger, "No Symbols found, starting at the beginning of the text segment"); - - std::stringstream s; - s << ""; - LLVMFunction * fun = new LLVMFunction(s.str(), text_entry); - functions.insert(std::make_pair(text_entry, fun)); - remaining_functions.push(fun); + disassembleFunctionAt(text_entry); } + splitBlocks(); +} + +void LLVMDisassembler::splitBlocks() { + SectionRef text_section = sections[".text"]; StringRef bytes; text_section.getContents(bytes); StringRefMemoryObject ref(bytes); - while (remaining_functions.size()) { - LLVMFunction * current_function = remaining_functions.top(); - remaining_functions.pop(); - - LOG4CXX_DEBUG(logger, "Handling function " << current_function->getName()); - - LLVMBasicBlock * block = new LLVMBasicBlock(current_function->getStartAddress(), this); - remaining_blocks.push(block); - blocks.insert(std::make_pair(block->getStartAddress(), block)); - - while (remaining_blocks.size()) { - LLVMBasicBlock * current_block = remaining_blocks.top(); - remaining_blocks.pop(); - - LOG4CXX_DEBUG(logger, "Handling Block starting at " << std::hex << current_block->getStartAddress()); - - uint64_t inst_size; - uint64_t base_address; - text_section.getAddress(base_address); - uint64_t current_address = current_block->getStartAddress() - base_address; - while(true) { - MCInst inst; - std::string buf; - llvm::raw_string_ostream s(buf); - - if(llvm::MCDisassembler::Success == - DisAsm->getInstruction(inst, inst_size, ref, current_address, nulls(), nulls())) { - - uint64_t jmptarget; - if (MIA->evaluateBranch(inst, current_address, inst_size, jmptarget)) { - jmptarget += base_address; - if (!MIA->isIndirectBranch(inst)) { - if (MIA->isCall(inst)) { - if (functions.find(jmptarget) == functions.end()) { - std::stringstream s; - s << ""; - LLVMFunction * fun = new LLVMFunction(s.str(), jmptarget); - functions.insert(std::make_pair(jmptarget, fun)); - remaining_functions.push(fun); - } - } else { - current_block->setNextBlock(0, jmptarget); - if (blocks.find(jmptarget) == blocks.end()) { - LLVMBasicBlock * block = new LLVMBasicBlock(jmptarget, this); - blocks.insert(std::make_pair(block->getStartAddress(), block)); - remaining_blocks.push(block); - } - if (MIA->isConditionalBranch(inst)) { - jmptarget = base_address + current_address + inst_size; - current_block->setNextBlock(1, jmptarget); - if (blocks.find(jmptarget) == blocks.end()) { - LLVMBasicBlock * block = new LLVMBasicBlock(jmptarget, this); - blocks.insert(std::make_pair(block->getStartAddress(), block)); - remaining_blocks.push(block); - } - } - } - } + // Split blocks where jumps are going inside the block + for (auto it = blocks.begin(); it != blocks.end(); ++it) { + LLVMBasicBlock * current_block = it->second; + uint64_t inst_size; + uint64_t base_address; + text_section.getAddress(base_address); + uint64_t current_address = current_block->getStartAddress() - base_address; + while(current_block->getEndAddress() - base_address > current_address) { + MCInst inst; + std::string buf; + llvm::raw_string_ostream s(buf); + + if(llvm::MCDisassembler::Success == + DisAsm->getInstruction(inst, inst_size, ref, current_address, nulls(), nulls())) { + auto other = blocks.find(current_address + inst_size + base_address); + + if (other != blocks.end()) { + uint64_t endaddress = current_address + inst_size + base_address; + if (endaddress != current_block->getEndAddress()) { + LOG4CXX_DEBUG(logger, "Shortening block starting at " + << std::hex + << current_block->getStartAddress() + << " now ending at " + << other->first); + current_block->setEndAddress(endaddress); + current_block->setNextBlock(0, other->first); + current_block->setNextBlock(1, 0); } - } else { - inst_size = 0; - } - - - if (inst_size == 0 || MIA->isTerminator(inst) || MIA->isBranch(inst)) { - current_block->setEndAddress(current_address + base_address + inst_size); - LOG4CXX_DEBUG(logger, "Finished Block at " << std::hex << - current_block->getEndAddress()); - break; } - current_address += inst_size; + } else { + inst_size = 1; } + current_address += inst_size; } - LOG4CXX_DEBUG(logger, "Finished function " << current_function->getName()); } - - // Split blocks where jumps are going inside the block - for (auto it = blocks.begin(); it != blocks.end(); ++it) { - LLVMBasicBlock * current_block = it->second; - uint64_t inst_size; - uint64_t base_address; - text_section.getAddress(base_address); - uint64_t current_address = current_block->getStartAddress() - base_address; - while(current_block->getEndAddress() - base_address > current_address) { - MCInst inst; - std::string buf; - llvm::raw_string_ostream s(buf); - - if(llvm::MCDisassembler::Success == - DisAsm->getInstruction(inst, inst_size, ref, current_address, nulls(), nulls())) { - auto other = blocks.find(current_address + inst_size + base_address); - - if (other != blocks.end()) { - uint64_t endaddress = current_address + inst_size + base_address; - if (endaddress != current_block->getEndAddress()) { - LOG4CXX_DEBUG(logger, "Shortening block starting at " - << std::hex - << current_block->getStartAddress() - << " now ending at " - << other->first); - current_block->setEndAddress(endaddress); - current_block->setNextBlock(0, other->first); - current_block->setNextBlock(1, 0); - } - } - } else { - inst_size = 1; - } - current_address += inst_size; - } - } } void LLVMDisassembler::readSymbols() { diff --git a/src/disassembler/llvm/LLVMDisassembler.hxx b/src/disassembler/llvm/LLVMDisassembler.hxx index 6ed3c8c..f2baecb 100644 --- a/src/disassembler/llvm/LLVMDisassembler.hxx +++ b/src/disassembler/llvm/LLVMDisassembler.hxx @@ -31,13 +31,17 @@ public: return blocks[address]; } + Function * disassembleFunctionAt(uint64_t address, const std::string& name = ""); + protected: bool isFunctionCall(uint64_t address) {return false;} bool isJump(uint64_t address) {return false;} private: // http://llvm.org/docs/doxygen/html/MCObjectDisassembler_8cpp_source.html +197 - void disassemble(); + void disassembleFunction(LLVMFunction* function); + void splitBlocks(); + void disassemble(); void readSymbols(); void readSections(); -- 2.39.2