* ist sondern z.B. einfach nur Instruktionen oder ein Bootsektor oder
* foo
*/
-
LLVMDisassembler::LLVMDisassembler(const std::string& filename)
: Disassembler(filename)
, logger(log4cxx::Logger::getLogger("LLVMDisassembler"))
disassemble();
}
+LLVMDisassembler::~LLVMDisassembler() {
+ std::for_each(functions.begin(), functions.end(),
+ [](std::pair<uint64_t,LLVMFunction*> it) {
+ delete it.second;
+ });
+ std::for_each(blocks.begin(), blocks.end(),
+ [](std::pair<uint64_t, LLVMBasicBlock*> it) {
+ delete it.second;
+ });
+}
void LLVMDisassembler::disassemble() {
std::stack<LLVMFunction*> remaining_functions;
std::stack<LLVMBasicBlock*> remaining_blocks;
SectionRef text_section = sections[".text"];
- std::for_each(symbols.begin(), symbols.end(),
- [&](std::pair<const std::string, SymbolRef> x) {
- uint64_t result;
- bool contains;
- SymbolRef::Type symbol_type;
+ for (auto x = symbols.begin(); x != symbols.end(); ++x) {
+ uint64_t result;
+ bool contains;
+ SymbolRef::Type symbol_type;
- if (text_section.containsSymbol(x.second, contains) || !contains)
- return;
- if (x.second.getType(symbol_type)
- || SymbolRef::ST_Function != symbol_type)
- return;
+ if (text_section.containsSymbol(x->second, contains) || !contains)
+ continue;
- if (!x.second.getAddress(result)) {
- remaining_functions.push(new LLVMFunction(x.first, result));
- LOG4CXX_DEBUG(logger, "Disasembling " << x.first);
- }
- });
+ if (x->second.getType(symbol_type)
+ || SymbolRef::ST_Function != symbol_type)
+ continue;
+
+ if (!x->second.getAddress(result)) {
+ LLVMFunction * fun = new LLVMFunction(x->first, result);
+ remaining_functions.push(fun);
+ functions.insert(std::make_pair(result, fun));
+ LOG4CXX_DEBUG(logger, "Disasembling " << x->first);
+ }
+ }
StringRef bytes;
text_section.getContents(bytes);
// if ("_start" != current_function->getName())
// continue;
- remaining_blocks.push(new LLVMBasicBlock(current_function->getStartAddress()));
+ LLVMBasicBlock * block = new LLVMBasicBlock(current_function->getStartAddress(), this);
+ remaining_blocks.push(block);
+ blocks.insert(std::make_pair(block->getStartAddress(), block));
while (remaining_blocks.size()) {
LLVMBasicBlock * current_block = remaining_blocks.top();
uint64_t jmptarget;
if (MIA->evaluateBranch(inst, current_address, inst_size, jmptarget)) {
- jmptarget += base_address;
+ jmptarget += base_address;
if (!MIA->isIndirectBranch(inst)) {
if (MIA->isCall(inst)) {
- if (blocks.find(jmptarget) == blocks.end())
- remaining_functions.push(new LLVMFunction("<Unnamed>", jmptarget));
+ if (functions.find(jmptarget) == functions.end()) {
+ std::stringstream s;
+ s << "<Unnamed 0x" << std::hex << jmptarget << ">";
+ LLVMFunction * fun = new LLVMFunction(s.str(), jmptarget);
+ functions.insert(std::make_pair(jmptarget, fun));
+ remaining_functions.push(fun);
+ }
} else {
- if (blocks.find(jmptarget) == blocks.end())
- remaining_blocks.push(new LLVMBasicBlock(jmptarget));
+ if (blocks.find(jmptarget) == blocks.end()) {
+ LLVMBasicBlock * block = new LLVMBasicBlock(jmptarget, this);
+ blocks.insert(std::make_pair(block->getStartAddress(), block));
+ current_block->setNextBlock(0, block->getStartAddress());
+ remaining_blocks.push(block);
+ }
if (MIA->isConditionalBranch(inst)) {
- jmptarget = base_address + current_address + inst_size;
- if (blocks.find(jmptarget) == blocks.end())
- remaining_blocks.push(new LLVMBasicBlock(jmptarget));
+ jmptarget = base_address + current_address + inst_size;
+ if (blocks.find(jmptarget) == blocks.end()) {
+ LLVMBasicBlock * block = new LLVMBasicBlock(jmptarget, this);
+ blocks.insert(std::make_pair(block->getStartAddress(), block));
+ current_block->setNextBlock(1, block->getStartAddress());
+ remaining_blocks.push(block);
+ }
}
}
}
if (inst_size == 0 || MIA->isTerminator(inst) || MIA->isBranch(inst)) {
- current_block->setEndAddress(current_address + base_address);
- blocks.insert(std::make_pair(current_block->getStartAddress(), current_block));
- LOG4CXX_DEBUG(logger, "Finished Block at " << current_block->getEndAddress());
+ current_block->setEndAddress(current_address + base_address + inst_size);
+ LOG4CXX_DEBUG(logger, "Finished Block at " << std::hex <<
+ current_block->getEndAddress());
break;
}
current_address += inst_size;
}
LOG4CXX_DEBUG(logger, "Finished function " << current_function->getName());
}
+
+ // Split blocks where jumps are going inside the block
+ for (auto it = blocks.begin(); it != blocks.end(); ++it) {
+ LLVMBasicBlock * current_block = it->second;
+ uint64_t inst_size;
+ uint64_t base_address;
+ text_section.getAddress(base_address);
+ uint64_t current_address = current_block->getStartAddress() - base_address;
+ while(current_block->getEndAddress() - base_address != current_address) {
+ MCInst inst;
+ std::string buf;
+ llvm::raw_string_ostream s(buf);
+
+ if(llvm::MCDisassembler::Success ==
+ DisAsm->getInstruction(inst, inst_size, ref, current_address, nulls(), nulls())) {
+ auto other = blocks.find(current_address + inst_size + base_address);
+
+ if (other != blocks.end()) {
+ LOG4CXX_DEBUG(logger, "Shortening block starting at "
+ << std::hex
+ << current_block->getStartAddress()
+ << " now ending at "
+ << other->first);
+ current_block->setEndAddress(current_address + inst_size + base_address);
+ current_block->setNextBlock(0, other->first);
+ current_block->setNextBlock(1, 0);
+ }
+ } else {
+ inst_size = 1;
+ }
+ current_address += inst_size;
+ }
+ }
}
void LLVMDisassembler::readSymbols() {
}
-BasicBlock * LLVMDisassembler::generateControlFlowGraph(uint64_t address) {
+void LLVMDisassembler::forEachFunction(std::function<void (uint64_t, Function*)> callback) {
+ std::for_each(functions.begin(), functions.end(),
+ [&](std::pair<uint64_t, LLVMFunction*> x) {
+ callback(x.first, x.second);
+ });
+}
+
+void LLVMDisassembler::printEachInstruction(uint64_t start, uint64_t end,
+ std::function<void (uint8_t*, size_t, const std::string&)> fun) {
+ SectionRef text_section = sections[".text"];
+ uint64_t base_address;
+ text_section.getAddress(base_address);
+ uint64_t current_address = start - base_address;
+
+ StringRef bytes;
+ text_section.getContents(bytes);
+ StringRefMemoryObject ref(bytes);
+
+ while (current_address < end - base_address) {
+ uint64_t inst_size;
+ MCInst inst;
+ std::string buf;
+ llvm::raw_string_ostream s(buf);
+
+ if(llvm::MCDisassembler::Success ==
+ DisAsm->getInstruction(inst, inst_size, ref, current_address, nulls(), nulls())) {
+ uint8_t bytes[inst_size+2];
+ ref.readBytes(current_address, inst_size, bytes);
+
+ IP->printInst(&inst, s, "");
+ fun(bytes, inst_size, s.str());
+ } else {
+ fun(NULL, 0, "Invalid Byte");
+ inst_size = 1;
+ }
+
+ current_address += inst_size;
+ }
}