+#include "disassembler/Instruction.hxx"
#include "disassembler/llvm/LLVMDisassembler.hxx"
-#include "disassembler/llvm/LLVMBasicBlock.hxx"
-#include "disassembler/llvm/LLVMFunction.hxx"
+#include "core/InformationManager.hxx"
+#include "core/Function.hxx"
+#include "core/BasicBlock.hxx"
+#include <boost/algorithm/string.hpp>
#include <stack>
#include <algorithm>
+#include <cassert>
using namespace llvm;
using namespace llvm::object;
using std::error_code;
+namespace {
+ class COFFT {
+
+ };
+
+ class MACHOT {
+
+ };
+}
+
/*
*
*/
Disassembler * createLLVMDisassembler(const std::string& filename, InformationManager* manager) {
- std::unique_ptr<Binary> o;
- o.reset(createBinary(filename).get());
- const Binary * op = o.get();
+ log4cxx::LoggerPtr logger(log4cxx::Logger::getLogger("disassembler.LLVMDisassembler"));
+ if (filename == "")
+ return NULL;
+
+ auto retval = createBinary(filename);
+ if (error_code ec = retval.getError()) {
+ LOG4CXX_ERROR(logger, ec.message());
+ return NULL;
+ }
+
+ Binary * op = retval.get();
+
+ if (!op) {
+ LOG4CXX_ERROR(logger, "Could not open " << filename);
+ return NULL;
+ }
// ELFType<endian, maxalign, 64bit>
- if (const ELF32LEObjectFile * _ = dyn_cast<ELF32LEObjectFile>(op)) {
-#pragma unused(_)
- return new LLVMDisassembler<ELFType<support::little, 2, false>>(filename, manager);
+ if (ELF32LEObjectFile * object = dyn_cast<ELF32LEObjectFile>(op)) {
+ return new LLVMDisassembler<ELFType<support::little, 2, false>>(filename, manager, object);
+ }
+ if (ELF64LEObjectFile * object = dyn_cast<ELF64LEObjectFile>(op)) {
+ return new LLVMDisassembler<ELFType<support::little, 2, true>>(filename, manager, object);
+ }
+ if (ELF32BEObjectFile * object = dyn_cast<ELF32BEObjectFile>(op)) {
+ return new LLVMDisassembler<ELFType<support::big, 2, false>>(filename, manager, object);
}
- if (const ELF64LEObjectFile * _ = dyn_cast<ELF64LEObjectFile>(op)) {
-#pragma unused(_)
- return new LLVMDisassembler<ELFType<support::little, 2, true>>(filename, manager);
+ if (ELF64BEObjectFile * object = dyn_cast<ELF64BEObjectFile>(op)) {
+ return new LLVMDisassembler<ELFType<support::big, 2, true>>(filename, manager, object);
}
- if (const ELF32BEObjectFile * _ = dyn_cast<ELF32BEObjectFile>(op)) {
-#pragma unused(_)
- return new LLVMDisassembler<ELFType<support::big, 2, false>>(filename, manager);
+ if (COFFObjectFile * object = dyn_cast<COFFObjectFile>(op)) {
+ return new LLVMDisassembler<COFFT>(filename, manager, object);
}
- if (const ELF64BEObjectFile * _ = dyn_cast<ELF64BEObjectFile>(op)) {
-#pragma unused(_)
- return new LLVMDisassembler<ELFType<support::big, 2, true>>(filename, manager);
+ if (MachOObjectFile * object = dyn_cast<MachOObjectFile>(op)) {
+ return new LLVMDisassembler<MACHOT>(filename, manager, object);
}
return NULL;
*/
template <typename ELFT>
LLVMDisassembler<ELFT>::LLVMDisassembler(const std::string& filename,
- InformationManager* manager)
- : Disassembler(filename, manager)
- , logger(log4cxx::Logger::getLogger("LLVMDisassembler"))
+ InformationManager* manager,
+ ObjectFile* file)
+ : Disassembler()
+ , logger(log4cxx::Logger::getLogger("disassembler.LLVMDisassembler"))
, triple("unknown-unknown-unknown")
, manager(manager)
{
- LOG4CXX_DEBUG(logger, "Handling file" << filename);
- auto result = createBinary(filename);
+ LOG4CXX_DEBUG(logger, "Handling file " << filename);
- error_code ec;
- if ((ec = result.getError())) {
- LOG4CXX_ERROR(logger, "Failed to load Binary" << ec.message());
- binary = NULL;
- return;
- }
+ if (!file) {
+ auto result = createBinary(filename);
- binary.reset(result.get());
+ error_code ec;
+ if ((ec = result.getError())) {
+ LOG4CXX_ERROR(logger, "Failed to load Binary" << ec.message());
+ binary = NULL;
+ return;
+ }
- o = dyn_cast<ObjectFile>(binary.get());
+ binary.reset(result.get());
+
+ o = dyn_cast<ObjectFile>(binary.get());
+ } else {
+ o = file;
+ binary.reset(file);
+ }
triple.setArch(Triple::ArchType(o->getArch()));
std::string tripleName(triple.getTriple());
RelInfo.reset(
target->createMCRelocationInfo(tripleName, Ctx));
if (RelInfo) {
- Symzer.reset(
- MCObjectSymbolizer::createObjectSymbolizer(Ctx, std::move(RelInfo), o));
- if (Symzer)
- DisAsm->setSymbolizer(std::move(Symzer));
+ // Symzer.reset(
+ // MCObjectSymbolizer::createObjectSymbolizer(Ctx, std::move(RelInfo), o));
+ // if (Symzer)
+ // DisAsm->setSymbolizer(std::move(Symzer));
}
RelInfo.release();
Symzer.release();
std::unique_ptr<MCObjectDisassembler> OD(
new MCObjectDisassembler(*o, *DisAsm, *MIA));
- Mod.reset(OD->buildModule(false));
+ //Mod.reset(OD->buildModule(false));
+
+ readSections();
}
template <typename ELFT>
void LLVMDisassembler<ELFT>::start() {
readSymbols();
- readSections();
disassemble();
+ readDynamicSymbols();
}
template <typename ELFT>
-LLVMDisassembler<ELFT>::~LLVMDisassembler() {
- std::for_each(functions.begin(), functions.end(),
- [](std::pair<uint64_t,LLVMFunction*> it) {
- delete it.second;
- });
- std::for_each(blocks.begin(), blocks.end(),
- [](std::pair<uint64_t, LLVMBasicBlock*> it) {
- delete it.second;
- });
-}
+LLVMDisassembler<ELFT>::~LLVMDisassembler() {}
template <typename ELFT>
Function* LLVMDisassembler<ELFT>::disassembleFunctionAt(uint64_t address, const std::string& name) {
- SectionRef text_section = sections[".text"];
+ Function * function;
+ SectionRef text_section = getTextSection();
uint64_t base_address, size;
text_section.getAddress(base_address);
text_section.getSize(size);
return NULL;
}
- if (functions.find(address) != functions.end()) {
- return functions[address];
- }
+ if (NULL == (function = manager->getFunction(address))) {
- LLVMFunction * function;
- if (name == "") {
- std::stringstream s;
- s << "<Unnamed 0x" << std::hex << address << ">";
- function = new LLVMFunction(s.str(), address);
- } else {
- function = new LLVMFunction(name, address);
+ if (name == "") {
+ std::stringstream s;
+ s << "<Unnamed 0x" << std::hex << address << ">";
+ function = manager->newFunction(address);
+ function->setName(s.str());
+ } else {
+ function = manager->newFunction(address);
+ function->setName(name);
+ }
+ disassembleFunction(function);
}
- functions.insert(std::make_pair(address, function));
-
- disassembleFunction(function);
return function;
}
template <typename ELFT>
-void LLVMDisassembler<ELFT>::disassembleFunction(LLVMFunction* function) {
- std::stack<LLVMBasicBlock*> remaining_blocks;
- SectionRef text_section = sections[".text"];
+void LLVMDisassembler<ELFT>::disassembleFunction(Function* function) {
+ std::vector<uint64_t> called_functions;
+ std::stack<BasicBlock*> remaining_blocks;
+ /* TODO:
+ * Do all blocks get added properly? We should take care to remove
+ * the other ones at the end of the function!
+ */
+ std::map<uint64_t, BasicBlock*> new_blocks;
+ SectionRef text_section = getTextSection();
StringRef bytes;
text_section.getContents(bytes);
StringRefMemoryObject ref(bytes);
LOG4CXX_DEBUG(logger, "Handling function " << function->getName());
- LLVMBasicBlock * block = new LLVMBasicBlock(function->getStartAddress(), this);
+ BasicBlock * block = manager->newBasicBlock(function->getStartAddress());
remaining_blocks.push(block);
- blocks.insert(std::make_pair(block->getStartAddress(), block));
+ new_blocks.insert(std::make_pair(block->getStartAddress(), block));
function->addBasicBlock(block);
+ uint64_t base_address, size;
+ text_section.getAddress(base_address);
+ text_section.getSize(size);
+ LOG4CXX_DEBUG(logger, "Text section at " << std::hex << base_address << " with size " << size);
+
while (remaining_blocks.size()) {
- LLVMBasicBlock * current_block = remaining_blocks.top();
+ BasicBlock * current_block = remaining_blocks.top();
remaining_blocks.pop();
- LOG4CXX_DEBUG(logger, "Handling Block starting at " << std::hex << current_block->getStartAddress());
+ LOG4CXX_DEBUG(logger, "Handling Block starting at " << std::hex
+ << current_block->getStartAddress());
uint64_t inst_size;
- uint64_t base_address;
- text_section.getAddress(base_address);
uint64_t current_address = current_block->getStartAddress() - base_address;
while(true) {
MCInst inst;
jmptarget += base_address;
if (!MIA->isIndirectBranch(inst)) {
if (MIA->isCall(inst)) {
- if (functions.find(jmptarget) == functions.end()) {
- disassembleFunctionAt(jmptarget);
- }
+ if (NULL == manager->getFunction(jmptarget))
+ called_functions.push_back(jmptarget);
} else {
current_block->setNextBlock(0, jmptarget);
- if (blocks.find(jmptarget) == blocks.end()) {
- LLVMBasicBlock * block = new LLVMBasicBlock(jmptarget, this);
- blocks.insert(std::make_pair(block->getStartAddress(), block));
+ if (new_blocks.find(jmptarget) == new_blocks.end()) {
+ BasicBlock * block = manager->newBasicBlock(jmptarget);
+ assert(block);
+ new_blocks.insert(std::make_pair(block->getStartAddress(), block));
function->addBasicBlock(block);
remaining_blocks.push(block);
} else {
- LOG4CXX_DEBUG(logger, "Reusing Block starting at " << std::hex << current_block->getStartAddress());
- function->addBasicBlock(blocks.find(jmptarget)->second);
+ LOG4CXX_DEBUG(logger, "Reusing Block starting at " << std::hex
+ << current_block->getStartAddress());
+ function->addBasicBlock(new_blocks.find(jmptarget)->second);
}
if (MIA->isConditionalBranch(inst)) {
jmptarget = base_address + current_address + inst_size;
current_block->setNextBlock(1, jmptarget);
- if (blocks.find(jmptarget) == blocks.end()) {
- LLVMBasicBlock * block = new LLVMBasicBlock(jmptarget, this);
- blocks.insert(std::make_pair(block->getStartAddress(), block));
+ if (new_blocks.find(jmptarget) == new_blocks.end()) {
+ BasicBlock * block = manager->newBasicBlock(jmptarget);
+ assert(block);
+ new_blocks.insert(std::make_pair(block->getStartAddress(), block));
function->addBasicBlock(block);
remaining_blocks.push(block);
} else {
- LOG4CXX_DEBUG(logger, "Reusing Block starting at " << std::hex << current_block->getStartAddress());
- function->addBasicBlock(blocks.find(jmptarget)->second);
+ LOG4CXX_DEBUG(logger, "Reusing Block starting at " << std::hex
+ << current_block->getStartAddress());
+ function->addBasicBlock(new_blocks.find(jmptarget)->second);
}
}
}
}
splitBlocks(function);
LOG4CXX_DEBUG(logger, "Finished function " << function->getName());
- manager->signal_new_function(function);
+ manager->finishFunction(function);
+ for (uint64_t address : called_functions)
+ disassembleFunctionAt(address);
}
template <typename ELFT>
void LLVMDisassembler<ELFT>::disassemble() {
- SectionRef text_section = sections[".text"];
- std::vector<LLVMFunction*> remaining_functions;
+ SectionRef text_section = getTextSection();
+ std::vector<Function*> remaining_functions;
// Assume all function symbols actually start a real function
for (auto x = symbols.begin(); x != symbols.end(); ++x) {
continue;
if (!x->second.getAddress(result)) {
- LLVMFunction * fun = new LLVMFunction(x->first, result);
- remaining_functions.push_back(fun);
- functions.insert(std::make_pair(result, fun));
- LOG4CXX_DEBUG(logger, "Disasembling " << x->first);
+ Function * fun = manager->newFunction(result);
+ if (fun) {
+ fun->setName(x->first);
+ remaining_functions.push_back(fun);
+ LOG4CXX_DEBUG(logger, "Disasembling " << x->first);
+ } else {
+ LOG4CXX_DEBUG(logger, "Function at " << std::hex << result
+ << " already disassembled as " << manager->getFunction(result)->getName());
+ }
}
}
- for (LLVMFunction* function : remaining_functions) {
+ for (Function* function : remaining_functions) {
disassembleFunction(function);
+ manager->finishFunction(function);
}
if (binary->isELF()) {
- bool is64bit = (binary->getData()[4] == 0x02);
-
- for (int i(0); i < (is64bit? 8 : 4); ++i) {
- if (binary->isLittleEndian()) {
- _entryAddress |= (unsigned int)((unsigned char)binary->getData()[0x18 + i]) << 8*i;
- } else {
- _entryAddress = _entryAddress << 8;
- _entryAddress |= (unsigned char)binary->getData()[0x18 + i];
- }
- }
+ uint64_t _entryAddress = entryAddress();
LOG4CXX_DEBUG(logger, "Adding entryAddress at: " << std::hex << _entryAddress);
std::stringstream s;
s << "<_start 0x" << std::hex << _entryAddress << ">";
disassembleFunctionAt(_entryAddress, s.str());
}
- if (functions.empty()) {
+ if (!manager->hasFunctions()) {
uint64_t text_entry;
text_section.getAddress(text_entry);
LOG4CXX_INFO(logger, "No Symbols found, starting at the beginning of the text segment");
}
}
+template <>
+uint64_t LLVMDisassembler<COFFT>::entryAddress() {
+ const auto coffobject = dyn_cast<COFFObjectFile>(o);
+ const struct pe32_header* pe32_header;
+ const struct pe32plus_header* pe32plus_header;
+
+ coffobject->getPE32PlusHeader(pe32plus_header);
+
+ if (pe32plus_header) {
+ return pe32plus_header->AddressOfEntryPoint;
+ } else {
+ coffobject->getPE32Header(pe32_header);
+ return pe32_header->AddressOfEntryPoint;
+ }
+}
+
+template<>
+uint64_t LLVMDisassembler<MACHOT>::entryAddress() {
+ // TODO
+ return 0;
+}
+
+template <typename ELFT>
+uint64_t LLVMDisassembler<ELFT>::entryAddress() {
+ const auto elffile = dyn_cast<ELFObjectFile<ELFT>>(o)->getELFFile();
+ const auto * header = elffile->getHeader();
+
+ return header->e_entry;
+}
+
template <typename ELFT>
-void LLVMDisassembler<ELFT>::splitBlocks(LLVMFunction* function) {
- SectionRef text_section = sections[".text"];
+void LLVMDisassembler<ELFT>::splitBlocks(Function* function) {
+ SectionRef text_section = getTextSection();
StringRef bytes;
text_section.getContents(bytes);
StringRefMemoryObject ref(bytes);
+ LOG4CXX_DEBUG(logger, "Splitting Blocks in Function " << function->getName());
// Split blocks where jumps are going inside the block
for (auto it = function->blocks().begin();
it != function->blocks().end();
++it) {
BasicBlock * current_block = it->second;
+ if (current_block->getEndAddress() == 0) {
+ LOG4CXX_ERROR(logger, "UNFINISHED BLOCK " << std::hex << current_block->getStartAddress());
+ break;
+ }
uint64_t inst_size;
uint64_t base_address;
text_section.getAddress(base_address);
if(llvm::MCDisassembler::Success ==
DisAsm->getInstruction(inst, inst_size, ref, current_address, nulls(), nulls())) {
// See if some other block starts here
- auto other = blocks.find(current_address + inst_size + base_address);
+ BasicBlock* other = manager->getBasicBlock(current_address
+ + inst_size
+ + base_address);
// Special case, other block starts here but we are at the end anyway
- if (other != blocks.end()) {
+ if (other != NULL) {
uint64_t endaddress = current_address + inst_size + base_address;
if (endaddress != current_block->getEndAddress()) {
LOG4CXX_DEBUG(logger, "Shortening block starting at "
<< std::hex
<< current_block->getStartAddress()
<< " now ending at "
- << other->first);
- function->addBasicBlock(other->second);
+ << other->getStartAddress());
+ function->addBasicBlock(other);
current_block->setEndAddress(endaddress);
- current_block->setNextBlock(0, other->first);
+ current_block->setNextBlock(0, other->getStartAddress());
current_block->setNextBlock(1, 0);
}
}
}
}
+template<>
+void LLVMDisassembler<COFFT>::readDynamicSymbols() {
+ //TODO
+}
+
+template<>
+void LLVMDisassembler<MACHOT>::readDynamicSymbols() {
+ //TODO
+}
+
+template <typename ELFT>
+void LLVMDisassembler<ELFT>::readDynamicSymbols() {
+ const auto elffile = dyn_cast<ELFObjectFile<ELFT>>(o)->getELFFile();
+ for (auto it = elffile->begin_dynamic_symbols(),
+ end = elffile->end_dynamic_symbols();
+ it != end;
+ ++it) {
+ if (it->getType() == 2) { // Function
+ bool is_default;
+ // TODO: Error handling
+ std::string symbolname = *(elffile->getSymbolName(it));
+ std::string symbolversion = *(elffile->getSymbolVersion(nullptr, &*it, is_default));
+ // TODO: actually get the symbol address from relocations
+ Function* f = manager->newDynamicFunction(0);
+ f->setName(symbolname + (is_default? "@@" : "@") + symbolversion);
+ manager->finishFunction(f);
+
+ LOG4CXX_DEBUG(logger, "Adding dynamic Symbol " << symbolname << (is_default? "@@" : "@") << symbolversion);
+ }
+ }
+}
+
template <typename ELFT>
void LLVMDisassembler<ELFT>::readSymbols() {
error_code ec;
}
+// template <typename ELFT>
+// void LLVMDisassembler<ELFT>::forEachFunction(std::function<void (uint64_t, Function*)> callback) {
+// // std::for_each(functions.begin(), functions.end(),
+// // [&](std::pair<uint64_t, Function*> x) {
+// // callback(x.first, x.second);
+// // });
+// }
+
template <typename ELFT>
-void LLVMDisassembler<ELFT>::forEachFunction(std::function<void (uint64_t, Function*)> callback) {
- std::for_each(functions.begin(), functions.end(),
- [&](std::pair<uint64_t, LLVMFunction*> x) {
- callback(x.first, x.second);
- });
+std::vector<Instruction> LLVMDisassembler<ELFT>::getInstructions(const BasicBlock *block) {
+ std::vector<Instruction> result;
+ SectionRef text_section = getTextSection();
+ uint64_t base_address;
+ text_section.getAddress(base_address);
+ uint64_t current_address = block->getStartAddress() - base_address;
+ uint64_t end_position = block->getEndAddress() - base_address;
+
+ StringRef bytes;
+ text_section.getContents(bytes);
+ StringRefMemoryObject ref(bytes);
+
+ while (current_address < end_position) {
+ uint64_t inst_size;
+ MCInst inst;
+ std::string buf;
+ llvm::raw_string_ostream s(buf);
+
+ if(llvm::MCDisassembler::Success ==
+ DisAsm->getInstruction(inst, inst_size, ref, current_address, nulls(), nulls())) {
+
+ uint8_t bytes[inst_size+2];
+ ref.readBytes(current_address, inst_size, bytes);
+
+ uint64_t jmptarget;
+ std::string ref("");
+ IP->printInst(&inst, s, "");
+ if (MIA->evaluateBranch(inst, current_address, inst_size, jmptarget)) {
+ std::stringstream stream;
+ if (MIA->isCall(inst))
+ stream << "function:";
+ else
+ stream << "block:";
+
+ stream << std::hex << (base_address + jmptarget);
+ ref = stream.str();
+ }
+ result.push_back(Instruction(current_address + base_address, boost::algorithm::trim_copy(s.str()),
+ std::vector<uint8_t>(bytes, bytes+inst_size), ref));
+ } else {
+ LOG4CXX_WARN(logger, "Invalid byte at" << std::hex << current_address + base_address);
+ uint8_t bytes[1];
+ ref.readBytes(current_address, 1, bytes);
+ result.push_back(Instruction(current_address + base_address, "Invalid Instruction",
+ std::vector<uint8_t>(bytes, bytes+1), ""));
+ inst_size = 1;
+ }
+
+ current_address += inst_size;
+ }
+ return result;
}
template <typename ELFT>
std::function<void (uint8_t*, size_t,
const std::string&,
const std::string&)> fun) {
- SectionRef text_section = sections[".text"];
+ SectionRef text_section = getTextSection();
uint64_t base_address;
text_section.getAddress(base_address);
uint64_t current_address = start - base_address;
current_address += inst_size;
}
}
+
+template <typename ELFT>
+SectionRef LLVMDisassembler<ELFT>::getTextSection() {
+ return sections[".text"];
+}
+
+template <>
+SectionRef LLVMDisassembler<MACHOT>::getTextSection() {
+ return sections["__text"];
+}