- LOG4CXX_DEBUG(logger, "Handling file" << filename);
- auto result = createBinary(filename);
-
- error_code ec;
- if ((ec = result.getError())) {
- LOG4CXX_ERROR(logger, "Failed to load Binary" << ec.message());
- binary = NULL;
- return;
- }
-
- binary.reset(result.get());
-
- o = dyn_cast<ObjectFile>(binary.get());
-
- triple.setArch(Triple::ArchType(o->getArch()));
- std::string tripleName(triple.getTriple());
-
- LOG4CXX_INFO(logger, "Architecture " << tripleName);
-
-
- std::string es;
- target = TargetRegistry::lookupTarget("", triple, es);
- if (!target) {
- LOG4CXX_ERROR(logger, es);
- return;
- }
-
- LOG4CXX_INFO(logger, "Target " << target->getName());
-
- MRI.reset(target->createMCRegInfo(tripleName));
- if (!MRI) {
- LOG4CXX_ERROR(logger, "no register info for target " << tripleName);
- return;
- }
-
- // Set up disassembler.
- AsmInfo.reset(target->createMCAsmInfo(*MRI, tripleName));
- if (!AsmInfo) {
- LOG4CXX_ERROR(logger, "no assembly info for target " << tripleName);
- return;
- }
-
- STI.reset(target->createMCSubtargetInfo(tripleName, "", ""));
- if (!STI) {
- LOG4CXX_ERROR(logger, "no subtarget info for target " << tripleName);
- return;
- }
-
- MII.reset(target->createMCInstrInfo());
- if (!MII) {
- LOG4CXX_ERROR(logger, "no instruction info for target " << tripleName);
- return;
- }
-
- DisAsm.reset(target->createMCDisassembler(*STI));
- if (!DisAsm) {
- LOG4CXX_ERROR(logger, "no disassembler for target " << tripleName);
- return;
- }
-
- MOFI.reset(new MCObjectFileInfo);
- Ctx.reset(new MCContext(AsmInfo.get(), MRI.get(), MOFI.get()));
- RelInfo.reset(
- target->createMCRelocationInfo(tripleName, *Ctx.get()));
- if (RelInfo) {
- Symzer.reset(
- MCObjectSymbolizer::createObjectSymbolizer(*Ctx.get(), RelInfo, o));
- if (Symzer)
- DisAsm->setSymbolizer(Symzer);
- }
-
- MIA.reset(target->createMCInstrAnalysis(MII.get()));
-
- int AsmPrinterVariant = AsmInfo->getAssemblerDialect();
- IP.reset(target->createMCInstPrinter(AsmPrinterVariant, *AsmInfo, *MII, *MRI, *STI));
- if (!IP) {
- LOG4CXX_ERROR(logger, "no instruction printer for target " << tripleName);
- return;
- }
-
- IP->setPrintImmHex(llvm::HexStyle::C);
- IP->setPrintImmHex(true);
-
- OwningPtr<MCObjectDisassembler> OD(
- new MCObjectDisassembler(*o, *DisAsm, *MIA));
- Mod.reset(OD->buildModule(false));
-
- readSymbols();
- readSections();
- disassemble();
-}
-
-
-void LLVMDisassembler::disassemble() {
- std::stack<LLVMFunction*> remaining_functions;
- std::stack<LLVMBasicBlock*> remaining_blocks;
- SectionRef text_section = sections[".text"];
-
- std::for_each(symbols.begin(), symbols.end(),
- [&](std::pair<const std::string, SymbolRef> x) {
- uint64_t result;
- bool contains;
- SymbolRef::Type symbol_type;
-
- if (text_section.containsSymbol(x.second, contains) || !contains)
- return;
-
- if (x.second.getType(symbol_type)
- || SymbolRef::ST_Function != symbol_type)
- return;
-
- if (!x.second.getAddress(result)) {
- remaining_functions.push(new LLVMFunction(x.first, result));
- LOG4CXX_DEBUG(logger, "Disasembling " << x.first);
- }
- });
-
- StringRef bytes;
- text_section.getContents(bytes);
- StringRefMemoryObject ref(bytes);
-
- while (remaining_functions.size()) {
- LLVMFunction * current_function = remaining_functions.top();
- remaining_functions.pop();
-
- LOG4CXX_DEBUG(logger, "Handling function " << current_function->getName());
-
- // if ("_start" != current_function->getName())
- // continue;
-
- remaining_blocks.push(new LLVMBasicBlock(current_function->getStartAddress()));
-
- while (remaining_blocks.size()) {
- LLVMBasicBlock * current_block = remaining_blocks.top();
- remaining_blocks.pop();
-
- LOG4CXX_DEBUG(logger, "Handling Block starting at " << std::hex << current_block->getStartAddress());
-
- uint64_t inst_size;
- uint64_t base_address;
- text_section.getAddress(base_address);
- uint64_t current_address = current_block->getStartAddress() - base_address;
- while(true) {
- MCInst inst;
- std::string buf;
- llvm::raw_string_ostream s(buf);
-
- if(llvm::MCDisassembler::Success ==
- DisAsm->getInstruction(inst, inst_size, ref, current_address, nulls(), nulls())) {
-
- uint8_t bytes[inst_size+2];
- ref.readBytes(current_address, inst_size, bytes);
- s << '\t';
- for(uint8_t* cur = bytes; cur < bytes + inst_size; ++cur) {
- s.write_hex(*cur);
- s << ' ';
- }
- s << '\t';
-
- IP->printInst(&inst, s, "");
-
- LOG4CXX_DEBUG(logger, std::hex << current_address + base_address << s.str());
-
- uint64_t jmptarget;
- if (MIA->evaluateBranch(inst, current_address, inst_size, jmptarget)) {
- jmptarget += base_address;
- if (!MIA->isIndirectBranch(inst)) {
- if (MIA->isCall(inst)) {
- if (blocks.find(jmptarget) == blocks.end())
- remaining_functions.push(new LLVMFunction("<Unnamed>", jmptarget));
- } else {
- if (blocks.find(jmptarget) == blocks.end())
- remaining_blocks.push(new LLVMBasicBlock(jmptarget));
- if (MIA->isConditionalBranch(inst)) {
- jmptarget = base_address + current_address + inst_size;
- if (blocks.find(jmptarget) == blocks.end())
- remaining_blocks.push(new LLVMBasicBlock(jmptarget));
- }
- }
- }
- }
- } else {
- inst_size = 0;
- }
-
-
- if (inst_size == 0 || MIA->isTerminator(inst) || MIA->isBranch(inst)) {
- current_block->setEndAddress(current_address + base_address);
- blocks.insert(std::make_pair(current_block->getStartAddress(), current_block));
- LOG4CXX_DEBUG(logger, "Finished Block at " << current_block->getEndAddress());
- break;
- }
- current_address += inst_size;
- }
- }
- LOG4CXX_DEBUG(logger, "Finished function " << current_function->getName());
- }
-}
-
-void LLVMDisassembler::readSymbols() {
- error_code ec;
- symbol_iterator si(o->symbol_begin()), se(o->symbol_end());
- for (; si != se; ++si) {
- StringRef name;
- if ((ec = si->getName(name))) {
- LOG4CXX_ERROR(logger, ec.message());
- break;
- }
- LOG4CXX_DEBUG(logger, "Added symbol " << name.str());
- symbols.insert(make_pair(name.str(), *si));
- }
-}
-
-void LLVMDisassembler::readSections() {
- error_code ec;
- section_iterator i(o->section_begin()), e(o->section_end());
- for (; i != e; ++i) {
- StringRef name;
- if ((ec = i->getName(name))) {
- LOG4CXX_ERROR(logger, ec.message());
- break;
- }
- LOG4CXX_DEBUG(logger, "Added section " << name.str());
- sections.insert(make_pair(name.str(), *i));
- }
-
-}
-
-BasicBlock * LLVMDisassembler::generateControlFlowGraph(uint64_t address) {
+ LOG4CXX_DEBUG(logger, "Handling file " << filename);
+
+ if (!file) {
+ auto result = createBinary(filename);
+
+ error_code ec;
+ if ((ec = result.getError())) {
+ LOG4CXX_ERROR(logger, "Failed to load Binary" << ec.message());
+ binary = NULL;
+ return;
+ }
+
+#if defined(LLVM_35)
+ binary.reset(result.get());
+#elif defined(LLVM_36)
+ OwningBinary<Binary> ob;
+ ob = std::move(result.get());
+ Binary* op = ob.getBinary();
+
+ binary.reset(op);
+#endif
+
+ o = dyn_cast<ObjectFile>(binary.get());
+ } else {
+ o = file;
+ binary.reset(file);
+ }
+
+ triple.setArch(Triple::ArchType(o->getArch()));
+ std::string tripleName(triple.getTriple());
+
+ LOG4CXX_INFO(logger, "Architecture " << tripleName);
+
+
+ std::string es;
+ target = TargetRegistry::lookupTarget("", triple, es);
+ if (!target) {
+ LOG4CXX_ERROR(logger, es);
+ BinaryNotSupported e;
+ throw e;
+ }
+
+ LOG4CXX_INFO(logger, "Target " << target->getName());
+
+ MRI.reset(target->createMCRegInfo(tripleName));
+ if (!MRI) {
+ LOG4CXX_ERROR(logger, "no register info for target " << tripleName);
+ BinaryNotSupported e;
+ throw e;
+ }
+
+ // Set up disassembler.
+ AsmInfo.reset(target->createMCAsmInfo(*MRI, tripleName));
+ if (!AsmInfo) {
+ LOG4CXX_ERROR(logger, "no assembly info for target " << tripleName);
+ BinaryNotSupported e;
+ throw e;
+ }
+
+ STI.reset(target->createMCSubtargetInfo(tripleName, "", ""));
+ if (!STI) {
+ LOG4CXX_ERROR(logger, "no subtarget info for target " << tripleName);
+ BinaryNotSupported e;
+ throw e;
+ }
+
+ MII.reset(target->createMCInstrInfo());
+ if (!MII) {
+ LOG4CXX_ERROR(logger, "no instruction info for target " << tripleName);
+ BinaryNotSupported e;
+ throw e;
+ }
+
+ MOFI.reset(new MCObjectFileInfo);
+ MCContext Ctx(AsmInfo.get(), MRI.get(), MOFI.get());
+
+ DisAsm.reset(target->createMCDisassembler(*STI, Ctx));
+ if (!DisAsm) {
+ LOG4CXX_ERROR(logger, "no disassembler for target " << tripleName);
+ BinaryNotSupported e;
+ throw e;
+ }
+ RelInfo.reset(
+ target->createMCRelocationInfo(tripleName, Ctx));
+ if (RelInfo) {
+ // Symzer.reset(
+ // MCObjectSymbolizer::createObjectSymbolizer(Ctx, std::move(RelInfo), o));
+ // if (Symzer)
+ // DisAsm->setSymbolizer(std::move(Symzer));
+ }
+ RelInfo.release();
+ Symzer.release();
+
+ MIA.reset(target->createMCInstrAnalysis(MII.get()));
+ if (!MIA) {
+ LOG4CXX_ERROR(logger, "no instruction analysis for target " << tripleName);
+ BinaryNotSupported e;
+ throw e;
+ }
+
+ int AsmPrinterVariant = AsmInfo->getAssemblerDialect();
+ IP.reset(target->createMCInstPrinter(AsmPrinterVariant, *AsmInfo, *MII, *MRI, *STI));
+ if (!IP) {
+ LOG4CXX_ERROR(logger, "no instruction printer for target " << tripleName);
+ BinaryNotSupported e;
+ throw e;
+ }
+
+ IP->setPrintImmHex(llvm::HexStyle::C);
+ IP->setPrintImmHex(true);
+
+// std::unique_ptr<MCObjectDisassembler> OD(
+// new MCObjectDisassembler(*o, *DisAsm, *MIA));
+ //Mod.reset(OD->buildModule(false));
+
+ readSections();
+}
+
+template <typename ELFT>
+void LLVMDisassembler<ELFT>::start() {
+ readSymbols();
+ disassemble();
+ readDynamicSymbols();
+}
+
+template <typename ELFT>
+LLVMDisassembler<ELFT>::~LLVMDisassembler() {}
+
+template <typename ELFT>
+Function* LLVMDisassembler<ELFT>::disassembleFunctionAt(uint64_t address, const std::string& name) {
+ Function * function;
+ SectionRef text_section = getTextSection();
+ uint64_t base_address, size;
+#if defined(LLVM_35)
+ text_section.getAddress(base_address);
+ text_section.getSize(size);
+#elif defined(LLVM_36)
+ base_address = text_section.getAddress();
+ size = text_section.getSize();
+#endif
+ if (address < base_address ||
+ address >= base_address + size) {
+ return NULL;
+ }
+
+ if (NULL == (function = manager->getFunction(address))) {
+
+ if (name == "") {
+ std::stringstream s;
+ s << "<Unnamed 0x" << std::hex << address << ">";
+ function = manager->newFunction(address);
+ function->setName(s.str());
+ } else {
+ function = manager->newFunction(address);
+ function->setName(name);
+ }
+ disassembleFunction(function);
+ }
+
+ return function;
+}
+
+template <typename ELFT>
+void LLVMDisassembler<ELFT>::disassembleFunction(Function* function) {
+ std::vector<uint64_t> called_functions;
+ std::stack<BasicBlock*> remaining_blocks;
+ /* TODO:
+ * Do all blocks get added properly? We should take care to remove
+ * the other ones at the end of the function!
+ */
+ std::map<uint64_t, BasicBlock*> new_blocks;
+ SectionRef text_section = getTextSection();
+ StringRef bytes;
+ uint64_t base_address, size;
+ text_section.getContents(bytes);
+#if defined(LLVM_35)
+ StringRefMemoryObject ref(bytes);
+ text_section.getAddress(base_address);
+ text_section.getSize(size);
+#elif defined(LLVM_36)
+ ArrayRef<uint8_t> bytearray(reinterpret_cast<const uint8_t *>(bytes.data()),
+ bytes.size());
+ base_address = text_section.getAddress();
+ size = text_section.getSize();
+#else
+#error LLVM != 3.5 | 3.6 not supported
+#endif
+
+ LOG4CXX_DEBUG(logger, "Handling function " << function->getName());
+
+ if(function->getStartAddress() < base_address || function->getStartAddress() > base_address + size) {
+ LOG4CXX_INFO(logger, "Trying to disassemble function " << function->getName() << " but start address " << std::hex << function->getStartAddress() << " is located outside the text segment");
+ return;
+ }
+
+ BasicBlock * block = manager->newBasicBlock(function->getStartAddress());
+ remaining_blocks.push(block);
+ new_blocks.insert(std::make_pair(block->getStartAddress(), block));
+ function->addBasicBlock(block);
+
+ LOG4CXX_DEBUG(logger, "Text section at " << std::hex << base_address << " with size " << size);
+
+ while (remaining_blocks.size()) {
+ BasicBlock * current_block = remaining_blocks.top();
+ remaining_blocks.pop();
+
+ LOG4CXX_DEBUG(logger, "Handling Block starting at " << std::hex
+ << current_block->getStartAddress());
+
+ uint64_t inst_size;
+ uint64_t current_address = current_block->getStartAddress() - base_address;
+ while(true) {
+ MCInst inst;
+ std::string buf;
+ llvm::raw_string_ostream s(buf);
+
+ if(llvm::MCDisassembler::Success ==
+#if defined(LLVM_35)
+ DisAsm->getInstruction(inst, inst_size, ref, current_address, nulls(), nulls())) {
+#elif defined(LLVM_36)
+ DisAsm->getInstruction(inst, inst_size,
+ bytearray.slice(current_address),
+ base_address + current_address,
+ nulls(), nulls())) {
+#endif
+ uint64_t jmptarget;
+
+ if (MIA->evaluateBranch(inst, current_address, inst_size, jmptarget)) {
+ jmptarget += base_address;
+ if (!MIA->isIndirectBranch(inst)) {
+ if (MIA->isCall(inst)) {
+ if (NULL == manager->getFunction(jmptarget))
+ called_functions.push_back(jmptarget);
+ } else {
+ if(jmptarget < base_address || jmptarget > base_address + size) {
+ if (MIA->isConditionalBranch(inst)) {
+ LOG4CXX_WARN(logger, "Conditional jump out of the text segment. This should never happen!");
+ } else {
+ LOG4CXX_INFO(logger, "Unconditional jump to PLT. Assuming Tail-Call to some library");
+ current_address += inst_size;
+ continue;
+ }
+ }
+ current_block->setNextBlock(0, jmptarget);
+ if (new_blocks.find(jmptarget) == new_blocks.end()) {
+ BasicBlock * block = manager->newBasicBlock(jmptarget);
+ assert(block);
+ new_blocks.insert(std::make_pair(block->getStartAddress(), block));
+ function->addBasicBlock(block);
+ remaining_blocks.push(block);
+ } else {
+ LOG4CXX_DEBUG(logger, "Reusing Block starting at " << std::hex
+ << current_block->getStartAddress());
+ function->addBasicBlock(new_blocks.find(jmptarget)->second);
+ }
+ if (MIA->isConditionalBranch(inst)) {
+ jmptarget = base_address + current_address + inst_size;
+ current_block->setNextBlock(1, jmptarget);
+ if (new_blocks.find(jmptarget) == new_blocks.end()) {
+ BasicBlock * block = manager->newBasicBlock(jmptarget);
+ assert(block);
+ new_blocks.insert(std::make_pair(block->getStartAddress(), block));
+ function->addBasicBlock(block);
+ remaining_blocks.push(block);
+ } else {
+ LOG4CXX_DEBUG(logger, "Reusing Block starting at " << std::hex
+ << current_block->getStartAddress());
+ function->addBasicBlock(new_blocks.find(jmptarget)->second);
+ }
+ }
+ }
+ }
+ }
+ } else {
+ inst_size = 0;
+ }
+
+
+ if (inst_size == 0 || MIA->isTerminator(inst) || MIA->isBranch(inst)) {
+ current_block->setEndAddress(current_address + base_address + inst_size);
+ LOG4CXX_DEBUG(logger, "Finished Block at " << std::hex <<
+ current_block->getEndAddress());
+ break;
+ }
+ current_address += inst_size;
+ }
+ }
+ splitBlocks(function);
+ LOG4CXX_DEBUG(logger, "Finished function " << function->getName());
+ manager->finishFunction(function);
+ for (uint64_t address : called_functions)
+ disassembleFunctionAt(address);
+}
+
+template <typename ELFT>
+void LLVMDisassembler<ELFT>::disassemble() {
+ SectionRef text_section = getTextSection();
+ std::vector<Function*> remaining_functions;
+
+ // Assume all function symbols actually start a real function
+ for (auto x = symbols.begin(); x != symbols.end(); ++x) {
+ uint64_t result;
+ bool contains;
+ SymbolRef::Type symbol_type;
+
+#if defined(LLVM_35)
+ if (text_section.containsSymbol(x->second, contains) || !contains)
+#elif defined(LLVM_36)
+ if (!text_section.containsSymbol(x->second))
+#endif
+ continue;
+
+ if (x->second.getType(symbol_type)
+ || SymbolRef::ST_Function != symbol_type)
+ continue;
+
+ if (!x->second.getAddress(result)) {
+ Function * fun = manager->newFunction(result);
+ if (fun) {
+ fun->setName(x->first);
+ remaining_functions.push_back(fun);
+ LOG4CXX_DEBUG(logger, "Disasembling " << x->first);
+ } else {
+ LOG4CXX_DEBUG(logger, "Function at " << std::hex << result
+ << " already disassembled as " << manager->getFunction(result)->getName());
+ }
+ }
+ }
+
+ for (Function* function : remaining_functions) {
+ disassembleFunction(function);
+ manager->finishFunction(function);
+ }
+
+ if (binary->isELF()) {
+ uint64_t _entryAddress = entryAddress();
+ LOG4CXX_DEBUG(logger, "Adding entryAddress at: " << std::hex << _entryAddress);
+ std::stringstream s;
+ s << "<_start 0x" << std::hex << _entryAddress << ">";
+
+ disassembleFunctionAt(_entryAddress, s.str());
+ }
+
+ if (!manager->hasFunctions()) {
+ uint64_t text_entry;
+#if defined(LLVM_35)
+ text_section.getAddress(text_entry);
+#elif defined(LLVM_36)
+ text_entry = text_section.getAddress();
+#endif
+ LOG4CXX_INFO(logger, "No Symbols found, starting at the beginning of the text segment");
+ disassembleFunctionAt(text_entry);
+ }
+}
+
+template <>
+uint64_t LLVMDisassembler<COFFT>::entryAddress() {
+ const auto coffobject = dyn_cast<COFFObjectFile>(o);
+ const struct pe32_header* pe32_header;
+ const struct pe32plus_header* pe32plus_header;
+
+ coffobject->getPE32PlusHeader(pe32plus_header);
+
+ if (pe32plus_header) {
+ return pe32plus_header->AddressOfEntryPoint;
+ } else {
+ coffobject->getPE32Header(pe32_header);
+ return pe32_header->AddressOfEntryPoint;
+ }
+}
+
+template<>
+uint64_t LLVMDisassembler<MACHOT>::entryAddress() {
+ // TODO
+ return 0;
+}
+
+template <typename ELFT>
+uint64_t LLVMDisassembler<ELFT>::entryAddress() {
+ const auto elffile = dyn_cast<ELFObjectFile<ELFT>>(o)->getELFFile();
+ const auto * header = elffile->getHeader();
+
+ return header->e_entry;
+}
+
+template <typename ELFT>
+void LLVMDisassembler<ELFT>::splitBlocks(Function* function) {
+ SectionRef text_section = getTextSection();
+ StringRef bytes;
+ text_section.getContents(bytes);
+#if defined(LLVM_35)
+ StringRefMemoryObject ref(bytes);
+#elif defined(LLVM_36)
+ ArrayRef<uint8_t> bytearray(reinterpret_cast<const uint8_t *>(bytes.data()),
+ bytes.size());
+#endif
+
+
+ LOG4CXX_DEBUG(logger, "Splitting Blocks in Function " << function->getName());
+ // Split blocks where jumps are going inside the block
+ for (auto it = function->blocks().begin();
+ it != function->blocks().end();
+ ++it) {
+ BasicBlock * current_block = it->second;
+ if (current_block->getEndAddress() == 0) {
+ LOG4CXX_ERROR(logger, "UNFINISHED BLOCK " << std::hex << current_block->getStartAddress());
+ break;
+ }
+ uint64_t inst_size;
+ uint64_t base_address;
+#if defined(LLVM_35)
+ text_section.getAddress(base_address);
+#elif defined(LLVM_36)
+ base_address = text_section.getAddress();
+#endif
+ uint64_t current_address = current_block->getStartAddress() - base_address;
+ while(current_block->getEndAddress() - base_address > current_address) {
+ MCInst inst;
+ std::string buf;
+ llvm::raw_string_ostream s(buf);
+
+ if(llvm::MCDisassembler::Success ==
+#if defined(LLVM_35)
+ DisAsm->getInstruction(inst, inst_size, ref, current_address, nulls(), nulls())) {
+#elif defined(LLVM_36)
+ DisAsm->getInstruction(inst, inst_size,
+ bytearray.slice(current_address),
+ base_address + current_address,
+ nulls(), nulls())) {
+#endif
+
+ // See if some other block starts here
+ BasicBlock* other = manager->getBasicBlock(current_address
+ + inst_size
+ + base_address);
+
+ // Special case, other block starts here but we are at the end anyway
+ if (other != NULL) {
+ uint64_t endaddress = current_address + inst_size + base_address;
+ if (endaddress != current_block->getEndAddress()) {
+ LOG4CXX_DEBUG(logger, "Shortening block starting at "
+ << std::hex
+ << current_block->getStartAddress()
+ << " now ending at "
+ << other->getStartAddress());
+ function->addBasicBlock(other);
+ current_block->setEndAddress(endaddress);
+ current_block->setNextBlock(0, other->getStartAddress());
+ current_block->setNextBlock(1, 0);
+ }
+ }
+ } else {
+ inst_size = 1;
+ }
+ current_address += inst_size;
+ }
+ }
+}
+
+template<>
+void LLVMDisassembler<COFFT>::readDynamicSymbols() {
+ //TODO
+}
+
+template<>
+void LLVMDisassembler<MACHOT>::readDynamicSymbols() {
+ //TODO
+}
+
+template <typename ELFT>
+void LLVMDisassembler<ELFT>::readDynamicSymbols() {
+ const auto elffile = dyn_cast<ELFObjectFile<ELFT>>(o)->getELFFile();
+ for (auto it = elffile->begin_dynamic_symbols(),
+ end = elffile->end_dynamic_symbols();
+ it != end;
+ ++it) {
+ if (it->getType() == 2) { // Function
+ bool is_default;
+ // TODO: Error handling
+ std::string symbolname = *(elffile->getSymbolName(it));
+ std::string symbolversion = *(elffile->getSymbolVersion(nullptr, &*it, is_default));
+ // TODO: actually get the symbol address from relocations
+ Function* f = manager->newDynamicFunction(0);
+ f->setName(symbolname + (is_default? "@@" : "@") + symbolversion);
+ manager->finishFunction(f);
+
+ LOG4CXX_DEBUG(logger, "Adding dynamic Symbol " << symbolname << (is_default? "@@" : "@") << symbolversion);
+ }
+ }
+}
+
+template <typename ELFT>
+void LLVMDisassembler<ELFT>::readSymbols() {
+ error_code ec;
+ symbol_iterator si(o->symbol_begin()), se(o->symbol_end());
+ for (; si != se; ++si) {
+ StringRef name;
+ uint64_t address;
+ si->getAddress(address);
+ if ((ec = si->getName(name))) {
+ LOG4CXX_ERROR(logger, ec.message());
+ break;
+ }
+ LOG4CXX_DEBUG(logger, "Added symbol " << name.str() << " at address " << std::hex << address);
+ symbols.insert(make_pair(name.str(), *si));
+ }
+}
+
+template <typename ELFT>
+void LLVMDisassembler<ELFT>::readSections() {
+ error_code ec;
+ section_iterator i(o->section_begin()), e(o->section_end());
+ for (; i != e; ++i) {
+ StringRef name;
+ if ((ec = i->getName(name))) {
+ LOG4CXX_ERROR(logger, ec.message());
+ break;
+ }
+ LOG4CXX_DEBUG(logger, "Added section " << name.str());
+ sections.insert(make_pair(name.str(), *i));
+ }
+
+}
+
+// template <typename ELFT>
+// void LLVMDisassembler<ELFT>::forEachFunction(std::function<void (uint64_t, Function*)> callback) {
+// // std::for_each(functions.begin(), functions.end(),
+// // [&](std::pair<uint64_t, Function*> x) {
+// // callback(x.first, x.second);
+// // });
+// }
+
+template <typename ELFT>
+std::vector<Instruction> LLVMDisassembler<ELFT>::getInstructions(const BasicBlock *block) {
+ std::vector<Instruction> result;
+ SectionRef text_section = getTextSection();
+ uint64_t base_address;
+#if defined(LLVM_35)
+ text_section.getAddress(base_address);
+#elif defined(LLVM_36)
+ base_address = text_section.getAddress();
+#endif
+
+ uint64_t current_address = block->getStartAddress() - base_address;
+ uint64_t end_position = block->getEndAddress() - base_address;
+
+ StringRef bytes;
+ text_section.getContents(bytes);
+#if defined(LLVM_35)
+ StringRefMemoryObject ref(bytes);
+#elif defined(LLVM_36)
+ ArrayRef<uint8_t> bytearray(reinterpret_cast<const uint8_t *>(bytes.data()),
+ bytes.size());
+#endif
+
+
+ while (current_address < end_position) {
+ uint64_t inst_size;
+ MCInst inst;
+ std::string buf;
+ llvm::raw_string_ostream s(buf);
+
+ if(llvm::MCDisassembler::Success ==
+#if defined(LLVM_35)
+ DisAsm->getInstruction(inst, inst_size, ref, current_address, nulls(), nulls())) {
+#elif defined(LLVM_36)
+ DisAsm->getInstruction(inst, inst_size,
+ bytearray.slice(current_address),
+ base_address + current_address,
+ nulls(), nulls())) {
+#endif
+
+ uint8_t bytes[inst_size+2];
+#if defined(LLVM_35)
+ ref.readBytes(current_address, inst_size, bytes);
+#elif defined(LLVM_36)
+ size_t bytesindex(0);
+ for (uint8_t byte : bytearray.slice(current_address, inst_size)) {
+ bytes[bytesindex++] = byte;
+ }
+#endif
+
+ uint64_t jmptarget;
+ std::string ref("");
+ IP->printInst(&inst, s, "");
+ if (MIA->evaluateBranch(inst, current_address, inst_size, jmptarget)) {
+ std::stringstream stream;
+ if (MIA->isCall(inst))
+ stream << "function:";
+ else
+ stream << "block:";
+
+ stream << std::hex << (base_address + jmptarget);
+ ref = stream.str();
+ }
+ result.push_back(Instruction(current_address + base_address, boost::algorithm::trim_copy(s.str()),
+ std::vector<uint8_t>(bytes, bytes+inst_size), ref));
+ } else {
+ LOG4CXX_WARN(logger, "Invalid byte at" << std::hex << current_address + base_address);
+ uint8_t bytes[1];
+#if defined(LLVM_35)
+ ref.readBytes(current_address, 1, bytes);
+#elif defined(LLVM_36)
+ bytes[0] = bytearray[current_address];
+#endif
+ result.push_back(Instruction(current_address + base_address, "Invalid Instruction",
+ std::vector<uint8_t>(bytes, bytes+1), ""));
+ inst_size = 1;
+ }
+
+ current_address += inst_size;
+ }
+ return result;
+}
+
+template <typename ELFT>
+SectionRef LLVMDisassembler<ELFT>::getTextSection() {
+ return sections[".text"];
+}