#include "core/InformationManager.hxx"
#include "core/Function.hxx"
#include "core/BasicBlock.hxx"
+#include "core/Exception.hxx"
#include <boost/algorithm/string.hpp>
#include <stack>
LOG4CXX_ERROR(logger, ec.message());
return NULL;
}
-
+#if defined(LLVM_35)
Binary * op = retval.get();
-
- if (!op) {
- LOG4CXX_ERROR(logger, "Could not open " << filename);
+#elif defined(LLVM_36)
+ OwningBinary<Binary> ob;
+ ob = std::move(retval.get());
+ Binary* op = ob.getBinary();
+ auto foo = ob.takeBinary();
+ foo.first.release();
+ foo.second.release();
+#endif
+
+ try {
+ // ELFType<endian, maxalign, 64bit>
+ if (ELF32LEObjectFile * object = dyn_cast<ELF32LEObjectFile>(op)) {
+ return new LLVMDisassembler<ELFType<support::little, 2, false>>(filename, manager, object);
+ }
+ if (ELF64LEObjectFile * object = dyn_cast<ELF64LEObjectFile>(op)) {
+ return new LLVMDisassembler<ELFType<support::little, 2, true>>(filename, manager, object);
+ }
+ if (ELF32BEObjectFile * object = dyn_cast<ELF32BEObjectFile>(op)) {
+ return new LLVMDisassembler<ELFType<support::big, 2, false>>(filename, manager, object);
+ }
+ if (ELF64BEObjectFile * object = dyn_cast<ELF64BEObjectFile>(op)) {
+ return new LLVMDisassembler<ELFType<support::big, 2, true>>(filename, manager, object);
+ }
+ if (COFFObjectFile * object = dyn_cast<COFFObjectFile>(op)) {
+ return new LLVMDisassembler<COFFT>(filename, manager, object);
+ }
+ if (MachOObjectFile * object = dyn_cast<MachOObjectFile>(op)) {
+ return new LLVMDisassembler<MACHOT>(filename, manager, object);
+ }
+ } catch (BinaryNotSupported& e) {
return NULL;
}
-
- // ELFType<endian, maxalign, 64bit>
- if (ELF32LEObjectFile * object = dyn_cast<ELF32LEObjectFile>(op)) {
- return new LLVMDisassembler<ELFType<support::little, 2, false>>(filename, manager, object);
- }
- if (ELF64LEObjectFile * object = dyn_cast<ELF64LEObjectFile>(op)) {
- return new LLVMDisassembler<ELFType<support::little, 2, true>>(filename, manager, object);
- }
- if (ELF32BEObjectFile * object = dyn_cast<ELF32BEObjectFile>(op)) {
- return new LLVMDisassembler<ELFType<support::big, 2, false>>(filename, manager, object);
- }
- if (ELF64BEObjectFile * object = dyn_cast<ELF64BEObjectFile>(op)) {
- return new LLVMDisassembler<ELFType<support::big, 2, true>>(filename, manager, object);
- }
- if (COFFObjectFile * object = dyn_cast<COFFObjectFile>(op)) {
- return new LLVMDisassembler<COFFT>(filename, manager, object);
- }
- if (MachOObjectFile * object = dyn_cast<MachOObjectFile>(op)) {
- return new LLVMDisassembler<MACHOT>(filename, manager, object);
- }
-
return NULL;
}
return;
}
+#if defined(LLVM_35)
binary.reset(result.get());
+#elif defined(LLVM_36)
+ OwningBinary<Binary> ob;
+ ob = std::move(result.get());
+ Binary* op = ob.getBinary();
+
+ binary.reset(op);
+#endif
o = dyn_cast<ObjectFile>(binary.get());
} else {
target = TargetRegistry::lookupTarget("", triple, es);
if (!target) {
LOG4CXX_ERROR(logger, es);
- return;
+ BinaryNotSupported e;
+ throw e;
}
LOG4CXX_INFO(logger, "Target " << target->getName());
MRI.reset(target->createMCRegInfo(tripleName));
if (!MRI) {
LOG4CXX_ERROR(logger, "no register info for target " << tripleName);
- return;
+ BinaryNotSupported e;
+ throw e;
}
// Set up disassembler.
AsmInfo.reset(target->createMCAsmInfo(*MRI, tripleName));
if (!AsmInfo) {
LOG4CXX_ERROR(logger, "no assembly info for target " << tripleName);
- return;
+ BinaryNotSupported e;
+ throw e;
}
STI.reset(target->createMCSubtargetInfo(tripleName, "", ""));
if (!STI) {
LOG4CXX_ERROR(logger, "no subtarget info for target " << tripleName);
- return;
+ BinaryNotSupported e;
+ throw e;
}
MII.reset(target->createMCInstrInfo());
if (!MII) {
LOG4CXX_ERROR(logger, "no instruction info for target " << tripleName);
- return;
+ BinaryNotSupported e;
+ throw e;
}
MOFI.reset(new MCObjectFileInfo);
DisAsm.reset(target->createMCDisassembler(*STI, Ctx));
if (!DisAsm) {
LOG4CXX_ERROR(logger, "no disassembler for target " << tripleName);
- return;
+ BinaryNotSupported e;
+ throw e;
}
RelInfo.reset(
target->createMCRelocationInfo(tripleName, Ctx));
MIA.reset(target->createMCInstrAnalysis(MII.get()));
if (!MIA) {
LOG4CXX_ERROR(logger, "no instruction analysis for target " << tripleName);
- return;
+ BinaryNotSupported e;
+ throw e;
}
int AsmPrinterVariant = AsmInfo->getAssemblerDialect();
IP.reset(target->createMCInstPrinter(AsmPrinterVariant, *AsmInfo, *MII, *MRI, *STI));
if (!IP) {
LOG4CXX_ERROR(logger, "no instruction printer for target " << tripleName);
- return;
+ BinaryNotSupported e;
+ throw e;
}
IP->setPrintImmHex(llvm::HexStyle::C);
IP->setPrintImmHex(true);
- std::unique_ptr<MCObjectDisassembler> OD(
- new MCObjectDisassembler(*o, *DisAsm, *MIA));
- Mod.reset(OD->buildModule(false));
+// std::unique_ptr<MCObjectDisassembler> OD(
+// new MCObjectDisassembler(*o, *DisAsm, *MIA));
+ //Mod.reset(OD->buildModule(false));
readSections();
}
LLVMDisassembler<ELFT>::~LLVMDisassembler() {}
template <typename ELFT>
-Function* LLVMDisassembler<ELFT>::disassembleFunctionAt(uint64_t address, const std::string& name) {
+Function* LLVMDisassembler<ELFT>::disassembleFunctionAt(uint64_t address,
+ const std::string& name) {
Function * function;
SectionRef text_section = getTextSection();
uint64_t base_address, size;
+#if defined(LLVM_35)
text_section.getAddress(base_address);
text_section.getSize(size);
-
+#elif defined(LLVM_36)
+ base_address = text_section.getAddress();
+ size = text_section.getSize();
+#endif
if (address < base_address ||
address >= base_address + size) {
return NULL;
std::map<uint64_t, BasicBlock*> new_blocks;
SectionRef text_section = getTextSection();
StringRef bytes;
+ uint64_t base_address, size;
text_section.getContents(bytes);
+#if defined(LLVM_35)
StringRefMemoryObject ref(bytes);
+ text_section.getAddress(base_address);
+ text_section.getSize(size);
+#elif defined(LLVM_36)
+ ArrayRef<uint8_t> bytearray(reinterpret_cast<const uint8_t *>(bytes.data()),
+ bytes.size());
+ base_address = text_section.getAddress();
+ size = text_section.getSize();
+#else
+#error LLVM != 3.5 | 3.6 not supported
+#endif
LOG4CXX_DEBUG(logger, "Handling function " << function->getName());
+ if(function->getStartAddress() < base_address || function->getStartAddress() > base_address + size) {
+ LOG4CXX_INFO(logger, "Trying to disassemble function " << function->getName() << " but start address " << std::hex << function->getStartAddress() << " is located outside the text segment");
+ return;
+ }
+
BasicBlock * block = manager->newBasicBlock(function->getStartAddress());
remaining_blocks.push(block);
new_blocks.insert(std::make_pair(block->getStartAddress(), block));
function->addBasicBlock(block);
- uint64_t base_address, size;
- text_section.getAddress(base_address);
- text_section.getSize(size);
LOG4CXX_DEBUG(logger, "Text section at " << std::hex << base_address << " with size " << size);
while (remaining_blocks.size()) {
llvm::raw_string_ostream s(buf);
if(llvm::MCDisassembler::Success ==
+#if defined(LLVM_35)
DisAsm->getInstruction(inst, inst_size, ref, current_address, nulls(), nulls())) {
+#elif defined(LLVM_36)
+ DisAsm->getInstruction(inst, inst_size,
+ bytearray.slice(current_address),
+ base_address + current_address,
+ nulls(), nulls())) {
+#endif
uint64_t jmptarget;
if (MIA->evaluateBranch(inst, current_address, inst_size, jmptarget)) {
if (NULL == manager->getFunction(jmptarget))
called_functions.push_back(jmptarget);
} else {
+ if(jmptarget < base_address || jmptarget > base_address + size) {
+ if (MIA->isConditionalBranch(inst)) {
+ LOG4CXX_WARN(logger, "Conditional jump out of the text segment. This should never happen!");
+ } else {
+ LOG4CXX_INFO(logger, "Unconditional jump to PLT. Assuming Tail-Call to some library");
+ current_address += inst_size;
+ continue;
+ }
+ }
current_block->setNextBlock(0, jmptarget);
if (new_blocks.find(jmptarget) == new_blocks.end()) {
BasicBlock * block = manager->newBasicBlock(jmptarget);
// Assume all function symbols actually start a real function
for (auto x = symbols.begin(); x != symbols.end(); ++x) {
uint64_t result;
- bool contains;
SymbolRef::Type symbol_type;
-
+#if defined(LLVM_35)
+ bool contains;
if (text_section.containsSymbol(x->second, contains) || !contains)
+#elif defined(LLVM_36)
+ if (!text_section.containsSymbol(x->second))
+#endif
continue;
if (x->second.getType(symbol_type)
if (!manager->hasFunctions()) {
uint64_t text_entry;
+#if defined(LLVM_35)
text_section.getAddress(text_entry);
+#elif defined(LLVM_36)
+ text_entry = text_section.getAddress();
+#endif
LOG4CXX_INFO(logger, "No Symbols found, starting at the beginning of the text segment");
disassembleFunctionAt(text_entry);
}
SectionRef text_section = getTextSection();
StringRef bytes;
text_section.getContents(bytes);
+#if defined(LLVM_35)
StringRefMemoryObject ref(bytes);
+#elif defined(LLVM_36)
+ ArrayRef<uint8_t> bytearray(reinterpret_cast<const uint8_t *>(bytes.data()),
+ bytes.size());
+#endif
+
LOG4CXX_DEBUG(logger, "Splitting Blocks in Function " << function->getName());
// Split blocks where jumps are going inside the block
}
uint64_t inst_size;
uint64_t base_address;
+#if defined(LLVM_35)
text_section.getAddress(base_address);
+#elif defined(LLVM_36)
+ base_address = text_section.getAddress();
+#endif
uint64_t current_address = current_block->getStartAddress() - base_address;
while(current_block->getEndAddress() - base_address > current_address) {
MCInst inst;
llvm::raw_string_ostream s(buf);
if(llvm::MCDisassembler::Success ==
+#if defined(LLVM_35)
DisAsm->getInstruction(inst, inst_size, ref, current_address, nulls(), nulls())) {
+#elif defined(LLVM_36)
+ DisAsm->getInstruction(inst, inst_size,
+ bytearray.slice(current_address),
+ base_address + current_address,
+ nulls(), nulls())) {
+#endif
+
// See if some other block starts here
BasicBlock* other = manager->getBasicBlock(current_address
+ inst_size
it != end;
++it) {
if (it->getType() == 2) { // Function
- bool is_default;
+ bool is_default(false);
// TODO: Error handling
std::string symbolname = *(elffile->getSymbolName(it));
std::string symbolversion = *(elffile->getSymbolVersion(nullptr, &*it, is_default));
symbol_iterator si(o->symbol_begin()), se(o->symbol_end());
for (; si != se; ++si) {
StringRef name;
+ uint64_t address;
+ si->getAddress(address);
if ((ec = si->getName(name))) {
LOG4CXX_ERROR(logger, ec.message());
break;
}
- LOG4CXX_DEBUG(logger, "Added symbol " << name.str());
+ LOG4CXX_DEBUG(logger, "Added symbol " << name.str() << " at address " << std::hex << address);
symbols.insert(make_pair(name.str(), *si));
}
}
std::vector<Instruction> result;
SectionRef text_section = getTextSection();
uint64_t base_address;
+#if defined(LLVM_35)
text_section.getAddress(base_address);
+#elif defined(LLVM_36)
+ base_address = text_section.getAddress();
+#endif
+
uint64_t current_address = block->getStartAddress() - base_address;
uint64_t end_position = block->getEndAddress() - base_address;
StringRef bytes;
text_section.getContents(bytes);
+#if defined(LLVM_35)
StringRefMemoryObject ref(bytes);
+#elif defined(LLVM_36)
+ ArrayRef<uint8_t> bytearray(reinterpret_cast<const uint8_t *>(bytes.data()),
+ bytes.size());
+#endif
+
while (current_address < end_position) {
uint64_t inst_size;
llvm::raw_string_ostream s(buf);
if(llvm::MCDisassembler::Success ==
- DisAsm->getInstruction(inst, inst_size, ref, current_address, nulls(), nulls())) {
+#if defined(LLVM_35)
+ DisAsm->getInstruction(inst, inst_size, ref, current_address, nulls(), nulls())) {
+#elif defined(LLVM_36)
+ DisAsm->getInstruction(inst, inst_size,
+ bytearray.slice(current_address),
+ base_address + current_address,
+ nulls(), nulls())) {
+#endif
uint8_t bytes[inst_size+2];
+#if defined(LLVM_35)
ref.readBytes(current_address, inst_size, bytes);
+#elif defined(LLVM_36)
+ size_t bytesindex(0);
+ for (uint8_t byte : bytearray.slice(current_address, inst_size)) {
+ bytes[bytesindex++] = byte;
+ }
+#endif
uint64_t jmptarget;
std::string ref("");
} else {
LOG4CXX_WARN(logger, "Invalid byte at" << std::hex << current_address + base_address);
uint8_t bytes[1];
+#if defined(LLVM_35)
ref.readBytes(current_address, 1, bytes);
+#elif defined(LLVM_36)
+ bytes[0] = bytearray[current_address];
+#endif
result.push_back(Instruction(current_address + base_address, "Invalid Instruction",
std::vector<uint8_t>(bytes, bytes+1), ""));
inst_size = 1;
return result;
}
-template <typename ELFT>
-void LLVMDisassembler<ELFT>::printEachInstruction(uint64_t start, uint64_t end,
- std::function<void (uint8_t*, size_t,
- const std::string&,
- const std::string&)> fun) {
- SectionRef text_section = getTextSection();
- uint64_t base_address;
- text_section.getAddress(base_address);
- uint64_t current_address = start - base_address;
-
- StringRef bytes;
- text_section.getContents(bytes);
- StringRefMemoryObject ref(bytes);
-
- while (current_address < end - base_address) {
- uint64_t inst_size;
- MCInst inst;
- std::string buf;
- llvm::raw_string_ostream s(buf);
-
- if(llvm::MCDisassembler::Success ==
- DisAsm->getInstruction(inst, inst_size, ref, current_address, nulls(), nulls())) {
-
- uint8_t bytes[inst_size+2];
- ref.readBytes(current_address, inst_size, bytes);
-
- uint64_t jmptarget;
- std::string ref("");
- IP->printInst(&inst, s, "");
- if (MIA->evaluateBranch(inst, current_address, inst_size, jmptarget)) {
- std::stringstream stream;
- if (MIA->isCall(inst))
- stream << "function:";
- else
- stream << "block:";
-
- stream << std::hex << (base_address + jmptarget);
- ref = stream.str();
- }
-
-
- fun(bytes, inst_size, s.str(), ref);
- } else {
- LOG4CXX_WARN(logger, "Invalid byte at" << std::hex << current_address + base_address);
- fun(NULL, 0, "Invalid Byte", "");
- inst_size = 1;
- }
-
- current_address += inst_size;
- }
-}
-
template <typename ELFT>
SectionRef LLVMDisassembler<ELFT>::getTextSection() {
return sections[".text"];