+#include "disassembler/Instruction.hxx"
#include "disassembler/llvm/LLVMDisassembler.hxx"
#include "core/InformationManager.hxx"
#include "core/Function.hxx"
#include "core/BasicBlock.hxx"
+#include <boost/algorithm/string.hpp>
#include <stack>
#include <algorithm>
class COFFT {
};
+
+ class MACHOT {
+
+ };
}
/*
*
*/
Disassembler * createLLVMDisassembler(const std::string& filename, InformationManager* manager) {
+ log4cxx::LoggerPtr logger(log4cxx::Logger::getLogger("disassembler.LLVMDisassembler"));
if (filename == "")
return NULL;
- std::unique_ptr<Binary> o;
- o.reset(createBinary(filename).get());
- Binary * op = o.release();
+ auto retval = createBinary(filename);
+ if (error_code ec = retval.getError()) {
+ LOG4CXX_ERROR(logger, ec.message());
+ return NULL;
+ }
+#if defined(LLVM_35)
+ Binary * op = retval.get();
+#elif defined(LLVM_36)
+ OwningBinary<Binary> ob;
+ ob = std::move(retval.get());
+ Binary* op = ob.getBinary();
+ auto foo = ob.takeBinary();
+ foo.first.release();
+ foo.second.release();
+#endif
// ELFType<endian, maxalign, 64bit>
if (ELF32LEObjectFile * object = dyn_cast<ELF32LEObjectFile>(op)) {
if (COFFObjectFile * object = dyn_cast<COFFObjectFile>(op)) {
return new LLVMDisassembler<COFFT>(filename, manager, object);
}
+ if (MachOObjectFile * object = dyn_cast<MachOObjectFile>(op)) {
+ return new LLVMDisassembler<MACHOT>(filename, manager, object);
+ }
return NULL;
}
return;
}
+#if defined(LLVM_35)
binary.reset(result.get());
+#elif defined(LLVM_36)
+ OwningBinary<Binary> ob;
+ ob = std::move(result.get());
+ Binary* op = ob.getBinary();
+
+ binary.reset(op);
+#endif
o = dyn_cast<ObjectFile>(binary.get());
} else {
IP->setPrintImmHex(llvm::HexStyle::C);
IP->setPrintImmHex(true);
- std::unique_ptr<MCObjectDisassembler> OD(
- new MCObjectDisassembler(*o, *DisAsm, *MIA));
- Mod.reset(OD->buildModule(false));
+// std::unique_ptr<MCObjectDisassembler> OD(
+// new MCObjectDisassembler(*o, *DisAsm, *MIA));
+ //Mod.reset(OD->buildModule(false));
readSections();
}
template <typename ELFT>
Function* LLVMDisassembler<ELFT>::disassembleFunctionAt(uint64_t address, const std::string& name) {
Function * function;
- SectionRef text_section = sections[".text"];
+ SectionRef text_section = getTextSection();
uint64_t base_address, size;
+#if defined(LLVM_35)
text_section.getAddress(base_address);
text_section.getSize(size);
-
+#elif defined(LLVM_36)
+ base_address = text_section.getAddress();
+ size = text_section.getSize();
+#endif
if (address < base_address ||
address >= base_address + size) {
return NULL;
* the other ones at the end of the function!
*/
std::map<uint64_t, BasicBlock*> new_blocks;
- SectionRef text_section = sections[".text"];
+ SectionRef text_section = getTextSection();
StringRef bytes;
text_section.getContents(bytes);
+#if defined(LLVM_35)
StringRefMemoryObject ref(bytes);
+#elif defined(LLVM_36)
+ ArrayRef<uint8_t> bytearray(reinterpret_cast<const uint8_t *>(bytes.data()),
+ bytes.size());
+#else
+#error LLVM != 3.5 | 3.6 not supported
+#endif
LOG4CXX_DEBUG(logger, "Handling function " << function->getName());
new_blocks.insert(std::make_pair(block->getStartAddress(), block));
function->addBasicBlock(block);
+ uint64_t base_address, size;
+#if defined(LLVM_35)
+ text_section.getAddress(base_address);
+ text_section.getSize(size);
+#elif defined(LLVM_36)
+ base_address = text_section.getAddress();
+ size = text_section.getSize();
+#endif
+ LOG4CXX_DEBUG(logger, "Text section at " << std::hex << base_address << " with size " << size);
+
while (remaining_blocks.size()) {
BasicBlock * current_block = remaining_blocks.top();
remaining_blocks.pop();
<< current_block->getStartAddress());
uint64_t inst_size;
- uint64_t base_address;
- text_section.getAddress(base_address);
uint64_t current_address = current_block->getStartAddress() - base_address;
while(true) {
MCInst inst;
llvm::raw_string_ostream s(buf);
if(llvm::MCDisassembler::Success ==
+#if defined(LLVM_35)
DisAsm->getInstruction(inst, inst_size, ref, current_address, nulls(), nulls())) {
+#elif defined(LLVM_36)
+ DisAsm->getInstruction(inst, inst_size,
+ bytearray.slice(current_address),
+ base_address + current_address,
+ nulls(), nulls())) {
+#endif
uint64_t jmptarget;
if (MIA->evaluateBranch(inst, current_address, inst_size, jmptarget)) {
template <typename ELFT>
void LLVMDisassembler<ELFT>::disassemble() {
- SectionRef text_section = sections[".text"];
+ SectionRef text_section = getTextSection();
std::vector<Function*> remaining_functions;
// Assume all function symbols actually start a real function
bool contains;
SymbolRef::Type symbol_type;
-
+#if defined(LLVM_35)
if (text_section.containsSymbol(x->second, contains) || !contains)
+#elif defined(LLVM_36)
+ if (text_section.containsSymbol(x->second))
+#endif
continue;
if (x->second.getType(symbol_type)
if (!x->second.getAddress(result)) {
Function * fun = manager->newFunction(result);
- fun->setName(x->first);
- remaining_functions.push_back(fun);
- LOG4CXX_DEBUG(logger, "Disasembling " << x->first);
+ if (fun) {
+ fun->setName(x->first);
+ remaining_functions.push_back(fun);
+ LOG4CXX_DEBUG(logger, "Disasembling " << x->first);
+ } else {
+ LOG4CXX_DEBUG(logger, "Function at " << std::hex << result
+ << " already disassembled as " << manager->getFunction(result)->getName());
+ }
}
}
if (!manager->hasFunctions()) {
uint64_t text_entry;
+#if defined(LLVM_35)
text_section.getAddress(text_entry);
+#elif defined(LLVM_36)
+ text_entry = text_section.getAddress();
+#endif
LOG4CXX_INFO(logger, "No Symbols found, starting at the beginning of the text segment");
disassembleFunctionAt(text_entry);
}
}
}
+template<>
+uint64_t LLVMDisassembler<MACHOT>::entryAddress() {
+ // TODO
+ return 0;
+}
+
template <typename ELFT>
uint64_t LLVMDisassembler<ELFT>::entryAddress() {
const auto elffile = dyn_cast<ELFObjectFile<ELFT>>(o)->getELFFile();
template <typename ELFT>
void LLVMDisassembler<ELFT>::splitBlocks(Function* function) {
- SectionRef text_section = sections[".text"];
+ SectionRef text_section = getTextSection();
StringRef bytes;
text_section.getContents(bytes);
+#if defined(LLVM_35)
StringRefMemoryObject ref(bytes);
+#elif defined(LLVM_36)
+ ArrayRef<uint8_t> bytearray(reinterpret_cast<const uint8_t *>(bytes.data()),
+ bytes.size());
+#endif
+
+ LOG4CXX_DEBUG(logger, "Splitting Blocks in Function " << function->getName());
// Split blocks where jumps are going inside the block
for (auto it = function->blocks().begin();
it != function->blocks().end();
++it) {
BasicBlock * current_block = it->second;
+ if (current_block->getEndAddress() == 0) {
+ LOG4CXX_ERROR(logger, "UNFINISHED BLOCK " << std::hex << current_block->getStartAddress());
+ break;
+ }
uint64_t inst_size;
uint64_t base_address;
+#if defined(LLVM_35)
text_section.getAddress(base_address);
+#elif defined(LLVM_36)
+ base_address = text_section.getAddress();
+#endif
uint64_t current_address = current_block->getStartAddress() - base_address;
while(current_block->getEndAddress() - base_address > current_address) {
MCInst inst;
llvm::raw_string_ostream s(buf);
if(llvm::MCDisassembler::Success ==
+#if defined(LLVM_35)
DisAsm->getInstruction(inst, inst_size, ref, current_address, nulls(), nulls())) {
+#elif defined(LLVM_36)
+ DisAsm->getInstruction(inst, inst_size,
+ bytearray.slice(current_address),
+ base_address + current_address,
+ nulls(), nulls())) {
+#endif
+
// See if some other block starts here
BasicBlock* other = manager->getBasicBlock(current_address
+ inst_size
//TODO
}
+template<>
+void LLVMDisassembler<MACHOT>::readDynamicSymbols() {
+ //TODO
+}
+
template <typename ELFT>
void LLVMDisassembler<ELFT>::readDynamicSymbols() {
const auto elffile = dyn_cast<ELFObjectFile<ELFT>>(o)->getELFFile();
// TODO: Error handling
std::string symbolname = *(elffile->getSymbolName(it));
std::string symbolversion = *(elffile->getSymbolVersion(nullptr, &*it, is_default));
- manager->signal_new_dyn_symbol(symbolname + (is_default? "@@" : "@") + symbolversion);
+ // TODO: actually get the symbol address from relocations
+ Function* f = manager->newDynamicFunction(0);
+ f->setName(symbolname + (is_default? "@@" : "@") + symbolversion);
+ manager->finishFunction(f);
+
LOG4CXX_DEBUG(logger, "Adding dynamic Symbol " << symbolname << (is_default? "@@" : "@") << symbolversion);
}
}
// // });
// }
+template <typename ELFT>
+std::vector<Instruction> LLVMDisassembler<ELFT>::getInstructions(const BasicBlock *block) {
+ std::vector<Instruction> result;
+ SectionRef text_section = getTextSection();
+ uint64_t base_address;
+#if defined(LLVM_35)
+ text_section.getAddress(base_address);
+#elif defined(LLVM_36)
+ base_address = text_section.getAddress();
+#endif
+
+ uint64_t current_address = block->getStartAddress() - base_address;
+ uint64_t end_position = block->getEndAddress() - base_address;
+
+ StringRef bytes;
+ text_section.getContents(bytes);
+#if defined(LLVM_35)
+ StringRefMemoryObject ref(bytes);
+#elif defined(LLVM_36)
+ ArrayRef<uint8_t> bytearray(reinterpret_cast<const uint8_t *>(bytes.data()),
+ bytes.size());
+#endif
+
+
+ while (current_address < end_position) {
+ uint64_t inst_size;
+ MCInst inst;
+ std::string buf;
+ llvm::raw_string_ostream s(buf);
+
+ if(llvm::MCDisassembler::Success ==
+#if defined(LLVM_35)
+ DisAsm->getInstruction(inst, inst_size, ref, current_address, nulls(), nulls())) {
+#elif defined(LLVM_36)
+ DisAsm->getInstruction(inst, inst_size,
+ bytearray.slice(current_address),
+ base_address + current_address,
+ nulls(), nulls())) {
+#endif
+
+ uint8_t bytes[inst_size+2];
+#if defined(LLVM_35)
+ ref.readBytes(current_address, inst_size, bytes);
+#elif defined(LLVM_36)
+ size_t bytesindex(0);
+ for (uint8_t byte : bytearray.slice(current_address, inst_size)) {
+ bytes[bytesindex++] = byte;
+ }
+#endif
+
+ uint64_t jmptarget;
+ std::string ref("");
+ IP->printInst(&inst, s, "");
+ if (MIA->evaluateBranch(inst, current_address, inst_size, jmptarget)) {
+ std::stringstream stream;
+ if (MIA->isCall(inst))
+ stream << "function:";
+ else
+ stream << "block:";
+
+ stream << std::hex << (base_address + jmptarget);
+ ref = stream.str();
+ }
+ result.push_back(Instruction(current_address + base_address, boost::algorithm::trim_copy(s.str()),
+ std::vector<uint8_t>(bytes, bytes+inst_size), ref));
+ } else {
+ LOG4CXX_WARN(logger, "Invalid byte at" << std::hex << current_address + base_address);
+ uint8_t bytes[1];
+#if defined(LLVM_35)
+ ref.readBytes(current_address, 1, bytes);
+#elif defined(LLVM_36)
+ bytes[0] = bytearray[current_address];
+#endif
+ result.push_back(Instruction(current_address + base_address, "Invalid Instruction",
+ std::vector<uint8_t>(bytes, bytes+1), ""));
+ inst_size = 1;
+ }
+
+ current_address += inst_size;
+ }
+ return result;
+}
+
template <typename ELFT>
void LLVMDisassembler<ELFT>::printEachInstruction(uint64_t start, uint64_t end,
std::function<void (uint8_t*, size_t,
const std::string&,
const std::string&)> fun) {
- SectionRef text_section = sections[".text"];
+ SectionRef text_section = getTextSection();
uint64_t base_address;
+#if defined(LLVM_35)
text_section.getAddress(base_address);
+#elif defined(LLVM_36)
+ base_address = text_section.getAddress();
+#endif
+
uint64_t current_address = start - base_address;
StringRef bytes;
text_section.getContents(bytes);
+#if defined(LLVM_35)
StringRefMemoryObject ref(bytes);
+#elif defined(LLVM_36)
+ ArrayRef<uint8_t> bytearray(reinterpret_cast<const uint8_t *>(bytes.data()),
+ bytes.size());
+#endif
+
while (current_address < end - base_address) {
uint64_t inst_size;
llvm::raw_string_ostream s(buf);
if(llvm::MCDisassembler::Success ==
- DisAsm->getInstruction(inst, inst_size, ref, current_address, nulls(), nulls())) {
+#if defined(LLVM_35)
+ DisAsm->getInstruction(inst, inst_size, ref, current_address, nulls(), nulls())) {
+#elif defined(LLVM_36)
+ DisAsm->getInstruction(inst, inst_size,
+ bytearray.slice(current_address),
+ base_address + current_address,
+ nulls(), nulls())) {
+#endif
uint8_t bytes[inst_size+2];
+#if defined(LLVM_35)
ref.readBytes(current_address, inst_size, bytes);
+#elif defined(LLVM_36)
+ size_t bytesindex(0);
+ for (uint8_t byte : bytearray.slice(current_address, inst_size)) {
+ bytes[bytesindex++] = byte;
+ }
+#endif
uint64_t jmptarget;
std::string ref("");
current_address += inst_size;
}
}
+
+template <typename ELFT>
+SectionRef LLVMDisassembler<ELFT>::getTextSection() {
+ return sections[".text"];
+}
+
+template <>
+SectionRef LLVMDisassembler<MACHOT>::getTextSection() {
+ return sections["__text"];
+}