From: Christoph Egger Date: Tue, 20 May 2014 12:23:33 +0000 (+0200) Subject: Factor out llvm disassembler X-Git-Tag: v0.1~215 X-Git-Url: https://git.siccegge.de//index.cgi?p=frida%2Ffrida.git;a=commitdiff_plain;h=0f91922e40640e00f1208aee5d8c968a698c5d31 Factor out llvm disassembler --- diff --git a/CMakeLists.txt b/CMakeLists.txt index fe8e1a3..2d9ccbf 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -28,19 +28,20 @@ separate_arguments(LLVM_LIBS) set(CMAKE_CXX_COMPILER "clang++") SET(frida_SOURCES - src/Binary.cxx src/main.cxx src/gui/Mainwindow.cxx src/gui/widgets/BasicBlockWidget.cxx - src/disassembler/InstructionReader.cxx - src/disassembler/Target.cxx - src/disassembler/Disassembler.cxx) + src/disassembler/Disassembler.cxx + src/disassembler/llvm/LLVMDisassembler.cxx + ) SET(frida_HEADERS - src/Binary.hxx src/include_llvm.hxx - src/Section.hxx + src/gui/qt.hxx src/gui/Mainwindow.hxx - src/gui/widgets/BasicBlockWidget.hxx) + src/gui/widgets/BasicBlockWidget.hxx + src/disassembler/llvm/LLVMDisassembler.hxx + src/disassembler/Disassembler.hxx + ) ADD_EXECUTABLE(frida ${frida_SOURCES} ${frida_HEADERS_MOC}) #ADD_LIBRARY(llvm SHARED ) diff --git a/src/disassembler.old/BasicBlock.hxx b/src/disassembler.old/BasicBlock.hxx new file mode 100644 index 0000000..22a31e0 --- /dev/null +++ b/src/disassembler.old/BasicBlock.hxx @@ -0,0 +1,8 @@ + + +class BasicBlock { +public: + +private: + +}; diff --git a/src/disassembler.old/Disassembler.cxx b/src/disassembler.old/Disassembler.cxx new file mode 100644 index 0000000..7422df5 --- /dev/null +++ b/src/disassembler.old/Disassembler.cxx @@ -0,0 +1,12 @@ +#include "Disassembler.hxx" + +using namespace llvm; +using namespace llvm::object; + +Disassembler::Disassembler(const std::string& filename) + : _binary(createBinary(filename).get()) + , _target(_binary) +{ + +} + diff --git a/src/disassembler.old/Disassembler.hxx b/src/disassembler.old/Disassembler.hxx new file mode 100644 index 0000000..3effa25 --- /dev/null +++ b/src/disassembler.old/Disassembler.hxx @@ -0,0 +1,15 @@ +#ifndef INCLUDE__Disassembler_hxx +#define INCLUDE__Disassembler_hxx + +#include "Target.hxx" +#include "include_llvm.hxx" + +class Disassembler { +public: + Disassembler(const std::string& filename); +private: + llvm::object::Binary* _binary; + Target _target; +}; + +#endif diff --git a/src/disassembler.old/Instruction.hxx b/src/disassembler.old/Instruction.hxx new file mode 100644 index 0000000..afb5193 --- /dev/null +++ b/src/disassembler.old/Instruction.hxx @@ -0,0 +1,6 @@ + + +class Instruction { +public: +private: +}; diff --git a/src/disassembler.old/InstructionReader.cxx b/src/disassembler.old/InstructionReader.cxx new file mode 100644 index 0000000..8f8df91 --- /dev/null +++ b/src/disassembler.old/InstructionReader.cxx @@ -0,0 +1,27 @@ +#include "InstructionReader.hxx" + +InstructionReader::InstructionReader(const Target& target) + : _logger(log4cxx::Logger::getLogger("disassembler.Target")) + , _target(target) +{ + DisAsm.reset(_target.getTarget().createMCDisassembler(_target.getSubTargetInfo())); + if (!DisAsm) { + LOG4CXX_ERROR(_logger, "error: no disassembler for target " << _target.getTripleName()) + return; + } +} + +void InstructionReader::readInstruction(std::string& data, size_t& offset, Instruction& inst) { + llvm::MCInst instr; + llvm::StringRefMemoryObject memoryObject(data); + uint64_t size; + + if (DisAsm->getInstruction(instr, size, memoryObject, offset, + llvm::nulls(), llvm::nulls())) { + + } else { + LOG4CXX_WARN(_logger, "warning: invalid instruction encoding"); + if (size == 0) + size = 1; // skip illegible bytes + } +} diff --git a/src/disassembler.old/InstructionReader.hxx b/src/disassembler.old/InstructionReader.hxx new file mode 100644 index 0000000..092e73c --- /dev/null +++ b/src/disassembler.old/InstructionReader.hxx @@ -0,0 +1,18 @@ +#include "include_llvm.hxx" +#include "Instruction.hxx" +#include "Target.hxx" + +#include + +#include + + +class InstructionReader { +public: + InstructionReader(const Target& target); + void readInstruction(std::string& data, size_t& offset, Instruction& inst); +private: + log4cxx::LoggerPtr _logger; + const Target& _target; + llvm::OwningPtr DisAsm; +}; diff --git a/src/disassembler.old/Target.cxx b/src/disassembler.old/Target.cxx new file mode 100644 index 0000000..79d590a --- /dev/null +++ b/src/disassembler.old/Target.cxx @@ -0,0 +1,28 @@ +#include "Target.hxx" + +#include + +Target::Target(llvm::object::Binary* binary) + : _logger(log4cxx::Logger::getLogger("disassembler.Target")) + , triple("unknown-unknown-unknown") +{ + std::string error; + llvm::object::ObjectFile * o = llvm::dyn_cast(binary); + + triple.setArch(llvm::Triple::ArchType(o->getArch())); + std::string tripleName(triple.getTriple()); + LOG4CXX_INFO(_logger, "Detected triple " << tripleName); + + target = llvm::TargetRegistry::lookupTarget("", triple, error); + if (!target) { + LOG4CXX_ERROR(_logger, "Couldn't create Target: " << error); + return; + } + LOG4CXX_INFO(_logger, "Target: " << target->getName()); + + STI.reset(target->createMCSubtargetInfo(tripleName, "", "")); + if (!STI) { + LOG4CXX_ERROR(_logger, "No subtarget for target " << tripleName); + return; + } +} diff --git a/src/disassembler.old/Target.hxx b/src/disassembler.old/Target.hxx new file mode 100644 index 0000000..f72af7c --- /dev/null +++ b/src/disassembler.old/Target.hxx @@ -0,0 +1,31 @@ +#ifndef INCLUDE__Target_hxx +#define INCLUDE__Target_hxx + +#include + +#include "include_llvm.hxx" + +class Target { +public: + Target(llvm::object::Binary* binary); + + const llvm::Target& getTarget() const { + return *target; + } + + const llvm::MCSubtargetInfo& getSubTargetInfo() const { + return *STI; + } + + std::string getTripleName() const { + return triple.getTriple(); + } +private: + log4cxx::LoggerPtr _logger; + llvm::Triple triple; + + const llvm::Target * target; + llvm::OwningPtr STI; +}; + +#endif diff --git a/src/disassembler/BasicBlock.hxx b/src/disassembler/BasicBlock.hxx deleted file mode 100644 index 22a31e0..0000000 --- a/src/disassembler/BasicBlock.hxx +++ /dev/null @@ -1,8 +0,0 @@ - - -class BasicBlock { -public: - -private: - -}; diff --git a/src/disassembler/Disassembler.cxx b/src/disassembler/Disassembler.cxx index 7422df5..968ebe2 100644 --- a/src/disassembler/Disassembler.cxx +++ b/src/disassembler/Disassembler.cxx @@ -1,12 +1,3 @@ -#include "Disassembler.hxx" +#include "disassembler/Disassembler.hxx" -using namespace llvm; -using namespace llvm::object; - -Disassembler::Disassembler(const std::string& filename) - : _binary(createBinary(filename).get()) - , _target(_binary) -{ - -} diff --git a/src/disassembler/Disassembler.hxx b/src/disassembler/Disassembler.hxx index 3effa25..9986ac8 100644 --- a/src/disassembler/Disassembler.hxx +++ b/src/disassembler/Disassembler.hxx @@ -1,15 +1,28 @@ #ifndef INCLUDE__Disassembler_hxx #define INCLUDE__Disassembler_hxx -#include "Target.hxx" -#include "include_llvm.hxx" +#include +#include + +class BasicBlock {}; class Disassembler { public: - Disassembler(const std::string& filename); -private: - llvm::object::Binary* _binary; - Target _target; + Disassembler(const std::string& filename) {} + virtual ~Disassembler() {} + + void getSymbols(); + uint64_t entryAddress(); + + virtual void forEachInstruction(const std::string& name, std::function callback) = 0; + + // http://llvm.org/docs/doxygen/html/MCObjectDisassembler_8cpp_source.html +197 + BasicBlock * generateControlFlowGraph(const std::string& name); + BasicBlock * generateControlFlowGraph(uint64_t address); + +protected: + virtual bool isFunctionCall(uint64_t address) = 0; + virtual bool isJump(uint64_t address) = 0; }; #endif diff --git a/src/disassembler/Instruction.hxx b/src/disassembler/Instruction.hxx deleted file mode 100644 index afb5193..0000000 --- a/src/disassembler/Instruction.hxx +++ /dev/null @@ -1,6 +0,0 @@ - - -class Instruction { -public: -private: -}; diff --git a/src/disassembler/InstructionReader.cxx b/src/disassembler/InstructionReader.cxx deleted file mode 100644 index 8f8df91..0000000 --- a/src/disassembler/InstructionReader.cxx +++ /dev/null @@ -1,27 +0,0 @@ -#include "InstructionReader.hxx" - -InstructionReader::InstructionReader(const Target& target) - : _logger(log4cxx::Logger::getLogger("disassembler.Target")) - , _target(target) -{ - DisAsm.reset(_target.getTarget().createMCDisassembler(_target.getSubTargetInfo())); - if (!DisAsm) { - LOG4CXX_ERROR(_logger, "error: no disassembler for target " << _target.getTripleName()) - return; - } -} - -void InstructionReader::readInstruction(std::string& data, size_t& offset, Instruction& inst) { - llvm::MCInst instr; - llvm::StringRefMemoryObject memoryObject(data); - uint64_t size; - - if (DisAsm->getInstruction(instr, size, memoryObject, offset, - llvm::nulls(), llvm::nulls())) { - - } else { - LOG4CXX_WARN(_logger, "warning: invalid instruction encoding"); - if (size == 0) - size = 1; // skip illegible bytes - } -} diff --git a/src/disassembler/InstructionReader.hxx b/src/disassembler/InstructionReader.hxx deleted file mode 100644 index 092e73c..0000000 --- a/src/disassembler/InstructionReader.hxx +++ /dev/null @@ -1,18 +0,0 @@ -#include "include_llvm.hxx" -#include "Instruction.hxx" -#include "Target.hxx" - -#include - -#include - - -class InstructionReader { -public: - InstructionReader(const Target& target); - void readInstruction(std::string& data, size_t& offset, Instruction& inst); -private: - log4cxx::LoggerPtr _logger; - const Target& _target; - llvm::OwningPtr DisAsm; -}; diff --git a/src/disassembler/Target.cxx b/src/disassembler/Target.cxx deleted file mode 100644 index 79d590a..0000000 --- a/src/disassembler/Target.cxx +++ /dev/null @@ -1,28 +0,0 @@ -#include "Target.hxx" - -#include - -Target::Target(llvm::object::Binary* binary) - : _logger(log4cxx::Logger::getLogger("disassembler.Target")) - , triple("unknown-unknown-unknown") -{ - std::string error; - llvm::object::ObjectFile * o = llvm::dyn_cast(binary); - - triple.setArch(llvm::Triple::ArchType(o->getArch())); - std::string tripleName(triple.getTriple()); - LOG4CXX_INFO(_logger, "Detected triple " << tripleName); - - target = llvm::TargetRegistry::lookupTarget("", triple, error); - if (!target) { - LOG4CXX_ERROR(_logger, "Couldn't create Target: " << error); - return; - } - LOG4CXX_INFO(_logger, "Target: " << target->getName()); - - STI.reset(target->createMCSubtargetInfo(tripleName, "", "")); - if (!STI) { - LOG4CXX_ERROR(_logger, "No subtarget for target " << tripleName); - return; - } -} diff --git a/src/disassembler/Target.hxx b/src/disassembler/Target.hxx deleted file mode 100644 index f72af7c..0000000 --- a/src/disassembler/Target.hxx +++ /dev/null @@ -1,31 +0,0 @@ -#ifndef INCLUDE__Target_hxx -#define INCLUDE__Target_hxx - -#include - -#include "include_llvm.hxx" - -class Target { -public: - Target(llvm::object::Binary* binary); - - const llvm::Target& getTarget() const { - return *target; - } - - const llvm::MCSubtargetInfo& getSubTargetInfo() const { - return *STI; - } - - std::string getTripleName() const { - return triple.getTriple(); - } -private: - log4cxx::LoggerPtr _logger; - llvm::Triple triple; - - const llvm::Target * target; - llvm::OwningPtr STI; -}; - -#endif diff --git a/src/disassembler/llvm/LLVMDisassembler.cxx b/src/disassembler/llvm/LLVMDisassembler.cxx new file mode 100644 index 0000000..881c662 --- /dev/null +++ b/src/disassembler/llvm/LLVMDisassembler.cxx @@ -0,0 +1,100 @@ +#include "disassembler/llvm/LLVMDisassembler.hxx" + +using namespace llvm; +using namespace llvm::object; + +/* + * TODO: fallback code falls die Datei kein ELF/PE/COFF/MacO/.. binary + * ist sondern z.B. einfach nur Instruktionen oder ein Bootsektor oder + * foo + */ + +LLVMDisassembler::LLVMDisassembler(const std::string& filename) + : Disassembler(filename) + , logger(log4cxx::Logger::getLogger("LLVMDisassembler")) + , triple("unknown-unknown-unknown") +{ + LOG4CXX_DEBUG(logger, "Handling file" << filename); + auto result = createBinary(filename); + + error_code ec; + if ((ec = result.getError())) { + LOG4CXX_ERROR(logger, "Failed to load Binary" << ec.message()); + binary = NULL; + return; + } + + binary.reset(result.get()); + + o = dyn_cast(binary.get()); + + triple.setArch(Triple::ArchType(o->getArch())); + std::string tripleName(triple.getTriple()); + + LOG4CXX_INFO(logger, "Architecture " << tripleName); + + + std::string es; + target = TargetRegistry::lookupTarget("", triple, es); + if (!target) { + LOG4CXX_ERROR(logger, es); + return; + } + + LOG4CXX_INFO(logger, "Target " << target->getName()); + + MRI.reset(target->createMCRegInfo(tripleName)); + if (!MRI) { + LOG4CXX_ERROR(logger, "no register info for target " << tripleName); + return; + } + + // Set up disassembler. + AsmInfo.reset(target->createMCAsmInfo(*MRI, tripleName)); + if (!AsmInfo) { + LOG4CXX_ERROR(logger, "no assembly info for target " << tripleName); + return; + } + + STI.reset(target->createMCSubtargetInfo(tripleName, "", "")); + if (!STI) { + LOG4CXX_ERROR(logger, "no subtarget info for target " << tripleName); + return; + } + + MII.reset(target->createMCInstrInfo()); + if (!MII) { + LOG4CXX_ERROR(logger, "no instruction info for target " << tripleName); + return; + } + + DisAsm.reset(target->createMCDisassembler(*STI)); + if (!DisAsm) { + LOG4CXX_ERROR(logger, "no disassembler for target " << tripleName); + return; + } + + MOFI.reset(new MCObjectFileInfo); + Ctx.reset(new MCContext(AsmInfo.get(), MRI.get(), MOFI.get())); + RelInfo.reset( + target->createMCRelocationInfo(tripleName, *Ctx.get())); + if (RelInfo) { + Symzer.reset( + MCObjectSymbolizer::createObjectSymbolizer(*Ctx.get(), RelInfo, o)); + if (Symzer) + DisAsm->setSymbolizer(Symzer); + } + + MIA.reset(target->createMCInstrAnalysis(MII.get())); + + int AsmPrinterVariant = AsmInfo->getAssemblerDialect(); + IP.reset(target->createMCInstPrinter(AsmPrinterVariant, *AsmInfo, *MII, *MRI, *STI)); + if (!IP) { + LOG4CXX_ERROR(logger, "no instruction printer for target " << tripleName); + return; + } + + OwningPtr OD( + new MCObjectDisassembler(*o, *DisAsm, *MIA)); + Mod.reset(OD->buildModule(false)); +} diff --git a/src/disassembler/llvm/LLVMDisassembler.hxx b/src/disassembler/llvm/LLVMDisassembler.hxx new file mode 100644 index 0000000..ff81be5 --- /dev/null +++ b/src/disassembler/llvm/LLVMDisassembler.hxx @@ -0,0 +1,57 @@ +#ifndef INCLUDE__LLVMDisassembler_hxx +#define INCLUDE__LLVMDisassembler_hxx + +#include +#include + +#include "include_llvm.hxx" + +#include "disassembler/Disassembler.hxx" + + +class LLVMDisassembler + : public Disassembler { +public: + LLVMDisassembler(const std::string& filename); + virtual ~LLVMDisassembler() {}; + + void getSymbols(); + uint64_t entryAddress(); + + void forEachInstruction(const std::string& name, std::function callback) {} + + BasicBlock * generateControlFlowGraph(const std::string& name); + BasicBlock * generateControlFlowGraph(uint64_t address); + +protected: + bool isFunctionCall(uint64_t address) {return false;} + bool isJump(uint64_t address) {return false;} + +private: + log4cxx::LoggerPtr logger; + + llvm::Triple triple; + std::shared_ptr binary; + + + const llvm::Target * target; + llvm::object::ObjectFile * o; + + std::unique_ptr MRI; + std::unique_ptr AsmInfo; + std::unique_ptr Mod; + std::unique_ptr IP; + std::unique_ptr DisAsm; + std::unique_ptr MOFI; + std::unique_ptr Ctx; + std::unique_ptr MIA; + std::unique_ptr STI; + std::unique_ptr MII; + llvm::OwningPtr RelInfo; + llvm::OwningPtr Symzer; + + std::map sections; + std::map symbols; +}; + +#endif diff --git a/src/gui/Mainwindow.cxx b/src/gui/Mainwindow.cxx index be86cac..061734a 100644 --- a/src/gui/Mainwindow.cxx +++ b/src/gui/Mainwindow.cxx @@ -1,6 +1,7 @@ -#include "Mainwindow.hxx" + #include "Mainwindow.hxx" #include "widgets/BasicBlockWidget.hxx" #include "qt.hxx" +#include "disassembler/llvm/LLVMDisassembler.hxx" #include #include @@ -54,14 +55,15 @@ void Mainwindow::open() { tr("Binaries (*)")); if (fileName != "") { - curBin = new qtlldb::Binary(fileName.toStdString()); - - std::vector symbols = curBin->getSymbols(); - if (0 == symbols.size()) - populateSymbolInformation(".text"); - for (auto it = symbols.begin(); it != symbols.end(); ++it) { - populateSymbolInformation(*it); - } + disassembler.reset(new LLVMDisassembler(fileName.toStdString())); + // curBin = new Binary(fileName.toStdString()); + + // std::vector symbols = curBin->getSymbols(); + // if (0 == symbols.size()) + // populateSymbolInformation(".text"); + // for (auto it = symbols.begin(); it != symbols.end(); ++it) { + // populateSymbolInformation(*it); + // } } } @@ -72,18 +74,18 @@ void Mainwindow::populateSymbolInformation(const std::string& sym) { QTableWidget * t = new QTableWidget(); t->setColumnCount(3); t->horizontalHeader()->setSectionResizeMode(QHeaderView::ResizeToContents); - curBin->for_each_instruction(sym, [&t](long add, std::string bytes, std::string mnemonic) { - int row = t->rowCount(); - std::stringstream s; - t->setRowCount(t->rowCount() + 1); - s << std::hex << add; - t->setItem(row,0,new QTableWidgetItem(s.str().c_str())); - s.str(""); - s << std::hex; - for_each(bytes.begin(), bytes.end(), [&s](char c){s << (unsigned int)((unsigned char)c) << ' ';}); - t->setItem(row,1,new QTableWidgetItem(s.str().c_str())); - t->setItem(row,2,new QTableWidgetItem(mnemonic.c_str() + 1)); - }); + // curBin->for_each_instruction(sym, [&t](long add, std::string bytes, std::string mnemonic) { + // int row = t->rowCount(); + // std::stringstream s; + // t->setRowCount(t->rowCount() + 1); + // s << std::hex << add; + // t->setItem(row,0,new QTableWidgetItem(s.str().c_str())); + // s.str(""); + // s << std::hex; + // for_each(bytes.begin(), bytes.end(), [&s](char c){s << (unsigned int)((unsigned char)c) << ' ';}); + // t->setItem(row,1,new QTableWidgetItem(s.str().c_str())); + // t->setItem(row,2,new QTableWidgetItem(mnemonic.c_str() + 1)); + // }); w->addTab(t, "Listing"); // CFG diff --git a/src/gui/Mainwindow.hxx b/src/gui/Mainwindow.hxx index 34abc3a..adfe71e 100644 --- a/src/gui/Mainwindow.hxx +++ b/src/gui/Mainwindow.hxx @@ -1,10 +1,11 @@ +#include #include #include #include #include #include -#include "Binary.hxx" +#include "disassembler/Disassembler.hxx" class Mainwindow : public QMainWindow { Q_OBJECT @@ -25,7 +26,7 @@ private: QAction *exitAction; QAction *openAction; - qtlldb::Binary * curBin; + std::shared_ptr disassembler; private slots: void quit(); void open();