Initial commit
authorChristoph Egger <christoph@christoph-egger.org>
Fri, 8 Nov 2013 20:53:00 +0000 (21:53 +0100)
committerChristoph Egger <christoph@christoph-egger.org>
Fri, 8 Nov 2013 20:53:00 +0000 (21:53 +0100)
.gitignore [new file with mode: 0644]
Makefile [new file with mode: 0644]
src/Binary.cxx [new file with mode: 0644]
src/Binary.hxx [new file with mode: 0644]
src/include.hxx [new file with mode: 0644]
src/main.cxx [new file with mode: 0644]

diff --git a/.gitignore b/.gitignore
new file mode 100644 (file)
index 0000000..0ddae6b
--- /dev/null
@@ -0,0 +1,4 @@
+*~
+*.o
+*.make
+qtlldb
diff --git a/Makefile b/Makefile
new file mode 100644 (file)
index 0000000..3037d61
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,24 @@
+#!/usr/bin/make -f
+
+CXX = clang++
+LD  = clang++
+
+CXXFLAGS += `llvm-config-3.4 --cflags`
+LDFLAGS  ?= `llvm-config-3.4 --ldflags`
+LIBS     ?= `llvm-config-3.4 --libs`
+
+objects = $(patsubst %.cxx,%.o,$(shell find src -name \*.cxx))
+
+qtlldb: $(objects)
+       $(LD) -o qtlldb $(LDFLAGS) $^ $(LIBS)
+
+.cxx.o:
+       $(CXX) -MM -o $*.make $(CXXFLAGS) $<
+       $(CXX) -o $@ -c $(CXXFLAGS) $<
+
+clean:
+       rm $(objects) qtlldb
+
+include **/*.make
+
+.SUFFIXES: .cxx .o
diff --git a/src/Binary.cxx b/src/Binary.cxx
new file mode 100644 (file)
index 0000000..696947d
--- /dev/null
@@ -0,0 +1,296 @@
+#include "Binary.hxx"
+
+#include <iostream>
+#include <string>
+#include <algorithm>
+
+using namespace llvm;
+using namespace llvm::object;
+
+static bool error(error_code ec) {
+  if (!ec) return false;
+
+  outs() << "error reading file: " << ec.message() << ".\n";
+  outs().flush();
+  return true;
+}
+
+static bool RelocAddressLess(RelocationRef a, RelocationRef b) {
+  uint64_t a_addr, b_addr;
+  if (error(a.getOffset(a_addr))) return false;
+  if (error(b.getOffset(b_addr))) return false;
+  return a_addr < b_addr;
+}
+
+static void DumpBytes(StringRef bytes) {
+  static const char hex_rep[] = "0123456789abcdef";
+  // FIXME: The real way to do this is to figure out the longest instruction
+  //        and align to that size before printing. I'll fix this when I get
+  //        around to outputting relocations.
+  // 15 is the longest x86 instruction
+  // 3 is for the hex rep of a byte + a space.
+  // 1 is for the null terminator.
+  enum { OutputSize = (15 * 3) + 1 };
+  char output[OutputSize];
+
+  assert(bytes.size() <= 15
+    && "DumpBytes only supports instructions of up to 15 bytes");
+  memset(output, ' ', sizeof(output));
+  unsigned index = 0;
+  for (StringRef::iterator i = bytes.begin(),
+                           e = bytes.end(); i != e; ++i) {
+    output[index] = hex_rep[(*i & 0xF0) >> 4];
+    output[index + 1] = hex_rep[*i & 0xF];
+    index += 3;
+  }
+
+  output[sizeof(output) - 1] = 0;
+  outs() << output;
+}
+
+::Binary::Binary(const std::string& filename)
+    : triple("unkown-unknown-unknown")
+{
+    std::string error;
+
+    createBinary(filename, binary);
+    if (Archive *a = dyn_cast<Archive>(binary.get())) {
+        std::cerr << "Got an archive!" << std::endl;
+        return;
+    }
+
+    o = dyn_cast<ObjectFile>(binary.get());
+
+    triple.setArch(Triple::ArchType(o->getArch()));
+    std::string tripleName(triple.getTriple());
+
+    target = TargetRegistry::lookupTarget("", triple, error);
+    if (!target) {
+        std::cerr << error;
+        return;
+    }
+
+    MRI.reset(target->createMCRegInfo(tripleName));
+    if (!MRI) {
+        std::cerr << "error: no register info for target " << tripleName << "\n";
+        return;
+    }
+
+    // Set up disassembler.
+    AsmInfo.reset(target->createMCAsmInfo(*MRI, tripleName));
+    if (!AsmInfo) {
+        std::cerr << "error: no assembly info for target " << tripleName << "\n";
+        return;
+    }
+
+    STI.reset(target->createMCSubtargetInfo(tripleName, "", ""));
+    if (!STI) {
+        errs() << "error: no subtarget info for target " << tripleName << "\n";
+        return;
+    }
+
+    MII.reset(target->createMCInstrInfo());
+    if (!MII) {
+        std::cerr << "error: no instruction info for target " << tripleName << "\n";
+        return;
+    }
+
+    DisAsm.reset(target->createMCDisassembler(*STI));
+    if (!DisAsm) {
+        std::cerr << "error: no disassembler for target " << tripleName << "\n";
+        return;
+    }
+
+    MOFI.reset(new MCObjectFileInfo);
+    Ctx.reset(new MCContext(AsmInfo.get(), MRI.get(), MOFI.get()));
+    RelInfo.reset(
+        target->createMCRelocationInfo(tripleName, *Ctx.get()));
+    if (RelInfo) {
+        Symzer.reset(
+            MCObjectSymbolizer::createObjectSymbolizer(*Ctx.get(), RelInfo, o));
+        if (Symzer)
+            DisAsm->setSymbolizer(Symzer);
+    }
+
+    MIA.reset(target->createMCInstrAnalysis(MII.get()));
+
+    int AsmPrinterVariant = AsmInfo->getAssemblerDialect();
+    IP.reset(target->createMCInstPrinter(AsmPrinterVariant, *AsmInfo, *MII, *MRI, *STI));
+    if (!IP) {
+        std::cerr << "error: no instruction printer for target " << tripleName
+                  << '\n';
+        return;
+    }
+
+    OwningPtr<MCObjectDisassembler> OD(
+        new MCObjectDisassembler(*o, *DisAsm, *MIA));
+    Mod.reset(OD->buildModule(/* withCFG */ true));
+}
+
+void ::Binary::disassemble() {
+    for (MCModule::const_atom_iterator AI = Mod->atom_begin(),
+             AE = Mod->atom_end();
+         AI != AE; ++AI) {
+
+        if ((*AI)->getKind() != llvm::MCAtom::TextAtom)
+            continue;
+
+        outs() << "\n\nAtom " << (*AI)->getName() << ": \n";
+        if (const MCTextAtom *TA = dyn_cast<MCTextAtom>(*AI)) {
+            for (MCTextAtom::const_iterator II = TA->begin(), IE = TA->end();
+                 II != IE;
+                 ++II) {
+//                II->Inst.dump();
+                IP->printInst(&II->Inst, outs(), "");
+                outs() << "\n";
+            }
+        }
+    }
+
+    outs() << "binary " << triple.getArchName() << "\n";
+}
+
+void ::Binary::disassemble_functions() {
+    error_code ec;
+    for (section_iterator i = o->begin_sections(),
+             e = o->end_sections();
+         i != e; i.increment(ec)) {
+        if (error(ec)) break;
+        bool text;
+        if (error(i->isText(text))) break;
+        if (!text) continue;
+
+        uint64_t SectionAddr;
+        if (error(i->getAddress(SectionAddr))) break;
+
+        // Make a list of all the symbols in this section.
+        std::vector<std::pair<uint64_t, StringRef> > Symbols;
+        for (symbol_iterator si = o->begin_symbols(),
+                 se = o->end_symbols();
+             si != se; si.increment(ec)) {
+            bool contains;
+            if (!error(i->containsSymbol(*si, contains)) && contains) {
+                uint64_t Address;
+                if (error(si->getAddress(Address))) break;
+                if (Address == UnknownAddressOrSize) continue;
+                Address -= SectionAddr;
+
+                StringRef Name;
+                if (error(si->getName(Name))) break;
+                Symbols.push_back(std::make_pair(Address, Name));
+            }
+        }
+
+        // Sort the symbols by address, just in case they didn't come in that way.
+        array_pod_sort(Symbols.begin(), Symbols.end());
+
+        // Make a list of all the relocations for this section.
+        std::vector<RelocationRef> Rels;
+        // if (InlineRelocs) {
+        //     for (relocation_iterator ri = i->begin_relocations(),
+        //              re = i->end_relocations();
+        //          ri != re; ri.increment(ec)) {
+        //         if (error(ec)) break;
+        //         Rels.push_back(*ri);
+        //     }
+        // }
+
+        // Sort relocations by address.
+        std::sort(Rels.begin(), Rels.end(), RelocAddressLess);
+
+        StringRef SegmentName = "";
+        // if (const MachOObjectFile *MachO =
+        //     dyn_cast<const MachOObjectFile>(o)) {
+        //     DataRefImpl DR = i->getRawDataRefImpl();
+        //     SegmentName = MachO->getSectionFinalSegmentName(DR);
+        // }
+        StringRef name;
+        if (error(i->getName(name))) break;
+        outs() << "Disassembly of section ";
+        if (!SegmentName.empty())
+            outs() << SegmentName << ",";
+        outs() << name << ':';
+
+        // If the section has no symbols just insert a dummy one and disassemble
+        // the whole section.
+        if (Symbols.empty())
+            Symbols.push_back(std::make_pair(0, name));
+
+
+        StringRef Bytes;
+        if (error(i->getContents(Bytes))) break;
+        StringRefMemoryObject memoryObject(Bytes);
+        uint64_t Size;
+        uint64_t Index;
+        uint64_t SectSize;
+        if (error(i->getSize(SectSize))) break;
+
+        std::vector<RelocationRef>::const_iterator rel_cur = Rels.begin();
+        std::vector<RelocationRef>::const_iterator rel_end = Rels.end();
+        // Disassemble symbol by symbol.
+        for (unsigned si = 0, se = Symbols.size(); si != se; ++si) {
+            uint64_t Start = Symbols[si].first;
+            uint64_t End;
+            // The end is either the size of the section or the beginning of the next
+            // symbol.
+            if (si == se - 1)
+                End = SectSize;
+            // Make sure this symbol takes up space.
+            else if (Symbols[si + 1].first != Start)
+                End = Symbols[si + 1].first - 1;
+            else
+                // This symbol has the same address as the next symbol. Skip it.
+                continue;
+
+            outs() << '\n' << Symbols[si].second << ":\n";
+
+#ifndef NDEBUG
+            raw_ostream &DebugOut = DebugFlag ? dbgs() : nulls();
+#else
+            raw_ostream &DebugOut = nulls();
+#endif
+
+            for (Index = Start; Index < End; Index += Size) {
+                MCInst Inst;
+
+                if (DisAsm->getInstruction(Inst, Size, memoryObject, Index,
+                                           DebugOut, nulls())) {
+                    outs() << format("%8" PRIx64 ":", SectionAddr + Index);
+                    outs() << "\t";
+                    DumpBytes(StringRef(Bytes.data() + Index, Size));
+
+                    IP->printInst(&Inst, outs(), "");
+                    outs() << "\n";
+                } else {
+                    errs() << "warning: invalid instruction encoding\n";
+                    if (Size == 0)
+                        Size = 1; // skip illegible bytes
+                }
+
+                // Print relocation for instruction.
+                while (rel_cur != rel_end) {
+                    bool hidden = false;
+                    uint64_t addr;
+                    SmallString<16> name;
+                    SmallString<32> val;
+
+                    // If this relocation is hidden, skip it.
+                    if (error(rel_cur->getHidden(hidden))) goto skip_print_rel;
+                    if (hidden) goto skip_print_rel;
+
+                    if (error(rel_cur->getOffset(addr))) goto skip_print_rel;
+                    // Stop when rel_cur's address is past the current instruction.
+                    if (addr >= Index + Size) break;
+                    if (error(rel_cur->getTypeName(name))) goto skip_print_rel;
+                    if (error(rel_cur->getValueString(val))) goto skip_print_rel;
+
+                    outs() << format("\t\t\t%8" PRIx64 ": ", SectionAddr + addr) << name
+                           << "\t" << val << "\n";
+
+                  skip_print_rel:
+                    ++rel_cur;
+                }
+            }
+        }
+    }
+}
diff --git a/src/Binary.hxx b/src/Binary.hxx
new file mode 100644 (file)
index 0000000..aa5460e
--- /dev/null
@@ -0,0 +1,32 @@
+#include "include.hxx"
+
+#include <string>
+
+using llvm::OwningPtr;
+
+class Binary {
+private:
+    llvm::Triple triple;
+    const llvm::Target * target;
+    llvm::object::ObjectFile * o;
+
+    OwningPtr<llvm::object::Binary> binary;
+    OwningPtr<const llvm::MCRegisterInfo> MRI;
+    OwningPtr<const llvm::MCAsmInfo> AsmInfo;
+    OwningPtr<llvm::MCModule> Mod;
+    OwningPtr<llvm::MCInstPrinter> IP;
+    OwningPtr<llvm::MCDisassembler> DisAsm;
+    OwningPtr<const llvm::MCObjectFileInfo> MOFI;
+    OwningPtr<llvm::MCContext> Ctx;
+    OwningPtr<const llvm::MCInstrAnalysis> MIA;
+    OwningPtr<const llvm::MCSubtargetInfo> STI;
+    OwningPtr<const llvm::MCInstrInfo> MII;
+    OwningPtr<llvm::MCRelocationInfo> RelInfo;
+    OwningPtr<llvm::MCSymbolizer> Symzer;
+public:
+    Binary(const std::string& filename);
+
+    void disassemble();
+
+    void disassemble_functions();
+};
diff --git a/src/include.hxx b/src/include.hxx
new file mode 100644 (file)
index 0000000..afb4787
--- /dev/null
@@ -0,0 +1,30 @@
+#ifndef INCLUDE__include_hxx
+#define INCLUDE__include_hxx
+
+#include "llvm/ADT/OwningPtr.h"
+#include <llvm/ADT/Triple.h>
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/Object/ObjectFile.h"
+#include "llvm/Object/Archive.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCAtom.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCDisassembler.h"
+#include "llvm/MC/MCFunction.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstPrinter.h"
+#include "llvm/MC/MCInstrAnalysis.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCModule.h"
+#include "llvm/MC/MCObjectDisassembler.h"
+#include "llvm/MC/MCObjectFileInfo.h"
+#include "llvm/MC/MCObjectSymbolizer.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCRelocationInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/TargetSelect.h"
+#include "llvm/Support/StringRefMemoryObject.h"
+
+#endif
diff --git a/src/main.cxx b/src/main.cxx
new file mode 100644 (file)
index 0000000..ec76891
--- /dev/null
@@ -0,0 +1,23 @@
+#include "include.hxx"
+
+#include <iostream>
+#include <climits>
+
+#include "Binary.hxx"
+
+using std::cout;
+using std::cin;
+using std::cerr;
+
+int main(int argc, char** argv)
+{
+    llvm::InitializeAllTargetInfos();
+    llvm::InitializeAllTargetMCs();
+    llvm::InitializeAllAsmParsers();
+    llvm::InitializeAllDisassemblers();
+
+    Binary bin(argv[1]);
+    bin.disassemble();
+
+    bin.disassemble_functions();
+}