--- /dev/null
+#include "Binary.hxx"
+
+#include <iostream>
+#include <string>
+#include <algorithm>
+
+using namespace llvm;
+using namespace llvm::object;
+
+static bool error(error_code ec) {
+ if (!ec) return false;
+
+ outs() << "error reading file: " << ec.message() << ".\n";
+ outs().flush();
+ return true;
+}
+
+static bool RelocAddressLess(RelocationRef a, RelocationRef b) {
+ uint64_t a_addr, b_addr;
+ if (error(a.getOffset(a_addr))) return false;
+ if (error(b.getOffset(b_addr))) return false;
+ return a_addr < b_addr;
+}
+
+static void DumpBytes(StringRef bytes) {
+ static const char hex_rep[] = "0123456789abcdef";
+ // FIXME: The real way to do this is to figure out the longest instruction
+ // and align to that size before printing. I'll fix this when I get
+ // around to outputting relocations.
+ // 15 is the longest x86 instruction
+ // 3 is for the hex rep of a byte + a space.
+ // 1 is for the null terminator.
+ enum { OutputSize = (15 * 3) + 1 };
+ char output[OutputSize];
+
+ assert(bytes.size() <= 15
+ && "DumpBytes only supports instructions of up to 15 bytes");
+ memset(output, ' ', sizeof(output));
+ unsigned index = 0;
+ for (StringRef::iterator i = bytes.begin(),
+ e = bytes.end(); i != e; ++i) {
+ output[index] = hex_rep[(*i & 0xF0) >> 4];
+ output[index + 1] = hex_rep[*i & 0xF];
+ index += 3;
+ }
+
+ output[sizeof(output) - 1] = 0;
+ outs() << output;
+}
+
+::Binary::Binary(const std::string& filename)
+ : triple("unkown-unknown-unknown")
+{
+ std::string error;
+
+ createBinary(filename, binary);
+ if (Archive *a = dyn_cast<Archive>(binary.get())) {
+ std::cerr << "Got an archive!" << std::endl;
+ return;
+ }
+
+ o = dyn_cast<ObjectFile>(binary.get());
+
+ triple.setArch(Triple::ArchType(o->getArch()));
+ std::string tripleName(triple.getTriple());
+
+ target = TargetRegistry::lookupTarget("", triple, error);
+ if (!target) {
+ std::cerr << error;
+ return;
+ }
+
+ MRI.reset(target->createMCRegInfo(tripleName));
+ if (!MRI) {
+ std::cerr << "error: no register info for target " << tripleName << "\n";
+ return;
+ }
+
+ // Set up disassembler.
+ AsmInfo.reset(target->createMCAsmInfo(*MRI, tripleName));
+ if (!AsmInfo) {
+ std::cerr << "error: no assembly info for target " << tripleName << "\n";
+ return;
+ }
+
+ STI.reset(target->createMCSubtargetInfo(tripleName, "", ""));
+ if (!STI) {
+ errs() << "error: no subtarget info for target " << tripleName << "\n";
+ return;
+ }
+
+ MII.reset(target->createMCInstrInfo());
+ if (!MII) {
+ std::cerr << "error: no instruction info for target " << tripleName << "\n";
+ return;
+ }
+
+ DisAsm.reset(target->createMCDisassembler(*STI));
+ if (!DisAsm) {
+ std::cerr << "error: no disassembler for target " << tripleName << "\n";
+ return;
+ }
+
+ MOFI.reset(new MCObjectFileInfo);
+ Ctx.reset(new MCContext(AsmInfo.get(), MRI.get(), MOFI.get()));
+ RelInfo.reset(
+ target->createMCRelocationInfo(tripleName, *Ctx.get()));
+ if (RelInfo) {
+ Symzer.reset(
+ MCObjectSymbolizer::createObjectSymbolizer(*Ctx.get(), RelInfo, o));
+ if (Symzer)
+ DisAsm->setSymbolizer(Symzer);
+ }
+
+ MIA.reset(target->createMCInstrAnalysis(MII.get()));
+
+ int AsmPrinterVariant = AsmInfo->getAssemblerDialect();
+ IP.reset(target->createMCInstPrinter(AsmPrinterVariant, *AsmInfo, *MII, *MRI, *STI));
+ if (!IP) {
+ std::cerr << "error: no instruction printer for target " << tripleName
+ << '\n';
+ return;
+ }
+
+ OwningPtr<MCObjectDisassembler> OD(
+ new MCObjectDisassembler(*o, *DisAsm, *MIA));
+ Mod.reset(OD->buildModule(/* withCFG */ true));
+}
+
+void ::Binary::disassemble() {
+ for (MCModule::const_atom_iterator AI = Mod->atom_begin(),
+ AE = Mod->atom_end();
+ AI != AE; ++AI) {
+
+ if ((*AI)->getKind() != llvm::MCAtom::TextAtom)
+ continue;
+
+ outs() << "\n\nAtom " << (*AI)->getName() << ": \n";
+ if (const MCTextAtom *TA = dyn_cast<MCTextAtom>(*AI)) {
+ for (MCTextAtom::const_iterator II = TA->begin(), IE = TA->end();
+ II != IE;
+ ++II) {
+// II->Inst.dump();
+ IP->printInst(&II->Inst, outs(), "");
+ outs() << "\n";
+ }
+ }
+ }
+
+ outs() << "binary " << triple.getArchName() << "\n";
+}
+
+void ::Binary::disassemble_functions() {
+ error_code ec;
+ for (section_iterator i = o->begin_sections(),
+ e = o->end_sections();
+ i != e; i.increment(ec)) {
+ if (error(ec)) break;
+ bool text;
+ if (error(i->isText(text))) break;
+ if (!text) continue;
+
+ uint64_t SectionAddr;
+ if (error(i->getAddress(SectionAddr))) break;
+
+ // Make a list of all the symbols in this section.
+ std::vector<std::pair<uint64_t, StringRef> > Symbols;
+ for (symbol_iterator si = o->begin_symbols(),
+ se = o->end_symbols();
+ si != se; si.increment(ec)) {
+ bool contains;
+ if (!error(i->containsSymbol(*si, contains)) && contains) {
+ uint64_t Address;
+ if (error(si->getAddress(Address))) break;
+ if (Address == UnknownAddressOrSize) continue;
+ Address -= SectionAddr;
+
+ StringRef Name;
+ if (error(si->getName(Name))) break;
+ Symbols.push_back(std::make_pair(Address, Name));
+ }
+ }
+
+ // Sort the symbols by address, just in case they didn't come in that way.
+ array_pod_sort(Symbols.begin(), Symbols.end());
+
+ // Make a list of all the relocations for this section.
+ std::vector<RelocationRef> Rels;
+ // if (InlineRelocs) {
+ // for (relocation_iterator ri = i->begin_relocations(),
+ // re = i->end_relocations();
+ // ri != re; ri.increment(ec)) {
+ // if (error(ec)) break;
+ // Rels.push_back(*ri);
+ // }
+ // }
+
+ // Sort relocations by address.
+ std::sort(Rels.begin(), Rels.end(), RelocAddressLess);
+
+ StringRef SegmentName = "";
+ // if (const MachOObjectFile *MachO =
+ // dyn_cast<const MachOObjectFile>(o)) {
+ // DataRefImpl DR = i->getRawDataRefImpl();
+ // SegmentName = MachO->getSectionFinalSegmentName(DR);
+ // }
+ StringRef name;
+ if (error(i->getName(name))) break;
+ outs() << "Disassembly of section ";
+ if (!SegmentName.empty())
+ outs() << SegmentName << ",";
+ outs() << name << ':';
+
+ // If the section has no symbols just insert a dummy one and disassemble
+ // the whole section.
+ if (Symbols.empty())
+ Symbols.push_back(std::make_pair(0, name));
+
+
+ StringRef Bytes;
+ if (error(i->getContents(Bytes))) break;
+ StringRefMemoryObject memoryObject(Bytes);
+ uint64_t Size;
+ uint64_t Index;
+ uint64_t SectSize;
+ if (error(i->getSize(SectSize))) break;
+
+ std::vector<RelocationRef>::const_iterator rel_cur = Rels.begin();
+ std::vector<RelocationRef>::const_iterator rel_end = Rels.end();
+ // Disassemble symbol by symbol.
+ for (unsigned si = 0, se = Symbols.size(); si != se; ++si) {
+ uint64_t Start = Symbols[si].first;
+ uint64_t End;
+ // The end is either the size of the section or the beginning of the next
+ // symbol.
+ if (si == se - 1)
+ End = SectSize;
+ // Make sure this symbol takes up space.
+ else if (Symbols[si + 1].first != Start)
+ End = Symbols[si + 1].first - 1;
+ else
+ // This symbol has the same address as the next symbol. Skip it.
+ continue;
+
+ outs() << '\n' << Symbols[si].second << ":\n";
+
+#ifndef NDEBUG
+ raw_ostream &DebugOut = DebugFlag ? dbgs() : nulls();
+#else
+ raw_ostream &DebugOut = nulls();
+#endif
+
+ for (Index = Start; Index < End; Index += Size) {
+ MCInst Inst;
+
+ if (DisAsm->getInstruction(Inst, Size, memoryObject, Index,
+ DebugOut, nulls())) {
+ outs() << format("%8" PRIx64 ":", SectionAddr + Index);
+ outs() << "\t";
+ DumpBytes(StringRef(Bytes.data() + Index, Size));
+
+ IP->printInst(&Inst, outs(), "");
+ outs() << "\n";
+ } else {
+ errs() << "warning: invalid instruction encoding\n";
+ if (Size == 0)
+ Size = 1; // skip illegible bytes
+ }
+
+ // Print relocation for instruction.
+ while (rel_cur != rel_end) {
+ bool hidden = false;
+ uint64_t addr;
+ SmallString<16> name;
+ SmallString<32> val;
+
+ // If this relocation is hidden, skip it.
+ if (error(rel_cur->getHidden(hidden))) goto skip_print_rel;
+ if (hidden) goto skip_print_rel;
+
+ if (error(rel_cur->getOffset(addr))) goto skip_print_rel;
+ // Stop when rel_cur's address is past the current instruction.
+ if (addr >= Index + Size) break;
+ if (error(rel_cur->getTypeName(name))) goto skip_print_rel;
+ if (error(rel_cur->getValueString(val))) goto skip_print_rel;
+
+ outs() << format("\t\t\t%8" PRIx64 ": ", SectionAddr + addr) << name
+ << "\t" << val << "\n";
+
+ skip_print_rel:
+ ++rel_cur;
+ }
+ }
+ }
+ }
+}