-#include "Binary.hxx"
-#include "disassembler/Disassembler.hxx"
-#include <iostream>
-#include <string>
-#include <algorithm>
-#include "llvm/Support/raw_ostream.h"
-using namespace llvm;
-using namespace llvm::object;
-namespace {
- bool error(error_code ec) {
- if (!ec) return false;
- outs() << "error reading file: " << ec.message() << ".\n";
- outs().flush();
- return true;
- }
- bool RelocAddressLess(RelocationRef a, RelocationRef b) {
- uint64_t a_addr, b_addr;
- if (error(a.getOffset(a_addr))) return false;
- if (error(b.getOffset(b_addr))) return false;
- return a_addr < b_addr;
- }
- void DumpBytes(StringRef bytes) {
- static const char hex_rep[] = "0123456789abcdef";
- // FIXME: The real way to do this is to figure out the longest instruction
- // and align to that size before printing. I'll fix this when I get
- // around to outputting relocations.
- // 15 is the longest x86 instruction
- // 3 is for the hex rep of a byte + a space.
- // 1 is for the null terminator.
- enum { OutputSize = (15 * 3) + 1 };
- char output[OutputSize];
- assert(bytes.size() <= 15
- && "DumpBytes only supports instructions of up to 15 bytes");
- memset(output, ' ', sizeof(output));
- unsigned index = 0;
- for (StringRef::iterator i = bytes.begin(),
- e = bytes.end(); i != e; ++i) {
- output[index] = hex_rep[(*i & 0xF0) >> 4];
- output[index + 1] = hex_rep[*i & 0xF];
- index += 3;
- }
- output[sizeof(output) - 1] = 0;
- outs() << output;
- }
- std::map<std::string, SectionRef> readSections(const ObjectFile& o) {
- error_code ec;
- std::map<std::string, SectionRef> result;
- section_iterator i(o.section_begin()), e(o.section_end());
- for (; i != e; ++i) {
- StringRef name;
- if (error(i->getName(name))) break;
- result.insert(make_pair(name.str(), *i));
- }
- return result;
- }
- std::map<std::string, SymbolRef> readSymbols(const ObjectFile& o) {
- error_code ec;
- std::map<std::string, SymbolRef> result;
- symbol_iterator si(o.symbol_begin()), se(o.symbol_end());
- for (; si != se; ++si) {
- StringRef name;
- if (error(si->getName(name))) break;
- result.insert(make_pair(name.str(), *si));
- }
- return result;
- }
-::Binary::Binary(const std::string& filename)
- : triple(llvm::Twine("unkown-unknown-unknown"))
- ::Disassembler d(filename);
- std::string error;
- binary = createBinary(filename).get();
- if (Archive *a = dyn_cast<Archive>(binary)) {
- std::cerr << "Got an archive!" << std::endl;
- return;
- }
- o = dyn_cast<ObjectFile>(binary);
- triple.setArch(Triple::ArchType(o->getArch()));
- std::string tripleName(triple.getTriple());
- outs() << tripleName << "\n";
- target = TargetRegistry::lookupTarget("", triple, error);
- if (!target) {
- std::cerr << error;
- return;
- }
- outs() << target->getName() << "\n";
- MRI.reset(target->createMCRegInfo(tripleName));
- if (!MRI) {
- std::cerr << "error: no register info for target " << tripleName << "\n";
- return;
- }
- // Set up disassembler.
- AsmInfo.reset(target->createMCAsmInfo(*MRI, tripleName));
- if (!AsmInfo) {
- std::cerr << "error: no assembly info for target " << tripleName << "\n";
- return;
- }
- STI.reset(target->createMCSubtargetInfo(tripleName, "", ""));
- if (!STI) {
- errs() << "error: no subtarget info for target " << tripleName << "\n";
- return;
- }
- MII.reset(target->createMCInstrInfo());
- if (!MII) {
- std::cerr << "error: no instruction info for target " << tripleName << "\n";
- return;
- }
- DisAsm.reset(target->createMCDisassembler(*STI));
- if (!DisAsm) {
- std::cerr << "error: no disassembler for target " << tripleName << "\n";
- return;
- }
- MOFI.reset(new MCObjectFileInfo);
- Ctx.reset(new MCContext(AsmInfo.get(), MRI.get(), MOFI.get()));
- RelInfo.reset(
- target->createMCRelocationInfo(tripleName, *Ctx.get()));
- if (RelInfo) {
- Symzer.reset(
- MCObjectSymbolizer::createObjectSymbolizer(*Ctx.get(), RelInfo, o));
- if (Symzer)
- DisAsm->setSymbolizer(Symzer);
- }
- MIA.reset(target->createMCInstrAnalysis(MII.get()));
- int AsmPrinterVariant = AsmInfo->getAssemblerDialect();
- IP.reset(target->createMCInstPrinter(AsmPrinterVariant, *AsmInfo, *MII, *MRI, *STI));
- if (!IP) {
- std::cerr << "error: no instruction printer for target " << tripleName
- << '\n';
- return;
- }
- OwningPtr<MCObjectDisassembler> OD(
- new MCObjectDisassembler(*o, *DisAsm, *MIA));
- Mod.reset(OD->buildModule(/* withCFG */ false));
- symbols = readSymbols(*o);
- sections = readSections(*o);
- // for_each(sections.begin(), sections.end(), [](const std::pair<std::string, SectionRef>& i){
- // std::cout << "Section: " << i.first << std::endl;
- // });
-::Binary::getSymbols() {
- error_code ec;
- SectionRef r = sections[".text"];
- std::vector<std::string> result;
- for_each(symbols.begin(), symbols.end(), [&](const std::pair<std::string, SymbolRef>& i) {
- bool contains;
- SymbolRef::Type t;
- if (!error(r.containsSymbol(i.second, contains)) && contains) {
- i.second.getType(t);
- if (SymbolRef::ST_Function == t)
- result.push_back(i.first);
- }
- });
- return result;
-void ::Binary::for_each_instruction(const std::string& function,
- std::function<void (long, std::string, std::string)> callback) {
- StringRef bytes;
- uint64_t base_address, address, ssize, size(0), index, end;
- StringRefMemoryObject memoryObject("");
- if (symbols.end() != symbols.find(function)) {
- SymbolRef ref;
- section_iterator sec(o->section_begin());
- ref = symbols.at(function);
- if (error(ref.getSection(sec))) return;
- if (error(ref.getAddress(address))) return;
- if (address == UnknownAddressOrSize) return;
- if (error(ref.getSize(ssize))) return;
- if (error(sec->getAddress(base_address))) return;
- if (error(sec->getContents(bytes))) return;
- memoryObject = bytes;
- }
- else if (sections.end() != sections.find(function)) {
- SectionRef sref = sections.at(function);
- if (error(sref.getAddress(address))) return;
- if (address == UnknownAddressOrSize) return;
- if (error(sref.getSize(ssize))) return;
- if (error(sref.getContents(bytes))) return;
- base_address = address;
- memoryObject = bytes;
- }
- // outs() << "Start for_each_instruction " << function << "\n";
- for (end = address + ssize - base_address, index = address - base_address; index < end; index += size) {
- MCInst Inst;
- if (DisAsm->getInstruction(Inst, size, memoryObject, index,
- nulls(), nulls())) {
- std::string buf;
- llvm::raw_string_ostream s(buf);
- IP->printInst(&Inst, s, "");
- if (index + size < bytes.str().length())
- callback(base_address + index, bytes.str().substr(index, size), s.str());
- } else {
- errs() << "warning: invalid instruction encoding\n";
- if (size == 0)
- size = 1; // skip illegible bytes
- }
- }
- // outs() << "End for_each_instruction\n";
-void ::Binary::disassemble() {
- for (MCModule::const_atom_iterator AI = Mod->atom_begin(),
- AE = Mod->atom_end();
- AI != AE; ++AI) {
- if ((*AI)->getKind() != llvm::MCAtom::TextAtom)
- continue;
- outs() << "\n\nAtom " << (*AI)->getName() << ": \n";
- if (const MCTextAtom *TA = dyn_cast<MCTextAtom>(*AI)) {
- for (MCTextAtom::const_iterator II = TA->begin(), IE = TA->end();
- II != IE;
- ++II) {
-// II->Inst.dump();
- IP->printInst(&II->Inst, outs(), "");
- outs() << "\n";
- }
- }
- }
- outs() << "binary " << triple.getArchName() << "\n";
-void ::Binary::disassemble_functions() {
- error_code ec;
- for (section_iterator i = o->section_begin(),
- e = o->section_end();
- i != e; ++i) {
- if (error(ec)) break;
- bool text;
- if (error(i->isText(text))) break;
- if (!text) continue;
- uint64_t SectionAddr;
- if (error(i->getAddress(SectionAddr))) break;
- // Make a list of all the symbols in this section.
- std::vector<std::pair<uint64_t, StringRef> > Symbols;
- for (symbol_iterator si = o->symbol_begin(),
- se = o->symbol_end();
- si != se; ++si) {
- bool contains;
- if (!error(i->containsSymbol(*si, contains)) && contains) {
- uint64_t Address;
- if (error(si->getAddress(Address))) break;
- if (Address == UnknownAddressOrSize) continue;
- Address -= SectionAddr;
- StringRef Name;
- if (error(si->getName(Name))) break;
- outs() << "\nXXX " << Name << "\n";
- Symbols.push_back(std::make_pair(Address, Name));
- }
- }
- // Sort the symbols by address, just in case they didn't come in that way.
- array_pod_sort(Symbols.begin(), Symbols.end());
- // Make a list of all the relocations for this section.
- std::vector<RelocationRef> Rels;
- // if (InlineRelocs) {
- // for (relocation_iterator ri = i->begin_relocations(),
- // re = i->end_relocations();
- // ri != re; ri.increment(ec)) {
- // if (error(ec)) break;
- // Rels.push_back(*ri);
- // }
- // }
- // Sort relocations by address.
- std::sort(Rels.begin(), Rels.end(), RelocAddressLess);
- StringRef SegmentName = "";
- // if (const MachOObjectFile *MachO =
- // dyn_cast<const MachOObjectFile>(o)) {
- // DataRefImpl DR = i->getRawDataRefImpl();
- // SegmentName = MachO->getSectionFinalSegmentName(DR);
- // }
- StringRef name;
- if (error(i->getName(name))) break;
- outs() << "Disassembly of section ";
- if (!SegmentName.empty())
- outs() << SegmentName << ",";
- outs() << name << ':';
- // If the section has no symbols just insert a dummy one and disassemble
- // the whole section.
- if (Symbols.empty())
- Symbols.push_back(std::make_pair(0, name));
- StringRef Bytes;
- if (error(i->getContents(Bytes))) break;
- StringRefMemoryObject memoryObject(Bytes);
- uint64_t Size;
- uint64_t Index;
- uint64_t SectSize;
- if (error(i->getSize(SectSize))) break;
- std::vector<RelocationRef>::const_iterator rel_cur = Rels.begin();
- std::vector<RelocationRef>::const_iterator rel_end = Rels.end();
- // Disassemble symbol by symbol.
- for (unsigned si = 0, se = Symbols.size(); si != se; ++si) {
- uint64_t Start = Symbols[si].first;
- uint64_t End;
- // The end is either the size of the section or the beginning of the next
- // symbol.
- if (si == se - 1)
- End = SectSize;
- // Make sure this symbol takes up space.
- else if (Symbols[si + 1].first != Start)
- End = Symbols[si + 1].first - 1;
- else
- // This symbol has the same address as the next symbol. Skip it.
- continue;
- outs() << '\n' << Symbols[si].second << ":\n";
-#ifndef NDEBUG
- raw_ostream &DebugOut = nulls(); //DebugFlag ? dbgs() : nulls();
- raw_ostream &DebugOut = nulls();
- for (Index = Start; Index < End; Index += Size) {
- MCInst Inst;
- if (DisAsm->getInstruction(Inst, Size, memoryObject, Index,
- DebugOut, nulls())) {
- outs() << format("%8" PRIx64 ":", SectionAddr + Index);
- outs() << "\t";
- DumpBytes(StringRef(Bytes.data() + Index, Size));
- IP->printInst(&Inst, outs(), "");
- outs() << "\n";
- } else {
- errs() << "warning: invalid instruction encoding\n";
- if (Size == 0)
- Size = 1; // skip illegible bytes
- }
- // Print relocation for instruction.
- while (rel_cur != rel_end) {
- bool hidden = false;
- uint64_t addr;
- SmallString<16> name;
- SmallString<32> val;
- // If this relocation is hidden, skip it.
- if (error(rel_cur->getHidden(hidden))) goto skip_print_rel;
- if (hidden) goto skip_print_rel;
- if (error(rel_cur->getOffset(addr))) goto skip_print_rel;
- // Stop when rel_cur's address is past the current instruction.
- if (addr >= Index + Size) break;
- if (error(rel_cur->getTypeName(name))) goto skip_print_rel;
- if (error(rel_cur->getValueString(val))) goto skip_print_rel;
- outs() << format("\t\t\t%8" PRIx64 ": ", SectionAddr + addr) << name
- << "\t" << val << "\n";
- skip_print_rel:
- ++rel_cur;
- }
- }
- }
- }
-void ::Binary::disassemble_cfg() {
- for (MCModule::const_func_iterator FI = Mod->func_begin(),
- FE = Mod->func_end();
- FI != FE; ++FI) {
- static int filenum = 0;
- std::string FileName = std::string("dot/") + (Twine((*FI)->getName()) + "_" + utostr(filenum) + ".dot").str();
- std::cerr << FileName << std::endl;
- // Start a new dot file.
- std::string Error;
- raw_fd_ostream Out(FileName.c_str(), Error);
- if (!Error.empty()) {
- errs() << "llvm-objdump: warning: " << Error << '\n';
- return;
- }
- Out << "digraph \"" << (*FI)->getName() << "\" {\n";
- Out << "graph [ rankdir = \"LR\" ];\n";
- for (MCFunction::const_iterator i = (*FI)->begin(), e = (*FI)->end(); i != e; ++i) {
- // Only print blocks that have predecessors.
- bool hasPreds = (*i)->pred_begin() != (*i)->pred_end();
- if (!hasPreds && i != (*FI)->begin())
- continue;
- Out << '"' << (*i)->getInsts()->getBeginAddr() << "\" [ label=\"<a>";
- // Print instructions.
- for (unsigned ii = 0, ie = (*i)->getInsts()->size(); ii != ie;
- ++ii) {
- if (ii != 0) // Not the first line, start a new row.
- Out << '|';
- if (ii + 1 == ie) // Last line, add an end id.
- Out << "<o>";
- // Escape special chars and print the instruction in mnemonic form.
- std::string Str;
- raw_string_ostream OS(Str);
- IP->printInst(&(*i)->getInsts()->at(ii).Inst, OS, "");
- Out << DOT::EscapeString(OS.str());
- }
- Out << "\" shape=\"record\" ];\n";
- // Add edges.
- for (MCBasicBlock::succ_const_iterator si = (*i)->succ_begin(),
- se = (*i)->succ_end(); si != se; ++si)
- Out << (*i)->getInsts()->getBeginAddr() << ":o -> "
- << (*si)->getInsts()->getBeginAddr() << ":a\n";
- }
- Out << "}\n";
- ++filenum;
- }