From c100e37a2dfbe6dd221e867559b473a4a5097570 Mon Sep 17 00:00:00 2001 From: Christoph Egger Date: Mon, 5 Jan 2015 17:19:13 +0100 Subject: [PATCH] Cleanup sourcetree --- src/Binary.cxx | 473 ------------------- src/Binary.hxx | 50 -- src/{ => disassembler/llvm}/include_llvm.hxx | 0 src/main.cxx | 2 +- 4 files changed, 1 insertion(+), 524 deletions(-) delete mode 100644 src/Binary.cxx delete mode 100644 src/Binary.hxx rename src/{ => disassembler/llvm}/include_llvm.hxx (100%) diff --git a/src/Binary.cxx b/src/Binary.cxx deleted file mode 100644 index f877685..0000000 --- a/src/Binary.cxx +++ /dev/null @@ -1,473 +0,0 @@ -#include "Binary.hxx" - -#include "disassembler/Disassembler.hxx" - -#include -#include -#include - -#include "llvm/Support/raw_ostream.h" - -using namespace llvm; -using namespace llvm::object; - -namespace { - bool error(error_code ec) { - if (!ec) return false; - - outs() << "error reading file: " << ec.message() << ".\n"; - outs().flush(); - return true; - } - - bool RelocAddressLess(RelocationRef a, RelocationRef b) { - uint64_t a_addr, b_addr; - if (error(a.getOffset(a_addr))) return false; - if (error(b.getOffset(b_addr))) return false; - return a_addr < b_addr; - } - - void DumpBytes(StringRef bytes) { - static const char hex_rep[] = "0123456789abcdef"; - // FIXME: The real way to do this is to figure out the longest instruction - // and align to that size before printing. I'll fix this when I get - // around to outputting relocations. - // 15 is the longest x86 instruction - // 3 is for the hex rep of a byte + a space. - // 1 is for the null terminator. - enum { OutputSize = (15 * 3) + 1 }; - char output[OutputSize]; - - assert(bytes.size() <= 15 - && "DumpBytes only supports instructions of up to 15 bytes"); - memset(output, ' ', sizeof(output)); - unsigned index = 0; - for (StringRef::iterator i = bytes.begin(), - e = bytes.end(); i != e; ++i) { - output[index] = hex_rep[(*i & 0xF0) >> 4]; - output[index + 1] = hex_rep[*i & 0xF]; - index += 3; - } - - output[sizeof(output) - 1] = 0; - outs() << output; - } - - std::map readSections(const ObjectFile& o) { - error_code ec; - std::map result; - section_iterator i(o.section_begin()), e(o.section_end()); - for (; i != e; ++i) { - StringRef name; - if (error(i->getName(name))) break; - - result.insert(make_pair(name.str(), *i)); - } - return result; - } - - std::map readSymbols(const ObjectFile& o) { - error_code ec; - std::map result; - symbol_iterator si(o.symbol_begin()), se(o.symbol_end()); - for (; si != se; ++si) { - StringRef name; - if (error(si->getName(name))) break; - - result.insert(make_pair(name.str(), *si)); - } - return result; - } -} - -::Binary::Binary(const std::string& filename) - : triple(llvm::Twine("unkown-unknown-unknown")) -{ - ::Disassembler d(filename); - std::string error; - - binary = createBinary(filename).get(); - if (Archive *a = dyn_cast(binary)) { - std::cerr << "Got an archive!" << std::endl; - return; - } - - o = dyn_cast(binary); - - triple.setArch(Triple::ArchType(o->getArch())); - std::string tripleName(triple.getTriple()); - - outs() << tripleName << "\n"; - - target = TargetRegistry::lookupTarget("", triple, error); - if (!target) { - std::cerr << error; - return; - } - - outs() << target->getName() << "\n"; - - MRI.reset(target->createMCRegInfo(tripleName)); - if (!MRI) { - std::cerr << "error: no register info for target " << tripleName << "\n"; - return; - } - - // Set up disassembler. - AsmInfo.reset(target->createMCAsmInfo(*MRI, tripleName)); - if (!AsmInfo) { - std::cerr << "error: no assembly info for target " << tripleName << "\n"; - return; - } - - STI.reset(target->createMCSubtargetInfo(tripleName, "", "")); - if (!STI) { - errs() << "error: no subtarget info for target " << tripleName << "\n"; - return; - } - - MII.reset(target->createMCInstrInfo()); - if (!MII) { - std::cerr << "error: no instruction info for target " << tripleName << "\n"; - return; - } - - DisAsm.reset(target->createMCDisassembler(*STI)); - if (!DisAsm) { - std::cerr << "error: no disassembler for target " << tripleName << "\n"; - return; - } - - MOFI.reset(new MCObjectFileInfo); - Ctx.reset(new MCContext(AsmInfo.get(), MRI.get(), MOFI.get())); - RelInfo.reset( - target->createMCRelocationInfo(tripleName, *Ctx.get())); - if (RelInfo) { - Symzer.reset( - MCObjectSymbolizer::createObjectSymbolizer(*Ctx.get(), RelInfo, o)); - if (Symzer) - DisAsm->setSymbolizer(Symzer); - } - - MIA.reset(target->createMCInstrAnalysis(MII.get())); - - int AsmPrinterVariant = AsmInfo->getAssemblerDialect(); - IP.reset(target->createMCInstPrinter(AsmPrinterVariant, *AsmInfo, *MII, *MRI, *STI)); - if (!IP) { - std::cerr << "error: no instruction printer for target " << tripleName - << '\n'; - return; - } - - OwningPtr OD( - new MCObjectDisassembler(*o, *DisAsm, *MIA)); - Mod.reset(OD->buildModule(/* withCFG */ false)); - - symbols = readSymbols(*o); - sections = readSections(*o); - - // for_each(sections.begin(), sections.end(), [](const std::pair& i){ - // std::cout << "Section: " << i.first << std::endl; - // }); -} - - - -std::vector -::Binary::getSymbols() { - error_code ec; - SectionRef r = sections[".text"]; - std::vector result; - for_each(symbols.begin(), symbols.end(), [&](const std::pair& i) { - bool contains; - SymbolRef::Type t; - if (!error(r.containsSymbol(i.second, contains)) && contains) { - i.second.getType(t); - if (SymbolRef::ST_Function == t) - result.push_back(i.first); - } - }); - return result; -} - -void ::Binary::for_each_instruction(const std::string& function, - std::function callback) { - StringRef bytes; - uint64_t base_address, address, ssize, size(0), index, end; - StringRefMemoryObject memoryObject(""); - - if (symbols.end() != symbols.find(function)) { - SymbolRef ref; - section_iterator sec(o->section_begin()); - - ref = symbols.at(function); - if (error(ref.getSection(sec))) return; - if (error(ref.getAddress(address))) return; - if (address == UnknownAddressOrSize) return; - if (error(ref.getSize(ssize))) return; - if (error(sec->getAddress(base_address))) return; - if (error(sec->getContents(bytes))) return; - memoryObject = bytes; - - } - else if (sections.end() != sections.find(function)) { - SectionRef sref = sections.at(function); - if (error(sref.getAddress(address))) return; - if (address == UnknownAddressOrSize) return; - if (error(sref.getSize(ssize))) return; - if (error(sref.getContents(bytes))) return; - base_address = address; - memoryObject = bytes; - } - - - // outs() << "Start for_each_instruction " << function << "\n"; - - - for (end = address + ssize - base_address, index = address - base_address; index < end; index += size) { - MCInst Inst; - - if (DisAsm->getInstruction(Inst, size, memoryObject, index, - nulls(), nulls())) { - std::string buf; - llvm::raw_string_ostream s(buf); - IP->printInst(&Inst, s, ""); - - if (index + size < bytes.str().length()) - callback(base_address + index, bytes.str().substr(index, size), s.str()); - - } else { - errs() << "warning: invalid instruction encoding\n"; - if (size == 0) - size = 1; // skip illegible bytes - } - } - // outs() << "End for_each_instruction\n"; - -} - -void ::Binary::disassemble() { - for (MCModule::const_atom_iterator AI = Mod->atom_begin(), - AE = Mod->atom_end(); - AI != AE; ++AI) { - - if ((*AI)->getKind() != llvm::MCAtom::TextAtom) - continue; - - outs() << "\n\nAtom " << (*AI)->getName() << ": \n"; - if (const MCTextAtom *TA = dyn_cast(*AI)) { - for (MCTextAtom::const_iterator II = TA->begin(), IE = TA->end(); - II != IE; - ++II) { -// II->Inst.dump(); - IP->printInst(&II->Inst, outs(), ""); - outs() << "\n"; - } - } - } - - outs() << "binary " << triple.getArchName() << "\n"; -} - -void ::Binary::disassemble_functions() { - error_code ec; - for (section_iterator i = o->section_begin(), - e = o->section_end(); - i != e; ++i) { - if (error(ec)) break; - bool text; - if (error(i->isText(text))) break; - if (!text) continue; - - uint64_t SectionAddr; - if (error(i->getAddress(SectionAddr))) break; - - // Make a list of all the symbols in this section. - std::vector > Symbols; - for (symbol_iterator si = o->symbol_begin(), - se = o->symbol_end(); - si != se; ++si) { - bool contains; - if (!error(i->containsSymbol(*si, contains)) && contains) { - uint64_t Address; - if (error(si->getAddress(Address))) break; - if (Address == UnknownAddressOrSize) continue; - Address -= SectionAddr; - - StringRef Name; - if (error(si->getName(Name))) break; - - outs() << "\nXXX " << Name << "\n"; - - Symbols.push_back(std::make_pair(Address, Name)); - } - } - - // Sort the symbols by address, just in case they didn't come in that way. - array_pod_sort(Symbols.begin(), Symbols.end()); - - // Make a list of all the relocations for this section. - std::vector Rels; - // if (InlineRelocs) { - // for (relocation_iterator ri = i->begin_relocations(), - // re = i->end_relocations(); - // ri != re; ri.increment(ec)) { - // if (error(ec)) break; - // Rels.push_back(*ri); - // } - // } - - // Sort relocations by address. - std::sort(Rels.begin(), Rels.end(), RelocAddressLess); - - StringRef SegmentName = ""; - // if (const MachOObjectFile *MachO = - // dyn_cast(o)) { - // DataRefImpl DR = i->getRawDataRefImpl(); - // SegmentName = MachO->getSectionFinalSegmentName(DR); - // } - StringRef name; - if (error(i->getName(name))) break; - outs() << "Disassembly of section "; - if (!SegmentName.empty()) - outs() << SegmentName << ","; - outs() << name << ':'; - - // If the section has no symbols just insert a dummy one and disassemble - // the whole section. - if (Symbols.empty()) - Symbols.push_back(std::make_pair(0, name)); - - - StringRef Bytes; - if (error(i->getContents(Bytes))) break; - StringRefMemoryObject memoryObject(Bytes); - uint64_t Size; - uint64_t Index; - uint64_t SectSize; - if (error(i->getSize(SectSize))) break; - - std::vector::const_iterator rel_cur = Rels.begin(); - std::vector::const_iterator rel_end = Rels.end(); - // Disassemble symbol by symbol. - for (unsigned si = 0, se = Symbols.size(); si != se; ++si) { - uint64_t Start = Symbols[si].first; - uint64_t End; - // The end is either the size of the section or the beginning of the next - // symbol. - if (si == se - 1) - End = SectSize; - // Make sure this symbol takes up space. - else if (Symbols[si + 1].first != Start) - End = Symbols[si + 1].first - 1; - else - // This symbol has the same address as the next symbol. Skip it. - continue; - - outs() << '\n' << Symbols[si].second << ":\n"; - -#ifndef NDEBUG - raw_ostream &DebugOut = nulls(); //DebugFlag ? dbgs() : nulls(); -#else - raw_ostream &DebugOut = nulls(); -#endif - - for (Index = Start; Index < End; Index += Size) { - MCInst Inst; - - if (DisAsm->getInstruction(Inst, Size, memoryObject, Index, - DebugOut, nulls())) { - outs() << format("%8" PRIx64 ":", SectionAddr + Index); - outs() << "\t"; - DumpBytes(StringRef(Bytes.data() + Index, Size)); - - IP->printInst(&Inst, outs(), ""); - outs() << "\n"; - } else { - errs() << "warning: invalid instruction encoding\n"; - if (Size == 0) - Size = 1; // skip illegible bytes - } - - // Print relocation for instruction. - while (rel_cur != rel_end) { - bool hidden = false; - uint64_t addr; - SmallString<16> name; - SmallString<32> val; - - // If this relocation is hidden, skip it. - if (error(rel_cur->getHidden(hidden))) goto skip_print_rel; - if (hidden) goto skip_print_rel; - - if (error(rel_cur->getOffset(addr))) goto skip_print_rel; - // Stop when rel_cur's address is past the current instruction. - if (addr >= Index + Size) break; - if (error(rel_cur->getTypeName(name))) goto skip_print_rel; - if (error(rel_cur->getValueString(val))) goto skip_print_rel; - - outs() << format("\t\t\t%8" PRIx64 ": ", SectionAddr + addr) << name - << "\t" << val << "\n"; - - skip_print_rel: - ++rel_cur; - } - } - } - } -} - -void ::Binary::disassemble_cfg() { - for (MCModule::const_func_iterator FI = Mod->func_begin(), - FE = Mod->func_end(); - FI != FE; ++FI) { - static int filenum = 0; - std::string FileName = std::string("dot/") + (Twine((*FI)->getName()) + "_" + utostr(filenum) + ".dot").str(); - - std::cerr << FileName << std::endl; - - // Start a new dot file. - std::string Error; - raw_fd_ostream Out(FileName.c_str(), Error); - if (!Error.empty()) { - errs() << "llvm-objdump: warning: " << Error << '\n'; - return; - } - - Out << "digraph \"" << (*FI)->getName() << "\" {\n"; - Out << "graph [ rankdir = \"LR\" ];\n"; - for (MCFunction::const_iterator i = (*FI)->begin(), e = (*FI)->end(); i != e; ++i) { - // Only print blocks that have predecessors. - bool hasPreds = (*i)->pred_begin() != (*i)->pred_end(); - - if (!hasPreds && i != (*FI)->begin()) - continue; - - Out << '"' << (*i)->getInsts()->getBeginAddr() << "\" [ label=\""; - // Print instructions. - for (unsigned ii = 0, ie = (*i)->getInsts()->size(); ii != ie; - ++ii) { - if (ii != 0) // Not the first line, start a new row. - Out << '|'; - if (ii + 1 == ie) // Last line, add an end id. - Out << ""; - - // Escape special chars and print the instruction in mnemonic form. - std::string Str; - raw_string_ostream OS(Str); - IP->printInst(&(*i)->getInsts()->at(ii).Inst, OS, ""); - Out << DOT::EscapeString(OS.str()); - } - Out << "\" shape=\"record\" ];\n"; - - // Add edges. - for (MCBasicBlock::succ_const_iterator si = (*i)->succ_begin(), - se = (*i)->succ_end(); si != se; ++si) - Out << (*i)->getInsts()->getBeginAddr() << ":o -> " - << (*si)->getInsts()->getBeginAddr() << ":a\n"; - } - Out << "}\n"; - - ++filenum; - } -} diff --git a/src/Binary.hxx b/src/Binary.hxx deleted file mode 100644 index 1a86578..0000000 --- a/src/Binary.hxx +++ /dev/null @@ -1,50 +0,0 @@ -#ifndef INCLUDE__Binary_hxx -#define INCLUDE__Binary_hxx - -#include "include_llvm.hxx" -#include "Section.hxx" - -#include -#include -#include - -using llvm::OwningPtr; - -class Binary { -public: - Binary(const std::string& filename); - - void disassemble(); - - void disassemble_functions(); - - void disassemble_cfg(); - - std::vector getSymbols(); - - std::vector getFunctions(); - void for_each_instruction(const std::string& function, std::function callback); - -private: - llvm::Triple triple; - const llvm::Target * target; - llvm::object::ObjectFile * o; - - llvm::object::Binary* binary; - OwningPtr MRI; - OwningPtr AsmInfo; - OwningPtr Mod; - OwningPtr IP; - OwningPtr DisAsm; - OwningPtr MOFI; - OwningPtr Ctx; - OwningPtr MIA; - OwningPtr STI; - OwningPtr MII; - OwningPtr RelInfo; - OwningPtr Symzer; - - std::map sections; - std::map symbols; -}; -#endif diff --git a/src/include_llvm.hxx b/src/disassembler/llvm/include_llvm.hxx similarity index 100% rename from src/include_llvm.hxx rename to src/disassembler/llvm/include_llvm.hxx diff --git a/src/main.cxx b/src/main.cxx index ebb85e8..c33d6c4 100644 --- a/src/main.cxx +++ b/src/main.cxx @@ -1,4 +1,4 @@ -#include "include_llvm.hxx" +#include "disassembler/llvm/include_llvm.hxx" #include #include -- 2.39.2