#include "Binary.hxx"
+#include "disassembler/Disassembler.hxx"
+
#include <iostream>
#include <string>
#include <algorithm>
+#include "llvm/Support/raw_ostream.h"
+
using namespace llvm;
using namespace llvm::object;
-static bool error(error_code ec) {
- if (!ec) return false;
+namespace {
+ bool error(error_code ec) {
+ if (!ec) return false;
- outs() << "error reading file: " << ec.message() << ".\n";
- outs().flush();
- return true;
-}
+ outs() << "error reading file: " << ec.message() << ".\n";
+ outs().flush();
+ return true;
+ }
-static bool RelocAddressLess(RelocationRef a, RelocationRef b) {
- uint64_t a_addr, b_addr;
- if (error(a.getOffset(a_addr))) return false;
- if (error(b.getOffset(b_addr))) return false;
- return a_addr < b_addr;
-}
+ bool RelocAddressLess(RelocationRef a, RelocationRef b) {
+ uint64_t a_addr, b_addr;
+ if (error(a.getOffset(a_addr))) return false;
+ if (error(b.getOffset(b_addr))) return false;
+ return a_addr < b_addr;
+ }
+
+ void DumpBytes(StringRef bytes) {
+ static const char hex_rep[] = "0123456789abcdef";
+ // FIXME: The real way to do this is to figure out the longest instruction
+ // and align to that size before printing. I'll fix this when I get
+ // around to outputting relocations.
+ // 15 is the longest x86 instruction
+ // 3 is for the hex rep of a byte + a space.
+ // 1 is for the null terminator.
+ enum { OutputSize = (15 * 3) + 1 };
+ char output[OutputSize];
+
+ assert(bytes.size() <= 15
+ && "DumpBytes only supports instructions of up to 15 bytes");
+ memset(output, ' ', sizeof(output));
+ unsigned index = 0;
+ for (StringRef::iterator i = bytes.begin(),
+ e = bytes.end(); i != e; ++i) {
+ output[index] = hex_rep[(*i & 0xF0) >> 4];
+ output[index + 1] = hex_rep[*i & 0xF];
+ index += 3;
+ }
+
+ output[sizeof(output) - 1] = 0;
+ outs() << output;
+ }
+
+ std::map<std::string, SectionRef> readSections(const ObjectFile& o) {
+ error_code ec;
+ std::map<std::string, SectionRef> result;
+ section_iterator i(o.section_begin()), e(o.section_end());
+ for (; i != e; ++i) {
+ StringRef name;
+ if (error(i->getName(name))) break;
+
+ result.insert(make_pair(name.str(), *i));
+ }
+ return result;
+ }
+
+ std::map<std::string, SymbolRef> readSymbols(const ObjectFile& o) {
+ error_code ec;
+ std::map<std::string, SymbolRef> result;
+ symbol_iterator si(o.symbol_begin()), se(o.symbol_end());
+ for (; si != se; ++si) {
+ StringRef name;
+ if (error(si->getName(name))) break;
-static void DumpBytes(StringRef bytes) {
- static const char hex_rep[] = "0123456789abcdef";
- // FIXME: The real way to do this is to figure out the longest instruction
- // and align to that size before printing. I'll fix this when I get
- // around to outputting relocations.
- // 15 is the longest x86 instruction
- // 3 is for the hex rep of a byte + a space.
- // 1 is for the null terminator.
- enum { OutputSize = (15 * 3) + 1 };
- char output[OutputSize];
-
- assert(bytes.size() <= 15
- && "DumpBytes only supports instructions of up to 15 bytes");
- memset(output, ' ', sizeof(output));
- unsigned index = 0;
- for (StringRef::iterator i = bytes.begin(),
- e = bytes.end(); i != e; ++i) {
- output[index] = hex_rep[(*i & 0xF0) >> 4];
- output[index + 1] = hex_rep[*i & 0xF];
- index += 3;
- }
-
- output[sizeof(output) - 1] = 0;
- outs() << output;
+ result.insert(make_pair(name.str(), *si));
+ }
+ return result;
+ }
}
::Binary::Binary(const std::string& filename)
- : triple("unkown-unknown-unknown")
+ : triple(llvm::Twine("unkown-unknown-unknown"))
{
+ ::Disassembler d(filename);
std::string error;
- createBinary(filename, binary);
- if (Archive *a = dyn_cast<Archive>(binary.get())) {
+ binary = createBinary(filename).get();
+ if (Archive *a = dyn_cast<Archive>(binary)) {
std::cerr << "Got an archive!" << std::endl;
return;
}
- o = dyn_cast<ObjectFile>(binary.get());
+ o = dyn_cast<ObjectFile>(binary);
triple.setArch(Triple::ArchType(o->getArch()));
std::string tripleName(triple.getTriple());
+ outs() << tripleName << "\n";
+
target = TargetRegistry::lookupTarget("", triple, error);
if (!target) {
std::cerr << error;
return;
}
+ outs() << target->getName() << "\n";
+
MRI.reset(target->createMCRegInfo(tripleName));
if (!MRI) {
std::cerr << "error: no register info for target " << tripleName << "\n";
OwningPtr<MCObjectDisassembler> OD(
new MCObjectDisassembler(*o, *DisAsm, *MIA));
- Mod.reset(OD->buildModule(/* withCFG */ true));
+ Mod.reset(OD->buildModule(/* withCFG */ false));
+
+ symbols = readSymbols(*o);
+ sections = readSections(*o);
+
+ // for_each(sections.begin(), sections.end(), [](const std::pair<std::string, SectionRef>& i){
+ // std::cout << "Section: " << i.first << std::endl;
+ // });
+}
+
+
+
+std::vector<std::string>
+::Binary::getSymbols() {
+ error_code ec;
+ SectionRef r = sections[".text"];
+ std::vector<std::string> result;
+ for_each(symbols.begin(), symbols.end(), [&](const std::pair<std::string, SymbolRef>& i) {
+ bool contains;
+ SymbolRef::Type t;
+ if (!error(r.containsSymbol(i.second, contains)) && contains) {
+ i.second.getType(t);
+ if (SymbolRef::ST_Function == t)
+ result.push_back(i.first);
+ }
+ });
+ return result;
+}
+
+void ::Binary::for_each_instruction(const std::string& function,
+ std::function<void (long, std::string, std::string)> callback) {
+ StringRef bytes;
+ uint64_t base_address, address, ssize, size(0), index, end;
+ StringRefMemoryObject memoryObject("");
+
+ if (symbols.end() != symbols.find(function)) {
+ SymbolRef ref;
+ section_iterator sec(o->section_begin());
+
+ ref = symbols.at(function);
+ if (error(ref.getSection(sec))) return;
+ if (error(ref.getAddress(address))) return;
+ if (address == UnknownAddressOrSize) return;
+ if (error(ref.getSize(ssize))) return;
+ if (error(sec->getAddress(base_address))) return;
+ if (error(sec->getContents(bytes))) return;
+ memoryObject = bytes;
+
+ }
+ else if (sections.end() != sections.find(function)) {
+ SectionRef sref = sections.at(function);
+ if (error(sref.getAddress(address))) return;
+ if (address == UnknownAddressOrSize) return;
+ if (error(sref.getSize(ssize))) return;
+ if (error(sref.getContents(bytes))) return;
+ base_address = address;
+ memoryObject = bytes;
+ }
+
+
+ // outs() << "Start for_each_instruction " << function << "\n";
+
+
+ for (end = address + ssize - base_address, index = address - base_address; index < end; index += size) {
+ MCInst Inst;
+
+ if (DisAsm->getInstruction(Inst, size, memoryObject, index,
+ nulls(), nulls())) {
+ std::string buf;
+ llvm::raw_string_ostream s(buf);
+ IP->printInst(&Inst, s, "");
+
+ if (index + size < bytes.str().length())
+ callback(base_address + index, bytes.str().substr(index, size), s.str());
+
+ } else {
+ errs() << "warning: invalid instruction encoding\n";
+ if (size == 0)
+ size = 1; // skip illegible bytes
+ }
+ }
+ // outs() << "End for_each_instruction\n";
+
}
void ::Binary::disassemble() {
void ::Binary::disassemble_functions() {
error_code ec;
- for (section_iterator i = o->begin_sections(),
- e = o->end_sections();
- i != e; i.increment(ec)) {
+ for (section_iterator i = o->section_begin(),
+ e = o->section_end();
+ i != e; ++i) {
if (error(ec)) break;
bool text;
if (error(i->isText(text))) break;
// Make a list of all the symbols in this section.
std::vector<std::pair<uint64_t, StringRef> > Symbols;
- for (symbol_iterator si = o->begin_symbols(),
- se = o->end_symbols();
- si != se; si.increment(ec)) {
+ for (symbol_iterator si = o->symbol_begin(),
+ se = o->symbol_end();
+ si != se; ++si) {
bool contains;
if (!error(i->containsSymbol(*si, contains)) && contains) {
uint64_t Address;
StringRef Name;
if (error(si->getName(Name))) break;
+
+ outs() << "\nXXX " << Name << "\n";
+
Symbols.push_back(std::make_pair(Address, Name));
}
}
outs() << '\n' << Symbols[si].second << ":\n";
#ifndef NDEBUG
- raw_ostream &DebugOut = DebugFlag ? dbgs() : nulls();
+ raw_ostream &DebugOut = nulls(); //DebugFlag ? dbgs() : nulls();
#else
raw_ostream &DebugOut = nulls();
#endif
outs() << format("\t\t\t%8" PRIx64 ": ", SectionAddr + addr) << name
<< "\t" << val << "\n";
- skip_print_rel:
+ skip_print_rel:
++rel_cur;
}
}
}
}
}
+
+void ::Binary::disassemble_cfg() {
+ for (MCModule::const_func_iterator FI = Mod->func_begin(),
+ FE = Mod->func_end();
+ FI != FE; ++FI) {
+ static int filenum = 0;
+ std::string FileName = std::string("dot/") + (Twine((*FI)->getName()) + "_" + utostr(filenum) + ".dot").str();
+
+ std::cerr << FileName << std::endl;
+
+ // Start a new dot file.
+ std::string Error;
+ raw_fd_ostream Out(FileName.c_str(), Error);
+ if (!Error.empty()) {
+ errs() << "llvm-objdump: warning: " << Error << '\n';
+ return;
+ }
+
+ Out << "digraph \"" << (*FI)->getName() << "\" {\n";
+ Out << "graph [ rankdir = \"LR\" ];\n";
+ for (MCFunction::const_iterator i = (*FI)->begin(), e = (*FI)->end(); i != e; ++i) {
+ // Only print blocks that have predecessors.
+ bool hasPreds = (*i)->pred_begin() != (*i)->pred_end();
+
+ if (!hasPreds && i != (*FI)->begin())
+ continue;
+
+ Out << '"' << (*i)->getInsts()->getBeginAddr() << "\" [ label=\"<a>";
+ // Print instructions.
+ for (unsigned ii = 0, ie = (*i)->getInsts()->size(); ii != ie;
+ ++ii) {
+ if (ii != 0) // Not the first line, start a new row.
+ Out << '|';
+ if (ii + 1 == ie) // Last line, add an end id.
+ Out << "<o>";
+
+ // Escape special chars and print the instruction in mnemonic form.
+ std::string Str;
+ raw_string_ostream OS(Str);
+ IP->printInst(&(*i)->getInsts()->at(ii).Inst, OS, "");
+ Out << DOT::EscapeString(OS.str());
+ }
+ Out << "\" shape=\"record\" ];\n";
+
+ // Add edges.
+ for (MCBasicBlock::succ_const_iterator si = (*i)->succ_begin(),
+ se = (*i)->succ_end(); si != se; ++si)
+ Out << (*i)->getInsts()->getBeginAddr() << ":o -> "
+ << (*si)->getInsts()->getBeginAddr() << ":a\n";
+ }
+ Out << "}\n";
+
+ ++filenum;
+ }
+}