#include "Binary.hxx"
+#include "disassembler/Disassembler.hxx"
+
#include <iostream>
#include <string>
#include <algorithm>
+#include "llvm/Support/raw_ostream.h"
+
using namespace llvm;
using namespace llvm::object;
-static bool error(error_code ec) {
- if (!ec) return false;
+namespace {
+ bool error(error_code ec) {
+ if (!ec) return false;
- outs() << "error reading file: " << ec.message() << ".\n";
- outs().flush();
- return true;
-}
+ outs() << "error reading file: " << ec.message() << ".\n";
+ outs().flush();
+ return true;
+ }
-static bool RelocAddressLess(RelocationRef a, RelocationRef b) {
- uint64_t a_addr, b_addr;
- if (error(a.getOffset(a_addr))) return false;
- if (error(b.getOffset(b_addr))) return false;
- return a_addr < b_addr;
-}
+ bool RelocAddressLess(RelocationRef a, RelocationRef b) {
+ uint64_t a_addr, b_addr;
+ if (error(a.getOffset(a_addr))) return false;
+ if (error(b.getOffset(b_addr))) return false;
+ return a_addr < b_addr;
+ }
-static void DumpBytes(StringRef bytes) {
- static const char hex_rep[] = "0123456789abcdef";
- // FIXME: The real way to do this is to figure out the longest instruction
- // and align to that size before printing. I'll fix this when I get
- // around to outputting relocations.
- // 15 is the longest x86 instruction
- // 3 is for the hex rep of a byte + a space.
- // 1 is for the null terminator.
- enum { OutputSize = (15 * 3) + 1 };
- char output[OutputSize];
-
- assert(bytes.size() <= 15
- && "DumpBytes only supports instructions of up to 15 bytes");
- memset(output, ' ', sizeof(output));
- unsigned index = 0;
- for (StringRef::iterator i = bytes.begin(),
- e = bytes.end(); i != e; ++i) {
- output[index] = hex_rep[(*i & 0xF0) >> 4];
- output[index + 1] = hex_rep[*i & 0xF];
- index += 3;
+ void DumpBytes(StringRef bytes) {
+ static const char hex_rep[] = "0123456789abcdef";
+ // FIXME: The real way to do this is to figure out the longest instruction
+ // and align to that size before printing. I'll fix this when I get
+ // around to outputting relocations.
+ // 15 is the longest x86 instruction
+ // 3 is for the hex rep of a byte + a space.
+ // 1 is for the null terminator.
+ enum { OutputSize = (15 * 3) + 1 };
+ char output[OutputSize];
+
+ assert(bytes.size() <= 15
+ && "DumpBytes only supports instructions of up to 15 bytes");
+ memset(output, ' ', sizeof(output));
+ unsigned index = 0;
+ for (StringRef::iterator i = bytes.begin(),
+ e = bytes.end(); i != e; ++i) {
+ output[index] = hex_rep[(*i & 0xF0) >> 4];
+ output[index + 1] = hex_rep[*i & 0xF];
+ index += 3;
+ }
+
+ output[sizeof(output) - 1] = 0;
+ outs() << output;
}
- output[sizeof(output) - 1] = 0;
- outs() << output;
+ std::map<std::string, SectionRef> readSections(const ObjectFile& o) {
+ error_code ec;
+ std::map<std::string, SectionRef> result;
+ section_iterator i(o.section_begin()), e(o.section_end());
+ for (; i != e; ++i) {
+ StringRef name;
+ if (error(i->getName(name))) break;
+
+ result.insert(make_pair(name.str(), *i));
+ }
+ return result;
+ }
+
+ std::map<std::string, SymbolRef> readSymbols(const ObjectFile& o) {
+ error_code ec;
+ std::map<std::string, SymbolRef> result;
+ symbol_iterator si(o.symbol_begin()), se(o.symbol_end());
+ for (; si != se; ++si) {
+ StringRef name;
+ if (error(si->getName(name))) break;
+
+ result.insert(make_pair(name.str(), *si));
+ }
+ return result;
+ }
}
::Binary::Binary(const std::string& filename)
- : triple("unkown-unknown-unknown")
+ : triple(llvm::Twine("unkown-unknown-unknown"))
{
+ ::Disassembler d(filename);
std::string error;
- createBinary(filename, binary);
- if (Archive *a = dyn_cast<Archive>(binary.get())) {
+ binary = createBinary(filename).get();
+ if (Archive *a = dyn_cast<Archive>(binary)) {
std::cerr << "Got an archive!" << std::endl;
return;
}
- o = dyn_cast<ObjectFile>(binary.get());
+ o = dyn_cast<ObjectFile>(binary);
triple.setArch(Triple::ArchType(o->getArch()));
std::string tripleName(triple.getTriple());
+ outs() << tripleName << "\n";
+
target = TargetRegistry::lookupTarget("", triple, error);
if (!target) {
std::cerr << error;
return;
}
+ outs() << target->getName() << "\n";
+
MRI.reset(target->createMCRegInfo(tripleName));
if (!MRI) {
std::cerr << "error: no register info for target " << tripleName << "\n";
OwningPtr<MCObjectDisassembler> OD(
new MCObjectDisassembler(*o, *DisAsm, *MIA));
- Mod.reset(OD->buildModule(/* withCFG */ true));
+ Mod.reset(OD->buildModule(/* withCFG */ false));
+
+ symbols = readSymbols(*o);
+ sections = readSections(*o);
+
+ // for_each(sections.begin(), sections.end(), [](const std::pair<std::string, SectionRef>& i){
+ // std::cout << "Section: " << i.first << std::endl;
+ // });
+}
+
+
+
+std::vector<std::string>
+::Binary::getSymbols() {
+ error_code ec;
+ SectionRef r = sections[".text"];
+ std::vector<std::string> result;
+ for_each(symbols.begin(), symbols.end(), [&](const std::pair<std::string, SymbolRef>& i) {
+ bool contains;
+ SymbolRef::Type t;
+ if (!error(r.containsSymbol(i.second, contains)) && contains) {
+ i.second.getType(t);
+ if (SymbolRef::ST_Function == t)
+ result.push_back(i.first);
+ }
+ });
+ return result;
+}
+
+void ::Binary::for_each_instruction(const std::string& function,
+ std::function<void (long, std::string, std::string)> callback) {
+ StringRef bytes;
+ uint64_t base_address, address, ssize, size(0), index, end;
+ StringRefMemoryObject memoryObject("");
+
+ if (symbols.end() != symbols.find(function)) {
+ SymbolRef ref;
+ section_iterator sec(o->section_begin());
+
+ ref = symbols.at(function);
+ if (error(ref.getSection(sec))) return;
+ if (error(ref.getAddress(address))) return;
+ if (address == UnknownAddressOrSize) return;
+ if (error(ref.getSize(ssize))) return;
+ if (error(sec->getAddress(base_address))) return;
+ if (error(sec->getContents(bytes))) return;
+ memoryObject = bytes;
+
+ }
+ else if (sections.end() != sections.find(function)) {
+ SectionRef sref = sections.at(function);
+ if (error(sref.getAddress(address))) return;
+ if (address == UnknownAddressOrSize) return;
+ if (error(sref.getSize(ssize))) return;
+ if (error(sref.getContents(bytes))) return;
+ base_address = address;
+ memoryObject = bytes;
+ }
+
+
+ // outs() << "Start for_each_instruction " << function << "\n";
+
+
+ for (end = address + ssize - base_address, index = address - base_address; index < end; index += size) {
+ MCInst Inst;
+
+ if (DisAsm->getInstruction(Inst, size, memoryObject, index,
+ nulls(), nulls())) {
+ std::string buf;
+ llvm::raw_string_ostream s(buf);
+ IP->printInst(&Inst, s, "");
+
+ if (index + size < bytes.str().length())
+ callback(base_address + index, bytes.str().substr(index, size), s.str());
+
+ } else {
+ errs() << "warning: invalid instruction encoding\n";
+ if (size == 0)
+ size = 1; // skip illegible bytes
+ }
+ }
+ // outs() << "End for_each_instruction\n";
+
}
void ::Binary::disassemble() {
void ::Binary::disassemble_functions() {
error_code ec;
- for (section_iterator i = o->begin_sections(),
- e = o->end_sections();
- i != e; i.increment(ec)) {
+ for (section_iterator i = o->section_begin(),
+ e = o->section_end();
+ i != e; ++i) {
if (error(ec)) break;
bool text;
if (error(i->isText(text))) break;
// Make a list of all the symbols in this section.
std::vector<std::pair<uint64_t, StringRef> > Symbols;
- for (symbol_iterator si = o->begin_symbols(),
- se = o->end_symbols();
- si != se; si.increment(ec)) {
+ for (symbol_iterator si = o->symbol_begin(),
+ se = o->symbol_end();
+ si != se; ++si) {
bool contains;
if (!error(i->containsSymbol(*si, contains)) && contains) {
uint64_t Address;
outs() << '\n' << Symbols[si].second << ":\n";
#ifndef NDEBUG
- raw_ostream &DebugOut = DebugFlag ? dbgs() : nulls();
+ raw_ostream &DebugOut = nulls(); //DebugFlag ? dbgs() : nulls();
#else
raw_ostream &DebugOut = nulls();
#endif
outs() << format("\t\t\t%8" PRIx64 ": ", SectionAddr + addr) << name
<< "\t" << val << "\n";
- skip_print_rel:
+ skip_print_rel:
++rel_cur;
}
}
std::cerr << FileName << std::endl;
- // Start a new dot file.
+ // Start a new dot file.
std::string Error;
raw_fd_ostream Out(FileName.c_str(), Error);
if (!Error.empty()) {