X-Git-Url: https://git.siccegge.de//index.cgi?p=frida%2Ffrida.git;a=blobdiff_plain;f=src%2FBinary.cxx;h=f8776858c42d9374625dcd0f99cf5ebbac778bc4;hp=696947d0262abd1d0ecc992ebc9918120d200283;hb=59181afa72eb08a7716d5bebd83b0be6a972e5f1;hpb=988726af7276c31122cd2e27adae87aa8b4049ad diff --git a/src/Binary.cxx b/src/Binary.cxx index 696947d..f877685 100644 --- a/src/Binary.cxx +++ b/src/Binary.cxx @@ -1,75 +1,112 @@ #include "Binary.hxx" +#include "disassembler/Disassembler.hxx" + #include #include #include +#include "llvm/Support/raw_ostream.h" + using namespace llvm; using namespace llvm::object; -static bool error(error_code ec) { - if (!ec) return false; +namespace { + bool error(error_code ec) { + if (!ec) return false; - outs() << "error reading file: " << ec.message() << ".\n"; - outs().flush(); - return true; -} + outs() << "error reading file: " << ec.message() << ".\n"; + outs().flush(); + return true; + } -static bool RelocAddressLess(RelocationRef a, RelocationRef b) { - uint64_t a_addr, b_addr; - if (error(a.getOffset(a_addr))) return false; - if (error(b.getOffset(b_addr))) return false; - return a_addr < b_addr; -} + bool RelocAddressLess(RelocationRef a, RelocationRef b) { + uint64_t a_addr, b_addr; + if (error(a.getOffset(a_addr))) return false; + if (error(b.getOffset(b_addr))) return false; + return a_addr < b_addr; + } + + void DumpBytes(StringRef bytes) { + static const char hex_rep[] = "0123456789abcdef"; + // FIXME: The real way to do this is to figure out the longest instruction + // and align to that size before printing. I'll fix this when I get + // around to outputting relocations. + // 15 is the longest x86 instruction + // 3 is for the hex rep of a byte + a space. + // 1 is for the null terminator. + enum { OutputSize = (15 * 3) + 1 }; + char output[OutputSize]; + + assert(bytes.size() <= 15 + && "DumpBytes only supports instructions of up to 15 bytes"); + memset(output, ' ', sizeof(output)); + unsigned index = 0; + for (StringRef::iterator i = bytes.begin(), + e = bytes.end(); i != e; ++i) { + output[index] = hex_rep[(*i & 0xF0) >> 4]; + output[index + 1] = hex_rep[*i & 0xF]; + index += 3; + } + + output[sizeof(output) - 1] = 0; + outs() << output; + } + + std::map readSections(const ObjectFile& o) { + error_code ec; + std::map result; + section_iterator i(o.section_begin()), e(o.section_end()); + for (; i != e; ++i) { + StringRef name; + if (error(i->getName(name))) break; + + result.insert(make_pair(name.str(), *i)); + } + return result; + } + + std::map readSymbols(const ObjectFile& o) { + error_code ec; + std::map result; + symbol_iterator si(o.symbol_begin()), se(o.symbol_end()); + for (; si != se; ++si) { + StringRef name; + if (error(si->getName(name))) break; -static void DumpBytes(StringRef bytes) { - static const char hex_rep[] = "0123456789abcdef"; - // FIXME: The real way to do this is to figure out the longest instruction - // and align to that size before printing. I'll fix this when I get - // around to outputting relocations. - // 15 is the longest x86 instruction - // 3 is for the hex rep of a byte + a space. - // 1 is for the null terminator. - enum { OutputSize = (15 * 3) + 1 }; - char output[OutputSize]; - - assert(bytes.size() <= 15 - && "DumpBytes only supports instructions of up to 15 bytes"); - memset(output, ' ', sizeof(output)); - unsigned index = 0; - for (StringRef::iterator i = bytes.begin(), - e = bytes.end(); i != e; ++i) { - output[index] = hex_rep[(*i & 0xF0) >> 4]; - output[index + 1] = hex_rep[*i & 0xF]; - index += 3; - } - - output[sizeof(output) - 1] = 0; - outs() << output; + result.insert(make_pair(name.str(), *si)); + } + return result; + } } ::Binary::Binary(const std::string& filename) - : triple("unkown-unknown-unknown") + : triple(llvm::Twine("unkown-unknown-unknown")) { + ::Disassembler d(filename); std::string error; - createBinary(filename, binary); - if (Archive *a = dyn_cast(binary.get())) { + binary = createBinary(filename).get(); + if (Archive *a = dyn_cast(binary)) { std::cerr << "Got an archive!" << std::endl; return; } - o = dyn_cast(binary.get()); + o = dyn_cast(binary); triple.setArch(Triple::ArchType(o->getArch())); std::string tripleName(triple.getTriple()); + outs() << tripleName << "\n"; + target = TargetRegistry::lookupTarget("", triple, error); if (!target) { std::cerr << error; return; } + outs() << target->getName() << "\n"; + MRI.reset(target->createMCRegInfo(tripleName)); if (!MRI) { std::cerr << "error: no register info for target " << tripleName << "\n"; @@ -124,7 +161,89 @@ static void DumpBytes(StringRef bytes) { OwningPtr OD( new MCObjectDisassembler(*o, *DisAsm, *MIA)); - Mod.reset(OD->buildModule(/* withCFG */ true)); + Mod.reset(OD->buildModule(/* withCFG */ false)); + + symbols = readSymbols(*o); + sections = readSections(*o); + + // for_each(sections.begin(), sections.end(), [](const std::pair& i){ + // std::cout << "Section: " << i.first << std::endl; + // }); +} + + + +std::vector +::Binary::getSymbols() { + error_code ec; + SectionRef r = sections[".text"]; + std::vector result; + for_each(symbols.begin(), symbols.end(), [&](const std::pair& i) { + bool contains; + SymbolRef::Type t; + if (!error(r.containsSymbol(i.second, contains)) && contains) { + i.second.getType(t); + if (SymbolRef::ST_Function == t) + result.push_back(i.first); + } + }); + return result; +} + +void ::Binary::for_each_instruction(const std::string& function, + std::function callback) { + StringRef bytes; + uint64_t base_address, address, ssize, size(0), index, end; + StringRefMemoryObject memoryObject(""); + + if (symbols.end() != symbols.find(function)) { + SymbolRef ref; + section_iterator sec(o->section_begin()); + + ref = symbols.at(function); + if (error(ref.getSection(sec))) return; + if (error(ref.getAddress(address))) return; + if (address == UnknownAddressOrSize) return; + if (error(ref.getSize(ssize))) return; + if (error(sec->getAddress(base_address))) return; + if (error(sec->getContents(bytes))) return; + memoryObject = bytes; + + } + else if (sections.end() != sections.find(function)) { + SectionRef sref = sections.at(function); + if (error(sref.getAddress(address))) return; + if (address == UnknownAddressOrSize) return; + if (error(sref.getSize(ssize))) return; + if (error(sref.getContents(bytes))) return; + base_address = address; + memoryObject = bytes; + } + + + // outs() << "Start for_each_instruction " << function << "\n"; + + + for (end = address + ssize - base_address, index = address - base_address; index < end; index += size) { + MCInst Inst; + + if (DisAsm->getInstruction(Inst, size, memoryObject, index, + nulls(), nulls())) { + std::string buf; + llvm::raw_string_ostream s(buf); + IP->printInst(&Inst, s, ""); + + if (index + size < bytes.str().length()) + callback(base_address + index, bytes.str().substr(index, size), s.str()); + + } else { + errs() << "warning: invalid instruction encoding\n"; + if (size == 0) + size = 1; // skip illegible bytes + } + } + // outs() << "End for_each_instruction\n"; + } void ::Binary::disassemble() { @@ -152,9 +271,9 @@ void ::Binary::disassemble() { void ::Binary::disassemble_functions() { error_code ec; - for (section_iterator i = o->begin_sections(), - e = o->end_sections(); - i != e; i.increment(ec)) { + for (section_iterator i = o->section_begin(), + e = o->section_end(); + i != e; ++i) { if (error(ec)) break; bool text; if (error(i->isText(text))) break; @@ -165,9 +284,9 @@ void ::Binary::disassemble_functions() { // Make a list of all the symbols in this section. std::vector > Symbols; - for (symbol_iterator si = o->begin_symbols(), - se = o->end_symbols(); - si != se; si.increment(ec)) { + for (symbol_iterator si = o->symbol_begin(), + se = o->symbol_end(); + si != se; ++si) { bool contains; if (!error(i->containsSymbol(*si, contains)) && contains) { uint64_t Address; @@ -177,6 +296,9 @@ void ::Binary::disassemble_functions() { StringRef Name; if (error(si->getName(Name))) break; + + outs() << "\nXXX " << Name << "\n"; + Symbols.push_back(std::make_pair(Address, Name)); } } @@ -245,7 +367,7 @@ void ::Binary::disassemble_functions() { outs() << '\n' << Symbols[si].second << ":\n"; #ifndef NDEBUG - raw_ostream &DebugOut = DebugFlag ? dbgs() : nulls(); + raw_ostream &DebugOut = nulls(); //DebugFlag ? dbgs() : nulls(); #else raw_ostream &DebugOut = nulls(); #endif @@ -287,10 +409,65 @@ void ::Binary::disassemble_functions() { outs() << format("\t\t\t%8" PRIx64 ": ", SectionAddr + addr) << name << "\t" << val << "\n"; - skip_print_rel: + skip_print_rel: ++rel_cur; } } } } } + +void ::Binary::disassemble_cfg() { + for (MCModule::const_func_iterator FI = Mod->func_begin(), + FE = Mod->func_end(); + FI != FE; ++FI) { + static int filenum = 0; + std::string FileName = std::string("dot/") + (Twine((*FI)->getName()) + "_" + utostr(filenum) + ".dot").str(); + + std::cerr << FileName << std::endl; + + // Start a new dot file. + std::string Error; + raw_fd_ostream Out(FileName.c_str(), Error); + if (!Error.empty()) { + errs() << "llvm-objdump: warning: " << Error << '\n'; + return; + } + + Out << "digraph \"" << (*FI)->getName() << "\" {\n"; + Out << "graph [ rankdir = \"LR\" ];\n"; + for (MCFunction::const_iterator i = (*FI)->begin(), e = (*FI)->end(); i != e; ++i) { + // Only print blocks that have predecessors. + bool hasPreds = (*i)->pred_begin() != (*i)->pred_end(); + + if (!hasPreds && i != (*FI)->begin()) + continue; + + Out << '"' << (*i)->getInsts()->getBeginAddr() << "\" [ label=\""; + // Print instructions. + for (unsigned ii = 0, ie = (*i)->getInsts()->size(); ii != ie; + ++ii) { + if (ii != 0) // Not the first line, start a new row. + Out << '|'; + if (ii + 1 == ie) // Last line, add an end id. + Out << ""; + + // Escape special chars and print the instruction in mnemonic form. + std::string Str; + raw_string_ostream OS(Str); + IP->printInst(&(*i)->getInsts()->at(ii).Inst, OS, ""); + Out << DOT::EscapeString(OS.str()); + } + Out << "\" shape=\"record\" ];\n"; + + // Add edges. + for (MCBasicBlock::succ_const_iterator si = (*i)->succ_begin(), + se = (*i)->succ_end(); si != se; ++si) + Out << (*i)->getInsts()->getBeginAddr() << ":o -> " + << (*si)->getInsts()->getBeginAddr() << ":a\n"; + } + Out << "}\n"; + + ++filenum; + } +}