]> git.siccegge.de Git - frida/frida.git/blob - src/Binary.cxx
Consistently use include-guards
[frida/frida.git] / src / Binary.cxx
1 #include "Binary.hxx"
2
3 #include "disassembler/Disassembler.hxx"
4
5 #include <iostream>
6 #include <string>
7 #include <algorithm>
8
9 #include "llvm/Support/raw_ostream.h"
10
11 using namespace llvm;
12 using namespace llvm::object;
13
14 namespace {
15 bool error(error_code ec) {
16 if (!ec) return false;
17
18 outs() << "error reading file: " << ec.message() << ".\n";
19 outs().flush();
20 return true;
21 }
22
23 bool RelocAddressLess(RelocationRef a, RelocationRef b) {
24 uint64_t a_addr, b_addr;
25 if (error(a.getOffset(a_addr))) return false;
26 if (error(b.getOffset(b_addr))) return false;
27 return a_addr < b_addr;
28 }
29
30 void DumpBytes(StringRef bytes) {
31 static const char hex_rep[] = "0123456789abcdef";
32 // FIXME: The real way to do this is to figure out the longest instruction
33 // and align to that size before printing. I'll fix this when I get
34 // around to outputting relocations.
35 // 15 is the longest x86 instruction
36 // 3 is for the hex rep of a byte + a space.
37 // 1 is for the null terminator.
38 enum { OutputSize = (15 * 3) + 1 };
39 char output[OutputSize];
40
41 assert(bytes.size() <= 15
42 && "DumpBytes only supports instructions of up to 15 bytes");
43 memset(output, ' ', sizeof(output));
44 unsigned index = 0;
45 for (StringRef::iterator i = bytes.begin(),
46 e = bytes.end(); i != e; ++i) {
47 output[index] = hex_rep[(*i & 0xF0) >> 4];
48 output[index + 1] = hex_rep[*i & 0xF];
49 index += 3;
50 }
51
52 output[sizeof(output) - 1] = 0;
53 outs() << output;
54 }
55
56 std::map<std::string, SectionRef> readSections(const ObjectFile& o) {
57 error_code ec;
58 std::map<std::string, SectionRef> result;
59 section_iterator i(o.section_begin()), e(o.section_end());
60 for (; i != e; ++i) {
61 StringRef name;
62 if (error(i->getName(name))) break;
63
64 result.insert(make_pair(name.str(), *i));
65 }
66 return result;
67 }
68
69 std::map<std::string, SymbolRef> readSymbols(const ObjectFile& o) {
70 error_code ec;
71 std::map<std::string, SymbolRef> result;
72 symbol_iterator si(o.symbol_begin()), se(o.symbol_end());
73 for (; si != se; ++si) {
74 StringRef name;
75 if (error(si->getName(name))) break;
76
77 result.insert(make_pair(name.str(), *si));
78 }
79 return result;
80 }
81 }
82
83 ::Binary::Binary(const std::string& filename)
84 : triple(llvm::Twine("unkown-unknown-unknown"))
85 {
86 ::Disassembler d(filename);
87 std::string error;
88
89 binary = createBinary(filename).get();
90 if (Archive *a = dyn_cast<Archive>(binary)) {
91 std::cerr << "Got an archive!" << std::endl;
92 return;
93 }
94
95 o = dyn_cast<ObjectFile>(binary);
96
97 triple.setArch(Triple::ArchType(o->getArch()));
98 std::string tripleName(triple.getTriple());
99
100 outs() << tripleName << "\n";
101
102 target = TargetRegistry::lookupTarget("", triple, error);
103 if (!target) {
104 std::cerr << error;
105 return;
106 }
107
108 outs() << target->getName() << "\n";
109
110 MRI.reset(target->createMCRegInfo(tripleName));
111 if (!MRI) {
112 std::cerr << "error: no register info for target " << tripleName << "\n";
113 return;
114 }
115
116 // Set up disassembler.
117 AsmInfo.reset(target->createMCAsmInfo(*MRI, tripleName));
118 if (!AsmInfo) {
119 std::cerr << "error: no assembly info for target " << tripleName << "\n";
120 return;
121 }
122
123 STI.reset(target->createMCSubtargetInfo(tripleName, "", ""));
124 if (!STI) {
125 errs() << "error: no subtarget info for target " << tripleName << "\n";
126 return;
127 }
128
129 MII.reset(target->createMCInstrInfo());
130 if (!MII) {
131 std::cerr << "error: no instruction info for target " << tripleName << "\n";
132 return;
133 }
134
135 DisAsm.reset(target->createMCDisassembler(*STI));
136 if (!DisAsm) {
137 std::cerr << "error: no disassembler for target " << tripleName << "\n";
138 return;
139 }
140
141 MOFI.reset(new MCObjectFileInfo);
142 Ctx.reset(new MCContext(AsmInfo.get(), MRI.get(), MOFI.get()));
143 RelInfo.reset(
144 target->createMCRelocationInfo(tripleName, *Ctx.get()));
145 if (RelInfo) {
146 Symzer.reset(
147 MCObjectSymbolizer::createObjectSymbolizer(*Ctx.get(), RelInfo, o));
148 if (Symzer)
149 DisAsm->setSymbolizer(Symzer);
150 }
151
152 MIA.reset(target->createMCInstrAnalysis(MII.get()));
153
154 int AsmPrinterVariant = AsmInfo->getAssemblerDialect();
155 IP.reset(target->createMCInstPrinter(AsmPrinterVariant, *AsmInfo, *MII, *MRI, *STI));
156 if (!IP) {
157 std::cerr << "error: no instruction printer for target " << tripleName
158 << '\n';
159 return;
160 }
161
162 OwningPtr<MCObjectDisassembler> OD(
163 new MCObjectDisassembler(*o, *DisAsm, *MIA));
164 Mod.reset(OD->buildModule(/* withCFG */ false));
165
166 symbols = readSymbols(*o);
167 sections = readSections(*o);
168
169 // for_each(sections.begin(), sections.end(), [](const std::pair<std::string, SectionRef>& i){
170 // std::cout << "Section: " << i.first << std::endl;
171 // });
172 }
173
174
175
176 std::vector<std::string>
177 ::Binary::getSymbols() {
178 error_code ec;
179 SectionRef r = sections[".text"];
180 std::vector<std::string> result;
181 for_each(symbols.begin(), symbols.end(), [&](const std::pair<std::string, SymbolRef>& i) {
182 bool contains;
183 SymbolRef::Type t;
184 if (!error(r.containsSymbol(i.second, contains)) && contains) {
185 i.second.getType(t);
186 if (SymbolRef::ST_Function == t)
187 result.push_back(i.first);
188 }
189 });
190 return result;
191 }
192
193 void ::Binary::for_each_instruction(const std::string& function,
194 std::function<void (long, std::string, std::string)> callback) {
195 StringRef bytes;
196 uint64_t base_address, address, ssize, size(0), index, end;
197 StringRefMemoryObject memoryObject("");
198
199 if (symbols.end() != symbols.find(function)) {
200 SymbolRef ref;
201 section_iterator sec(o->section_begin());
202
203 ref = symbols.at(function);
204 if (error(ref.getSection(sec))) return;
205 if (error(ref.getAddress(address))) return;
206 if (address == UnknownAddressOrSize) return;
207 if (error(ref.getSize(ssize))) return;
208 if (error(sec->getAddress(base_address))) return;
209 if (error(sec->getContents(bytes))) return;
210 memoryObject = bytes;
211
212 }
213 else if (sections.end() != sections.find(function)) {
214 SectionRef sref = sections.at(function);
215 if (error(sref.getAddress(address))) return;
216 if (address == UnknownAddressOrSize) return;
217 if (error(sref.getSize(ssize))) return;
218 if (error(sref.getContents(bytes))) return;
219 base_address = address;
220 memoryObject = bytes;
221 }
222
223
224 // outs() << "Start for_each_instruction " << function << "\n";
225
226
227 for (end = address + ssize - base_address, index = address - base_address; index < end; index += size) {
228 MCInst Inst;
229
230 if (DisAsm->getInstruction(Inst, size, memoryObject, index,
231 nulls(), nulls())) {
232 std::string buf;
233 llvm::raw_string_ostream s(buf);
234 IP->printInst(&Inst, s, "");
235
236 if (index + size < bytes.str().length())
237 callback(base_address + index, bytes.str().substr(index, size), s.str());
238
239 } else {
240 errs() << "warning: invalid instruction encoding\n";
241 if (size == 0)
242 size = 1; // skip illegible bytes
243 }
244 }
245 // outs() << "End for_each_instruction\n";
246
247 }
248
249 void ::Binary::disassemble() {
250 for (MCModule::const_atom_iterator AI = Mod->atom_begin(),
251 AE = Mod->atom_end();
252 AI != AE; ++AI) {
253
254 if ((*AI)->getKind() != llvm::MCAtom::TextAtom)
255 continue;
256
257 outs() << "\n\nAtom " << (*AI)->getName() << ": \n";
258 if (const MCTextAtom *TA = dyn_cast<MCTextAtom>(*AI)) {
259 for (MCTextAtom::const_iterator II = TA->begin(), IE = TA->end();
260 II != IE;
261 ++II) {
262 // II->Inst.dump();
263 IP->printInst(&II->Inst, outs(), "");
264 outs() << "\n";
265 }
266 }
267 }
268
269 outs() << "binary " << triple.getArchName() << "\n";
270 }
271
272 void ::Binary::disassemble_functions() {
273 error_code ec;
274 for (section_iterator i = o->section_begin(),
275 e = o->section_end();
276 i != e; ++i) {
277 if (error(ec)) break;
278 bool text;
279 if (error(i->isText(text))) break;
280 if (!text) continue;
281
282 uint64_t SectionAddr;
283 if (error(i->getAddress(SectionAddr))) break;
284
285 // Make a list of all the symbols in this section.
286 std::vector<std::pair<uint64_t, StringRef> > Symbols;
287 for (symbol_iterator si = o->symbol_begin(),
288 se = o->symbol_end();
289 si != se; ++si) {
290 bool contains;
291 if (!error(i->containsSymbol(*si, contains)) && contains) {
292 uint64_t Address;
293 if (error(si->getAddress(Address))) break;
294 if (Address == UnknownAddressOrSize) continue;
295 Address -= SectionAddr;
296
297 StringRef Name;
298 if (error(si->getName(Name))) break;
299
300 outs() << "\nXXX " << Name << "\n";
301
302 Symbols.push_back(std::make_pair(Address, Name));
303 }
304 }
305
306 // Sort the symbols by address, just in case they didn't come in that way.
307 array_pod_sort(Symbols.begin(), Symbols.end());
308
309 // Make a list of all the relocations for this section.
310 std::vector<RelocationRef> Rels;
311 // if (InlineRelocs) {
312 // for (relocation_iterator ri = i->begin_relocations(),
313 // re = i->end_relocations();
314 // ri != re; ri.increment(ec)) {
315 // if (error(ec)) break;
316 // Rels.push_back(*ri);
317 // }
318 // }
319
320 // Sort relocations by address.
321 std::sort(Rels.begin(), Rels.end(), RelocAddressLess);
322
323 StringRef SegmentName = "";
324 // if (const MachOObjectFile *MachO =
325 // dyn_cast<const MachOObjectFile>(o)) {
326 // DataRefImpl DR = i->getRawDataRefImpl();
327 // SegmentName = MachO->getSectionFinalSegmentName(DR);
328 // }
329 StringRef name;
330 if (error(i->getName(name))) break;
331 outs() << "Disassembly of section ";
332 if (!SegmentName.empty())
333 outs() << SegmentName << ",";
334 outs() << name << ':';
335
336 // If the section has no symbols just insert a dummy one and disassemble
337 // the whole section.
338 if (Symbols.empty())
339 Symbols.push_back(std::make_pair(0, name));
340
341
342 StringRef Bytes;
343 if (error(i->getContents(Bytes))) break;
344 StringRefMemoryObject memoryObject(Bytes);
345 uint64_t Size;
346 uint64_t Index;
347 uint64_t SectSize;
348 if (error(i->getSize(SectSize))) break;
349
350 std::vector<RelocationRef>::const_iterator rel_cur = Rels.begin();
351 std::vector<RelocationRef>::const_iterator rel_end = Rels.end();
352 // Disassemble symbol by symbol.
353 for (unsigned si = 0, se = Symbols.size(); si != se; ++si) {
354 uint64_t Start = Symbols[si].first;
355 uint64_t End;
356 // The end is either the size of the section or the beginning of the next
357 // symbol.
358 if (si == se - 1)
359 End = SectSize;
360 // Make sure this symbol takes up space.
361 else if (Symbols[si + 1].first != Start)
362 End = Symbols[si + 1].first - 1;
363 else
364 // This symbol has the same address as the next symbol. Skip it.
365 continue;
366
367 outs() << '\n' << Symbols[si].second << ":\n";
368
369 #ifndef NDEBUG
370 raw_ostream &DebugOut = nulls(); //DebugFlag ? dbgs() : nulls();
371 #else
372 raw_ostream &DebugOut = nulls();
373 #endif
374
375 for (Index = Start; Index < End; Index += Size) {
376 MCInst Inst;
377
378 if (DisAsm->getInstruction(Inst, Size, memoryObject, Index,
379 DebugOut, nulls())) {
380 outs() << format("%8" PRIx64 ":", SectionAddr + Index);
381 outs() << "\t";
382 DumpBytes(StringRef(Bytes.data() + Index, Size));
383
384 IP->printInst(&Inst, outs(), "");
385 outs() << "\n";
386 } else {
387 errs() << "warning: invalid instruction encoding\n";
388 if (Size == 0)
389 Size = 1; // skip illegible bytes
390 }
391
392 // Print relocation for instruction.
393 while (rel_cur != rel_end) {
394 bool hidden = false;
395 uint64_t addr;
396 SmallString<16> name;
397 SmallString<32> val;
398
399 // If this relocation is hidden, skip it.
400 if (error(rel_cur->getHidden(hidden))) goto skip_print_rel;
401 if (hidden) goto skip_print_rel;
402
403 if (error(rel_cur->getOffset(addr))) goto skip_print_rel;
404 // Stop when rel_cur's address is past the current instruction.
405 if (addr >= Index + Size) break;
406 if (error(rel_cur->getTypeName(name))) goto skip_print_rel;
407 if (error(rel_cur->getValueString(val))) goto skip_print_rel;
408
409 outs() << format("\t\t\t%8" PRIx64 ": ", SectionAddr + addr) << name
410 << "\t" << val << "\n";
411
412 skip_print_rel:
413 ++rel_cur;
414 }
415 }
416 }
417 }
418 }
419
420 void ::Binary::disassemble_cfg() {
421 for (MCModule::const_func_iterator FI = Mod->func_begin(),
422 FE = Mod->func_end();
423 FI != FE; ++FI) {
424 static int filenum = 0;
425 std::string FileName = std::string("dot/") + (Twine((*FI)->getName()) + "_" + utostr(filenum) + ".dot").str();
426
427 std::cerr << FileName << std::endl;
428
429 // Start a new dot file.
430 std::string Error;
431 raw_fd_ostream Out(FileName.c_str(), Error);
432 if (!Error.empty()) {
433 errs() << "llvm-objdump: warning: " << Error << '\n';
434 return;
435 }
436
437 Out << "digraph \"" << (*FI)->getName() << "\" {\n";
438 Out << "graph [ rankdir = \"LR\" ];\n";
439 for (MCFunction::const_iterator i = (*FI)->begin(), e = (*FI)->end(); i != e; ++i) {
440 // Only print blocks that have predecessors.
441 bool hasPreds = (*i)->pred_begin() != (*i)->pred_end();
442
443 if (!hasPreds && i != (*FI)->begin())
444 continue;
445
446 Out << '"' << (*i)->getInsts()->getBeginAddr() << "\" [ label=\"<a>";
447 // Print instructions.
448 for (unsigned ii = 0, ie = (*i)->getInsts()->size(); ii != ie;
449 ++ii) {
450 if (ii != 0) // Not the first line, start a new row.
451 Out << '|';
452 if (ii + 1 == ie) // Last line, add an end id.
453 Out << "<o>";
454
455 // Escape special chars and print the instruction in mnemonic form.
456 std::string Str;
457 raw_string_ostream OS(Str);
458 IP->printInst(&(*i)->getInsts()->at(ii).Inst, OS, "");
459 Out << DOT::EscapeString(OS.str());
460 }
461 Out << "\" shape=\"record\" ];\n";
462
463 // Add edges.
464 for (MCBasicBlock::succ_const_iterator si = (*i)->succ_begin(),
465 se = (*i)->succ_end(); si != se; ++si)
466 Out << (*i)->getInsts()->getBeginAddr() << ":o -> "
467 << (*si)->getInsts()->getBeginAddr() << ":a\n";
468 }
469 Out << "}\n";
470
471 ++filenum;
472 }
473 }