]> git.siccegge.de Git - frida/frida.git/blob - src/Binary.cxx
Handle case with no known functions properl
[frida/frida.git] / src / Binary.cxx
1 #include "Binary.hxx"
2
3 #include <iostream>
4 #include <string>
5 #include <algorithm>
6
7 #include "llvm/Support/raw_ostream.h"
8
9 using namespace llvm;
10 using namespace llvm::object;
11
12 namespace {
13 bool error(error_code ec) {
14 if (!ec) return false;
15
16 outs() << "error reading file: " << ec.message() << ".\n";
17 outs().flush();
18 return true;
19 }
20
21 bool RelocAddressLess(RelocationRef a, RelocationRef b) {
22 uint64_t a_addr, b_addr;
23 if (error(a.getOffset(a_addr))) return false;
24 if (error(b.getOffset(b_addr))) return false;
25 return a_addr < b_addr;
26 }
27
28 void DumpBytes(StringRef bytes) {
29 static const char hex_rep[] = "0123456789abcdef";
30 // FIXME: The real way to do this is to figure out the longest instruction
31 // and align to that size before printing. I'll fix this when I get
32 // around to outputting relocations.
33 // 15 is the longest x86 instruction
34 // 3 is for the hex rep of a byte + a space.
35 // 1 is for the null terminator.
36 enum { OutputSize = (15 * 3) + 1 };
37 char output[OutputSize];
38
39 assert(bytes.size() <= 15
40 && "DumpBytes only supports instructions of up to 15 bytes");
41 memset(output, ' ', sizeof(output));
42 unsigned index = 0;
43 for (StringRef::iterator i = bytes.begin(),
44 e = bytes.end(); i != e; ++i) {
45 output[index] = hex_rep[(*i & 0xF0) >> 4];
46 output[index + 1] = hex_rep[*i & 0xF];
47 index += 3;
48 }
49
50 output[sizeof(output) - 1] = 0;
51 outs() << output;
52 }
53
54 std::map<std::string, SectionRef> readSections(const ObjectFile& o) {
55 error_code ec;
56 std::map<std::string, SectionRef> result;
57 section_iterator i(o.begin_sections()), e(o.end_sections());
58 for (; i != e; i.increment(ec)) {
59 StringRef name;
60 if (error(i->getName(name))) break;
61
62 result.insert(make_pair(name.str(), *i));
63 }
64 return result;
65 }
66
67 std::map<std::string, SymbolRef> readSymbols(const ObjectFile& o) {
68 error_code ec;
69 std::map<std::string, SymbolRef> result;
70 symbol_iterator si(o.begin_symbols()), se(o.end_symbols());
71 for (; si != se; si.increment(ec)) {
72 StringRef name;
73 if (error(si->getName(name))) break;
74
75 result.insert(make_pair(name.str(), *si));
76 }
77 return result;
78 }
79 }
80
81 namespace qtlldb {
82
83 Binary::Binary(const std::string& filename)
84 : triple("unkown-unknown-unknown")
85 {
86 std::string error;
87
88 createBinary(filename, binary);
89 if (Archive *a = dyn_cast<Archive>(binary.get())) {
90 std::cerr << "Got an archive!" << std::endl;
91 return;
92 }
93
94 o = dyn_cast<ObjectFile>(binary.get());
95
96 triple.setArch(Triple::ArchType(o->getArch()));
97 std::string tripleName(triple.getTriple());
98
99 outs() << tripleName << "\n";
100
101 target = TargetRegistry::lookupTarget("", triple, error);
102 if (!target) {
103 std::cerr << error;
104 return;
105 }
106
107 outs() << target->getName() << "\n";
108
109 MRI.reset(target->createMCRegInfo(tripleName));
110 if (!MRI) {
111 std::cerr << "error: no register info for target " << tripleName << "\n";
112 return;
113 }
114
115 // Set up disassembler.
116 AsmInfo.reset(target->createMCAsmInfo(*MRI, tripleName));
117 if (!AsmInfo) {
118 std::cerr << "error: no assembly info for target " << tripleName << "\n";
119 return;
120 }
121
122 STI.reset(target->createMCSubtargetInfo(tripleName, "", ""));
123 if (!STI) {
124 errs() << "error: no subtarget info for target " << tripleName << "\n";
125 return;
126 }
127
128 MII.reset(target->createMCInstrInfo());
129 if (!MII) {
130 std::cerr << "error: no instruction info for target " << tripleName << "\n";
131 return;
132 }
133
134 DisAsm.reset(target->createMCDisassembler(*STI));
135 if (!DisAsm) {
136 std::cerr << "error: no disassembler for target " << tripleName << "\n";
137 return;
138 }
139
140 MOFI.reset(new MCObjectFileInfo);
141 Ctx.reset(new MCContext(AsmInfo.get(), MRI.get(), MOFI.get()));
142 RelInfo.reset(
143 target->createMCRelocationInfo(tripleName, *Ctx.get()));
144 if (RelInfo) {
145 Symzer.reset(
146 MCObjectSymbolizer::createObjectSymbolizer(*Ctx.get(), RelInfo, o));
147 if (Symzer)
148 DisAsm->setSymbolizer(Symzer);
149 }
150
151 MIA.reset(target->createMCInstrAnalysis(MII.get()));
152
153 int AsmPrinterVariant = AsmInfo->getAssemblerDialect();
154 IP.reset(target->createMCInstPrinter(AsmPrinterVariant, *AsmInfo, *MII, *MRI, *STI));
155 if (!IP) {
156 std::cerr << "error: no instruction printer for target " << tripleName
157 << '\n';
158 return;
159 }
160
161 OwningPtr<MCObjectDisassembler> OD(
162 new MCObjectDisassembler(*o, *DisAsm, *MIA));
163 Mod.reset(OD->buildModule(/* withCFG */ false));
164
165 symbols = readSymbols(*o);
166 sections = readSections(*o);
167
168 // for_each(sections.begin(), sections.end(), [](const std::pair<std::string, SectionRef>& i){
169 // std::cout << "Section: " << i.first << std::endl;
170 // });
171 }
172
173
174
175 std::vector<std::string> Binary::getSymbols() {
176 error_code ec;
177 SectionRef r = sections[".text"];
178 std::vector<std::string> result;
179 for_each(symbols.begin(), symbols.end(), [&](const std::pair<std::string, SymbolRef>& i) {
180 bool contains;
181 if (!error(r.containsSymbol(i.second, contains)) && contains)
182 result.push_back(i.first);
183 });
184 return result;
185 }
186
187 void Binary::for_each_instruction(const std::string& function,
188 std::function<void (long, std::string, std::string)> callback) {
189 StringRef bytes;
190 uint64_t base_address, address, ssize, size(0), index, end;
191 StringRefMemoryObject memoryObject("");
192
193 if (symbols.end() != symbols.find(function)) {
194 SymbolRef ref;
195 section_iterator sec(o->begin_sections());
196
197 ref = symbols.at(function);
198 if (error(ref.getSection(sec))) return;
199 if (error(ref.getAddress(address))) return;
200 if (address == UnknownAddressOrSize) return;
201 if (error(ref.getSize(ssize))) return;
202 if (error(sec->getAddress(base_address))) return;
203 if (error(sec->getContents(bytes))) return;
204 memoryObject = bytes;
205
206 }
207 else if (sections.end() != sections.find(function)) {
208 SectionRef sref = sections.at(function);
209 if (error(sref.getAddress(address))) return;
210 if (address == UnknownAddressOrSize) return;
211 if (error(sref.getSize(ssize))) return;
212 if (error(sref.getContents(bytes))) return;
213 base_address = address;
214 memoryObject = bytes;
215 }
216
217
218 // outs() << "Start for_each_instruction " << function << "\n";
219
220
221 for (end = address + ssize - base_address, index = address - base_address; index < end; index += size) {
222 MCInst Inst;
223
224 if (DisAsm->getInstruction(Inst, size, memoryObject, index,
225 nulls(), nulls())) {
226 std::string buf;
227 llvm::raw_string_ostream s(buf);
228 IP->printInst(&Inst, s, "");
229
230 if (index + size < bytes.str().length())
231 callback(base_address + index, bytes.str().substr(index, size), s.str());
232
233 } else {
234 errs() << "warning: invalid instruction encoding\n";
235 if (size == 0)
236 size = 1; // skip illegible bytes
237 }
238 }
239 // outs() << "End for_each_instruction\n";
240
241 }
242
243 void Binary::disassemble() {
244 for (MCModule::const_atom_iterator AI = Mod->atom_begin(),
245 AE = Mod->atom_end();
246 AI != AE; ++AI) {
247
248 if ((*AI)->getKind() != llvm::MCAtom::TextAtom)
249 continue;
250
251 outs() << "\n\nAtom " << (*AI)->getName() << ": \n";
252 if (const MCTextAtom *TA = dyn_cast<MCTextAtom>(*AI)) {
253 for (MCTextAtom::const_iterator II = TA->begin(), IE = TA->end();
254 II != IE;
255 ++II) {
256 // II->Inst.dump();
257 IP->printInst(&II->Inst, outs(), "");
258 outs() << "\n";
259 }
260 }
261 }
262
263 outs() << "binary " << triple.getArchName() << "\n";
264 }
265
266 void Binary::disassemble_functions() {
267 error_code ec;
268 for (section_iterator i = o->begin_sections(),
269 e = o->end_sections();
270 i != e; i.increment(ec)) {
271 if (error(ec)) break;
272 bool text;
273 if (error(i->isText(text))) break;
274 if (!text) continue;
275
276 uint64_t SectionAddr;
277 if (error(i->getAddress(SectionAddr))) break;
278
279 // Make a list of all the symbols in this section.
280 std::vector<std::pair<uint64_t, StringRef> > Symbols;
281 for (symbol_iterator si = o->begin_symbols(),
282 se = o->end_symbols();
283 si != se; si.increment(ec)) {
284 bool contains;
285 if (!error(i->containsSymbol(*si, contains)) && contains) {
286 uint64_t Address;
287 if (error(si->getAddress(Address))) break;
288 if (Address == UnknownAddressOrSize) continue;
289 Address -= SectionAddr;
290
291 StringRef Name;
292 if (error(si->getName(Name))) break;
293
294 outs() << "\nXXX " << Name << "\n";
295
296 Symbols.push_back(std::make_pair(Address, Name));
297 }
298 }
299
300 // Sort the symbols by address, just in case they didn't come in that way.
301 array_pod_sort(Symbols.begin(), Symbols.end());
302
303 // Make a list of all the relocations for this section.
304 std::vector<RelocationRef> Rels;
305 // if (InlineRelocs) {
306 // for (relocation_iterator ri = i->begin_relocations(),
307 // re = i->end_relocations();
308 // ri != re; ri.increment(ec)) {
309 // if (error(ec)) break;
310 // Rels.push_back(*ri);
311 // }
312 // }
313
314 // Sort relocations by address.
315 std::sort(Rels.begin(), Rels.end(), RelocAddressLess);
316
317 StringRef SegmentName = "";
318 // if (const MachOObjectFile *MachO =
319 // dyn_cast<const MachOObjectFile>(o)) {
320 // DataRefImpl DR = i->getRawDataRefImpl();
321 // SegmentName = MachO->getSectionFinalSegmentName(DR);
322 // }
323 StringRef name;
324 if (error(i->getName(name))) break;
325 outs() << "Disassembly of section ";
326 if (!SegmentName.empty())
327 outs() << SegmentName << ",";
328 outs() << name << ':';
329
330 // If the section has no symbols just insert a dummy one and disassemble
331 // the whole section.
332 if (Symbols.empty())
333 Symbols.push_back(std::make_pair(0, name));
334
335
336 StringRef Bytes;
337 if (error(i->getContents(Bytes))) break;
338 StringRefMemoryObject memoryObject(Bytes);
339 uint64_t Size;
340 uint64_t Index;
341 uint64_t SectSize;
342 if (error(i->getSize(SectSize))) break;
343
344 std::vector<RelocationRef>::const_iterator rel_cur = Rels.begin();
345 std::vector<RelocationRef>::const_iterator rel_end = Rels.end();
346 // Disassemble symbol by symbol.
347 for (unsigned si = 0, se = Symbols.size(); si != se; ++si) {
348 uint64_t Start = Symbols[si].first;
349 uint64_t End;
350 // The end is either the size of the section or the beginning of the next
351 // symbol.
352 if (si == se - 1)
353 End = SectSize;
354 // Make sure this symbol takes up space.
355 else if (Symbols[si + 1].first != Start)
356 End = Symbols[si + 1].first - 1;
357 else
358 // This symbol has the same address as the next symbol. Skip it.
359 continue;
360
361 outs() << '\n' << Symbols[si].second << ":\n";
362
363 #ifndef NDEBUG
364 raw_ostream &DebugOut = DebugFlag ? dbgs() : nulls();
365 #else
366 raw_ostream &DebugOut = nulls();
367 #endif
368
369 for (Index = Start; Index < End; Index += Size) {
370 MCInst Inst;
371
372 if (DisAsm->getInstruction(Inst, Size, memoryObject, Index,
373 DebugOut, nulls())) {
374 outs() << format("%8" PRIx64 ":", SectionAddr + Index);
375 outs() << "\t";
376 DumpBytes(StringRef(Bytes.data() + Index, Size));
377
378 IP->printInst(&Inst, outs(), "");
379 outs() << "\n";
380 } else {
381 errs() << "warning: invalid instruction encoding\n";
382 if (Size == 0)
383 Size = 1; // skip illegible bytes
384 }
385
386 // Print relocation for instruction.
387 while (rel_cur != rel_end) {
388 bool hidden = false;
389 uint64_t addr;
390 SmallString<16> name;
391 SmallString<32> val;
392
393 // If this relocation is hidden, skip it.
394 if (error(rel_cur->getHidden(hidden))) goto skip_print_rel;
395 if (hidden) goto skip_print_rel;
396
397 if (error(rel_cur->getOffset(addr))) goto skip_print_rel;
398 // Stop when rel_cur's address is past the current instruction.
399 if (addr >= Index + Size) break;
400 if (error(rel_cur->getTypeName(name))) goto skip_print_rel;
401 if (error(rel_cur->getValueString(val))) goto skip_print_rel;
402
403 outs() << format("\t\t\t%8" PRIx64 ": ", SectionAddr + addr) << name
404 << "\t" << val << "\n";
405
406 skip_print_rel:
407 ++rel_cur;
408 }
409 }
410 }
411 }
412 }
413
414 void Binary::disassemble_cfg() {
415 for (MCModule::const_func_iterator FI = Mod->func_begin(),
416 FE = Mod->func_end();
417 FI != FE; ++FI) {
418 static int filenum = 0;
419 std::string FileName = std::string("dot/") + (Twine((*FI)->getName()) + "_" + utostr(filenum) + ".dot").str();
420
421 std::cerr << FileName << std::endl;
422
423 // Start a new dot file.
424 std::string Error;
425 raw_fd_ostream Out(FileName.c_str(), Error);
426 if (!Error.empty()) {
427 errs() << "llvm-objdump: warning: " << Error << '\n';
428 return;
429 }
430
431 Out << "digraph \"" << (*FI)->getName() << "\" {\n";
432 Out << "graph [ rankdir = \"LR\" ];\n";
433 for (MCFunction::const_iterator i = (*FI)->begin(), e = (*FI)->end(); i != e; ++i) {
434 // Only print blocks that have predecessors.
435 bool hasPreds = (*i)->pred_begin() != (*i)->pred_end();
436
437 if (!hasPreds && i != (*FI)->begin())
438 continue;
439
440 Out << '"' << (*i)->getInsts()->getBeginAddr() << "\" [ label=\"<a>";
441 // Print instructions.
442 for (unsigned ii = 0, ie = (*i)->getInsts()->size(); ii != ie;
443 ++ii) {
444 if (ii != 0) // Not the first line, start a new row.
445 Out << '|';
446 if (ii + 1 == ie) // Last line, add an end id.
447 Out << "<o>";
448
449 // Escape special chars and print the instruction in mnemonic form.
450 std::string Str;
451 raw_string_ostream OS(Str);
452 IP->printInst(&(*i)->getInsts()->at(ii).Inst, OS, "");
453 Out << DOT::EscapeString(OS.str());
454 }
455 Out << "\" shape=\"record\" ];\n";
456
457 // Add edges.
458 for (MCBasicBlock::succ_const_iterator si = (*i)->succ_begin(),
459 se = (*i)->succ_end(); si != se; ++si)
460 Out << (*i)->getInsts()->getBeginAddr() << ":o -> "
461 << (*si)->getInsts()->getBeginAddr() << ":a\n";
462 }
463 Out << "}\n";
464
465 ++filenum;
466 }
467 }
468 }