]> git.siccegge.de Git - frida/frida.git/blob - src/Binary.cxx
Update
[frida/frida.git] / src / Binary.cxx
1 #include "Binary.hxx"
2
3 #include <iostream>
4 #include <string>
5 #include <algorithm>
6
7 using namespace llvm;
8 using namespace llvm::object;
9
10 static bool error(error_code ec) {
11 if (!ec) return false;
12
13 outs() << "error reading file: " << ec.message() << ".\n";
14 outs().flush();
15 return true;
16 }
17
18 static bool RelocAddressLess(RelocationRef a, RelocationRef b) {
19 uint64_t a_addr, b_addr;
20 if (error(a.getOffset(a_addr))) return false;
21 if (error(b.getOffset(b_addr))) return false;
22 return a_addr < b_addr;
23 }
24
25 static void DumpBytes(StringRef bytes) {
26 static const char hex_rep[] = "0123456789abcdef";
27 // FIXME: The real way to do this is to figure out the longest instruction
28 // and align to that size before printing. I'll fix this when I get
29 // around to outputting relocations.
30 // 15 is the longest x86 instruction
31 // 3 is for the hex rep of a byte + a space.
32 // 1 is for the null terminator.
33 enum { OutputSize = (15 * 3) + 1 };
34 char output[OutputSize];
35
36 assert(bytes.size() <= 15
37 && "DumpBytes only supports instructions of up to 15 bytes");
38 memset(output, ' ', sizeof(output));
39 unsigned index = 0;
40 for (StringRef::iterator i = bytes.begin(),
41 e = bytes.end(); i != e; ++i) {
42 output[index] = hex_rep[(*i & 0xF0) >> 4];
43 output[index + 1] = hex_rep[*i & 0xF];
44 index += 3;
45 }
46
47 output[sizeof(output) - 1] = 0;
48 outs() << output;
49 }
50
51 ::Binary::Binary(const std::string& filename)
52 : triple("unkown-unknown-unknown")
53 {
54 std::string error;
55
56 createBinary(filename, binary);
57 if (Archive *a = dyn_cast<Archive>(binary.get())) {
58 std::cerr << "Got an archive!" << std::endl;
59 return;
60 }
61
62 o = dyn_cast<ObjectFile>(binary.get());
63
64 triple.setArch(Triple::ArchType(o->getArch()));
65 std::string tripleName(triple.getTriple());
66
67 target = TargetRegistry::lookupTarget("", triple, error);
68 if (!target) {
69 std::cerr << error;
70 return;
71 }
72
73 MRI.reset(target->createMCRegInfo(tripleName));
74 if (!MRI) {
75 std::cerr << "error: no register info for target " << tripleName << "\n";
76 return;
77 }
78
79 // Set up disassembler.
80 AsmInfo.reset(target->createMCAsmInfo(*MRI, tripleName));
81 if (!AsmInfo) {
82 std::cerr << "error: no assembly info for target " << tripleName << "\n";
83 return;
84 }
85
86 STI.reset(target->createMCSubtargetInfo(tripleName, "", ""));
87 if (!STI) {
88 errs() << "error: no subtarget info for target " << tripleName << "\n";
89 return;
90 }
91
92 MII.reset(target->createMCInstrInfo());
93 if (!MII) {
94 std::cerr << "error: no instruction info for target " << tripleName << "\n";
95 return;
96 }
97
98 DisAsm.reset(target->createMCDisassembler(*STI));
99 if (!DisAsm) {
100 std::cerr << "error: no disassembler for target " << tripleName << "\n";
101 return;
102 }
103
104 MOFI.reset(new MCObjectFileInfo);
105 Ctx.reset(new MCContext(AsmInfo.get(), MRI.get(), MOFI.get()));
106 RelInfo.reset(
107 target->createMCRelocationInfo(tripleName, *Ctx.get()));
108 if (RelInfo) {
109 Symzer.reset(
110 MCObjectSymbolizer::createObjectSymbolizer(*Ctx.get(), RelInfo, o));
111 if (Symzer)
112 DisAsm->setSymbolizer(Symzer);
113 }
114
115 MIA.reset(target->createMCInstrAnalysis(MII.get()));
116
117 int AsmPrinterVariant = AsmInfo->getAssemblerDialect();
118 IP.reset(target->createMCInstPrinter(AsmPrinterVariant, *AsmInfo, *MII, *MRI, *STI));
119 if (!IP) {
120 std::cerr << "error: no instruction printer for target " << tripleName
121 << '\n';
122 return;
123 }
124
125 OwningPtr<MCObjectDisassembler> OD(
126 new MCObjectDisassembler(*o, *DisAsm, *MIA));
127 Mod.reset(OD->buildModule(/* withCFG */ true));
128 }
129
130 void ::Binary::disassemble() {
131 for (MCModule::const_atom_iterator AI = Mod->atom_begin(),
132 AE = Mod->atom_end();
133 AI != AE; ++AI) {
134
135 if ((*AI)->getKind() != llvm::MCAtom::TextAtom)
136 continue;
137
138 outs() << "\n\nAtom " << (*AI)->getName() << ": \n";
139 if (const MCTextAtom *TA = dyn_cast<MCTextAtom>(*AI)) {
140 for (MCTextAtom::const_iterator II = TA->begin(), IE = TA->end();
141 II != IE;
142 ++II) {
143 // II->Inst.dump();
144 IP->printInst(&II->Inst, outs(), "");
145 outs() << "\n";
146 }
147 }
148 }
149
150 outs() << "binary " << triple.getArchName() << "\n";
151 }
152
153 void ::Binary::disassemble_functions() {
154 error_code ec;
155 for (section_iterator i = o->begin_sections(),
156 e = o->end_sections();
157 i != e; i.increment(ec)) {
158 if (error(ec)) break;
159 bool text;
160 if (error(i->isText(text))) break;
161 if (!text) continue;
162
163 uint64_t SectionAddr;
164 if (error(i->getAddress(SectionAddr))) break;
165
166 // Make a list of all the symbols in this section.
167 std::vector<std::pair<uint64_t, StringRef> > Symbols;
168 for (symbol_iterator si = o->begin_symbols(),
169 se = o->end_symbols();
170 si != se; si.increment(ec)) {
171 bool contains;
172 if (!error(i->containsSymbol(*si, contains)) && contains) {
173 uint64_t Address;
174 if (error(si->getAddress(Address))) break;
175 if (Address == UnknownAddressOrSize) continue;
176 Address -= SectionAddr;
177
178 StringRef Name;
179 if (error(si->getName(Name))) break;
180
181 outs() << "\nXXX " << Name << "\n";
182
183 Symbols.push_back(std::make_pair(Address, Name));
184 }
185 }
186
187 // Sort the symbols by address, just in case they didn't come in that way.
188 array_pod_sort(Symbols.begin(), Symbols.end());
189
190 // Make a list of all the relocations for this section.
191 std::vector<RelocationRef> Rels;
192 // if (InlineRelocs) {
193 // for (relocation_iterator ri = i->begin_relocations(),
194 // re = i->end_relocations();
195 // ri != re; ri.increment(ec)) {
196 // if (error(ec)) break;
197 // Rels.push_back(*ri);
198 // }
199 // }
200
201 // Sort relocations by address.
202 std::sort(Rels.begin(), Rels.end(), RelocAddressLess);
203
204 StringRef SegmentName = "";
205 // if (const MachOObjectFile *MachO =
206 // dyn_cast<const MachOObjectFile>(o)) {
207 // DataRefImpl DR = i->getRawDataRefImpl();
208 // SegmentName = MachO->getSectionFinalSegmentName(DR);
209 // }
210 StringRef name;
211 if (error(i->getName(name))) break;
212 outs() << "Disassembly of section ";
213 if (!SegmentName.empty())
214 outs() << SegmentName << ",";
215 outs() << name << ':';
216
217 // If the section has no symbols just insert a dummy one and disassemble
218 // the whole section.
219 if (Symbols.empty())
220 Symbols.push_back(std::make_pair(0, name));
221
222
223 StringRef Bytes;
224 if (error(i->getContents(Bytes))) break;
225 StringRefMemoryObject memoryObject(Bytes);
226 uint64_t Size;
227 uint64_t Index;
228 uint64_t SectSize;
229 if (error(i->getSize(SectSize))) break;
230
231 std::vector<RelocationRef>::const_iterator rel_cur = Rels.begin();
232 std::vector<RelocationRef>::const_iterator rel_end = Rels.end();
233 // Disassemble symbol by symbol.
234 for (unsigned si = 0, se = Symbols.size(); si != se; ++si) {
235 uint64_t Start = Symbols[si].first;
236 uint64_t End;
237 // The end is either the size of the section or the beginning of the next
238 // symbol.
239 if (si == se - 1)
240 End = SectSize;
241 // Make sure this symbol takes up space.
242 else if (Symbols[si + 1].first != Start)
243 End = Symbols[si + 1].first - 1;
244 else
245 // This symbol has the same address as the next symbol. Skip it.
246 continue;
247
248 outs() << '\n' << Symbols[si].second << ":\n";
249
250 #ifndef NDEBUG
251 raw_ostream &DebugOut = DebugFlag ? dbgs() : nulls();
252 #else
253 raw_ostream &DebugOut = nulls();
254 #endif
255
256 for (Index = Start; Index < End; Index += Size) {
257 MCInst Inst;
258
259 if (DisAsm->getInstruction(Inst, Size, memoryObject, Index,
260 DebugOut, nulls())) {
261 outs() << format("%8" PRIx64 ":", SectionAddr + Index);
262 outs() << "\t";
263 DumpBytes(StringRef(Bytes.data() + Index, Size));
264
265 IP->printInst(&Inst, outs(), "");
266 outs() << "\n";
267 } else {
268 errs() << "warning: invalid instruction encoding\n";
269 if (Size == 0)
270 Size = 1; // skip illegible bytes
271 }
272
273 // Print relocation for instruction.
274 while (rel_cur != rel_end) {
275 bool hidden = false;
276 uint64_t addr;
277 SmallString<16> name;
278 SmallString<32> val;
279
280 // If this relocation is hidden, skip it.
281 if (error(rel_cur->getHidden(hidden))) goto skip_print_rel;
282 if (hidden) goto skip_print_rel;
283
284 if (error(rel_cur->getOffset(addr))) goto skip_print_rel;
285 // Stop when rel_cur's address is past the current instruction.
286 if (addr >= Index + Size) break;
287 if (error(rel_cur->getTypeName(name))) goto skip_print_rel;
288 if (error(rel_cur->getValueString(val))) goto skip_print_rel;
289
290 outs() << format("\t\t\t%8" PRIx64 ": ", SectionAddr + addr) << name
291 << "\t" << val << "\n";
292
293 skip_print_rel:
294 ++rel_cur;
295 }
296 }
297 }
298 }
299 }
300
301 void ::Binary::disassemble_cfg() {
302 for (MCModule::const_func_iterator FI = Mod->func_begin(),
303 FE = Mod->func_end();
304 FI != FE; ++FI) {
305 static int filenum = 0;
306 std::string FileName = std::string("dot/") + (Twine((*FI)->getName()) + "_" + utostr(filenum) + ".dot").str();
307
308 std::cerr << FileName << std::endl;
309
310 // Start a new dot file.
311 std::string Error;
312 raw_fd_ostream Out(FileName.c_str(), Error);
313 if (!Error.empty()) {
314 errs() << "llvm-objdump: warning: " << Error << '\n';
315 return;
316 }
317
318 Out << "digraph \"" << (*FI)->getName() << "\" {\n";
319 Out << "graph [ rankdir = \"LR\" ];\n";
320 for (MCFunction::const_iterator i = (*FI)->begin(), e = (*FI)->end(); i != e; ++i) {
321 // Only print blocks that have predecessors.
322 bool hasPreds = (*i)->pred_begin() != (*i)->pred_end();
323
324 if (!hasPreds && i != (*FI)->begin())
325 continue;
326
327 Out << '"' << (*i)->getInsts()->getBeginAddr() << "\" [ label=\"<a>";
328 // Print instructions.
329 for (unsigned ii = 0, ie = (*i)->getInsts()->size(); ii != ie;
330 ++ii) {
331 if (ii != 0) // Not the first line, start a new row.
332 Out << '|';
333 if (ii + 1 == ie) // Last line, add an end id.
334 Out << "<o>";
335
336 // Escape special chars and print the instruction in mnemonic form.
337 std::string Str;
338 raw_string_ostream OS(Str);
339 IP->printInst(&(*i)->getInsts()->at(ii).Inst, OS, "");
340 Out << DOT::EscapeString(OS.str());
341 }
342 Out << "\" shape=\"record\" ];\n";
343
344 // Add edges.
345 for (MCBasicBlock::succ_const_iterator si = (*i)->succ_begin(),
346 se = (*i)->succ_end(); si != se; ++si)
347 Out << (*i)->getInsts()->getBeginAddr() << ":o -> "
348 << (*si)->getInsts()->getBeginAddr() << ":a\n";
349 }
350 Out << "}\n";
351
352 ++filenum;
353 }
354 }