]> git.siccegge.de Git - frida/frida.git/blob - src/disassembler/llvm/LLVMDisassembler.cxx
Make function/jump targets clickable
[frida/frida.git] / src / disassembler / llvm / LLVMDisassembler.cxx
1 #include "disassembler/llvm/LLVMDisassembler.hxx"
2 #include "disassembler/llvm/LLVMBasicBlock.hxx"
3 #include "disassembler/llvm/LLVMFunction.hxx"
4
5 #include <stack>
6 #include <algorithm>
7
8 using namespace llvm;
9 using namespace llvm::object;
10 using std::error_code;
11
12 /*
13 *
14 */
15 Disassembler * createLLVMDisassembler(const std::string& filename, InformationManager* manager) {
16 std::unique_ptr<Binary> o;
17 o.reset(createBinary(filename).get());
18 const Binary * op = o.get();
19
20 // ELFType<endian, maxalign, 64bit>
21 if (const ELF32LEObjectFile * _ = dyn_cast<ELF32LEObjectFile>(op)) {
22 #pragma unused(_)
23 return new LLVMDisassembler<ELFType<support::little, 2, false>>(filename, manager);
24 }
25 if (const ELF64LEObjectFile * _ = dyn_cast<ELF64LEObjectFile>(op)) {
26 #pragma unused(_)
27 return new LLVMDisassembler<ELFType<support::little, 2, true>>(filename, manager);
28 }
29 if (const ELF32BEObjectFile * _ = dyn_cast<ELF32BEObjectFile>(op)) {
30 #pragma unused(_)
31 return new LLVMDisassembler<ELFType<support::big, 2, false>>(filename, manager);
32 }
33 if (const ELF64BEObjectFile * _ = dyn_cast<ELF64BEObjectFile>(op)) {
34 #pragma unused(_)
35 return new LLVMDisassembler<ELFType<support::big, 2, true>>(filename, manager);
36 }
37
38 return NULL;
39 }
40
41 /*
42 * TODO: fallback code falls die Datei kein ELF/PE/COFF/MacO/.. binary
43 * ist sondern z.B. einfach nur Instruktionen oder ein Bootsektor oder
44 * foo
45 */
46 template <typename ELFT>
47 LLVMDisassembler<ELFT>::LLVMDisassembler(const std::string& filename,
48 InformationManager* manager)
49 : Disassembler(filename, manager)
50 , logger(log4cxx::Logger::getLogger("LLVMDisassembler"))
51 , triple("unknown-unknown-unknown")
52 , manager(manager)
53 {
54 LOG4CXX_DEBUG(logger, "Handling file" << filename);
55 auto result = createBinary(filename);
56
57 error_code ec;
58 if ((ec = result.getError())) {
59 LOG4CXX_ERROR(logger, "Failed to load Binary" << ec.message());
60 binary = NULL;
61 return;
62 }
63
64 binary.reset(result.get());
65
66 o = dyn_cast<ObjectFile>(binary.get());
67
68 triple.setArch(Triple::ArchType(o->getArch()));
69 std::string tripleName(triple.getTriple());
70
71 LOG4CXX_INFO(logger, "Architecture " << tripleName);
72
73
74 std::string es;
75 target = TargetRegistry::lookupTarget("", triple, es);
76 if (!target) {
77 LOG4CXX_ERROR(logger, es);
78 return;
79 }
80
81 LOG4CXX_INFO(logger, "Target " << target->getName());
82
83 MRI.reset(target->createMCRegInfo(tripleName));
84 if (!MRI) {
85 LOG4CXX_ERROR(logger, "no register info for target " << tripleName);
86 return;
87 }
88
89 // Set up disassembler.
90 AsmInfo.reset(target->createMCAsmInfo(*MRI, tripleName));
91 if (!AsmInfo) {
92 LOG4CXX_ERROR(logger, "no assembly info for target " << tripleName);
93 return;
94 }
95
96 STI.reset(target->createMCSubtargetInfo(tripleName, "", ""));
97 if (!STI) {
98 LOG4CXX_ERROR(logger, "no subtarget info for target " << tripleName);
99 return;
100 }
101
102 MII.reset(target->createMCInstrInfo());
103 if (!MII) {
104 LOG4CXX_ERROR(logger, "no instruction info for target " << tripleName);
105 return;
106 }
107
108 MOFI.reset(new MCObjectFileInfo);
109 MCContext Ctx(AsmInfo.get(), MRI.get(), MOFI.get());
110
111 DisAsm.reset(target->createMCDisassembler(*STI, Ctx));
112 if (!DisAsm) {
113 LOG4CXX_ERROR(logger, "no disassembler for target " << tripleName);
114 return;
115 }
116 RelInfo.reset(
117 target->createMCRelocationInfo(tripleName, Ctx));
118 if (RelInfo) {
119 Symzer.reset(
120 MCObjectSymbolizer::createObjectSymbolizer(Ctx, std::move(RelInfo), o));
121 if (Symzer)
122 DisAsm->setSymbolizer(std::move(Symzer));
123 }
124 RelInfo.release();
125 Symzer.release();
126
127 MIA.reset(target->createMCInstrAnalysis(MII.get()));
128 if (!MIA) {
129 LOG4CXX_ERROR(logger, "no instruction analysis for target " << tripleName);
130 return;
131 }
132
133 int AsmPrinterVariant = AsmInfo->getAssemblerDialect();
134 IP.reset(target->createMCInstPrinter(AsmPrinterVariant, *AsmInfo, *MII, *MRI, *STI));
135 if (!IP) {
136 LOG4CXX_ERROR(logger, "no instruction printer for target " << tripleName);
137 return;
138 }
139
140 IP->setPrintImmHex(llvm::HexStyle::C);
141 IP->setPrintImmHex(true);
142
143 std::unique_ptr<MCObjectDisassembler> OD(
144 new MCObjectDisassembler(*o, *DisAsm, *MIA));
145 Mod.reset(OD->buildModule(false));
146 }
147
148 template <typename ELFT>
149 void LLVMDisassembler<ELFT>::start() {
150 readSymbols();
151 readSections();
152 disassemble();
153 }
154
155 template <typename ELFT>
156 LLVMDisassembler<ELFT>::~LLVMDisassembler() {
157 std::for_each(functions.begin(), functions.end(),
158 [](std::pair<uint64_t,LLVMFunction*> it) {
159 delete it.second;
160 });
161 std::for_each(blocks.begin(), blocks.end(),
162 [](std::pair<uint64_t, LLVMBasicBlock*> it) {
163 delete it.second;
164 });
165 }
166
167 template <typename ELFT>
168 Function* LLVMDisassembler<ELFT>::disassembleFunctionAt(uint64_t address, const std::string& name) {
169 SectionRef text_section = sections[".text"];
170 uint64_t base_address, size;
171 text_section.getAddress(base_address);
172 text_section.getSize(size);
173
174 if (address < base_address ||
175 address >= base_address + size) {
176 return NULL;
177 }
178
179 if (functions.find(address) != functions.end()) {
180 return functions[address];
181 }
182
183 LLVMFunction * function;
184 if (name == "") {
185 std::stringstream s;
186 s << "<Unnamed 0x" << std::hex << address << ">";
187 function = new LLVMFunction(s.str(), address);
188 } else {
189 function = new LLVMFunction(name, address);
190 }
191 functions.insert(std::make_pair(address, function));
192
193 disassembleFunction(function);
194
195 return function;
196 }
197
198 template <typename ELFT>
199 void LLVMDisassembler<ELFT>::disassembleFunction(LLVMFunction* function) {
200 std::stack<LLVMBasicBlock*> remaining_blocks;
201 SectionRef text_section = sections[".text"];
202 StringRef bytes;
203 text_section.getContents(bytes);
204 StringRefMemoryObject ref(bytes);
205
206 LOG4CXX_DEBUG(logger, "Handling function " << function->getName());
207
208 LLVMBasicBlock * block = new LLVMBasicBlock(function->getStartAddress(), this);
209 remaining_blocks.push(block);
210 blocks.insert(std::make_pair(block->getStartAddress(), block));
211 function->addBasicBlock(block);
212
213 while (remaining_blocks.size()) {
214 LLVMBasicBlock * current_block = remaining_blocks.top();
215 remaining_blocks.pop();
216
217 LOG4CXX_DEBUG(logger, "Handling Block starting at " << std::hex << current_block->getStartAddress());
218
219 uint64_t inst_size;
220 uint64_t base_address;
221 text_section.getAddress(base_address);
222 uint64_t current_address = current_block->getStartAddress() - base_address;
223 while(true) {
224 MCInst inst;
225 std::string buf;
226 llvm::raw_string_ostream s(buf);
227
228 if(llvm::MCDisassembler::Success ==
229 DisAsm->getInstruction(inst, inst_size, ref, current_address, nulls(), nulls())) {
230 uint64_t jmptarget;
231
232 if (MIA->evaluateBranch(inst, current_address, inst_size, jmptarget)) {
233 jmptarget += base_address;
234 if (!MIA->isIndirectBranch(inst)) {
235 if (MIA->isCall(inst)) {
236 if (functions.find(jmptarget) == functions.end()) {
237 disassembleFunctionAt(jmptarget);
238 }
239 } else {
240 current_block->setNextBlock(0, jmptarget);
241 if (blocks.find(jmptarget) == blocks.end()) {
242 LLVMBasicBlock * block = new LLVMBasicBlock(jmptarget, this);
243 blocks.insert(std::make_pair(block->getStartAddress(), block));
244 function->addBasicBlock(block);
245 remaining_blocks.push(block);
246 } else {
247 LOG4CXX_DEBUG(logger, "Reusing Block starting at " << std::hex << current_block->getStartAddress());
248 function->addBasicBlock(blocks.find(jmptarget)->second);
249 }
250 if (MIA->isConditionalBranch(inst)) {
251 jmptarget = base_address + current_address + inst_size;
252 current_block->setNextBlock(1, jmptarget);
253 if (blocks.find(jmptarget) == blocks.end()) {
254 LLVMBasicBlock * block = new LLVMBasicBlock(jmptarget, this);
255 blocks.insert(std::make_pair(block->getStartAddress(), block));
256 function->addBasicBlock(block);
257 remaining_blocks.push(block);
258 } else {
259 LOG4CXX_DEBUG(logger, "Reusing Block starting at " << std::hex << current_block->getStartAddress());
260 function->addBasicBlock(blocks.find(jmptarget)->second);
261 }
262 }
263 }
264 }
265 }
266 } else {
267 inst_size = 0;
268 }
269
270
271 if (inst_size == 0 || MIA->isTerminator(inst) || MIA->isBranch(inst)) {
272 current_block->setEndAddress(current_address + base_address + inst_size);
273 LOG4CXX_DEBUG(logger, "Finished Block at " << std::hex <<
274 current_block->getEndAddress());
275 break;
276 }
277 current_address += inst_size;
278 }
279 }
280 splitBlocks(function);
281 LOG4CXX_DEBUG(logger, "Finished function " << function->getName());
282 manager->signal_new_function(function);
283 }
284
285 template <typename ELFT>
286 void LLVMDisassembler<ELFT>::disassemble() {
287 SectionRef text_section = sections[".text"];
288 std::vector<LLVMFunction*> remaining_functions;
289
290 // Assume all function symbols actually start a real function
291 for (auto x = symbols.begin(); x != symbols.end(); ++x) {
292 uint64_t result;
293 bool contains;
294 SymbolRef::Type symbol_type;
295
296
297 if (text_section.containsSymbol(x->second, contains) || !contains)
298 continue;
299
300 if (x->second.getType(symbol_type)
301 || SymbolRef::ST_Function != symbol_type)
302 continue;
303
304 if (!x->second.getAddress(result)) {
305 LLVMFunction * fun = new LLVMFunction(x->first, result);
306 remaining_functions.push_back(fun);
307 functions.insert(std::make_pair(result, fun));
308 LOG4CXX_DEBUG(logger, "Disasembling " << x->first);
309 }
310 }
311
312 for (LLVMFunction* function : remaining_functions) {
313 disassembleFunction(function);
314 }
315
316 if (binary->isELF()) {
317 bool is64bit = (binary->getData()[4] == 0x02);
318
319 for (int i(0); i < (is64bit? 8 : 4); ++i) {
320 if (binary->isLittleEndian()) {
321 _entryAddress |= (unsigned int)((unsigned char)binary->getData()[0x18 + i]) << 8*i;
322 } else {
323 _entryAddress = _entryAddress << 8;
324 _entryAddress |= (unsigned char)binary->getData()[0x18 + i];
325 }
326 }
327 LOG4CXX_DEBUG(logger, "Adding entryAddress at: " << std::hex << _entryAddress);
328 std::stringstream s;
329 s << "<_start 0x" << std::hex << _entryAddress << ">";
330
331 disassembleFunctionAt(_entryAddress, s.str());
332 }
333
334 if (functions.empty()) {
335 uint64_t text_entry;
336 text_section.getAddress(text_entry);
337 LOG4CXX_INFO(logger, "No Symbols found, starting at the beginning of the text segment");
338 disassembleFunctionAt(text_entry);
339 }
340 }
341
342 template <typename ELFT>
343 void LLVMDisassembler<ELFT>::splitBlocks(LLVMFunction* function) {
344 SectionRef text_section = sections[".text"];
345 StringRef bytes;
346 text_section.getContents(bytes);
347 StringRefMemoryObject ref(bytes);
348
349 // Split blocks where jumps are going inside the block
350 for (auto it = function->blocks().begin();
351 it != function->blocks().end();
352 ++it) {
353 BasicBlock * current_block = it->second;
354 uint64_t inst_size;
355 uint64_t base_address;
356 text_section.getAddress(base_address);
357 uint64_t current_address = current_block->getStartAddress() - base_address;
358 while(current_block->getEndAddress() - base_address > current_address) {
359 MCInst inst;
360 std::string buf;
361 llvm::raw_string_ostream s(buf);
362
363 if(llvm::MCDisassembler::Success ==
364 DisAsm->getInstruction(inst, inst_size, ref, current_address, nulls(), nulls())) {
365 // See if some other block starts here
366 auto other = blocks.find(current_address + inst_size + base_address);
367
368 // Special case, other block starts here but we are at the end anyway
369 if (other != blocks.end()) {
370 uint64_t endaddress = current_address + inst_size + base_address;
371 if (endaddress != current_block->getEndAddress()) {
372 LOG4CXX_DEBUG(logger, "Shortening block starting at "
373 << std::hex
374 << current_block->getStartAddress()
375 << " now ending at "
376 << other->first);
377 function->addBasicBlock(other->second);
378 current_block->setEndAddress(endaddress);
379 current_block->setNextBlock(0, other->first);
380 current_block->setNextBlock(1, 0);
381 }
382 }
383 } else {
384 inst_size = 1;
385 }
386 current_address += inst_size;
387 }
388 }
389 }
390
391 template <typename ELFT>
392 void LLVMDisassembler<ELFT>::readSymbols() {
393 error_code ec;
394 symbol_iterator si(o->symbol_begin()), se(o->symbol_end());
395 for (; si != se; ++si) {
396 StringRef name;
397 if ((ec = si->getName(name))) {
398 LOG4CXX_ERROR(logger, ec.message());
399 break;
400 }
401 LOG4CXX_DEBUG(logger, "Added symbol " << name.str());
402 symbols.insert(make_pair(name.str(), *si));
403 }
404 }
405
406 template <typename ELFT>
407 void LLVMDisassembler<ELFT>::readSections() {
408 error_code ec;
409 section_iterator i(o->section_begin()), e(o->section_end());
410 for (; i != e; ++i) {
411 StringRef name;
412 if ((ec = i->getName(name))) {
413 LOG4CXX_ERROR(logger, ec.message());
414 break;
415 }
416 LOG4CXX_DEBUG(logger, "Added section " << name.str());
417 sections.insert(make_pair(name.str(), *i));
418 }
419
420 }
421
422 template <typename ELFT>
423 void LLVMDisassembler<ELFT>::forEachFunction(std::function<void (uint64_t, Function*)> callback) {
424 std::for_each(functions.begin(), functions.end(),
425 [&](std::pair<uint64_t, LLVMFunction*> x) {
426 callback(x.first, x.second);
427 });
428 }
429
430 template <typename ELFT>
431 void LLVMDisassembler<ELFT>::printEachInstruction(uint64_t start, uint64_t end,
432 std::function<void (uint8_t*, size_t,
433 const std::string&,
434 const std::string&)> fun) {
435 SectionRef text_section = sections[".text"];
436 uint64_t base_address;
437 text_section.getAddress(base_address);
438 uint64_t current_address = start - base_address;
439
440 StringRef bytes;
441 text_section.getContents(bytes);
442 StringRefMemoryObject ref(bytes);
443
444 while (current_address < end - base_address) {
445 uint64_t inst_size;
446 MCInst inst;
447 std::string buf;
448 llvm::raw_string_ostream s(buf);
449
450 if(llvm::MCDisassembler::Success ==
451 DisAsm->getInstruction(inst, inst_size, ref, current_address, nulls(), nulls())) {
452
453 uint8_t bytes[inst_size+2];
454 ref.readBytes(current_address, inst_size, bytes);
455
456 uint64_t jmptarget;
457 std::string ref("");
458 IP->printInst(&inst, s, "");
459 if (MIA->evaluateBranch(inst, current_address, inst_size, jmptarget)) {
460 std::stringstream stream;
461 if (MIA->isCall(inst))
462 stream << "function:";
463 else
464 stream << "block:";
465
466 stream << std::hex << (base_address + jmptarget);
467 ref = stream.str();
468 }
469
470
471 fun(bytes, inst_size, s.str(), ref);
472 } else {
473 LOG4CXX_WARN(logger, "Invalid byte at" << std::hex << current_address + base_address);
474 fun(NULL, 0, "Invalid Byte", "");
475 inst_size = 1;
476 }
477
478 current_address += inst_size;
479 }
480 }