More logging in LLVMDisassembler
[frida/frida.git] / src / disassembler / llvm / LLVMDisassembler.cxx
1 #include "disassembler/llvm/LLVMDisassembler.hxx"
2 #include "core/InformationManager.hxx"
3 #include "core/Function.hxx"
4 #include "core/BasicBlock.hxx"
5
6 #include <stack>
7 #include <algorithm>
8 #include <cassert>
9
10 using namespace llvm;
11 using namespace llvm::object;
12 using std::error_code;
13
14 namespace {
15 class COFFT {
16
17 };
18 }
19
20 /*
21 *
22 */
23 Disassembler * createLLVMDisassembler(const std::string& filename, InformationManager* manager) {
24 if (filename == "")
25 return NULL;
26
27 std::unique_ptr<Binary> o;
28 o.reset(createBinary(filename).get());
29 Binary * op = o.release();
30
31 // ELFType<endian, maxalign, 64bit>
32 if (ELF32LEObjectFile * object = dyn_cast<ELF32LEObjectFile>(op)) {
33 return new LLVMDisassembler<ELFType<support::little, 2, false>>(filename, manager, object);
34 }
35 if (ELF64LEObjectFile * object = dyn_cast<ELF64LEObjectFile>(op)) {
36 return new LLVMDisassembler<ELFType<support::little, 2, true>>(filename, manager, object);
37 }
38 if (ELF32BEObjectFile * object = dyn_cast<ELF32BEObjectFile>(op)) {
39 return new LLVMDisassembler<ELFType<support::big, 2, false>>(filename, manager, object);
40 }
41 if (ELF64BEObjectFile * object = dyn_cast<ELF64BEObjectFile>(op)) {
42 return new LLVMDisassembler<ELFType<support::big, 2, true>>(filename, manager, object);
43 }
44 if (COFFObjectFile * object = dyn_cast<COFFObjectFile>(op)) {
45 return new LLVMDisassembler<COFFT>(filename, manager, object);
46 }
47
48 return NULL;
49 }
50
51 /*
52 * TODO: fallback code falls die Datei kein ELF/PE/COFF/MacO/.. binary
53 * ist sondern z.B. einfach nur Instruktionen oder ein Bootsektor oder
54 * foo
55 */
56 template <typename ELFT>
57 LLVMDisassembler<ELFT>::LLVMDisassembler(const std::string& filename,
58 InformationManager* manager,
59 ObjectFile* file)
60 : Disassembler()
61 , logger(log4cxx::Logger::getLogger("disassembler.LLVMDisassembler"))
62 , triple("unknown-unknown-unknown")
63 , manager(manager)
64 {
65 LOG4CXX_DEBUG(logger, "Handling file " << filename);
66
67 if (!file) {
68 auto result = createBinary(filename);
69
70 error_code ec;
71 if ((ec = result.getError())) {
72 LOG4CXX_ERROR(logger, "Failed to load Binary" << ec.message());
73 binary = NULL;
74 return;
75 }
76
77 binary.reset(result.get());
78
79 o = dyn_cast<ObjectFile>(binary.get());
80 } else {
81 o = file;
82 binary.reset(file);
83 }
84
85 triple.setArch(Triple::ArchType(o->getArch()));
86 std::string tripleName(triple.getTriple());
87
88 LOG4CXX_INFO(logger, "Architecture " << tripleName);
89
90
91 std::string es;
92 target = TargetRegistry::lookupTarget("", triple, es);
93 if (!target) {
94 LOG4CXX_ERROR(logger, es);
95 return;
96 }
97
98 LOG4CXX_INFO(logger, "Target " << target->getName());
99
100 MRI.reset(target->createMCRegInfo(tripleName));
101 if (!MRI) {
102 LOG4CXX_ERROR(logger, "no register info for target " << tripleName);
103 return;
104 }
105
106 // Set up disassembler.
107 AsmInfo.reset(target->createMCAsmInfo(*MRI, tripleName));
108 if (!AsmInfo) {
109 LOG4CXX_ERROR(logger, "no assembly info for target " << tripleName);
110 return;
111 }
112
113 STI.reset(target->createMCSubtargetInfo(tripleName, "", ""));
114 if (!STI) {
115 LOG4CXX_ERROR(logger, "no subtarget info for target " << tripleName);
116 return;
117 }
118
119 MII.reset(target->createMCInstrInfo());
120 if (!MII) {
121 LOG4CXX_ERROR(logger, "no instruction info for target " << tripleName);
122 return;
123 }
124
125 MOFI.reset(new MCObjectFileInfo);
126 MCContext Ctx(AsmInfo.get(), MRI.get(), MOFI.get());
127
128 DisAsm.reset(target->createMCDisassembler(*STI, Ctx));
129 if (!DisAsm) {
130 LOG4CXX_ERROR(logger, "no disassembler for target " << tripleName);
131 return;
132 }
133 RelInfo.reset(
134 target->createMCRelocationInfo(tripleName, Ctx));
135 if (RelInfo) {
136 // Symzer.reset(
137 // MCObjectSymbolizer::createObjectSymbolizer(Ctx, std::move(RelInfo), o));
138 // if (Symzer)
139 // DisAsm->setSymbolizer(std::move(Symzer));
140 }
141 RelInfo.release();
142 Symzer.release();
143
144 MIA.reset(target->createMCInstrAnalysis(MII.get()));
145 if (!MIA) {
146 LOG4CXX_ERROR(logger, "no instruction analysis for target " << tripleName);
147 return;
148 }
149
150 int AsmPrinterVariant = AsmInfo->getAssemblerDialect();
151 IP.reset(target->createMCInstPrinter(AsmPrinterVariant, *AsmInfo, *MII, *MRI, *STI));
152 if (!IP) {
153 LOG4CXX_ERROR(logger, "no instruction printer for target " << tripleName);
154 return;
155 }
156
157 IP->setPrintImmHex(llvm::HexStyle::C);
158 IP->setPrintImmHex(true);
159
160 std::unique_ptr<MCObjectDisassembler> OD(
161 new MCObjectDisassembler(*o, *DisAsm, *MIA));
162 Mod.reset(OD->buildModule(false));
163
164 readSections();
165 }
166
167 template <typename ELFT>
168 void LLVMDisassembler<ELFT>::start() {
169 readSymbols();
170 disassemble();
171 readDynamicSymbols();
172 }
173
174 template <typename ELFT>
175 LLVMDisassembler<ELFT>::~LLVMDisassembler() {}
176
177 template <typename ELFT>
178 Function* LLVMDisassembler<ELFT>::disassembleFunctionAt(uint64_t address, const std::string& name) {
179 Function * function;
180 SectionRef text_section = sections[".text"];
181 uint64_t base_address, size;
182 text_section.getAddress(base_address);
183 text_section.getSize(size);
184
185 if (address < base_address ||
186 address >= base_address + size) {
187 return NULL;
188 }
189
190 if (NULL == (function = manager->getFunction(address))) {
191
192 if (name == "") {
193 std::stringstream s;
194 s << "<Unnamed 0x" << std::hex << address << ">";
195 function = manager->newFunction(address);
196 function->setName(s.str());
197 } else {
198 function = manager->newFunction(address);
199 function->setName(name);
200 }
201 disassembleFunction(function);
202 }
203
204 return function;
205 }
206
207 template <typename ELFT>
208 void LLVMDisassembler<ELFT>::disassembleFunction(Function* function) {
209 std::vector<uint64_t> called_functions;
210 std::stack<BasicBlock*> remaining_blocks;
211 /* TODO:
212 * Do all blocks get added properly? We should take care to remove
213 * the other ones at the end of the function!
214 */
215 std::map<uint64_t, BasicBlock*> new_blocks;
216 SectionRef text_section = sections[".text"];
217 StringRef bytes;
218 text_section.getContents(bytes);
219 StringRefMemoryObject ref(bytes);
220
221 LOG4CXX_DEBUG(logger, "Handling function " << function->getName());
222
223 BasicBlock * block = manager->newBasicBlock(function->getStartAddress());
224 remaining_blocks.push(block);
225 new_blocks.insert(std::make_pair(block->getStartAddress(), block));
226 function->addBasicBlock(block);
227
228 uint64_t base_address, size;
229 text_section.getAddress(base_address);
230 text_section.getSize(size);
231 LOG4CXX_DEBUG(logger, "Text section at " << std::hex << base_address << " with size " << size);
232
233 while (remaining_blocks.size()) {
234 BasicBlock * current_block = remaining_blocks.top();
235 remaining_blocks.pop();
236
237 LOG4CXX_DEBUG(logger, "Handling Block starting at " << std::hex
238 << current_block->getStartAddress());
239
240 uint64_t inst_size;
241 uint64_t current_address = current_block->getStartAddress() - base_address;
242 while(true) {
243 MCInst inst;
244 std::string buf;
245 llvm::raw_string_ostream s(buf);
246
247 if(llvm::MCDisassembler::Success ==
248 DisAsm->getInstruction(inst, inst_size, ref, current_address, nulls(), nulls())) {
249 uint64_t jmptarget;
250
251 if (MIA->evaluateBranch(inst, current_address, inst_size, jmptarget)) {
252 jmptarget += base_address;
253 if (!MIA->isIndirectBranch(inst)) {
254 if (MIA->isCall(inst)) {
255 if (NULL == manager->getFunction(jmptarget))
256 called_functions.push_back(jmptarget);
257 } else {
258 current_block->setNextBlock(0, jmptarget);
259 if (new_blocks.find(jmptarget) == new_blocks.end()) {
260 BasicBlock * block = manager->newBasicBlock(jmptarget);
261 assert(block);
262 new_blocks.insert(std::make_pair(block->getStartAddress(), block));
263 function->addBasicBlock(block);
264 remaining_blocks.push(block);
265 } else {
266 LOG4CXX_DEBUG(logger, "Reusing Block starting at " << std::hex
267 << current_block->getStartAddress());
268 function->addBasicBlock(new_blocks.find(jmptarget)->second);
269 }
270 if (MIA->isConditionalBranch(inst)) {
271 jmptarget = base_address + current_address + inst_size;
272 current_block->setNextBlock(1, jmptarget);
273 if (new_blocks.find(jmptarget) == new_blocks.end()) {
274 BasicBlock * block = manager->newBasicBlock(jmptarget);
275 assert(block);
276 new_blocks.insert(std::make_pair(block->getStartAddress(), block));
277 function->addBasicBlock(block);
278 remaining_blocks.push(block);
279 } else {
280 LOG4CXX_DEBUG(logger, "Reusing Block starting at " << std::hex
281 << current_block->getStartAddress());
282 function->addBasicBlock(new_blocks.find(jmptarget)->second);
283 }
284 }
285 }
286 }
287 }
288 } else {
289 inst_size = 0;
290 }
291
292
293 if (inst_size == 0 || MIA->isTerminator(inst) || MIA->isBranch(inst)) {
294 current_block->setEndAddress(current_address + base_address + inst_size);
295 LOG4CXX_DEBUG(logger, "Finished Block at " << std::hex <<
296 current_block->getEndAddress());
297 break;
298 }
299 current_address += inst_size;
300 }
301 }
302 splitBlocks(function);
303 LOG4CXX_DEBUG(logger, "Finished function " << function->getName());
304 manager->finishFunction(function);
305 for (uint64_t address : called_functions)
306 disassembleFunctionAt(address);
307 }
308
309 template <typename ELFT>
310 void LLVMDisassembler<ELFT>::disassemble() {
311 SectionRef text_section = sections[".text"];
312 std::vector<Function*> remaining_functions;
313
314 // Assume all function symbols actually start a real function
315 for (auto x = symbols.begin(); x != symbols.end(); ++x) {
316 uint64_t result;
317 bool contains;
318 SymbolRef::Type symbol_type;
319
320
321 if (text_section.containsSymbol(x->second, contains) || !contains)
322 continue;
323
324 if (x->second.getType(symbol_type)
325 || SymbolRef::ST_Function != symbol_type)
326 continue;
327
328 if (!x->second.getAddress(result)) {
329 Function * fun = manager->newFunction(result);
330 fun->setName(x->first);
331 remaining_functions.push_back(fun);
332 LOG4CXX_DEBUG(logger, "Disasembling " << x->first);
333 }
334 }
335
336 for (Function* function : remaining_functions) {
337 disassembleFunction(function);
338 manager->finishFunction(function);
339 }
340
341 if (binary->isELF()) {
342 uint64_t _entryAddress = entryAddress();
343 LOG4CXX_DEBUG(logger, "Adding entryAddress at: " << std::hex << _entryAddress);
344 std::stringstream s;
345 s << "<_start 0x" << std::hex << _entryAddress << ">";
346
347 disassembleFunctionAt(_entryAddress, s.str());
348 }
349
350 if (!manager->hasFunctions()) {
351 uint64_t text_entry;
352 text_section.getAddress(text_entry);
353 LOG4CXX_INFO(logger, "No Symbols found, starting at the beginning of the text segment");
354 disassembleFunctionAt(text_entry);
355 }
356 }
357
358 template <>
359 uint64_t LLVMDisassembler<COFFT>::entryAddress() {
360 const auto coffobject = dyn_cast<COFFObjectFile>(o);
361 const struct pe32_header* pe32_header;
362 const struct pe32plus_header* pe32plus_header;
363
364 coffobject->getPE32PlusHeader(pe32plus_header);
365
366 if (pe32plus_header) {
367 return pe32plus_header->AddressOfEntryPoint;
368 } else {
369 coffobject->getPE32Header(pe32_header);
370 return pe32_header->AddressOfEntryPoint;
371 }
372 }
373
374 template <typename ELFT>
375 uint64_t LLVMDisassembler<ELFT>::entryAddress() {
376 const auto elffile = dyn_cast<ELFObjectFile<ELFT>>(o)->getELFFile();
377 const auto * header = elffile->getHeader();
378
379 return header->e_entry;
380 }
381
382 template <typename ELFT>
383 void LLVMDisassembler<ELFT>::splitBlocks(Function* function) {
384 SectionRef text_section = sections[".text"];
385 StringRef bytes;
386 text_section.getContents(bytes);
387 StringRefMemoryObject ref(bytes);
388
389 LOG4CXX_DEBUG(logger, "Splitting Blocks in Function " << function->getName());
390 // Split blocks where jumps are going inside the block
391 for (auto it = function->blocks().begin();
392 it != function->blocks().end();
393 ++it) {
394 BasicBlock * current_block = it->second;
395 if (current_block->getEndAddress() == 0) {
396 LOG4CXX_ERROR(logger, "UNFINISHED BLOCK " << std::hex << current_block->getStartAddress());
397 break;
398 }
399 uint64_t inst_size;
400 uint64_t base_address;
401 text_section.getAddress(base_address);
402 uint64_t current_address = current_block->getStartAddress() - base_address;
403 while(current_block->getEndAddress() - base_address > current_address) {
404 MCInst inst;
405 std::string buf;
406 llvm::raw_string_ostream s(buf);
407
408 if(llvm::MCDisassembler::Success ==
409 DisAsm->getInstruction(inst, inst_size, ref, current_address, nulls(), nulls())) {
410 // See if some other block starts here
411 BasicBlock* other = manager->getBasicBlock(current_address
412 + inst_size
413 + base_address);
414
415 // Special case, other block starts here but we are at the end anyway
416 if (other != NULL) {
417 uint64_t endaddress = current_address + inst_size + base_address;
418 if (endaddress != current_block->getEndAddress()) {
419 LOG4CXX_DEBUG(logger, "Shortening block starting at "
420 << std::hex
421 << current_block->getStartAddress()
422 << " now ending at "
423 << other->getStartAddress());
424 function->addBasicBlock(other);
425 current_block->setEndAddress(endaddress);
426 current_block->setNextBlock(0, other->getStartAddress());
427 current_block->setNextBlock(1, 0);
428 }
429 }
430 } else {
431 inst_size = 1;
432 }
433 current_address += inst_size;
434 }
435 }
436 }
437
438 template<>
439 void LLVMDisassembler<COFFT>::readDynamicSymbols() {
440 //TODO
441 }
442
443 template <typename ELFT>
444 void LLVMDisassembler<ELFT>::readDynamicSymbols() {
445 const auto elffile = dyn_cast<ELFObjectFile<ELFT>>(o)->getELFFile();
446 for (auto it = elffile->begin_dynamic_symbols(),
447 end = elffile->end_dynamic_symbols();
448 it != end;
449 ++it) {
450 if (it->getType() == 2) { // Function
451 bool is_default;
452 // TODO: Error handling
453 std::string symbolname = *(elffile->getSymbolName(it));
454 std::string symbolversion = *(elffile->getSymbolVersion(nullptr, &*it, is_default));
455 manager->signal_new_dyn_symbol(symbolname + (is_default? "@@" : "@") + symbolversion);
456 LOG4CXX_DEBUG(logger, "Adding dynamic Symbol " << symbolname << (is_default? "@@" : "@") << symbolversion);
457 }
458 }
459 }
460
461 template <typename ELFT>
462 void LLVMDisassembler<ELFT>::readSymbols() {
463 error_code ec;
464 symbol_iterator si(o->symbol_begin()), se(o->symbol_end());
465 for (; si != se; ++si) {
466 StringRef name;
467 if ((ec = si->getName(name))) {
468 LOG4CXX_ERROR(logger, ec.message());
469 break;
470 }
471 LOG4CXX_DEBUG(logger, "Added symbol " << name.str());
472 symbols.insert(make_pair(name.str(), *si));
473 }
474 }
475
476 template <typename ELFT>
477 void LLVMDisassembler<ELFT>::readSections() {
478 error_code ec;
479 section_iterator i(o->section_begin()), e(o->section_end());
480 for (; i != e; ++i) {
481 StringRef name;
482 if ((ec = i->getName(name))) {
483 LOG4CXX_ERROR(logger, ec.message());
484 break;
485 }
486 LOG4CXX_DEBUG(logger, "Added section " << name.str());
487 sections.insert(make_pair(name.str(), *i));
488 }
489
490 }
491
492 // template <typename ELFT>
493 // void LLVMDisassembler<ELFT>::forEachFunction(std::function<void (uint64_t, Function*)> callback) {
494 // // std::for_each(functions.begin(), functions.end(),
495 // // [&](std::pair<uint64_t, Function*> x) {
496 // // callback(x.first, x.second);
497 // // });
498 // }
499
500 template <typename ELFT>
501 void LLVMDisassembler<ELFT>::printEachInstruction(uint64_t start, uint64_t end,
502 std::function<void (uint8_t*, size_t,
503 const std::string&,
504 const std::string&)> fun) {
505 SectionRef text_section = sections[".text"];
506 uint64_t base_address;
507 text_section.getAddress(base_address);
508 uint64_t current_address = start - base_address;
509
510 StringRef bytes;
511 text_section.getContents(bytes);
512 StringRefMemoryObject ref(bytes);
513
514 while (current_address < end - base_address) {
515 uint64_t inst_size;
516 MCInst inst;
517 std::string buf;
518 llvm::raw_string_ostream s(buf);
519
520 if(llvm::MCDisassembler::Success ==
521 DisAsm->getInstruction(inst, inst_size, ref, current_address, nulls(), nulls())) {
522
523 uint8_t bytes[inst_size+2];
524 ref.readBytes(current_address, inst_size, bytes);
525
526 uint64_t jmptarget;
527 std::string ref("");
528 IP->printInst(&inst, s, "");
529 if (MIA->evaluateBranch(inst, current_address, inst_size, jmptarget)) {
530 std::stringstream stream;
531 if (MIA->isCall(inst))
532 stream << "function:";
533 else
534 stream << "block:";
535
536 stream << std::hex << (base_address + jmptarget);
537 ref = stream.str();
538 }
539
540
541 fun(bytes, inst_size, s.str(), ref);
542 } else {
543 LOG4CXX_WARN(logger, "Invalid byte at" << std::hex << current_address + base_address);
544 fun(NULL, 0, "Invalid Byte", "");
545 inst_size = 1;
546 }
547
548 current_address += inst_size;
549 }
550 }