]> git.siccegge.de Git - frida/frida.git/blob - src/disassembler/llvm/LLVMDisassembler.cxx
Disable buildModule
[frida/frida.git] / src / disassembler / llvm / LLVMDisassembler.cxx
1 #include "disassembler/Instruction.hxx"
2 #include "disassembler/llvm/LLVMDisassembler.hxx"
3 #include "core/InformationManager.hxx"
4 #include "core/Function.hxx"
5 #include "core/BasicBlock.hxx"
6 #include <boost/algorithm/string.hpp>
7
8 #include <stack>
9 #include <algorithm>
10 #include <cassert>
11
12 using namespace llvm;
13 using namespace llvm::object;
14 using std::error_code;
15
16 namespace {
17 class COFFT {
18
19 };
20
21 class MACHOT {
22
23 };
24 }
25
26 /*
27 *
28 */
29 Disassembler * createLLVMDisassembler(const std::string& filename, InformationManager* manager) {
30 log4cxx::LoggerPtr logger(log4cxx::Logger::getLogger("disassembler.LLVMDisassembler"));
31 if (filename == "")
32 return NULL;
33
34 auto retval = createBinary(filename);
35 if (error_code ec = retval.getError()) {
36 LOG4CXX_ERROR(logger, ec.message());
37 return NULL;
38 }
39
40 Binary * op = retval.get();
41
42 if (!op) {
43 LOG4CXX_ERROR(logger, "Could not open " << filename);
44 return NULL;
45 }
46
47 // ELFType<endian, maxalign, 64bit>
48 if (ELF32LEObjectFile * object = dyn_cast<ELF32LEObjectFile>(op)) {
49 return new LLVMDisassembler<ELFType<support::little, 2, false>>(filename, manager, object);
50 }
51 if (ELF64LEObjectFile * object = dyn_cast<ELF64LEObjectFile>(op)) {
52 return new LLVMDisassembler<ELFType<support::little, 2, true>>(filename, manager, object);
53 }
54 if (ELF32BEObjectFile * object = dyn_cast<ELF32BEObjectFile>(op)) {
55 return new LLVMDisassembler<ELFType<support::big, 2, false>>(filename, manager, object);
56 }
57 if (ELF64BEObjectFile * object = dyn_cast<ELF64BEObjectFile>(op)) {
58 return new LLVMDisassembler<ELFType<support::big, 2, true>>(filename, manager, object);
59 }
60 if (COFFObjectFile * object = dyn_cast<COFFObjectFile>(op)) {
61 return new LLVMDisassembler<COFFT>(filename, manager, object);
62 }
63 if (MachOObjectFile * object = dyn_cast<MachOObjectFile>(op)) {
64 return new LLVMDisassembler<MACHOT>(filename, manager, object);
65 }
66
67 return NULL;
68 }
69
70 /*
71 * TODO: fallback code falls die Datei kein ELF/PE/COFF/MacO/.. binary
72 * ist sondern z.B. einfach nur Instruktionen oder ein Bootsektor oder
73 * foo
74 */
75 template <typename ELFT>
76 LLVMDisassembler<ELFT>::LLVMDisassembler(const std::string& filename,
77 InformationManager* manager,
78 ObjectFile* file)
79 : Disassembler()
80 , logger(log4cxx::Logger::getLogger("disassembler.LLVMDisassembler"))
81 , triple("unknown-unknown-unknown")
82 , manager(manager)
83 {
84 LOG4CXX_DEBUG(logger, "Handling file " << filename);
85
86 if (!file) {
87 auto result = createBinary(filename);
88
89 error_code ec;
90 if ((ec = result.getError())) {
91 LOG4CXX_ERROR(logger, "Failed to load Binary" << ec.message());
92 binary = NULL;
93 return;
94 }
95
96 binary.reset(result.get());
97
98 o = dyn_cast<ObjectFile>(binary.get());
99 } else {
100 o = file;
101 binary.reset(file);
102 }
103
104 triple.setArch(Triple::ArchType(o->getArch()));
105 std::string tripleName(triple.getTriple());
106
107 LOG4CXX_INFO(logger, "Architecture " << tripleName);
108
109
110 std::string es;
111 target = TargetRegistry::lookupTarget("", triple, es);
112 if (!target) {
113 LOG4CXX_ERROR(logger, es);
114 return;
115 }
116
117 LOG4CXX_INFO(logger, "Target " << target->getName());
118
119 MRI.reset(target->createMCRegInfo(tripleName));
120 if (!MRI) {
121 LOG4CXX_ERROR(logger, "no register info for target " << tripleName);
122 return;
123 }
124
125 // Set up disassembler.
126 AsmInfo.reset(target->createMCAsmInfo(*MRI, tripleName));
127 if (!AsmInfo) {
128 LOG4CXX_ERROR(logger, "no assembly info for target " << tripleName);
129 return;
130 }
131
132 STI.reset(target->createMCSubtargetInfo(tripleName, "", ""));
133 if (!STI) {
134 LOG4CXX_ERROR(logger, "no subtarget info for target " << tripleName);
135 return;
136 }
137
138 MII.reset(target->createMCInstrInfo());
139 if (!MII) {
140 LOG4CXX_ERROR(logger, "no instruction info for target " << tripleName);
141 return;
142 }
143
144 MOFI.reset(new MCObjectFileInfo);
145 MCContext Ctx(AsmInfo.get(), MRI.get(), MOFI.get());
146
147 DisAsm.reset(target->createMCDisassembler(*STI, Ctx));
148 if (!DisAsm) {
149 LOG4CXX_ERROR(logger, "no disassembler for target " << tripleName);
150 return;
151 }
152 RelInfo.reset(
153 target->createMCRelocationInfo(tripleName, Ctx));
154 if (RelInfo) {
155 // Symzer.reset(
156 // MCObjectSymbolizer::createObjectSymbolizer(Ctx, std::move(RelInfo), o));
157 // if (Symzer)
158 // DisAsm->setSymbolizer(std::move(Symzer));
159 }
160 RelInfo.release();
161 Symzer.release();
162
163 MIA.reset(target->createMCInstrAnalysis(MII.get()));
164 if (!MIA) {
165 LOG4CXX_ERROR(logger, "no instruction analysis for target " << tripleName);
166 return;
167 }
168
169 int AsmPrinterVariant = AsmInfo->getAssemblerDialect();
170 IP.reset(target->createMCInstPrinter(AsmPrinterVariant, *AsmInfo, *MII, *MRI, *STI));
171 if (!IP) {
172 LOG4CXX_ERROR(logger, "no instruction printer for target " << tripleName);
173 return;
174 }
175
176 IP->setPrintImmHex(llvm::HexStyle::C);
177 IP->setPrintImmHex(true);
178
179 std::unique_ptr<MCObjectDisassembler> OD(
180 new MCObjectDisassembler(*o, *DisAsm, *MIA));
181 //Mod.reset(OD->buildModule(false));
182
183 readSections();
184 }
185
186 template <typename ELFT>
187 void LLVMDisassembler<ELFT>::start() {
188 readSymbols();
189 disassemble();
190 readDynamicSymbols();
191 }
192
193 template <typename ELFT>
194 LLVMDisassembler<ELFT>::~LLVMDisassembler() {}
195
196 template <typename ELFT>
197 Function* LLVMDisassembler<ELFT>::disassembleFunctionAt(uint64_t address, const std::string& name) {
198 Function * function;
199 SectionRef text_section = getTextSection();
200 uint64_t base_address, size;
201 text_section.getAddress(base_address);
202 text_section.getSize(size);
203
204 if (address < base_address ||
205 address >= base_address + size) {
206 return NULL;
207 }
208
209 if (NULL == (function = manager->getFunction(address))) {
210
211 if (name == "") {
212 std::stringstream s;
213 s << "<Unnamed 0x" << std::hex << address << ">";
214 function = manager->newFunction(address);
215 function->setName(s.str());
216 } else {
217 function = manager->newFunction(address);
218 function->setName(name);
219 }
220 disassembleFunction(function);
221 }
222
223 return function;
224 }
225
226 template <typename ELFT>
227 void LLVMDisassembler<ELFT>::disassembleFunction(Function* function) {
228 std::vector<uint64_t> called_functions;
229 std::stack<BasicBlock*> remaining_blocks;
230 /* TODO:
231 * Do all blocks get added properly? We should take care to remove
232 * the other ones at the end of the function!
233 */
234 std::map<uint64_t, BasicBlock*> new_blocks;
235 SectionRef text_section = getTextSection();
236 StringRef bytes;
237 text_section.getContents(bytes);
238 StringRefMemoryObject ref(bytes);
239
240 LOG4CXX_DEBUG(logger, "Handling function " << function->getName());
241
242 BasicBlock * block = manager->newBasicBlock(function->getStartAddress());
243 remaining_blocks.push(block);
244 new_blocks.insert(std::make_pair(block->getStartAddress(), block));
245 function->addBasicBlock(block);
246
247 uint64_t base_address, size;
248 text_section.getAddress(base_address);
249 text_section.getSize(size);
250 LOG4CXX_DEBUG(logger, "Text section at " << std::hex << base_address << " with size " << size);
251
252 while (remaining_blocks.size()) {
253 BasicBlock * current_block = remaining_blocks.top();
254 remaining_blocks.pop();
255
256 LOG4CXX_DEBUG(logger, "Handling Block starting at " << std::hex
257 << current_block->getStartAddress());
258
259 uint64_t inst_size;
260 uint64_t current_address = current_block->getStartAddress() - base_address;
261 while(true) {
262 MCInst inst;
263 std::string buf;
264 llvm::raw_string_ostream s(buf);
265
266 if(llvm::MCDisassembler::Success ==
267 DisAsm->getInstruction(inst, inst_size, ref, current_address, nulls(), nulls())) {
268 uint64_t jmptarget;
269
270 if (MIA->evaluateBranch(inst, current_address, inst_size, jmptarget)) {
271 jmptarget += base_address;
272 if (!MIA->isIndirectBranch(inst)) {
273 if (MIA->isCall(inst)) {
274 if (NULL == manager->getFunction(jmptarget))
275 called_functions.push_back(jmptarget);
276 } else {
277 current_block->setNextBlock(0, jmptarget);
278 if (new_blocks.find(jmptarget) == new_blocks.end()) {
279 BasicBlock * block = manager->newBasicBlock(jmptarget);
280 assert(block);
281 new_blocks.insert(std::make_pair(block->getStartAddress(), block));
282 function->addBasicBlock(block);
283 remaining_blocks.push(block);
284 } else {
285 LOG4CXX_DEBUG(logger, "Reusing Block starting at " << std::hex
286 << current_block->getStartAddress());
287 function->addBasicBlock(new_blocks.find(jmptarget)->second);
288 }
289 if (MIA->isConditionalBranch(inst)) {
290 jmptarget = base_address + current_address + inst_size;
291 current_block->setNextBlock(1, jmptarget);
292 if (new_blocks.find(jmptarget) == new_blocks.end()) {
293 BasicBlock * block = manager->newBasicBlock(jmptarget);
294 assert(block);
295 new_blocks.insert(std::make_pair(block->getStartAddress(), block));
296 function->addBasicBlock(block);
297 remaining_blocks.push(block);
298 } else {
299 LOG4CXX_DEBUG(logger, "Reusing Block starting at " << std::hex
300 << current_block->getStartAddress());
301 function->addBasicBlock(new_blocks.find(jmptarget)->second);
302 }
303 }
304 }
305 }
306 }
307 } else {
308 inst_size = 0;
309 }
310
311
312 if (inst_size == 0 || MIA->isTerminator(inst) || MIA->isBranch(inst)) {
313 current_block->setEndAddress(current_address + base_address + inst_size);
314 LOG4CXX_DEBUG(logger, "Finished Block at " << std::hex <<
315 current_block->getEndAddress());
316 break;
317 }
318 current_address += inst_size;
319 }
320 }
321 splitBlocks(function);
322 LOG4CXX_DEBUG(logger, "Finished function " << function->getName());
323 manager->finishFunction(function);
324 for (uint64_t address : called_functions)
325 disassembleFunctionAt(address);
326 }
327
328 template <typename ELFT>
329 void LLVMDisassembler<ELFT>::disassemble() {
330 SectionRef text_section = getTextSection();
331 std::vector<Function*> remaining_functions;
332
333 // Assume all function symbols actually start a real function
334 for (auto x = symbols.begin(); x != symbols.end(); ++x) {
335 uint64_t result;
336 bool contains;
337 SymbolRef::Type symbol_type;
338
339
340 if (text_section.containsSymbol(x->second, contains) || !contains)
341 continue;
342
343 if (x->second.getType(symbol_type)
344 || SymbolRef::ST_Function != symbol_type)
345 continue;
346
347 if (!x->second.getAddress(result)) {
348 Function * fun = manager->newFunction(result);
349 if (fun) {
350 fun->setName(x->first);
351 remaining_functions.push_back(fun);
352 LOG4CXX_DEBUG(logger, "Disasembling " << x->first);
353 } else {
354 LOG4CXX_DEBUG(logger, "Function at " << std::hex << result
355 << " already disassembled as " << manager->getFunction(result)->getName());
356 }
357 }
358 }
359
360 for (Function* function : remaining_functions) {
361 disassembleFunction(function);
362 manager->finishFunction(function);
363 }
364
365 if (binary->isELF()) {
366 uint64_t _entryAddress = entryAddress();
367 LOG4CXX_DEBUG(logger, "Adding entryAddress at: " << std::hex << _entryAddress);
368 std::stringstream s;
369 s << "<_start 0x" << std::hex << _entryAddress << ">";
370
371 disassembleFunctionAt(_entryAddress, s.str());
372 }
373
374 if (!manager->hasFunctions()) {
375 uint64_t text_entry;
376 text_section.getAddress(text_entry);
377 LOG4CXX_INFO(logger, "No Symbols found, starting at the beginning of the text segment");
378 disassembleFunctionAt(text_entry);
379 }
380 }
381
382 template <>
383 uint64_t LLVMDisassembler<COFFT>::entryAddress() {
384 const auto coffobject = dyn_cast<COFFObjectFile>(o);
385 const struct pe32_header* pe32_header;
386 const struct pe32plus_header* pe32plus_header;
387
388 coffobject->getPE32PlusHeader(pe32plus_header);
389
390 if (pe32plus_header) {
391 return pe32plus_header->AddressOfEntryPoint;
392 } else {
393 coffobject->getPE32Header(pe32_header);
394 return pe32_header->AddressOfEntryPoint;
395 }
396 }
397
398 template<>
399 uint64_t LLVMDisassembler<MACHOT>::entryAddress() {
400 // TODO
401 return 0;
402 }
403
404 template <typename ELFT>
405 uint64_t LLVMDisassembler<ELFT>::entryAddress() {
406 const auto elffile = dyn_cast<ELFObjectFile<ELFT>>(o)->getELFFile();
407 const auto * header = elffile->getHeader();
408
409 return header->e_entry;
410 }
411
412 template <typename ELFT>
413 void LLVMDisassembler<ELFT>::splitBlocks(Function* function) {
414 SectionRef text_section = getTextSection();
415 StringRef bytes;
416 text_section.getContents(bytes);
417 StringRefMemoryObject ref(bytes);
418
419 LOG4CXX_DEBUG(logger, "Splitting Blocks in Function " << function->getName());
420 // Split blocks where jumps are going inside the block
421 for (auto it = function->blocks().begin();
422 it != function->blocks().end();
423 ++it) {
424 BasicBlock * current_block = it->second;
425 if (current_block->getEndAddress() == 0) {
426 LOG4CXX_ERROR(logger, "UNFINISHED BLOCK " << std::hex << current_block->getStartAddress());
427 break;
428 }
429 uint64_t inst_size;
430 uint64_t base_address;
431 text_section.getAddress(base_address);
432 uint64_t current_address = current_block->getStartAddress() - base_address;
433 while(current_block->getEndAddress() - base_address > current_address) {
434 MCInst inst;
435 std::string buf;
436 llvm::raw_string_ostream s(buf);
437
438 if(llvm::MCDisassembler::Success ==
439 DisAsm->getInstruction(inst, inst_size, ref, current_address, nulls(), nulls())) {
440 // See if some other block starts here
441 BasicBlock* other = manager->getBasicBlock(current_address
442 + inst_size
443 + base_address);
444
445 // Special case, other block starts here but we are at the end anyway
446 if (other != NULL) {
447 uint64_t endaddress = current_address + inst_size + base_address;
448 if (endaddress != current_block->getEndAddress()) {
449 LOG4CXX_DEBUG(logger, "Shortening block starting at "
450 << std::hex
451 << current_block->getStartAddress()
452 << " now ending at "
453 << other->getStartAddress());
454 function->addBasicBlock(other);
455 current_block->setEndAddress(endaddress);
456 current_block->setNextBlock(0, other->getStartAddress());
457 current_block->setNextBlock(1, 0);
458 }
459 }
460 } else {
461 inst_size = 1;
462 }
463 current_address += inst_size;
464 }
465 }
466 }
467
468 template<>
469 void LLVMDisassembler<COFFT>::readDynamicSymbols() {
470 //TODO
471 }
472
473 template<>
474 void LLVMDisassembler<MACHOT>::readDynamicSymbols() {
475 //TODO
476 }
477
478 template <typename ELFT>
479 void LLVMDisassembler<ELFT>::readDynamicSymbols() {
480 const auto elffile = dyn_cast<ELFObjectFile<ELFT>>(o)->getELFFile();
481 for (auto it = elffile->begin_dynamic_symbols(),
482 end = elffile->end_dynamic_symbols();
483 it != end;
484 ++it) {
485 if (it->getType() == 2) { // Function
486 bool is_default;
487 // TODO: Error handling
488 std::string symbolname = *(elffile->getSymbolName(it));
489 std::string symbolversion = *(elffile->getSymbolVersion(nullptr, &*it, is_default));
490 // TODO: actually get the symbol address from relocations
491 Function* f = manager->newDynamicFunction(0);
492 f->setName(symbolname + (is_default? "@@" : "@") + symbolversion);
493 manager->finishFunction(f);
494
495 LOG4CXX_DEBUG(logger, "Adding dynamic Symbol " << symbolname << (is_default? "@@" : "@") << symbolversion);
496 }
497 }
498 }
499
500 template <typename ELFT>
501 void LLVMDisassembler<ELFT>::readSymbols() {
502 error_code ec;
503 symbol_iterator si(o->symbol_begin()), se(o->symbol_end());
504 for (; si != se; ++si) {
505 StringRef name;
506 if ((ec = si->getName(name))) {
507 LOG4CXX_ERROR(logger, ec.message());
508 break;
509 }
510 LOG4CXX_DEBUG(logger, "Added symbol " << name.str());
511 symbols.insert(make_pair(name.str(), *si));
512 }
513 }
514
515 template <typename ELFT>
516 void LLVMDisassembler<ELFT>::readSections() {
517 error_code ec;
518 section_iterator i(o->section_begin()), e(o->section_end());
519 for (; i != e; ++i) {
520 StringRef name;
521 if ((ec = i->getName(name))) {
522 LOG4CXX_ERROR(logger, ec.message());
523 break;
524 }
525 LOG4CXX_DEBUG(logger, "Added section " << name.str());
526 sections.insert(make_pair(name.str(), *i));
527 }
528
529 }
530
531 // template <typename ELFT>
532 // void LLVMDisassembler<ELFT>::forEachFunction(std::function<void (uint64_t, Function*)> callback) {
533 // // std::for_each(functions.begin(), functions.end(),
534 // // [&](std::pair<uint64_t, Function*> x) {
535 // // callback(x.first, x.second);
536 // // });
537 // }
538
539 template <typename ELFT>
540 std::vector<Instruction> LLVMDisassembler<ELFT>::getInstructions(const BasicBlock *block) {
541 std::vector<Instruction> result;
542 SectionRef text_section = getTextSection();
543 uint64_t base_address;
544 text_section.getAddress(base_address);
545 uint64_t current_address = block->getStartAddress() - base_address;
546 uint64_t end_position = block->getEndAddress() - base_address;
547
548 StringRef bytes;
549 text_section.getContents(bytes);
550 StringRefMemoryObject ref(bytes);
551
552 while (current_address < end_position) {
553 uint64_t inst_size;
554 MCInst inst;
555 std::string buf;
556 llvm::raw_string_ostream s(buf);
557
558 if(llvm::MCDisassembler::Success ==
559 DisAsm->getInstruction(inst, inst_size, ref, current_address, nulls(), nulls())) {
560
561 uint8_t bytes[inst_size+2];
562 ref.readBytes(current_address, inst_size, bytes);
563
564 uint64_t jmptarget;
565 std::string ref("");
566 IP->printInst(&inst, s, "");
567 if (MIA->evaluateBranch(inst, current_address, inst_size, jmptarget)) {
568 std::stringstream stream;
569 if (MIA->isCall(inst))
570 stream << "function:";
571 else
572 stream << "block:";
573
574 stream << std::hex << (base_address + jmptarget);
575 ref = stream.str();
576 }
577 result.push_back(Instruction(current_address + base_address, boost::algorithm::trim_copy(s.str()),
578 std::vector<uint8_t>(bytes, bytes+inst_size), ref));
579 } else {
580 LOG4CXX_WARN(logger, "Invalid byte at" << std::hex << current_address + base_address);
581 uint8_t bytes[1];
582 ref.readBytes(current_address, 1, bytes);
583 result.push_back(Instruction(current_address + base_address, "Invalid Instruction",
584 std::vector<uint8_t>(bytes, bytes+1), ""));
585 inst_size = 1;
586 }
587
588 current_address += inst_size;
589 }
590 return result;
591 }
592
593 template <typename ELFT>
594 void LLVMDisassembler<ELFT>::printEachInstruction(uint64_t start, uint64_t end,
595 std::function<void (uint8_t*, size_t,
596 const std::string&,
597 const std::string&)> fun) {
598 SectionRef text_section = getTextSection();
599 uint64_t base_address;
600 text_section.getAddress(base_address);
601 uint64_t current_address = start - base_address;
602
603 StringRef bytes;
604 text_section.getContents(bytes);
605 StringRefMemoryObject ref(bytes);
606
607 while (current_address < end - base_address) {
608 uint64_t inst_size;
609 MCInst inst;
610 std::string buf;
611 llvm::raw_string_ostream s(buf);
612
613 if(llvm::MCDisassembler::Success ==
614 DisAsm->getInstruction(inst, inst_size, ref, current_address, nulls(), nulls())) {
615
616 uint8_t bytes[inst_size+2];
617 ref.readBytes(current_address, inst_size, bytes);
618
619 uint64_t jmptarget;
620 std::string ref("");
621 IP->printInst(&inst, s, "");
622 if (MIA->evaluateBranch(inst, current_address, inst_size, jmptarget)) {
623 std::stringstream stream;
624 if (MIA->isCall(inst))
625 stream << "function:";
626 else
627 stream << "block:";
628
629 stream << std::hex << (base_address + jmptarget);
630 ref = stream.str();
631 }
632
633
634 fun(bytes, inst_size, s.str(), ref);
635 } else {
636 LOG4CXX_WARN(logger, "Invalid byte at" << std::hex << current_address + base_address);
637 fun(NULL, 0, "Invalid Byte", "");
638 inst_size = 1;
639 }
640
641 current_address += inst_size;
642 }
643 }
644
645 template <typename ELFT>
646 SectionRef LLVMDisassembler<ELFT>::getTextSection() {
647 return sections[".text"];
648 }
649
650 template <>
651 SectionRef LLVMDisassembler<MACHOT>::getTextSection() {
652 return sections["__text"];
653 }