]> git.siccegge.de Git - frida/frida.git/blob - src/disassembler/llvm/LLVMDisassembler.cxx
Basic MachO Support
[frida/frida.git] / src / disassembler / llvm / LLVMDisassembler.cxx
1 #include "disassembler/llvm/LLVMDisassembler.hxx"
2 #include "core/InformationManager.hxx"
3 #include "core/Function.hxx"
4 #include "core/BasicBlock.hxx"
5
6 #include <stack>
7 #include <algorithm>
8 #include <cassert>
9
10 using namespace llvm;
11 using namespace llvm::object;
12 using std::error_code;
13
14 namespace {
15 class COFFT {
16
17 };
18
19 class MACHOT {
20
21 };
22 }
23
24 /*
25 *
26 */
27 Disassembler * createLLVMDisassembler(const std::string& filename, InformationManager* manager) {
28 if (filename == "")
29 return NULL;
30
31 std::unique_ptr<Binary> o;
32 o.reset(createBinary(filename).get());
33 Binary * op = o.release();
34
35 // ELFType<endian, maxalign, 64bit>
36 if (ELF32LEObjectFile * object = dyn_cast<ELF32LEObjectFile>(op)) {
37 return new LLVMDisassembler<ELFType<support::little, 2, false>>(filename, manager, object);
38 }
39 if (ELF64LEObjectFile * object = dyn_cast<ELF64LEObjectFile>(op)) {
40 return new LLVMDisassembler<ELFType<support::little, 2, true>>(filename, manager, object);
41 }
42 if (ELF32BEObjectFile * object = dyn_cast<ELF32BEObjectFile>(op)) {
43 return new LLVMDisassembler<ELFType<support::big, 2, false>>(filename, manager, object);
44 }
45 if (ELF64BEObjectFile * object = dyn_cast<ELF64BEObjectFile>(op)) {
46 return new LLVMDisassembler<ELFType<support::big, 2, true>>(filename, manager, object);
47 }
48 if (COFFObjectFile * object = dyn_cast<COFFObjectFile>(op)) {
49 return new LLVMDisassembler<COFFT>(filename, manager, object);
50 }
51 if (MachOObjectFile * object = dyn_cast<MachOObjectFile>(op)) {
52 return new LLVMDisassembler<MACHOT>(filename, manager, object);
53 }
54
55 return NULL;
56 }
57
58 /*
59 * TODO: fallback code falls die Datei kein ELF/PE/COFF/MacO/.. binary
60 * ist sondern z.B. einfach nur Instruktionen oder ein Bootsektor oder
61 * foo
62 */
63 template <typename ELFT>
64 LLVMDisassembler<ELFT>::LLVMDisassembler(const std::string& filename,
65 InformationManager* manager,
66 ObjectFile* file)
67 : Disassembler()
68 , logger(log4cxx::Logger::getLogger("disassembler.LLVMDisassembler"))
69 , triple("unknown-unknown-unknown")
70 , manager(manager)
71 {
72 LOG4CXX_DEBUG(logger, "Handling file " << filename);
73
74 if (!file) {
75 auto result = createBinary(filename);
76
77 error_code ec;
78 if ((ec = result.getError())) {
79 LOG4CXX_ERROR(logger, "Failed to load Binary" << ec.message());
80 binary = NULL;
81 return;
82 }
83
84 binary.reset(result.get());
85
86 o = dyn_cast<ObjectFile>(binary.get());
87 } else {
88 o = file;
89 binary.reset(file);
90 }
91
92 triple.setArch(Triple::ArchType(o->getArch()));
93 std::string tripleName(triple.getTriple());
94
95 LOG4CXX_INFO(logger, "Architecture " << tripleName);
96
97
98 std::string es;
99 target = TargetRegistry::lookupTarget("", triple, es);
100 if (!target) {
101 LOG4CXX_ERROR(logger, es);
102 return;
103 }
104
105 LOG4CXX_INFO(logger, "Target " << target->getName());
106
107 MRI.reset(target->createMCRegInfo(tripleName));
108 if (!MRI) {
109 LOG4CXX_ERROR(logger, "no register info for target " << tripleName);
110 return;
111 }
112
113 // Set up disassembler.
114 AsmInfo.reset(target->createMCAsmInfo(*MRI, tripleName));
115 if (!AsmInfo) {
116 LOG4CXX_ERROR(logger, "no assembly info for target " << tripleName);
117 return;
118 }
119
120 STI.reset(target->createMCSubtargetInfo(tripleName, "", ""));
121 if (!STI) {
122 LOG4CXX_ERROR(logger, "no subtarget info for target " << tripleName);
123 return;
124 }
125
126 MII.reset(target->createMCInstrInfo());
127 if (!MII) {
128 LOG4CXX_ERROR(logger, "no instruction info for target " << tripleName);
129 return;
130 }
131
132 MOFI.reset(new MCObjectFileInfo);
133 MCContext Ctx(AsmInfo.get(), MRI.get(), MOFI.get());
134
135 DisAsm.reset(target->createMCDisassembler(*STI, Ctx));
136 if (!DisAsm) {
137 LOG4CXX_ERROR(logger, "no disassembler for target " << tripleName);
138 return;
139 }
140 RelInfo.reset(
141 target->createMCRelocationInfo(tripleName, Ctx));
142 if (RelInfo) {
143 // Symzer.reset(
144 // MCObjectSymbolizer::createObjectSymbolizer(Ctx, std::move(RelInfo), o));
145 // if (Symzer)
146 // DisAsm->setSymbolizer(std::move(Symzer));
147 }
148 RelInfo.release();
149 Symzer.release();
150
151 MIA.reset(target->createMCInstrAnalysis(MII.get()));
152 if (!MIA) {
153 LOG4CXX_ERROR(logger, "no instruction analysis for target " << tripleName);
154 return;
155 }
156
157 int AsmPrinterVariant = AsmInfo->getAssemblerDialect();
158 IP.reset(target->createMCInstPrinter(AsmPrinterVariant, *AsmInfo, *MII, *MRI, *STI));
159 if (!IP) {
160 LOG4CXX_ERROR(logger, "no instruction printer for target " << tripleName);
161 return;
162 }
163
164 IP->setPrintImmHex(llvm::HexStyle::C);
165 IP->setPrintImmHex(true);
166
167 std::unique_ptr<MCObjectDisassembler> OD(
168 new MCObjectDisassembler(*o, *DisAsm, *MIA));
169 Mod.reset(OD->buildModule(false));
170
171 readSections();
172 }
173
174 template <typename ELFT>
175 void LLVMDisassembler<ELFT>::start() {
176 readSymbols();
177 disassemble();
178 readDynamicSymbols();
179 }
180
181 template <typename ELFT>
182 LLVMDisassembler<ELFT>::~LLVMDisassembler() {}
183
184 template <typename ELFT>
185 Function* LLVMDisassembler<ELFT>::disassembleFunctionAt(uint64_t address, const std::string& name) {
186 Function * function;
187 SectionRef text_section = getTextSection();
188 uint64_t base_address, size;
189 text_section.getAddress(base_address);
190 text_section.getSize(size);
191
192 if (address < base_address ||
193 address >= base_address + size) {
194 return NULL;
195 }
196
197 if (NULL == (function = manager->getFunction(address))) {
198
199 if (name == "") {
200 std::stringstream s;
201 s << "<Unnamed 0x" << std::hex << address << ">";
202 function = manager->newFunction(address);
203 function->setName(s.str());
204 } else {
205 function = manager->newFunction(address);
206 function->setName(name);
207 }
208 disassembleFunction(function);
209 }
210
211 return function;
212 }
213
214 template <typename ELFT>
215 void LLVMDisassembler<ELFT>::disassembleFunction(Function* function) {
216 std::vector<uint64_t> called_functions;
217 std::stack<BasicBlock*> remaining_blocks;
218 /* TODO:
219 * Do all blocks get added properly? We should take care to remove
220 * the other ones at the end of the function!
221 */
222 std::map<uint64_t, BasicBlock*> new_blocks;
223 SectionRef text_section = getTextSection();
224 StringRef bytes;
225 text_section.getContents(bytes);
226 StringRefMemoryObject ref(bytes);
227
228 LOG4CXX_DEBUG(logger, "Handling function " << function->getName());
229
230 BasicBlock * block = manager->newBasicBlock(function->getStartAddress());
231 remaining_blocks.push(block);
232 new_blocks.insert(std::make_pair(block->getStartAddress(), block));
233 function->addBasicBlock(block);
234
235 uint64_t base_address, size;
236 text_section.getAddress(base_address);
237 text_section.getSize(size);
238 LOG4CXX_DEBUG(logger, "Text section at " << std::hex << base_address << " with size " << size);
239
240 while (remaining_blocks.size()) {
241 BasicBlock * current_block = remaining_blocks.top();
242 remaining_blocks.pop();
243
244 LOG4CXX_DEBUG(logger, "Handling Block starting at " << std::hex
245 << current_block->getStartAddress());
246
247 uint64_t inst_size;
248 uint64_t current_address = current_block->getStartAddress() - base_address;
249 while(true) {
250 MCInst inst;
251 std::string buf;
252 llvm::raw_string_ostream s(buf);
253
254 if(llvm::MCDisassembler::Success ==
255 DisAsm->getInstruction(inst, inst_size, ref, current_address, nulls(), nulls())) {
256 uint64_t jmptarget;
257
258 if (MIA->evaluateBranch(inst, current_address, inst_size, jmptarget)) {
259 jmptarget += base_address;
260 if (!MIA->isIndirectBranch(inst)) {
261 if (MIA->isCall(inst)) {
262 if (NULL == manager->getFunction(jmptarget))
263 called_functions.push_back(jmptarget);
264 } else {
265 current_block->setNextBlock(0, jmptarget);
266 if (new_blocks.find(jmptarget) == new_blocks.end()) {
267 BasicBlock * block = manager->newBasicBlock(jmptarget);
268 assert(block);
269 new_blocks.insert(std::make_pair(block->getStartAddress(), block));
270 function->addBasicBlock(block);
271 remaining_blocks.push(block);
272 } else {
273 LOG4CXX_DEBUG(logger, "Reusing Block starting at " << std::hex
274 << current_block->getStartAddress());
275 function->addBasicBlock(new_blocks.find(jmptarget)->second);
276 }
277 if (MIA->isConditionalBranch(inst)) {
278 jmptarget = base_address + current_address + inst_size;
279 current_block->setNextBlock(1, jmptarget);
280 if (new_blocks.find(jmptarget) == new_blocks.end()) {
281 BasicBlock * block = manager->newBasicBlock(jmptarget);
282 assert(block);
283 new_blocks.insert(std::make_pair(block->getStartAddress(), block));
284 function->addBasicBlock(block);
285 remaining_blocks.push(block);
286 } else {
287 LOG4CXX_DEBUG(logger, "Reusing Block starting at " << std::hex
288 << current_block->getStartAddress());
289 function->addBasicBlock(new_blocks.find(jmptarget)->second);
290 }
291 }
292 }
293 }
294 }
295 } else {
296 inst_size = 0;
297 }
298
299
300 if (inst_size == 0 || MIA->isTerminator(inst) || MIA->isBranch(inst)) {
301 current_block->setEndAddress(current_address + base_address + inst_size);
302 LOG4CXX_DEBUG(logger, "Finished Block at " << std::hex <<
303 current_block->getEndAddress());
304 break;
305 }
306 current_address += inst_size;
307 }
308 }
309 splitBlocks(function);
310 LOG4CXX_DEBUG(logger, "Finished function " << function->getName());
311 manager->finishFunction(function);
312 for (uint64_t address : called_functions)
313 disassembleFunctionAt(address);
314 }
315
316 template <typename ELFT>
317 void LLVMDisassembler<ELFT>::disassemble() {
318 SectionRef text_section = getTextSection();
319 std::vector<Function*> remaining_functions;
320
321 // Assume all function symbols actually start a real function
322 for (auto x = symbols.begin(); x != symbols.end(); ++x) {
323 uint64_t result;
324 bool contains;
325 SymbolRef::Type symbol_type;
326
327
328 if (text_section.containsSymbol(x->second, contains) || !contains)
329 continue;
330
331 if (x->second.getType(symbol_type)
332 || SymbolRef::ST_Function != symbol_type)
333 continue;
334
335 if (!x->second.getAddress(result)) {
336 Function * fun = manager->newFunction(result);
337 if (fun) {
338 fun->setName(x->first);
339 remaining_functions.push_back(fun);
340 LOG4CXX_DEBUG(logger, "Disasembling " << x->first);
341 } else {
342 LOG4CXX_DEBUG(logger, "Function at " << std::hex << result
343 << " already disassembled as " << manager->getFunction(result)->getName());
344 }
345 }
346 }
347
348 for (Function* function : remaining_functions) {
349 disassembleFunction(function);
350 manager->finishFunction(function);
351 }
352
353 if (binary->isELF()) {
354 uint64_t _entryAddress = entryAddress();
355 LOG4CXX_DEBUG(logger, "Adding entryAddress at: " << std::hex << _entryAddress);
356 std::stringstream s;
357 s << "<_start 0x" << std::hex << _entryAddress << ">";
358
359 disassembleFunctionAt(_entryAddress, s.str());
360 }
361
362 if (!manager->hasFunctions()) {
363 uint64_t text_entry;
364 text_section.getAddress(text_entry);
365 LOG4CXX_INFO(logger, "No Symbols found, starting at the beginning of the text segment");
366 disassembleFunctionAt(text_entry);
367 }
368 }
369
370 template <>
371 uint64_t LLVMDisassembler<COFFT>::entryAddress() {
372 const auto coffobject = dyn_cast<COFFObjectFile>(o);
373 const struct pe32_header* pe32_header;
374 const struct pe32plus_header* pe32plus_header;
375
376 coffobject->getPE32PlusHeader(pe32plus_header);
377
378 if (pe32plus_header) {
379 return pe32plus_header->AddressOfEntryPoint;
380 } else {
381 coffobject->getPE32Header(pe32_header);
382 return pe32_header->AddressOfEntryPoint;
383 }
384 }
385
386 template<>
387 uint64_t LLVMDisassembler<MACHOT>::entryAddress() {
388 // TODO
389 return 0;
390 }
391
392 template <typename ELFT>
393 uint64_t LLVMDisassembler<ELFT>::entryAddress() {
394 const auto elffile = dyn_cast<ELFObjectFile<ELFT>>(o)->getELFFile();
395 const auto * header = elffile->getHeader();
396
397 return header->e_entry;
398 }
399
400 template <typename ELFT>
401 void LLVMDisassembler<ELFT>::splitBlocks(Function* function) {
402 SectionRef text_section = getTextSection();
403 StringRef bytes;
404 text_section.getContents(bytes);
405 StringRefMemoryObject ref(bytes);
406
407 LOG4CXX_DEBUG(logger, "Splitting Blocks in Function " << function->getName());
408 // Split blocks where jumps are going inside the block
409 for (auto it = function->blocks().begin();
410 it != function->blocks().end();
411 ++it) {
412 BasicBlock * current_block = it->second;
413 if (current_block->getEndAddress() == 0) {
414 LOG4CXX_ERROR(logger, "UNFINISHED BLOCK " << std::hex << current_block->getStartAddress());
415 break;
416 }
417 uint64_t inst_size;
418 uint64_t base_address;
419 text_section.getAddress(base_address);
420 uint64_t current_address = current_block->getStartAddress() - base_address;
421 while(current_block->getEndAddress() - base_address > current_address) {
422 MCInst inst;
423 std::string buf;
424 llvm::raw_string_ostream s(buf);
425
426 if(llvm::MCDisassembler::Success ==
427 DisAsm->getInstruction(inst, inst_size, ref, current_address, nulls(), nulls())) {
428 // See if some other block starts here
429 BasicBlock* other = manager->getBasicBlock(current_address
430 + inst_size
431 + base_address);
432
433 // Special case, other block starts here but we are at the end anyway
434 if (other != NULL) {
435 uint64_t endaddress = current_address + inst_size + base_address;
436 if (endaddress != current_block->getEndAddress()) {
437 LOG4CXX_DEBUG(logger, "Shortening block starting at "
438 << std::hex
439 << current_block->getStartAddress()
440 << " now ending at "
441 << other->getStartAddress());
442 function->addBasicBlock(other);
443 current_block->setEndAddress(endaddress);
444 current_block->setNextBlock(0, other->getStartAddress());
445 current_block->setNextBlock(1, 0);
446 }
447 }
448 } else {
449 inst_size = 1;
450 }
451 current_address += inst_size;
452 }
453 }
454 }
455
456 template<>
457 void LLVMDisassembler<COFFT>::readDynamicSymbols() {
458 //TODO
459 }
460
461 template<>
462 void LLVMDisassembler<MACHOT>::readDynamicSymbols() {
463 //TODO
464 }
465
466 template <typename ELFT>
467 void LLVMDisassembler<ELFT>::readDynamicSymbols() {
468 const auto elffile = dyn_cast<ELFObjectFile<ELFT>>(o)->getELFFile();
469 for (auto it = elffile->begin_dynamic_symbols(),
470 end = elffile->end_dynamic_symbols();
471 it != end;
472 ++it) {
473 if (it->getType() == 2) { // Function
474 bool is_default;
475 // TODO: Error handling
476 std::string symbolname = *(elffile->getSymbolName(it));
477 std::string symbolversion = *(elffile->getSymbolVersion(nullptr, &*it, is_default));
478 manager->signal_new_dyn_symbol(symbolname + (is_default? "@@" : "@") + symbolversion);
479 LOG4CXX_DEBUG(logger, "Adding dynamic Symbol " << symbolname << (is_default? "@@" : "@") << symbolversion);
480 }
481 }
482 }
483
484 template <typename ELFT>
485 void LLVMDisassembler<ELFT>::readSymbols() {
486 error_code ec;
487 symbol_iterator si(o->symbol_begin()), se(o->symbol_end());
488 for (; si != se; ++si) {
489 StringRef name;
490 if ((ec = si->getName(name))) {
491 LOG4CXX_ERROR(logger, ec.message());
492 break;
493 }
494 LOG4CXX_DEBUG(logger, "Added symbol " << name.str());
495 symbols.insert(make_pair(name.str(), *si));
496 }
497 }
498
499 template <typename ELFT>
500 void LLVMDisassembler<ELFT>::readSections() {
501 error_code ec;
502 section_iterator i(o->section_begin()), e(o->section_end());
503 for (; i != e; ++i) {
504 StringRef name;
505 if ((ec = i->getName(name))) {
506 LOG4CXX_ERROR(logger, ec.message());
507 break;
508 }
509 LOG4CXX_DEBUG(logger, "Added section " << name.str());
510 sections.insert(make_pair(name.str(), *i));
511 }
512
513 }
514
515 // template <typename ELFT>
516 // void LLVMDisassembler<ELFT>::forEachFunction(std::function<void (uint64_t, Function*)> callback) {
517 // // std::for_each(functions.begin(), functions.end(),
518 // // [&](std::pair<uint64_t, Function*> x) {
519 // // callback(x.first, x.second);
520 // // });
521 // }
522
523 template <typename ELFT>
524 void LLVMDisassembler<ELFT>::printEachInstruction(uint64_t start, uint64_t end,
525 std::function<void (uint8_t*, size_t,
526 const std::string&,
527 const std::string&)> fun) {
528 SectionRef text_section = getTextSection();
529 uint64_t base_address;
530 text_section.getAddress(base_address);
531 uint64_t current_address = start - base_address;
532
533 StringRef bytes;
534 text_section.getContents(bytes);
535 StringRefMemoryObject ref(bytes);
536
537 while (current_address < end - base_address) {
538 uint64_t inst_size;
539 MCInst inst;
540 std::string buf;
541 llvm::raw_string_ostream s(buf);
542
543 if(llvm::MCDisassembler::Success ==
544 DisAsm->getInstruction(inst, inst_size, ref, current_address, nulls(), nulls())) {
545
546 uint8_t bytes[inst_size+2];
547 ref.readBytes(current_address, inst_size, bytes);
548
549 uint64_t jmptarget;
550 std::string ref("");
551 IP->printInst(&inst, s, "");
552 if (MIA->evaluateBranch(inst, current_address, inst_size, jmptarget)) {
553 std::stringstream stream;
554 if (MIA->isCall(inst))
555 stream << "function:";
556 else
557 stream << "block:";
558
559 stream << std::hex << (base_address + jmptarget);
560 ref = stream.str();
561 }
562
563
564 fun(bytes, inst_size, s.str(), ref);
565 } else {
566 LOG4CXX_WARN(logger, "Invalid byte at" << std::hex << current_address + base_address);
567 fun(NULL, 0, "Invalid Byte", "");
568 inst_size = 1;
569 }
570
571 current_address += inst_size;
572 }
573 }
574
575 template <typename ELFT>
576 SectionRef LLVMDisassembler<ELFT>::getTextSection() {
577 return sections[".text"];
578 }
579
580 template <>
581 SectionRef LLVMDisassembler<MACHOT>::getTextSection() {
582 return sections["__text"];
583 }