]> git.siccegge.de Git - frida/frida.git/blob - src/disassembler/llvm/LLVMDisassembler.cxx
d9d615ab1ff2c90302c7fbe4106d918805d3e4d5
[frida/frida.git] / src / disassembler / llvm / LLVMDisassembler.cxx
1 #include "disassembler/Instruction.hxx"
2 #include "disassembler/llvm/LLVMDisassembler.hxx"
3 #include "core/InformationManager.hxx"
4 #include "core/Function.hxx"
5 #include "core/BasicBlock.hxx"
6 #include "core/Exception.hxx"
7 #include <boost/algorithm/string.hpp>
8
9 #include <stack>
10 #include <algorithm>
11 #include <cassert>
12
13 using namespace llvm;
14 using namespace llvm::object;
15 using std::error_code;
16
17 namespace {
18 class COFFT {
19
20 };
21
22 class MACHOT {
23
24 };
25 }
26
27 /*
28 *
29 */
30 Disassembler * createLLVMDisassembler(const std::string& filename, InformationManager* manager) {
31 log4cxx::LoggerPtr logger(log4cxx::Logger::getLogger("disassembler.LLVMDisassembler"));
32 if (filename == "")
33 return NULL;
34
35 auto retval = createBinary(filename);
36 if (error_code ec = retval.getError()) {
37 LOG4CXX_ERROR(logger, ec.message());
38 return NULL;
39 }
40 #if defined(LLVM_35)
41 Binary * op = retval.get();
42 #elif defined(LLVM_36)
43 OwningBinary<Binary> ob;
44 ob = std::move(retval.get());
45 Binary* op = ob.getBinary();
46 auto foo = ob.takeBinary();
47 foo.first.release();
48 foo.second.release();
49 #endif
50
51 try {
52 // ELFType<endian, maxalign, 64bit>
53 if (ELF32LEObjectFile * object = dyn_cast<ELF32LEObjectFile>(op)) {
54 return new LLVMDisassembler<ELFType<support::little, 2, false>>(filename, manager, object);
55 }
56 if (ELF64LEObjectFile * object = dyn_cast<ELF64LEObjectFile>(op)) {
57 return new LLVMDisassembler<ELFType<support::little, 2, true>>(filename, manager, object);
58 }
59 if (ELF32BEObjectFile * object = dyn_cast<ELF32BEObjectFile>(op)) {
60 return new LLVMDisassembler<ELFType<support::big, 2, false>>(filename, manager, object);
61 }
62 if (ELF64BEObjectFile * object = dyn_cast<ELF64BEObjectFile>(op)) {
63 return new LLVMDisassembler<ELFType<support::big, 2, true>>(filename, manager, object);
64 }
65 if (COFFObjectFile * object = dyn_cast<COFFObjectFile>(op)) {
66 return new LLVMDisassembler<COFFT>(filename, manager, object);
67 }
68 if (MachOObjectFile * object = dyn_cast<MachOObjectFile>(op)) {
69 return new LLVMDisassembler<MACHOT>(filename, manager, object);
70 }
71 } catch (BinaryNotSupported& e) {
72 return NULL;
73 }
74 return NULL;
75 }
76
77 /*
78 * TODO: fallback code falls die Datei kein ELF/PE/COFF/MacO/.. binary
79 * ist sondern z.B. einfach nur Instruktionen oder ein Bootsektor oder
80 * foo
81 */
82 template <typename ELFT>
83 LLVMDisassembler<ELFT>::LLVMDisassembler(const std::string& filename,
84 InformationManager* manager,
85 ObjectFile* file)
86 : Disassembler()
87 , logger(log4cxx::Logger::getLogger("disassembler.LLVMDisassembler"))
88 , triple("unknown-unknown-unknown")
89 , manager(manager)
90 {
91 LOG4CXX_DEBUG(logger, "Handling file " << filename);
92
93 if (!file) {
94 auto result = createBinary(filename);
95
96 error_code ec;
97 if ((ec = result.getError())) {
98 LOG4CXX_ERROR(logger, "Failed to load Binary" << ec.message());
99 binary = NULL;
100 return;
101 }
102
103 #if defined(LLVM_35)
104 binary.reset(result.get());
105 #elif defined(LLVM_36)
106 OwningBinary<Binary> ob;
107 ob = std::move(result.get());
108 Binary* op = ob.getBinary();
109
110 binary.reset(op);
111 #endif
112
113 o = dyn_cast<ObjectFile>(binary.get());
114 } else {
115 o = file;
116 binary.reset(file);
117 }
118
119 triple.setArch(Triple::ArchType(o->getArch()));
120 std::string tripleName(triple.getTriple());
121
122 LOG4CXX_INFO(logger, "Architecture " << tripleName);
123
124
125 std::string es;
126 target = TargetRegistry::lookupTarget("", triple, es);
127 if (!target) {
128 LOG4CXX_ERROR(logger, es);
129 BinaryNotSupported e;
130 throw e;
131 }
132
133 LOG4CXX_INFO(logger, "Target " << target->getName());
134
135 MRI.reset(target->createMCRegInfo(tripleName));
136 if (!MRI) {
137 LOG4CXX_ERROR(logger, "no register info for target " << tripleName);
138 BinaryNotSupported e;
139 throw e;
140 }
141
142 // Set up disassembler.
143 AsmInfo.reset(target->createMCAsmInfo(*MRI, tripleName));
144 if (!AsmInfo) {
145 LOG4CXX_ERROR(logger, "no assembly info for target " << tripleName);
146 BinaryNotSupported e;
147 throw e;
148 }
149
150 STI.reset(target->createMCSubtargetInfo(tripleName, "", ""));
151 if (!STI) {
152 LOG4CXX_ERROR(logger, "no subtarget info for target " << tripleName);
153 BinaryNotSupported e;
154 throw e;
155 }
156
157 MII.reset(target->createMCInstrInfo());
158 if (!MII) {
159 LOG4CXX_ERROR(logger, "no instruction info for target " << tripleName);
160 BinaryNotSupported e;
161 throw e;
162 }
163
164 MOFI.reset(new MCObjectFileInfo);
165 MCContext Ctx(AsmInfo.get(), MRI.get(), MOFI.get());
166
167 DisAsm.reset(target->createMCDisassembler(*STI, Ctx));
168 if (!DisAsm) {
169 LOG4CXX_ERROR(logger, "no disassembler for target " << tripleName);
170 BinaryNotSupported e;
171 throw e;
172 }
173 RelInfo.reset(
174 target->createMCRelocationInfo(tripleName, Ctx));
175 if (RelInfo) {
176 // Symzer.reset(
177 // MCObjectSymbolizer::createObjectSymbolizer(Ctx, std::move(RelInfo), o));
178 // if (Symzer)
179 // DisAsm->setSymbolizer(std::move(Symzer));
180 }
181 RelInfo.release();
182 Symzer.release();
183
184 MIA.reset(target->createMCInstrAnalysis(MII.get()));
185 if (!MIA) {
186 LOG4CXX_ERROR(logger, "no instruction analysis for target " << tripleName);
187 BinaryNotSupported e;
188 throw e;
189 }
190
191 int AsmPrinterVariant = AsmInfo->getAssemblerDialect();
192 IP.reset(target->createMCInstPrinter(AsmPrinterVariant, *AsmInfo, *MII, *MRI, *STI));
193 if (!IP) {
194 LOG4CXX_ERROR(logger, "no instruction printer for target " << tripleName);
195 BinaryNotSupported e;
196 throw e;
197 }
198
199 IP->setPrintImmHex(llvm::HexStyle::C);
200 IP->setPrintImmHex(true);
201
202 // std::unique_ptr<MCObjectDisassembler> OD(
203 // new MCObjectDisassembler(*o, *DisAsm, *MIA));
204 //Mod.reset(OD->buildModule(false));
205
206 readSections();
207 }
208
209 template <typename ELFT>
210 void LLVMDisassembler<ELFT>::start() {
211 readSymbols();
212 disassemble();
213 readDynamicSymbols();
214 }
215
216 template <typename ELFT>
217 LLVMDisassembler<ELFT>::~LLVMDisassembler() {}
218
219 template <typename ELFT>
220 Function* LLVMDisassembler<ELFT>::disassembleFunctionAt(uint64_t address, const std::string& name) {
221 Function * function;
222 SectionRef text_section = getTextSection();
223 uint64_t base_address, size;
224 #if defined(LLVM_35)
225 text_section.getAddress(base_address);
226 text_section.getSize(size);
227 #elif defined(LLVM_36)
228 base_address = text_section.getAddress();
229 size = text_section.getSize();
230 #endif
231 if (address < base_address ||
232 address >= base_address + size) {
233 return NULL;
234 }
235
236 if (NULL == (function = manager->getFunction(address))) {
237
238 if (name == "") {
239 std::stringstream s;
240 s << "<Unnamed 0x" << std::hex << address << ">";
241 function = manager->newFunction(address);
242 function->setName(s.str());
243 } else {
244 function = manager->newFunction(address);
245 function->setName(name);
246 }
247 disassembleFunction(function);
248 }
249
250 return function;
251 }
252
253 template <typename ELFT>
254 void LLVMDisassembler<ELFT>::disassembleFunction(Function* function) {
255 std::vector<uint64_t> called_functions;
256 std::stack<BasicBlock*> remaining_blocks;
257 /* TODO:
258 * Do all blocks get added properly? We should take care to remove
259 * the other ones at the end of the function!
260 */
261 std::map<uint64_t, BasicBlock*> new_blocks;
262 SectionRef text_section = getTextSection();
263 StringRef bytes;
264 text_section.getContents(bytes);
265 #if defined(LLVM_35)
266 StringRefMemoryObject ref(bytes);
267 #elif defined(LLVM_36)
268 ArrayRef<uint8_t> bytearray(reinterpret_cast<const uint8_t *>(bytes.data()),
269 bytes.size());
270 #else
271 #error LLVM != 3.5 | 3.6 not supported
272 #endif
273
274 LOG4CXX_DEBUG(logger, "Handling function " << function->getName());
275
276 BasicBlock * block = manager->newBasicBlock(function->getStartAddress());
277 remaining_blocks.push(block);
278 new_blocks.insert(std::make_pair(block->getStartAddress(), block));
279 function->addBasicBlock(block);
280
281 uint64_t base_address, size;
282 #if defined(LLVM_35)
283 text_section.getAddress(base_address);
284 text_section.getSize(size);
285 #elif defined(LLVM_36)
286 base_address = text_section.getAddress();
287 size = text_section.getSize();
288 #endif
289 LOG4CXX_DEBUG(logger, "Text section at " << std::hex << base_address << " with size " << size);
290
291 while (remaining_blocks.size()) {
292 BasicBlock * current_block = remaining_blocks.top();
293 remaining_blocks.pop();
294
295 LOG4CXX_DEBUG(logger, "Handling Block starting at " << std::hex
296 << current_block->getStartAddress());
297
298 uint64_t inst_size;
299 uint64_t current_address = current_block->getStartAddress() - base_address;
300 while(true) {
301 MCInst inst;
302 std::string buf;
303 llvm::raw_string_ostream s(buf);
304
305 if(llvm::MCDisassembler::Success ==
306 #if defined(LLVM_35)
307 DisAsm->getInstruction(inst, inst_size, ref, current_address, nulls(), nulls())) {
308 #elif defined(LLVM_36)
309 DisAsm->getInstruction(inst, inst_size,
310 bytearray.slice(current_address),
311 base_address + current_address,
312 nulls(), nulls())) {
313 #endif
314 uint64_t jmptarget;
315
316 if (MIA->evaluateBranch(inst, current_address, inst_size, jmptarget)) {
317 jmptarget += base_address;
318 if (!MIA->isIndirectBranch(inst)) {
319 if (MIA->isCall(inst)) {
320 if (NULL == manager->getFunction(jmptarget))
321 called_functions.push_back(jmptarget);
322 } else {
323 current_block->setNextBlock(0, jmptarget);
324 if (new_blocks.find(jmptarget) == new_blocks.end()) {
325 BasicBlock * block = manager->newBasicBlock(jmptarget);
326 assert(block);
327 new_blocks.insert(std::make_pair(block->getStartAddress(), block));
328 function->addBasicBlock(block);
329 remaining_blocks.push(block);
330 } else {
331 LOG4CXX_DEBUG(logger, "Reusing Block starting at " << std::hex
332 << current_block->getStartAddress());
333 function->addBasicBlock(new_blocks.find(jmptarget)->second);
334 }
335 if (MIA->isConditionalBranch(inst)) {
336 jmptarget = base_address + current_address + inst_size;
337 current_block->setNextBlock(1, jmptarget);
338 if (new_blocks.find(jmptarget) == new_blocks.end()) {
339 BasicBlock * block = manager->newBasicBlock(jmptarget);
340 assert(block);
341 new_blocks.insert(std::make_pair(block->getStartAddress(), block));
342 function->addBasicBlock(block);
343 remaining_blocks.push(block);
344 } else {
345 LOG4CXX_DEBUG(logger, "Reusing Block starting at " << std::hex
346 << current_block->getStartAddress());
347 function->addBasicBlock(new_blocks.find(jmptarget)->second);
348 }
349 }
350 }
351 }
352 }
353 } else {
354 inst_size = 0;
355 }
356
357
358 if (inst_size == 0 || MIA->isTerminator(inst) || MIA->isBranch(inst)) {
359 current_block->setEndAddress(current_address + base_address + inst_size);
360 LOG4CXX_DEBUG(logger, "Finished Block at " << std::hex <<
361 current_block->getEndAddress());
362 break;
363 }
364 current_address += inst_size;
365 }
366 }
367 splitBlocks(function);
368 LOG4CXX_DEBUG(logger, "Finished function " << function->getName());
369 manager->finishFunction(function);
370 for (uint64_t address : called_functions)
371 disassembleFunctionAt(address);
372 }
373
374 template <typename ELFT>
375 void LLVMDisassembler<ELFT>::disassemble() {
376 SectionRef text_section = getTextSection();
377 std::vector<Function*> remaining_functions;
378
379 // Assume all function symbols actually start a real function
380 for (auto x = symbols.begin(); x != symbols.end(); ++x) {
381 uint64_t result;
382 bool contains;
383 SymbolRef::Type symbol_type;
384
385 #if defined(LLVM_35)
386 if (text_section.containsSymbol(x->second, contains) || !contains)
387 #elif defined(LLVM_36)
388 if (!text_section.containsSymbol(x->second))
389 #endif
390 continue;
391
392 if (x->second.getType(symbol_type)
393 || SymbolRef::ST_Function != symbol_type)
394 continue;
395
396 if (!x->second.getAddress(result)) {
397 Function * fun = manager->newFunction(result);
398 if (fun) {
399 fun->setName(x->first);
400 remaining_functions.push_back(fun);
401 LOG4CXX_DEBUG(logger, "Disasembling " << x->first);
402 } else {
403 LOG4CXX_DEBUG(logger, "Function at " << std::hex << result
404 << " already disassembled as " << manager->getFunction(result)->getName());
405 }
406 }
407 }
408
409 for (Function* function : remaining_functions) {
410 disassembleFunction(function);
411 manager->finishFunction(function);
412 }
413
414 if (binary->isELF()) {
415 uint64_t _entryAddress = entryAddress();
416 LOG4CXX_DEBUG(logger, "Adding entryAddress at: " << std::hex << _entryAddress);
417 std::stringstream s;
418 s << "<_start 0x" << std::hex << _entryAddress << ">";
419
420 disassembleFunctionAt(_entryAddress, s.str());
421 }
422
423 if (!manager->hasFunctions()) {
424 uint64_t text_entry;
425 #if defined(LLVM_35)
426 text_section.getAddress(text_entry);
427 #elif defined(LLVM_36)
428 text_entry = text_section.getAddress();
429 #endif
430 LOG4CXX_INFO(logger, "No Symbols found, starting at the beginning of the text segment");
431 disassembleFunctionAt(text_entry);
432 }
433 }
434
435 template <>
436 uint64_t LLVMDisassembler<COFFT>::entryAddress() {
437 const auto coffobject = dyn_cast<COFFObjectFile>(o);
438 const struct pe32_header* pe32_header;
439 const struct pe32plus_header* pe32plus_header;
440
441 coffobject->getPE32PlusHeader(pe32plus_header);
442
443 if (pe32plus_header) {
444 return pe32plus_header->AddressOfEntryPoint;
445 } else {
446 coffobject->getPE32Header(pe32_header);
447 return pe32_header->AddressOfEntryPoint;
448 }
449 }
450
451 template<>
452 uint64_t LLVMDisassembler<MACHOT>::entryAddress() {
453 // TODO
454 return 0;
455 }
456
457 template <typename ELFT>
458 uint64_t LLVMDisassembler<ELFT>::entryAddress() {
459 const auto elffile = dyn_cast<ELFObjectFile<ELFT>>(o)->getELFFile();
460 const auto * header = elffile->getHeader();
461
462 return header->e_entry;
463 }
464
465 template <typename ELFT>
466 void LLVMDisassembler<ELFT>::splitBlocks(Function* function) {
467 SectionRef text_section = getTextSection();
468 StringRef bytes;
469 text_section.getContents(bytes);
470 #if defined(LLVM_35)
471 StringRefMemoryObject ref(bytes);
472 #elif defined(LLVM_36)
473 ArrayRef<uint8_t> bytearray(reinterpret_cast<const uint8_t *>(bytes.data()),
474 bytes.size());
475 #endif
476
477
478 LOG4CXX_DEBUG(logger, "Splitting Blocks in Function " << function->getName());
479 // Split blocks where jumps are going inside the block
480 for (auto it = function->blocks().begin();
481 it != function->blocks().end();
482 ++it) {
483 BasicBlock * current_block = it->second;
484 if (current_block->getEndAddress() == 0) {
485 LOG4CXX_ERROR(logger, "UNFINISHED BLOCK " << std::hex << current_block->getStartAddress());
486 break;
487 }
488 uint64_t inst_size;
489 uint64_t base_address;
490 #if defined(LLVM_35)
491 text_section.getAddress(base_address);
492 #elif defined(LLVM_36)
493 base_address = text_section.getAddress();
494 #endif
495 uint64_t current_address = current_block->getStartAddress() - base_address;
496 while(current_block->getEndAddress() - base_address > current_address) {
497 MCInst inst;
498 std::string buf;
499 llvm::raw_string_ostream s(buf);
500
501 if(llvm::MCDisassembler::Success ==
502 #if defined(LLVM_35)
503 DisAsm->getInstruction(inst, inst_size, ref, current_address, nulls(), nulls())) {
504 #elif defined(LLVM_36)
505 DisAsm->getInstruction(inst, inst_size,
506 bytearray.slice(current_address),
507 base_address + current_address,
508 nulls(), nulls())) {
509 #endif
510
511 // See if some other block starts here
512 BasicBlock* other = manager->getBasicBlock(current_address
513 + inst_size
514 + base_address);
515
516 // Special case, other block starts here but we are at the end anyway
517 if (other != NULL) {
518 uint64_t endaddress = current_address + inst_size + base_address;
519 if (endaddress != current_block->getEndAddress()) {
520 LOG4CXX_DEBUG(logger, "Shortening block starting at "
521 << std::hex
522 << current_block->getStartAddress()
523 << " now ending at "
524 << other->getStartAddress());
525 function->addBasicBlock(other);
526 current_block->setEndAddress(endaddress);
527 current_block->setNextBlock(0, other->getStartAddress());
528 current_block->setNextBlock(1, 0);
529 }
530 }
531 } else {
532 inst_size = 1;
533 }
534 current_address += inst_size;
535 }
536 }
537 }
538
539 template<>
540 void LLVMDisassembler<COFFT>::readDynamicSymbols() {
541 //TODO
542 }
543
544 template<>
545 void LLVMDisassembler<MACHOT>::readDynamicSymbols() {
546 //TODO
547 }
548
549 template <typename ELFT>
550 void LLVMDisassembler<ELFT>::readDynamicSymbols() {
551 const auto elffile = dyn_cast<ELFObjectFile<ELFT>>(o)->getELFFile();
552 for (auto it = elffile->begin_dynamic_symbols(),
553 end = elffile->end_dynamic_symbols();
554 it != end;
555 ++it) {
556 if (it->getType() == 2) { // Function
557 bool is_default;
558 // TODO: Error handling
559 std::string symbolname = *(elffile->getSymbolName(it));
560 std::string symbolversion = *(elffile->getSymbolVersion(nullptr, &*it, is_default));
561 // TODO: actually get the symbol address from relocations
562 Function* f = manager->newDynamicFunction(0);
563 f->setName(symbolname + (is_default? "@@" : "@") + symbolversion);
564 manager->finishFunction(f);
565
566 LOG4CXX_DEBUG(logger, "Adding dynamic Symbol " << symbolname << (is_default? "@@" : "@") << symbolversion);
567 }
568 }
569 }
570
571 template <typename ELFT>
572 void LLVMDisassembler<ELFT>::readSymbols() {
573 error_code ec;
574 symbol_iterator si(o->symbol_begin()), se(o->symbol_end());
575 for (; si != se; ++si) {
576 StringRef name;
577 if ((ec = si->getName(name))) {
578 LOG4CXX_ERROR(logger, ec.message());
579 break;
580 }
581 LOG4CXX_DEBUG(logger, "Added symbol " << name.str());
582 symbols.insert(make_pair(name.str(), *si));
583 }
584 }
585
586 template <typename ELFT>
587 void LLVMDisassembler<ELFT>::readSections() {
588 error_code ec;
589 section_iterator i(o->section_begin()), e(o->section_end());
590 for (; i != e; ++i) {
591 StringRef name;
592 if ((ec = i->getName(name))) {
593 LOG4CXX_ERROR(logger, ec.message());
594 break;
595 }
596 LOG4CXX_DEBUG(logger, "Added section " << name.str());
597 sections.insert(make_pair(name.str(), *i));
598 }
599
600 }
601
602 // template <typename ELFT>
603 // void LLVMDisassembler<ELFT>::forEachFunction(std::function<void (uint64_t, Function*)> callback) {
604 // // std::for_each(functions.begin(), functions.end(),
605 // // [&](std::pair<uint64_t, Function*> x) {
606 // // callback(x.first, x.second);
607 // // });
608 // }
609
610 template <typename ELFT>
611 std::vector<Instruction> LLVMDisassembler<ELFT>::getInstructions(const BasicBlock *block) {
612 std::vector<Instruction> result;
613 SectionRef text_section = getTextSection();
614 uint64_t base_address;
615 #if defined(LLVM_35)
616 text_section.getAddress(base_address);
617 #elif defined(LLVM_36)
618 base_address = text_section.getAddress();
619 #endif
620
621 uint64_t current_address = block->getStartAddress() - base_address;
622 uint64_t end_position = block->getEndAddress() - base_address;
623
624 StringRef bytes;
625 text_section.getContents(bytes);
626 #if defined(LLVM_35)
627 StringRefMemoryObject ref(bytes);
628 #elif defined(LLVM_36)
629 ArrayRef<uint8_t> bytearray(reinterpret_cast<const uint8_t *>(bytes.data()),
630 bytes.size());
631 #endif
632
633
634 while (current_address < end_position) {
635 uint64_t inst_size;
636 MCInst inst;
637 std::string buf;
638 llvm::raw_string_ostream s(buf);
639
640 if(llvm::MCDisassembler::Success ==
641 #if defined(LLVM_35)
642 DisAsm->getInstruction(inst, inst_size, ref, current_address, nulls(), nulls())) {
643 #elif defined(LLVM_36)
644 DisAsm->getInstruction(inst, inst_size,
645 bytearray.slice(current_address),
646 base_address + current_address,
647 nulls(), nulls())) {
648 #endif
649
650 uint8_t bytes[inst_size+2];
651 #if defined(LLVM_35)
652 ref.readBytes(current_address, inst_size, bytes);
653 #elif defined(LLVM_36)
654 size_t bytesindex(0);
655 for (uint8_t byte : bytearray.slice(current_address, inst_size)) {
656 bytes[bytesindex++] = byte;
657 }
658 #endif
659
660 uint64_t jmptarget;
661 std::string ref("");
662 IP->printInst(&inst, s, "");
663 if (MIA->evaluateBranch(inst, current_address, inst_size, jmptarget)) {
664 std::stringstream stream;
665 if (MIA->isCall(inst))
666 stream << "function:";
667 else
668 stream << "block:";
669
670 stream << std::hex << (base_address + jmptarget);
671 ref = stream.str();
672 }
673 result.push_back(Instruction(current_address + base_address, boost::algorithm::trim_copy(s.str()),
674 std::vector<uint8_t>(bytes, bytes+inst_size), ref));
675 } else {
676 LOG4CXX_WARN(logger, "Invalid byte at" << std::hex << current_address + base_address);
677 uint8_t bytes[1];
678 #if defined(LLVM_35)
679 ref.readBytes(current_address, 1, bytes);
680 #elif defined(LLVM_36)
681 bytes[0] = bytearray[current_address];
682 #endif
683 result.push_back(Instruction(current_address + base_address, "Invalid Instruction",
684 std::vector<uint8_t>(bytes, bytes+1), ""));
685 inst_size = 1;
686 }
687
688 current_address += inst_size;
689 }
690 return result;
691 }
692
693 template <typename ELFT>
694 void LLVMDisassembler<ELFT>::printEachInstruction(uint64_t start, uint64_t end,
695 std::function<void (uint8_t*, size_t,
696 const std::string&,
697 const std::string&)> fun) {
698 SectionRef text_section = getTextSection();
699 uint64_t base_address;
700 #if defined(LLVM_35)
701 text_section.getAddress(base_address);
702 #elif defined(LLVM_36)
703 base_address = text_section.getAddress();
704 #endif
705
706 uint64_t current_address = start - base_address;
707
708 StringRef bytes;
709 text_section.getContents(bytes);
710 #if defined(LLVM_35)
711 StringRefMemoryObject ref(bytes);
712 #elif defined(LLVM_36)
713 ArrayRef<uint8_t> bytearray(reinterpret_cast<const uint8_t *>(bytes.data()),
714 bytes.size());
715 #endif
716
717
718 while (current_address < end - base_address) {
719 uint64_t inst_size;
720 MCInst inst;
721 std::string buf;
722 llvm::raw_string_ostream s(buf);
723
724 if(llvm::MCDisassembler::Success ==
725 #if defined(LLVM_35)
726 DisAsm->getInstruction(inst, inst_size, ref, current_address, nulls(), nulls())) {
727 #elif defined(LLVM_36)
728 DisAsm->getInstruction(inst, inst_size,
729 bytearray.slice(current_address),
730 base_address + current_address,
731 nulls(), nulls())) {
732 #endif
733
734 uint8_t bytes[inst_size+2];
735 #if defined(LLVM_35)
736 ref.readBytes(current_address, inst_size, bytes);
737 #elif defined(LLVM_36)
738 size_t bytesindex(0);
739 for (uint8_t byte : bytearray.slice(current_address, inst_size)) {
740 bytes[bytesindex++] = byte;
741 }
742 #endif
743
744 uint64_t jmptarget;
745 std::string ref("");
746 IP->printInst(&inst, s, "");
747 if (MIA->evaluateBranch(inst, current_address, inst_size, jmptarget)) {
748 std::stringstream stream;
749 if (MIA->isCall(inst))
750 stream << "function:";
751 else
752 stream << "block:";
753
754 stream << std::hex << (base_address + jmptarget);
755 ref = stream.str();
756 }
757
758
759 fun(bytes, inst_size, s.str(), ref);
760 } else {
761 LOG4CXX_WARN(logger, "Invalid byte at" << std::hex << current_address + base_address);
762 fun(NULL, 0, "Invalid Byte", "");
763 inst_size = 1;
764 }
765
766 current_address += inst_size;
767 }
768 }
769
770 template <typename ELFT>
771 SectionRef LLVMDisassembler<ELFT>::getTextSection() {
772 return sections[".text"];
773 }
774
775 template <>
776 SectionRef LLVMDisassembler<MACHOT>::getTextSection() {
777 return sections["__text"];
778 }