]> git.siccegge.de Git - frida/frida.git/blob - src/disassembler/llvm/LLVMDisassembler.cxx
Preliminary LLVM-3.6 support
[frida/frida.git] / src / disassembler / llvm / LLVMDisassembler.cxx
1 #include "disassembler/Instruction.hxx"
2 #include "disassembler/llvm/LLVMDisassembler.hxx"
3 #include "core/InformationManager.hxx"
4 #include "core/Function.hxx"
5 #include "core/BasicBlock.hxx"
6 #include <boost/algorithm/string.hpp>
7
8 #include <stack>
9 #include <algorithm>
10 #include <cassert>
11
12 using namespace llvm;
13 using namespace llvm::object;
14 using std::error_code;
15
16 namespace {
17 class COFFT {
18
19 };
20
21 class MACHOT {
22
23 };
24 }
25
26 /*
27 *
28 */
29 Disassembler * createLLVMDisassembler(const std::string& filename, InformationManager* manager) {
30 log4cxx::LoggerPtr logger(log4cxx::Logger::getLogger("disassembler.LLVMDisassembler"));
31 if (filename == "")
32 return NULL;
33
34 auto retval = createBinary(filename);
35 if (error_code ec = retval.getError()) {
36 LOG4CXX_ERROR(logger, ec.message());
37 return NULL;
38 }
39 #if defined(LLVM_35)
40 Binary * op = retval.get();
41 #elif defined(LLVM_36)
42 OwningBinary<Binary> ob;
43 ob = std::move(retval.get());
44 Binary* op = ob.getBinary();
45 auto foo = ob.takeBinary();
46 foo.first.release();
47 foo.second.release();
48 #endif
49
50 // ELFType<endian, maxalign, 64bit>
51 if (ELF32LEObjectFile * object = dyn_cast<ELF32LEObjectFile>(op)) {
52 return new LLVMDisassembler<ELFType<support::little, 2, false>>(filename, manager, object);
53 }
54 if (ELF64LEObjectFile * object = dyn_cast<ELF64LEObjectFile>(op)) {
55 return new LLVMDisassembler<ELFType<support::little, 2, true>>(filename, manager, object);
56 }
57 if (ELF32BEObjectFile * object = dyn_cast<ELF32BEObjectFile>(op)) {
58 return new LLVMDisassembler<ELFType<support::big, 2, false>>(filename, manager, object);
59 }
60 if (ELF64BEObjectFile * object = dyn_cast<ELF64BEObjectFile>(op)) {
61 return new LLVMDisassembler<ELFType<support::big, 2, true>>(filename, manager, object);
62 }
63 if (COFFObjectFile * object = dyn_cast<COFFObjectFile>(op)) {
64 return new LLVMDisassembler<COFFT>(filename, manager, object);
65 }
66 if (MachOObjectFile * object = dyn_cast<MachOObjectFile>(op)) {
67 return new LLVMDisassembler<MACHOT>(filename, manager, object);
68 }
69
70 return NULL;
71 }
72
73 /*
74 * TODO: fallback code falls die Datei kein ELF/PE/COFF/MacO/.. binary
75 * ist sondern z.B. einfach nur Instruktionen oder ein Bootsektor oder
76 * foo
77 */
78 template <typename ELFT>
79 LLVMDisassembler<ELFT>::LLVMDisassembler(const std::string& filename,
80 InformationManager* manager,
81 ObjectFile* file)
82 : Disassembler()
83 , logger(log4cxx::Logger::getLogger("disassembler.LLVMDisassembler"))
84 , triple("unknown-unknown-unknown")
85 , manager(manager)
86 {
87 LOG4CXX_DEBUG(logger, "Handling file " << filename);
88
89 if (!file) {
90 auto result = createBinary(filename);
91
92 error_code ec;
93 if ((ec = result.getError())) {
94 LOG4CXX_ERROR(logger, "Failed to load Binary" << ec.message());
95 binary = NULL;
96 return;
97 }
98
99 #if defined(LLVM_35)
100 binary.reset(result.get());
101 #elif defined(LLVM_36)
102 OwningBinary<Binary> ob;
103 ob = std::move(result.get());
104 Binary* op = ob.getBinary();
105
106 binary.reset(op);
107 #endif
108
109 o = dyn_cast<ObjectFile>(binary.get());
110 } else {
111 o = file;
112 binary.reset(file);
113 }
114
115 triple.setArch(Triple::ArchType(o->getArch()));
116 std::string tripleName(triple.getTriple());
117
118 LOG4CXX_INFO(logger, "Architecture " << tripleName);
119
120
121 std::string es;
122 target = TargetRegistry::lookupTarget("", triple, es);
123 if (!target) {
124 LOG4CXX_ERROR(logger, es);
125 return;
126 }
127
128 LOG4CXX_INFO(logger, "Target " << target->getName());
129
130 MRI.reset(target->createMCRegInfo(tripleName));
131 if (!MRI) {
132 LOG4CXX_ERROR(logger, "no register info for target " << tripleName);
133 return;
134 }
135
136 // Set up disassembler.
137 AsmInfo.reset(target->createMCAsmInfo(*MRI, tripleName));
138 if (!AsmInfo) {
139 LOG4CXX_ERROR(logger, "no assembly info for target " << tripleName);
140 return;
141 }
142
143 STI.reset(target->createMCSubtargetInfo(tripleName, "", ""));
144 if (!STI) {
145 LOG4CXX_ERROR(logger, "no subtarget info for target " << tripleName);
146 return;
147 }
148
149 MII.reset(target->createMCInstrInfo());
150 if (!MII) {
151 LOG4CXX_ERROR(logger, "no instruction info for target " << tripleName);
152 return;
153 }
154
155 MOFI.reset(new MCObjectFileInfo);
156 MCContext Ctx(AsmInfo.get(), MRI.get(), MOFI.get());
157
158 DisAsm.reset(target->createMCDisassembler(*STI, Ctx));
159 if (!DisAsm) {
160 LOG4CXX_ERROR(logger, "no disassembler for target " << tripleName);
161 return;
162 }
163 RelInfo.reset(
164 target->createMCRelocationInfo(tripleName, Ctx));
165 if (RelInfo) {
166 // Symzer.reset(
167 // MCObjectSymbolizer::createObjectSymbolizer(Ctx, std::move(RelInfo), o));
168 // if (Symzer)
169 // DisAsm->setSymbolizer(std::move(Symzer));
170 }
171 RelInfo.release();
172 Symzer.release();
173
174 MIA.reset(target->createMCInstrAnalysis(MII.get()));
175 if (!MIA) {
176 LOG4CXX_ERROR(logger, "no instruction analysis for target " << tripleName);
177 return;
178 }
179
180 int AsmPrinterVariant = AsmInfo->getAssemblerDialect();
181 IP.reset(target->createMCInstPrinter(AsmPrinterVariant, *AsmInfo, *MII, *MRI, *STI));
182 if (!IP) {
183 LOG4CXX_ERROR(logger, "no instruction printer for target " << tripleName);
184 return;
185 }
186
187 IP->setPrintImmHex(llvm::HexStyle::C);
188 IP->setPrintImmHex(true);
189
190 // std::unique_ptr<MCObjectDisassembler> OD(
191 // new MCObjectDisassembler(*o, *DisAsm, *MIA));
192 //Mod.reset(OD->buildModule(false));
193
194 readSections();
195 }
196
197 template <typename ELFT>
198 void LLVMDisassembler<ELFT>::start() {
199 readSymbols();
200 disassemble();
201 readDynamicSymbols();
202 }
203
204 template <typename ELFT>
205 LLVMDisassembler<ELFT>::~LLVMDisassembler() {}
206
207 template <typename ELFT>
208 Function* LLVMDisassembler<ELFT>::disassembleFunctionAt(uint64_t address, const std::string& name) {
209 Function * function;
210 SectionRef text_section = getTextSection();
211 uint64_t base_address, size;
212 #if defined(LLVM_35)
213 text_section.getAddress(base_address);
214 text_section.getSize(size);
215 #elif defined(LLVM_36)
216 base_address = text_section.getAddress();
217 size = text_section.getSize();
218 #endif
219 if (address < base_address ||
220 address >= base_address + size) {
221 return NULL;
222 }
223
224 if (NULL == (function = manager->getFunction(address))) {
225
226 if (name == "") {
227 std::stringstream s;
228 s << "<Unnamed 0x" << std::hex << address << ">";
229 function = manager->newFunction(address);
230 function->setName(s.str());
231 } else {
232 function = manager->newFunction(address);
233 function->setName(name);
234 }
235 disassembleFunction(function);
236 }
237
238 return function;
239 }
240
241 template <typename ELFT>
242 void LLVMDisassembler<ELFT>::disassembleFunction(Function* function) {
243 std::vector<uint64_t> called_functions;
244 std::stack<BasicBlock*> remaining_blocks;
245 /* TODO:
246 * Do all blocks get added properly? We should take care to remove
247 * the other ones at the end of the function!
248 */
249 std::map<uint64_t, BasicBlock*> new_blocks;
250 SectionRef text_section = getTextSection();
251 StringRef bytes;
252 text_section.getContents(bytes);
253 #if defined(LLVM_35)
254 StringRefMemoryObject ref(bytes);
255 #elif defined(LLVM_36)
256 ArrayRef<uint8_t> bytearray(reinterpret_cast<const uint8_t *>(bytes.data()),
257 bytes.size());
258 #else
259 #error LLVM != 3.5 | 3.6 not supported
260 #endif
261
262 LOG4CXX_DEBUG(logger, "Handling function " << function->getName());
263
264 BasicBlock * block = manager->newBasicBlock(function->getStartAddress());
265 remaining_blocks.push(block);
266 new_blocks.insert(std::make_pair(block->getStartAddress(), block));
267 function->addBasicBlock(block);
268
269 uint64_t base_address, size;
270 #if defined(LLVM_35)
271 text_section.getAddress(base_address);
272 text_section.getSize(size);
273 #elif defined(LLVM_36)
274 base_address = text_section.getAddress();
275 size = text_section.getSize();
276 #endif
277 LOG4CXX_DEBUG(logger, "Text section at " << std::hex << base_address << " with size " << size);
278
279 while (remaining_blocks.size()) {
280 BasicBlock * current_block = remaining_blocks.top();
281 remaining_blocks.pop();
282
283 LOG4CXX_DEBUG(logger, "Handling Block starting at " << std::hex
284 << current_block->getStartAddress());
285
286 uint64_t inst_size;
287 uint64_t current_address = current_block->getStartAddress() - base_address;
288 while(true) {
289 MCInst inst;
290 std::string buf;
291 llvm::raw_string_ostream s(buf);
292
293 if(llvm::MCDisassembler::Success ==
294 #if defined(LLVM_35)
295 DisAsm->getInstruction(inst, inst_size, ref, current_address, nulls(), nulls())) {
296 #elif defined(LLVM_36)
297 DisAsm->getInstruction(inst, inst_size,
298 bytearray.slice(current_address),
299 base_address + current_address,
300 nulls(), nulls())) {
301 #endif
302 uint64_t jmptarget;
303
304 if (MIA->evaluateBranch(inst, current_address, inst_size, jmptarget)) {
305 jmptarget += base_address;
306 if (!MIA->isIndirectBranch(inst)) {
307 if (MIA->isCall(inst)) {
308 if (NULL == manager->getFunction(jmptarget))
309 called_functions.push_back(jmptarget);
310 } else {
311 current_block->setNextBlock(0, jmptarget);
312 if (new_blocks.find(jmptarget) == new_blocks.end()) {
313 BasicBlock * block = manager->newBasicBlock(jmptarget);
314 assert(block);
315 new_blocks.insert(std::make_pair(block->getStartAddress(), block));
316 function->addBasicBlock(block);
317 remaining_blocks.push(block);
318 } else {
319 LOG4CXX_DEBUG(logger, "Reusing Block starting at " << std::hex
320 << current_block->getStartAddress());
321 function->addBasicBlock(new_blocks.find(jmptarget)->second);
322 }
323 if (MIA->isConditionalBranch(inst)) {
324 jmptarget = base_address + current_address + inst_size;
325 current_block->setNextBlock(1, jmptarget);
326 if (new_blocks.find(jmptarget) == new_blocks.end()) {
327 BasicBlock * block = manager->newBasicBlock(jmptarget);
328 assert(block);
329 new_blocks.insert(std::make_pair(block->getStartAddress(), block));
330 function->addBasicBlock(block);
331 remaining_blocks.push(block);
332 } else {
333 LOG4CXX_DEBUG(logger, "Reusing Block starting at " << std::hex
334 << current_block->getStartAddress());
335 function->addBasicBlock(new_blocks.find(jmptarget)->second);
336 }
337 }
338 }
339 }
340 }
341 } else {
342 inst_size = 0;
343 }
344
345
346 if (inst_size == 0 || MIA->isTerminator(inst) || MIA->isBranch(inst)) {
347 current_block->setEndAddress(current_address + base_address + inst_size);
348 LOG4CXX_DEBUG(logger, "Finished Block at " << std::hex <<
349 current_block->getEndAddress());
350 break;
351 }
352 current_address += inst_size;
353 }
354 }
355 splitBlocks(function);
356 LOG4CXX_DEBUG(logger, "Finished function " << function->getName());
357 manager->finishFunction(function);
358 for (uint64_t address : called_functions)
359 disassembleFunctionAt(address);
360 }
361
362 template <typename ELFT>
363 void LLVMDisassembler<ELFT>::disassemble() {
364 SectionRef text_section = getTextSection();
365 std::vector<Function*> remaining_functions;
366
367 // Assume all function symbols actually start a real function
368 for (auto x = symbols.begin(); x != symbols.end(); ++x) {
369 uint64_t result;
370 bool contains;
371 SymbolRef::Type symbol_type;
372
373 #if defined(LLVM_35)
374 if (text_section.containsSymbol(x->second, contains) || !contains)
375 #elif defined(LLVM_36)
376 if (text_section.containsSymbol(x->second))
377 #endif
378 continue;
379
380 if (x->second.getType(symbol_type)
381 || SymbolRef::ST_Function != symbol_type)
382 continue;
383
384 if (!x->second.getAddress(result)) {
385 Function * fun = manager->newFunction(result);
386 if (fun) {
387 fun->setName(x->first);
388 remaining_functions.push_back(fun);
389 LOG4CXX_DEBUG(logger, "Disasembling " << x->first);
390 } else {
391 LOG4CXX_DEBUG(logger, "Function at " << std::hex << result
392 << " already disassembled as " << manager->getFunction(result)->getName());
393 }
394 }
395 }
396
397 for (Function* function : remaining_functions) {
398 disassembleFunction(function);
399 manager->finishFunction(function);
400 }
401
402 if (binary->isELF()) {
403 uint64_t _entryAddress = entryAddress();
404 LOG4CXX_DEBUG(logger, "Adding entryAddress at: " << std::hex << _entryAddress);
405 std::stringstream s;
406 s << "<_start 0x" << std::hex << _entryAddress << ">";
407
408 disassembleFunctionAt(_entryAddress, s.str());
409 }
410
411 if (!manager->hasFunctions()) {
412 uint64_t text_entry;
413 #if defined(LLVM_35)
414 text_section.getAddress(text_entry);
415 #elif defined(LLVM_36)
416 text_entry = text_section.getAddress();
417 #endif
418 LOG4CXX_INFO(logger, "No Symbols found, starting at the beginning of the text segment");
419 disassembleFunctionAt(text_entry);
420 }
421 }
422
423 template <>
424 uint64_t LLVMDisassembler<COFFT>::entryAddress() {
425 const auto coffobject = dyn_cast<COFFObjectFile>(o);
426 const struct pe32_header* pe32_header;
427 const struct pe32plus_header* pe32plus_header;
428
429 coffobject->getPE32PlusHeader(pe32plus_header);
430
431 if (pe32plus_header) {
432 return pe32plus_header->AddressOfEntryPoint;
433 } else {
434 coffobject->getPE32Header(pe32_header);
435 return pe32_header->AddressOfEntryPoint;
436 }
437 }
438
439 template<>
440 uint64_t LLVMDisassembler<MACHOT>::entryAddress() {
441 // TODO
442 return 0;
443 }
444
445 template <typename ELFT>
446 uint64_t LLVMDisassembler<ELFT>::entryAddress() {
447 const auto elffile = dyn_cast<ELFObjectFile<ELFT>>(o)->getELFFile();
448 const auto * header = elffile->getHeader();
449
450 return header->e_entry;
451 }
452
453 template <typename ELFT>
454 void LLVMDisassembler<ELFT>::splitBlocks(Function* function) {
455 SectionRef text_section = getTextSection();
456 StringRef bytes;
457 text_section.getContents(bytes);
458 #if defined(LLVM_35)
459 StringRefMemoryObject ref(bytes);
460 #elif defined(LLVM_36)
461 ArrayRef<uint8_t> bytearray(reinterpret_cast<const uint8_t *>(bytes.data()),
462 bytes.size());
463 #endif
464
465
466 LOG4CXX_DEBUG(logger, "Splitting Blocks in Function " << function->getName());
467 // Split blocks where jumps are going inside the block
468 for (auto it = function->blocks().begin();
469 it != function->blocks().end();
470 ++it) {
471 BasicBlock * current_block = it->second;
472 if (current_block->getEndAddress() == 0) {
473 LOG4CXX_ERROR(logger, "UNFINISHED BLOCK " << std::hex << current_block->getStartAddress());
474 break;
475 }
476 uint64_t inst_size;
477 uint64_t base_address;
478 #if defined(LLVM_35)
479 text_section.getAddress(base_address);
480 #elif defined(LLVM_36)
481 base_address = text_section.getAddress();
482 #endif
483 uint64_t current_address = current_block->getStartAddress() - base_address;
484 while(current_block->getEndAddress() - base_address > current_address) {
485 MCInst inst;
486 std::string buf;
487 llvm::raw_string_ostream s(buf);
488
489 if(llvm::MCDisassembler::Success ==
490 #if defined(LLVM_35)
491 DisAsm->getInstruction(inst, inst_size, ref, current_address, nulls(), nulls())) {
492 #elif defined(LLVM_36)
493 DisAsm->getInstruction(inst, inst_size,
494 bytearray.slice(current_address),
495 base_address + current_address,
496 nulls(), nulls())) {
497 #endif
498
499 // See if some other block starts here
500 BasicBlock* other = manager->getBasicBlock(current_address
501 + inst_size
502 + base_address);
503
504 // Special case, other block starts here but we are at the end anyway
505 if (other != NULL) {
506 uint64_t endaddress = current_address + inst_size + base_address;
507 if (endaddress != current_block->getEndAddress()) {
508 LOG4CXX_DEBUG(logger, "Shortening block starting at "
509 << std::hex
510 << current_block->getStartAddress()
511 << " now ending at "
512 << other->getStartAddress());
513 function->addBasicBlock(other);
514 current_block->setEndAddress(endaddress);
515 current_block->setNextBlock(0, other->getStartAddress());
516 current_block->setNextBlock(1, 0);
517 }
518 }
519 } else {
520 inst_size = 1;
521 }
522 current_address += inst_size;
523 }
524 }
525 }
526
527 template<>
528 void LLVMDisassembler<COFFT>::readDynamicSymbols() {
529 //TODO
530 }
531
532 template<>
533 void LLVMDisassembler<MACHOT>::readDynamicSymbols() {
534 //TODO
535 }
536
537 template <typename ELFT>
538 void LLVMDisassembler<ELFT>::readDynamicSymbols() {
539 const auto elffile = dyn_cast<ELFObjectFile<ELFT>>(o)->getELFFile();
540 for (auto it = elffile->begin_dynamic_symbols(),
541 end = elffile->end_dynamic_symbols();
542 it != end;
543 ++it) {
544 if (it->getType() == 2) { // Function
545 bool is_default;
546 // TODO: Error handling
547 std::string symbolname = *(elffile->getSymbolName(it));
548 std::string symbolversion = *(elffile->getSymbolVersion(nullptr, &*it, is_default));
549 // TODO: actually get the symbol address from relocations
550 Function* f = manager->newDynamicFunction(0);
551 f->setName(symbolname + (is_default? "@@" : "@") + symbolversion);
552 manager->finishFunction(f);
553
554 LOG4CXX_DEBUG(logger, "Adding dynamic Symbol " << symbolname << (is_default? "@@" : "@") << symbolversion);
555 }
556 }
557 }
558
559 template <typename ELFT>
560 void LLVMDisassembler<ELFT>::readSymbols() {
561 error_code ec;
562 symbol_iterator si(o->symbol_begin()), se(o->symbol_end());
563 for (; si != se; ++si) {
564 StringRef name;
565 if ((ec = si->getName(name))) {
566 LOG4CXX_ERROR(logger, ec.message());
567 break;
568 }
569 LOG4CXX_DEBUG(logger, "Added symbol " << name.str());
570 symbols.insert(make_pair(name.str(), *si));
571 }
572 }
573
574 template <typename ELFT>
575 void LLVMDisassembler<ELFT>::readSections() {
576 error_code ec;
577 section_iterator i(o->section_begin()), e(o->section_end());
578 for (; i != e; ++i) {
579 StringRef name;
580 if ((ec = i->getName(name))) {
581 LOG4CXX_ERROR(logger, ec.message());
582 break;
583 }
584 LOG4CXX_DEBUG(logger, "Added section " << name.str());
585 sections.insert(make_pair(name.str(), *i));
586 }
587
588 }
589
590 // template <typename ELFT>
591 // void LLVMDisassembler<ELFT>::forEachFunction(std::function<void (uint64_t, Function*)> callback) {
592 // // std::for_each(functions.begin(), functions.end(),
593 // // [&](std::pair<uint64_t, Function*> x) {
594 // // callback(x.first, x.second);
595 // // });
596 // }
597
598 template <typename ELFT>
599 std::vector<Instruction> LLVMDisassembler<ELFT>::getInstructions(const BasicBlock *block) {
600 std::vector<Instruction> result;
601 SectionRef text_section = getTextSection();
602 uint64_t base_address;
603 #if defined(LLVM_35)
604 text_section.getAddress(base_address);
605 #elif defined(LLVM_36)
606 base_address = text_section.getAddress();
607 #endif
608
609 uint64_t current_address = block->getStartAddress() - base_address;
610 uint64_t end_position = block->getEndAddress() - base_address;
611
612 StringRef bytes;
613 text_section.getContents(bytes);
614 #if defined(LLVM_35)
615 StringRefMemoryObject ref(bytes);
616 #elif defined(LLVM_36)
617 ArrayRef<uint8_t> bytearray(reinterpret_cast<const uint8_t *>(bytes.data()),
618 bytes.size());
619 #endif
620
621
622 while (current_address < end_position) {
623 uint64_t inst_size;
624 MCInst inst;
625 std::string buf;
626 llvm::raw_string_ostream s(buf);
627
628 if(llvm::MCDisassembler::Success ==
629 #if defined(LLVM_35)
630 DisAsm->getInstruction(inst, inst_size, ref, current_address, nulls(), nulls())) {
631 #elif defined(LLVM_36)
632 DisAsm->getInstruction(inst, inst_size,
633 bytearray.slice(current_address),
634 base_address + current_address,
635 nulls(), nulls())) {
636 #endif
637
638 uint8_t bytes[inst_size+2];
639 #if defined(LLVM_35)
640 ref.readBytes(current_address, inst_size, bytes);
641 #elif defined(LLVM_36)
642 size_t bytesindex(0);
643 for (uint8_t byte : bytearray.slice(current_address, inst_size)) {
644 bytes[bytesindex++] = byte;
645 }
646 #endif
647
648 uint64_t jmptarget;
649 std::string ref("");
650 IP->printInst(&inst, s, "");
651 if (MIA->evaluateBranch(inst, current_address, inst_size, jmptarget)) {
652 std::stringstream stream;
653 if (MIA->isCall(inst))
654 stream << "function:";
655 else
656 stream << "block:";
657
658 stream << std::hex << (base_address + jmptarget);
659 ref = stream.str();
660 }
661 result.push_back(Instruction(current_address + base_address, boost::algorithm::trim_copy(s.str()),
662 std::vector<uint8_t>(bytes, bytes+inst_size), ref));
663 } else {
664 LOG4CXX_WARN(logger, "Invalid byte at" << std::hex << current_address + base_address);
665 uint8_t bytes[1];
666 #if defined(LLVM_35)
667 ref.readBytes(current_address, 1, bytes);
668 #elif defined(LLVM_36)
669 bytes[0] = bytearray[current_address];
670 #endif
671 result.push_back(Instruction(current_address + base_address, "Invalid Instruction",
672 std::vector<uint8_t>(bytes, bytes+1), ""));
673 inst_size = 1;
674 }
675
676 current_address += inst_size;
677 }
678 return result;
679 }
680
681 template <typename ELFT>
682 void LLVMDisassembler<ELFT>::printEachInstruction(uint64_t start, uint64_t end,
683 std::function<void (uint8_t*, size_t,
684 const std::string&,
685 const std::string&)> fun) {
686 SectionRef text_section = getTextSection();
687 uint64_t base_address;
688 #if defined(LLVM_35)
689 text_section.getAddress(base_address);
690 #elif defined(LLVM_36)
691 base_address = text_section.getAddress();
692 #endif
693
694 uint64_t current_address = start - base_address;
695
696 StringRef bytes;
697 text_section.getContents(bytes);
698 #if defined(LLVM_35)
699 StringRefMemoryObject ref(bytes);
700 #elif defined(LLVM_36)
701 ArrayRef<uint8_t> bytearray(reinterpret_cast<const uint8_t *>(bytes.data()),
702 bytes.size());
703 #endif
704
705
706 while (current_address < end - base_address) {
707 uint64_t inst_size;
708 MCInst inst;
709 std::string buf;
710 llvm::raw_string_ostream s(buf);
711
712 if(llvm::MCDisassembler::Success ==
713 #if defined(LLVM_35)
714 DisAsm->getInstruction(inst, inst_size, ref, current_address, nulls(), nulls())) {
715 #elif defined(LLVM_36)
716 DisAsm->getInstruction(inst, inst_size,
717 bytearray.slice(current_address),
718 base_address + current_address,
719 nulls(), nulls())) {
720 #endif
721
722 uint8_t bytes[inst_size+2];
723 #if defined(LLVM_35)
724 ref.readBytes(current_address, inst_size, bytes);
725 #elif defined(LLVM_36)
726 size_t bytesindex(0);
727 for (uint8_t byte : bytearray.slice(current_address, inst_size)) {
728 bytes[bytesindex++] = byte;
729 }
730 #endif
731
732 uint64_t jmptarget;
733 std::string ref("");
734 IP->printInst(&inst, s, "");
735 if (MIA->evaluateBranch(inst, current_address, inst_size, jmptarget)) {
736 std::stringstream stream;
737 if (MIA->isCall(inst))
738 stream << "function:";
739 else
740 stream << "block:";
741
742 stream << std::hex << (base_address + jmptarget);
743 ref = stream.str();
744 }
745
746
747 fun(bytes, inst_size, s.str(), ref);
748 } else {
749 LOG4CXX_WARN(logger, "Invalid byte at" << std::hex << current_address + base_address);
750 fun(NULL, 0, "Invalid Byte", "");
751 inst_size = 1;
752 }
753
754 current_address += inst_size;
755 }
756 }
757
758 template <typename ELFT>
759 SectionRef LLVMDisassembler<ELFT>::getTextSection() {
760 return sections[".text"];
761 }
762
763 template <>
764 SectionRef LLVMDisassembler<MACHOT>::getTextSection() {
765 return sections["__text"];
766 }