]> git.siccegge.de Git - frida/frida.git/blob - src/disassembler/llvm/LLVMDisassembler.cxx
User proper hirarchial names for loggers
[frida/frida.git] / src / disassembler / llvm / LLVMDisassembler.cxx
1 #include "disassembler/llvm/LLVMDisassembler.hxx"
2 #include "core/InformationManager.hxx"
3 #include "core/Function.hxx"
4 #include "core/BasicBlock.hxx"
5
6 #include <stack>
7 #include <algorithm>
8 #include <cassert>
9
10 using namespace llvm;
11 using namespace llvm::object;
12 using std::error_code;
13
14 namespace {
15 class COFFT {
16
17 };
18 }
19
20 /*
21 *
22 */
23 Disassembler * createLLVMDisassembler(const std::string& filename, InformationManager* manager) {
24 if (filename == "")
25 return NULL;
26
27 std::unique_ptr<Binary> o;
28 o.reset(createBinary(filename).get());
29 Binary * op = o.release();
30
31 // ELFType<endian, maxalign, 64bit>
32 if (ELF32LEObjectFile * object = dyn_cast<ELF32LEObjectFile>(op)) {
33 return new LLVMDisassembler<ELFType<support::little, 2, false>>(filename, manager, object);
34 }
35 if (ELF64LEObjectFile * object = dyn_cast<ELF64LEObjectFile>(op)) {
36 return new LLVMDisassembler<ELFType<support::little, 2, true>>(filename, manager, object);
37 }
38 if (ELF32BEObjectFile * object = dyn_cast<ELF32BEObjectFile>(op)) {
39 return new LLVMDisassembler<ELFType<support::big, 2, false>>(filename, manager, object);
40 }
41 if (ELF64BEObjectFile * object = dyn_cast<ELF64BEObjectFile>(op)) {
42 return new LLVMDisassembler<ELFType<support::big, 2, true>>(filename, manager, object);
43 }
44 if (COFFObjectFile * object = dyn_cast<COFFObjectFile>(op)) {
45 return new LLVMDisassembler<COFFT>(filename, manager, object);
46 }
47
48 return NULL;
49 }
50
51 /*
52 * TODO: fallback code falls die Datei kein ELF/PE/COFF/MacO/.. binary
53 * ist sondern z.B. einfach nur Instruktionen oder ein Bootsektor oder
54 * foo
55 */
56 template <typename ELFT>
57 LLVMDisassembler<ELFT>::LLVMDisassembler(const std::string& filename,
58 InformationManager* manager,
59 ObjectFile* file)
60 : Disassembler()
61 , logger(log4cxx::Logger::getLogger("disassembler.LLVMDisassembler"))
62 , triple("unknown-unknown-unknown")
63 , manager(manager)
64 {
65 LOG4CXX_DEBUG(logger, "Handling file " << filename);
66
67 if (!file) {
68 auto result = createBinary(filename);
69
70 error_code ec;
71 if ((ec = result.getError())) {
72 LOG4CXX_ERROR(logger, "Failed to load Binary" << ec.message());
73 binary = NULL;
74 return;
75 }
76
77 binary.reset(result.get());
78
79 o = dyn_cast<ObjectFile>(binary.get());
80 } else {
81 o = file;
82 binary.reset(file);
83 }
84
85 triple.setArch(Triple::ArchType(o->getArch()));
86 std::string tripleName(triple.getTriple());
87
88 LOG4CXX_INFO(logger, "Architecture " << tripleName);
89
90
91 std::string es;
92 target = TargetRegistry::lookupTarget("", triple, es);
93 if (!target) {
94 LOG4CXX_ERROR(logger, es);
95 return;
96 }
97
98 LOG4CXX_INFO(logger, "Target " << target->getName());
99
100 MRI.reset(target->createMCRegInfo(tripleName));
101 if (!MRI) {
102 LOG4CXX_ERROR(logger, "no register info for target " << tripleName);
103 return;
104 }
105
106 // Set up disassembler.
107 AsmInfo.reset(target->createMCAsmInfo(*MRI, tripleName));
108 if (!AsmInfo) {
109 LOG4CXX_ERROR(logger, "no assembly info for target " << tripleName);
110 return;
111 }
112
113 STI.reset(target->createMCSubtargetInfo(tripleName, "", ""));
114 if (!STI) {
115 LOG4CXX_ERROR(logger, "no subtarget info for target " << tripleName);
116 return;
117 }
118
119 MII.reset(target->createMCInstrInfo());
120 if (!MII) {
121 LOG4CXX_ERROR(logger, "no instruction info for target " << tripleName);
122 return;
123 }
124
125 MOFI.reset(new MCObjectFileInfo);
126 MCContext Ctx(AsmInfo.get(), MRI.get(), MOFI.get());
127
128 DisAsm.reset(target->createMCDisassembler(*STI, Ctx));
129 if (!DisAsm) {
130 LOG4CXX_ERROR(logger, "no disassembler for target " << tripleName);
131 return;
132 }
133 RelInfo.reset(
134 target->createMCRelocationInfo(tripleName, Ctx));
135 if (RelInfo) {
136 // Symzer.reset(
137 // MCObjectSymbolizer::createObjectSymbolizer(Ctx, std::move(RelInfo), o));
138 // if (Symzer)
139 // DisAsm->setSymbolizer(std::move(Symzer));
140 }
141 RelInfo.release();
142 Symzer.release();
143
144 MIA.reset(target->createMCInstrAnalysis(MII.get()));
145 if (!MIA) {
146 LOG4CXX_ERROR(logger, "no instruction analysis for target " << tripleName);
147 return;
148 }
149
150 int AsmPrinterVariant = AsmInfo->getAssemblerDialect();
151 IP.reset(target->createMCInstPrinter(AsmPrinterVariant, *AsmInfo, *MII, *MRI, *STI));
152 if (!IP) {
153 LOG4CXX_ERROR(logger, "no instruction printer for target " << tripleName);
154 return;
155 }
156
157 IP->setPrintImmHex(llvm::HexStyle::C);
158 IP->setPrintImmHex(true);
159
160 std::unique_ptr<MCObjectDisassembler> OD(
161 new MCObjectDisassembler(*o, *DisAsm, *MIA));
162 Mod.reset(OD->buildModule(false));
163
164 readSections();
165 }
166
167 template <typename ELFT>
168 void LLVMDisassembler<ELFT>::start() {
169 readSymbols();
170 disassemble();
171 readDynamicSymbols();
172 }
173
174 template <typename ELFT>
175 LLVMDisassembler<ELFT>::~LLVMDisassembler() {}
176
177 template <typename ELFT>
178 Function* LLVMDisassembler<ELFT>::disassembleFunctionAt(uint64_t address, const std::string& name) {
179 Function * function;
180 SectionRef text_section = sections[".text"];
181 uint64_t base_address, size;
182 text_section.getAddress(base_address);
183 text_section.getSize(size);
184
185 if (address < base_address ||
186 address >= base_address + size) {
187 return NULL;
188 }
189
190 if (NULL == (function = manager->getFunction(address))) {
191
192 if (name == "") {
193 std::stringstream s;
194 s << "<Unnamed 0x" << std::hex << address << ">";
195 function = manager->newFunction(address);
196 function->setName(s.str());
197 } else {
198 function = manager->newFunction(address);
199 function->setName(name);
200 }
201 disassembleFunction(function);
202 }
203
204 return function;
205 }
206
207 template <typename ELFT>
208 void LLVMDisassembler<ELFT>::disassembleFunction(Function* function) {
209 std::vector<uint64_t> called_functions;
210 std::stack<BasicBlock*> remaining_blocks;
211 /* TODO:
212 * Do all blocks get added properly? We should take care to remove
213 * the other ones at the end of the function!
214 */
215 std::map<uint64_t, BasicBlock*> new_blocks;
216 SectionRef text_section = sections[".text"];
217 StringRef bytes;
218 text_section.getContents(bytes);
219 StringRefMemoryObject ref(bytes);
220
221 LOG4CXX_DEBUG(logger, "Handling function " << function->getName());
222
223 BasicBlock * block = manager->newBasicBlock(function->getStartAddress());
224 remaining_blocks.push(block);
225 new_blocks.insert(std::make_pair(block->getStartAddress(), block));
226 function->addBasicBlock(block);
227
228 while (remaining_blocks.size()) {
229 BasicBlock * current_block = remaining_blocks.top();
230 remaining_blocks.pop();
231
232 LOG4CXX_DEBUG(logger, "Handling Block starting at " << std::hex
233 << current_block->getStartAddress());
234
235 uint64_t inst_size;
236 uint64_t base_address;
237 text_section.getAddress(base_address);
238 uint64_t current_address = current_block->getStartAddress() - base_address;
239 while(true) {
240 MCInst inst;
241 std::string buf;
242 llvm::raw_string_ostream s(buf);
243
244 if(llvm::MCDisassembler::Success ==
245 DisAsm->getInstruction(inst, inst_size, ref, current_address, nulls(), nulls())) {
246 uint64_t jmptarget;
247
248 if (MIA->evaluateBranch(inst, current_address, inst_size, jmptarget)) {
249 jmptarget += base_address;
250 if (!MIA->isIndirectBranch(inst)) {
251 if (MIA->isCall(inst)) {
252 if (NULL == manager->getFunction(jmptarget))
253 called_functions.push_back(jmptarget);
254 } else {
255 current_block->setNextBlock(0, jmptarget);
256 if (new_blocks.find(jmptarget) == new_blocks.end()) {
257 BasicBlock * block = manager->newBasicBlock(jmptarget);
258 assert(block);
259 new_blocks.insert(std::make_pair(block->getStartAddress(), block));
260 function->addBasicBlock(block);
261 remaining_blocks.push(block);
262 } else {
263 LOG4CXX_DEBUG(logger, "Reusing Block starting at " << std::hex
264 << current_block->getStartAddress());
265 function->addBasicBlock(new_blocks.find(jmptarget)->second);
266 }
267 if (MIA->isConditionalBranch(inst)) {
268 jmptarget = base_address + current_address + inst_size;
269 current_block->setNextBlock(1, jmptarget);
270 if (new_blocks.find(jmptarget) == new_blocks.end()) {
271 BasicBlock * block = manager->newBasicBlock(jmptarget);
272 assert(block);
273 new_blocks.insert(std::make_pair(block->getStartAddress(), block));
274 function->addBasicBlock(block);
275 remaining_blocks.push(block);
276 } else {
277 LOG4CXX_DEBUG(logger, "Reusing Block starting at " << std::hex
278 << current_block->getStartAddress());
279 function->addBasicBlock(new_blocks.find(jmptarget)->second);
280 }
281 }
282 }
283 }
284 }
285 } else {
286 inst_size = 0;
287 }
288
289
290 if (inst_size == 0 || MIA->isTerminator(inst) || MIA->isBranch(inst)) {
291 current_block->setEndAddress(current_address + base_address + inst_size);
292 LOG4CXX_DEBUG(logger, "Finished Block at " << std::hex <<
293 current_block->getEndAddress());
294 break;
295 }
296 current_address += inst_size;
297 }
298 }
299 splitBlocks(function);
300 LOG4CXX_DEBUG(logger, "Finished function " << function->getName());
301 manager->finishFunction(function);
302 for (uint64_t address : called_functions)
303 disassembleFunctionAt(address);
304 }
305
306 template <typename ELFT>
307 void LLVMDisassembler<ELFT>::disassemble() {
308 SectionRef text_section = sections[".text"];
309 std::vector<Function*> remaining_functions;
310
311 // Assume all function symbols actually start a real function
312 for (auto x = symbols.begin(); x != symbols.end(); ++x) {
313 uint64_t result;
314 bool contains;
315 SymbolRef::Type symbol_type;
316
317
318 if (text_section.containsSymbol(x->second, contains) || !contains)
319 continue;
320
321 if (x->second.getType(symbol_type)
322 || SymbolRef::ST_Function != symbol_type)
323 continue;
324
325 if (!x->second.getAddress(result)) {
326 Function * fun = manager->newFunction(result);
327 fun->setName(x->first);
328 remaining_functions.push_back(fun);
329 LOG4CXX_DEBUG(logger, "Disasembling " << x->first);
330 }
331 }
332
333 for (Function* function : remaining_functions) {
334 disassembleFunction(function);
335 manager->finishFunction(function);
336 }
337
338 if (binary->isELF()) {
339 uint64_t _entryAddress = entryAddress();
340 LOG4CXX_DEBUG(logger, "Adding entryAddress at: " << std::hex << _entryAddress);
341 std::stringstream s;
342 s << "<_start 0x" << std::hex << _entryAddress << ">";
343
344 disassembleFunctionAt(_entryAddress, s.str());
345 }
346
347 if (!manager->hasFunctions()) {
348 uint64_t text_entry;
349 text_section.getAddress(text_entry);
350 LOG4CXX_INFO(logger, "No Symbols found, starting at the beginning of the text segment");
351 disassembleFunctionAt(text_entry);
352 }
353 }
354
355 template <>
356 uint64_t LLVMDisassembler<COFFT>::entryAddress() {
357 const auto coffobject = dyn_cast<COFFObjectFile>(o);
358 const struct pe32_header* pe32_header;
359 const struct pe32plus_header* pe32plus_header;
360
361 coffobject->getPE32PlusHeader(pe32plus_header);
362
363 if (pe32plus_header) {
364 return pe32plus_header->AddressOfEntryPoint;
365 } else {
366 coffobject->getPE32Header(pe32_header);
367 return pe32_header->AddressOfEntryPoint;
368 }
369 }
370
371 template <typename ELFT>
372 uint64_t LLVMDisassembler<ELFT>::entryAddress() {
373 const auto elffile = dyn_cast<ELFObjectFile<ELFT>>(o)->getELFFile();
374 const auto * header = elffile->getHeader();
375
376 return header->e_entry;
377 }
378
379 template <typename ELFT>
380 void LLVMDisassembler<ELFT>::splitBlocks(Function* function) {
381 SectionRef text_section = sections[".text"];
382 StringRef bytes;
383 text_section.getContents(bytes);
384 StringRefMemoryObject ref(bytes);
385
386 // Split blocks where jumps are going inside the block
387 for (auto it = function->blocks().begin();
388 it != function->blocks().end();
389 ++it) {
390 BasicBlock * current_block = it->second;
391 uint64_t inst_size;
392 uint64_t base_address;
393 text_section.getAddress(base_address);
394 uint64_t current_address = current_block->getStartAddress() - base_address;
395 while(current_block->getEndAddress() - base_address > current_address) {
396 MCInst inst;
397 std::string buf;
398 llvm::raw_string_ostream s(buf);
399
400 if(llvm::MCDisassembler::Success ==
401 DisAsm->getInstruction(inst, inst_size, ref, current_address, nulls(), nulls())) {
402 // See if some other block starts here
403 BasicBlock* other = manager->getBasicBlock(current_address
404 + inst_size
405 + base_address);
406
407 // Special case, other block starts here but we are at the end anyway
408 if (other != NULL) {
409 uint64_t endaddress = current_address + inst_size + base_address;
410 if (endaddress != current_block->getEndAddress()) {
411 LOG4CXX_DEBUG(logger, "Shortening block starting at "
412 << std::hex
413 << current_block->getStartAddress()
414 << " now ending at "
415 << other->getStartAddress());
416 function->addBasicBlock(other);
417 current_block->setEndAddress(endaddress);
418 current_block->setNextBlock(0, other->getStartAddress());
419 current_block->setNextBlock(1, 0);
420 }
421 }
422 } else {
423 inst_size = 1;
424 }
425 current_address += inst_size;
426 }
427 }
428 }
429
430 template<>
431 void LLVMDisassembler<COFFT>::readDynamicSymbols() {
432 //TODO
433 }
434
435 template <typename ELFT>
436 void LLVMDisassembler<ELFT>::readDynamicSymbols() {
437 const auto elffile = dyn_cast<ELFObjectFile<ELFT>>(o)->getELFFile();
438 for (auto it = elffile->begin_dynamic_symbols(),
439 end = elffile->end_dynamic_symbols();
440 it != end;
441 ++it) {
442 if (it->getType() == 2) { // Function
443 bool is_default;
444 // TODO: Error handling
445 std::string symbolname = *(elffile->getSymbolName(it));
446 std::string symbolversion = *(elffile->getSymbolVersion(nullptr, &*it, is_default));
447 manager->signal_new_dyn_symbol(symbolname + (is_default? "@@" : "@") + symbolversion);
448 LOG4CXX_DEBUG(logger, "Adding dynamic Symbol " << symbolname << (is_default? "@@" : "@") << symbolversion);
449 }
450 }
451 }
452
453 template <typename ELFT>
454 void LLVMDisassembler<ELFT>::readSymbols() {
455 error_code ec;
456 symbol_iterator si(o->symbol_begin()), se(o->symbol_end());
457 for (; si != se; ++si) {
458 StringRef name;
459 if ((ec = si->getName(name))) {
460 LOG4CXX_ERROR(logger, ec.message());
461 break;
462 }
463 LOG4CXX_DEBUG(logger, "Added symbol " << name.str());
464 symbols.insert(make_pair(name.str(), *si));
465 }
466 }
467
468 template <typename ELFT>
469 void LLVMDisassembler<ELFT>::readSections() {
470 error_code ec;
471 section_iterator i(o->section_begin()), e(o->section_end());
472 for (; i != e; ++i) {
473 StringRef name;
474 if ((ec = i->getName(name))) {
475 LOG4CXX_ERROR(logger, ec.message());
476 break;
477 }
478 LOG4CXX_DEBUG(logger, "Added section " << name.str());
479 sections.insert(make_pair(name.str(), *i));
480 }
481
482 }
483
484 // template <typename ELFT>
485 // void LLVMDisassembler<ELFT>::forEachFunction(std::function<void (uint64_t, Function*)> callback) {
486 // // std::for_each(functions.begin(), functions.end(),
487 // // [&](std::pair<uint64_t, Function*> x) {
488 // // callback(x.first, x.second);
489 // // });
490 // }
491
492 template <typename ELFT>
493 void LLVMDisassembler<ELFT>::printEachInstruction(uint64_t start, uint64_t end,
494 std::function<void (uint8_t*, size_t,
495 const std::string&,
496 const std::string&)> fun) {
497 SectionRef text_section = sections[".text"];
498 uint64_t base_address;
499 text_section.getAddress(base_address);
500 uint64_t current_address = start - base_address;
501
502 StringRef bytes;
503 text_section.getContents(bytes);
504 StringRefMemoryObject ref(bytes);
505
506 while (current_address < end - base_address) {
507 uint64_t inst_size;
508 MCInst inst;
509 std::string buf;
510 llvm::raw_string_ostream s(buf);
511
512 if(llvm::MCDisassembler::Success ==
513 DisAsm->getInstruction(inst, inst_size, ref, current_address, nulls(), nulls())) {
514
515 uint8_t bytes[inst_size+2];
516 ref.readBytes(current_address, inst_size, bytes);
517
518 uint64_t jmptarget;
519 std::string ref("");
520 IP->printInst(&inst, s, "");
521 if (MIA->evaluateBranch(inst, current_address, inst_size, jmptarget)) {
522 std::stringstream stream;
523 if (MIA->isCall(inst))
524 stream << "function:";
525 else
526 stream << "block:";
527
528 stream << std::hex << (base_address + jmptarget);
529 ref = stream.str();
530 }
531
532
533 fun(bytes, inst_size, s.str(), ref);
534 } else {
535 LOG4CXX_WARN(logger, "Invalid byte at" << std::hex << current_address + base_address);
536 fun(NULL, 0, "Invalid Byte", "");
537 inst_size = 1;
538 }
539
540 current_address += inst_size;
541 }
542 }