]> git.siccegge.de Git - frida/frida.git/blob - src/disassembler/llvm/LLVMDisassembler.cxx
Actually call finishFuction() when function is finished
[frida/frida.git] / src / disassembler / llvm / LLVMDisassembler.cxx
1 #include "disassembler/llvm/LLVMDisassembler.hxx"
2 #include "core/InformationManager.hxx"
3 #include "core/Function.hxx"
4 #include "core/BasicBlock.hxx"
5
6 #include <stack>
7 #include <algorithm>
8 #include <cassert>
9
10 using namespace llvm;
11 using namespace llvm::object;
12 using std::error_code;
13
14 namespace {
15 class COFFT {
16
17 };
18 }
19
20 /*
21 *
22 */
23 Disassembler * createLLVMDisassembler(const std::string& filename, InformationManager* manager) {
24 if (filename == "")
25 return NULL;
26
27 std::unique_ptr<Binary> o;
28 o.reset(createBinary(filename).get());
29 Binary * op = o.release();
30
31 // ELFType<endian, maxalign, 64bit>
32 if (ELF32LEObjectFile * object = dyn_cast<ELF32LEObjectFile>(op)) {
33 return new LLVMDisassembler<ELFType<support::little, 2, false>>(filename, manager, object);
34 }
35 if (ELF64LEObjectFile * object = dyn_cast<ELF64LEObjectFile>(op)) {
36 return new LLVMDisassembler<ELFType<support::little, 2, true>>(filename, manager, object);
37 }
38 if (ELF32BEObjectFile * object = dyn_cast<ELF32BEObjectFile>(op)) {
39 return new LLVMDisassembler<ELFType<support::big, 2, false>>(filename, manager, object);
40 }
41 if (ELF64BEObjectFile * object = dyn_cast<ELF64BEObjectFile>(op)) {
42 return new LLVMDisassembler<ELFType<support::big, 2, true>>(filename, manager, object);
43 }
44 if (COFFObjectFile * object = dyn_cast<COFFObjectFile>(op)) {
45 return new LLVMDisassembler<COFFT>(filename, manager, object);
46 }
47
48 return NULL;
49 }
50
51 /*
52 * TODO: fallback code falls die Datei kein ELF/PE/COFF/MacO/.. binary
53 * ist sondern z.B. einfach nur Instruktionen oder ein Bootsektor oder
54 * foo
55 */
56 template <typename ELFT>
57 LLVMDisassembler<ELFT>::LLVMDisassembler(const std::string& filename,
58 InformationManager* manager,
59 ObjectFile* file)
60 : Disassembler()
61 , logger(log4cxx::Logger::getLogger("LLVMDisassembler"))
62 , triple("unknown-unknown-unknown")
63 , manager(manager)
64 {
65 LOG4CXX_DEBUG(logger, "Handling file " << filename);
66
67 if (!file) {
68 auto result = createBinary(filename);
69
70 error_code ec;
71 if ((ec = result.getError())) {
72 LOG4CXX_ERROR(logger, "Failed to load Binary" << ec.message());
73 binary = NULL;
74 return;
75 }
76
77 binary.reset(result.get());
78
79 o = dyn_cast<ObjectFile>(binary.get());
80 } else {
81 o = file;
82 binary.reset(file);
83 }
84
85 triple.setArch(Triple::ArchType(o->getArch()));
86 std::string tripleName(triple.getTriple());
87
88 LOG4CXX_INFO(logger, "Architecture " << tripleName);
89
90
91 std::string es;
92 target = TargetRegistry::lookupTarget("", triple, es);
93 if (!target) {
94 LOG4CXX_ERROR(logger, es);
95 return;
96 }
97
98 LOG4CXX_INFO(logger, "Target " << target->getName());
99
100 MRI.reset(target->createMCRegInfo(tripleName));
101 if (!MRI) {
102 LOG4CXX_ERROR(logger, "no register info for target " << tripleName);
103 return;
104 }
105
106 // Set up disassembler.
107 AsmInfo.reset(target->createMCAsmInfo(*MRI, tripleName));
108 if (!AsmInfo) {
109 LOG4CXX_ERROR(logger, "no assembly info for target " << tripleName);
110 return;
111 }
112
113 STI.reset(target->createMCSubtargetInfo(tripleName, "", ""));
114 if (!STI) {
115 LOG4CXX_ERROR(logger, "no subtarget info for target " << tripleName);
116 return;
117 }
118
119 MII.reset(target->createMCInstrInfo());
120 if (!MII) {
121 LOG4CXX_ERROR(logger, "no instruction info for target " << tripleName);
122 return;
123 }
124
125 MOFI.reset(new MCObjectFileInfo);
126 MCContext Ctx(AsmInfo.get(), MRI.get(), MOFI.get());
127
128 DisAsm.reset(target->createMCDisassembler(*STI, Ctx));
129 if (!DisAsm) {
130 LOG4CXX_ERROR(logger, "no disassembler for target " << tripleName);
131 return;
132 }
133 RelInfo.reset(
134 target->createMCRelocationInfo(tripleName, Ctx));
135 if (RelInfo) {
136 // Symzer.reset(
137 // MCObjectSymbolizer::createObjectSymbolizer(Ctx, std::move(RelInfo), o));
138 // if (Symzer)
139 // DisAsm->setSymbolizer(std::move(Symzer));
140 }
141 RelInfo.release();
142 Symzer.release();
143
144 MIA.reset(target->createMCInstrAnalysis(MII.get()));
145 if (!MIA) {
146 LOG4CXX_ERROR(logger, "no instruction analysis for target " << tripleName);
147 return;
148 }
149
150 int AsmPrinterVariant = AsmInfo->getAssemblerDialect();
151 IP.reset(target->createMCInstPrinter(AsmPrinterVariant, *AsmInfo, *MII, *MRI, *STI));
152 if (!IP) {
153 LOG4CXX_ERROR(logger, "no instruction printer for target " << tripleName);
154 return;
155 }
156
157 IP->setPrintImmHex(llvm::HexStyle::C);
158 IP->setPrintImmHex(true);
159
160 std::unique_ptr<MCObjectDisassembler> OD(
161 new MCObjectDisassembler(*o, *DisAsm, *MIA));
162 Mod.reset(OD->buildModule(false));
163
164 readSections();
165 }
166
167 template <typename ELFT>
168 void LLVMDisassembler<ELFT>::start() {
169 readSymbols();
170 disassemble();
171 readDynamicSymbols();
172 }
173
174 template <typename ELFT>
175 LLVMDisassembler<ELFT>::~LLVMDisassembler() {}
176
177 template <typename ELFT>
178 Function* LLVMDisassembler<ELFT>::disassembleFunctionAt(uint64_t address, const std::string& name) {
179 Function * function;
180 SectionRef text_section = sections[".text"];
181 uint64_t base_address, size;
182 text_section.getAddress(base_address);
183 text_section.getSize(size);
184
185 if (address < base_address ||
186 address >= base_address + size) {
187 return NULL;
188 }
189
190 if (NULL == (function = manager->getFunction(address))) {
191
192 if (name == "") {
193 std::stringstream s;
194 s << "<Unnamed 0x" << std::hex << address << ">";
195 function = manager->newFunction(address);
196 function->setName(s.str());
197 } else {
198 function = manager->newFunction(address);
199 function->setName(name);
200 }
201 disassembleFunction(function);
202 }
203
204 return function;
205 }
206
207 template <typename ELFT>
208 void LLVMDisassembler<ELFT>::disassembleFunction(Function* function) {
209 std::stack<BasicBlock*> remaining_blocks;
210 /* TODO:
211 * Do all blocks get added properly? We should take care to remove
212 * the other ones at the end of the function!
213 */
214 std::map<uint64_t, BasicBlock*> new_blocks;
215 SectionRef text_section = sections[".text"];
216 StringRef bytes;
217 text_section.getContents(bytes);
218 StringRefMemoryObject ref(bytes);
219
220 LOG4CXX_DEBUG(logger, "Handling function " << function->getName());
221
222 BasicBlock * block = manager->newBasicBlock(function->getStartAddress());
223 remaining_blocks.push(block);
224 new_blocks.insert(std::make_pair(block->getStartAddress(), block));
225 function->addBasicBlock(block);
226
227 while (remaining_blocks.size()) {
228 BasicBlock * current_block = remaining_blocks.top();
229 remaining_blocks.pop();
230
231 LOG4CXX_DEBUG(logger, "Handling Block starting at " << std::hex
232 << current_block->getStartAddress());
233
234 uint64_t inst_size;
235 uint64_t base_address;
236 text_section.getAddress(base_address);
237 uint64_t current_address = current_block->getStartAddress() - base_address;
238 while(true) {
239 MCInst inst;
240 std::string buf;
241 llvm::raw_string_ostream s(buf);
242
243 if(llvm::MCDisassembler::Success ==
244 DisAsm->getInstruction(inst, inst_size, ref, current_address, nulls(), nulls())) {
245 uint64_t jmptarget;
246
247 if (MIA->evaluateBranch(inst, current_address, inst_size, jmptarget)) {
248 jmptarget += base_address;
249 if (!MIA->isIndirectBranch(inst)) {
250 if (MIA->isCall(inst)) {
251 if (NULL == manager->getFunction(jmptarget))
252 disassembleFunctionAt(jmptarget);
253 } else {
254 current_block->setNextBlock(0, jmptarget);
255 if (new_blocks.find(jmptarget) == new_blocks.end()) {
256 BasicBlock * block = manager->newBasicBlock(jmptarget);
257 assert(block);
258 new_blocks.insert(std::make_pair(block->getStartAddress(), block));
259 function->addBasicBlock(block);
260 remaining_blocks.push(block);
261 } else {
262 LOG4CXX_DEBUG(logger, "Reusing Block starting at " << std::hex
263 << current_block->getStartAddress());
264 function->addBasicBlock(new_blocks.find(jmptarget)->second);
265 }
266 if (MIA->isConditionalBranch(inst)) {
267 jmptarget = base_address + current_address + inst_size;
268 current_block->setNextBlock(1, jmptarget);
269 if (new_blocks.find(jmptarget) == new_blocks.end()) {
270 BasicBlock * block = manager->newBasicBlock(jmptarget);
271 assert(block);
272 new_blocks.insert(std::make_pair(block->getStartAddress(), block));
273 function->addBasicBlock(block);
274 remaining_blocks.push(block);
275 } else {
276 LOG4CXX_DEBUG(logger, "Reusing Block starting at " << std::hex
277 << current_block->getStartAddress());
278 function->addBasicBlock(new_blocks.find(jmptarget)->second);
279 }
280 }
281 }
282 }
283 }
284 } else {
285 inst_size = 0;
286 }
287
288
289 if (inst_size == 0 || MIA->isTerminator(inst) || MIA->isBranch(inst)) {
290 current_block->setEndAddress(current_address + base_address + inst_size);
291 LOG4CXX_DEBUG(logger, "Finished Block at " << std::hex <<
292 current_block->getEndAddress());
293 break;
294 }
295 current_address += inst_size;
296 }
297 }
298 splitBlocks(function);
299 LOG4CXX_DEBUG(logger, "Finished function " << function->getName());
300 manager->finishFunction(function);
301 }
302
303 template <typename ELFT>
304 void LLVMDisassembler<ELFT>::disassemble() {
305 SectionRef text_section = sections[".text"];
306 std::vector<Function*> remaining_functions;
307
308 // Assume all function symbols actually start a real function
309 for (auto x = symbols.begin(); x != symbols.end(); ++x) {
310 uint64_t result;
311 bool contains;
312 SymbolRef::Type symbol_type;
313
314
315 if (text_section.containsSymbol(x->second, contains) || !contains)
316 continue;
317
318 if (x->second.getType(symbol_type)
319 || SymbolRef::ST_Function != symbol_type)
320 continue;
321
322 if (!x->second.getAddress(result)) {
323 Function * fun = manager->newFunction(result);
324 fun->setName(x->first);
325 remaining_functions.push_back(fun);
326 LOG4CXX_DEBUG(logger, "Disasembling " << x->first);
327 }
328 }
329
330 for (Function* function : remaining_functions) {
331 disassembleFunction(function);
332 manager->finishFunction(function);
333 }
334
335 if (binary->isELF()) {
336 uint64_t _entryAddress = entryAddress();
337 LOG4CXX_DEBUG(logger, "Adding entryAddress at: " << std::hex << _entryAddress);
338 std::stringstream s;
339 s << "<_start 0x" << std::hex << _entryAddress << ">";
340
341 disassembleFunctionAt(_entryAddress, s.str());
342 }
343
344 if (!manager->hasFunctions()) {
345 uint64_t text_entry;
346 text_section.getAddress(text_entry);
347 LOG4CXX_INFO(logger, "No Symbols found, starting at the beginning of the text segment");
348 disassembleFunctionAt(text_entry);
349 }
350 }
351
352 template <>
353 uint64_t LLVMDisassembler<COFFT>::entryAddress() {
354 const auto coffobject = dyn_cast<COFFObjectFile>(o);
355 const struct pe32_header* pe32_header;
356 const struct pe32plus_header* pe32plus_header;
357
358 coffobject->getPE32PlusHeader(pe32plus_header);
359
360 if (pe32plus_header) {
361 return pe32plus_header->AddressOfEntryPoint;
362 } else {
363 coffobject->getPE32Header(pe32_header);
364 return pe32_header->AddressOfEntryPoint;
365 }
366 }
367
368 template <typename ELFT>
369 uint64_t LLVMDisassembler<ELFT>::entryAddress() {
370 const auto elffile = dyn_cast<ELFObjectFile<ELFT>>(o)->getELFFile();
371 const auto * header = elffile->getHeader();
372
373 return header->e_entry;
374 }
375
376 template <typename ELFT>
377 void LLVMDisassembler<ELFT>::splitBlocks(Function* function) {
378 SectionRef text_section = sections[".text"];
379 StringRef bytes;
380 text_section.getContents(bytes);
381 StringRefMemoryObject ref(bytes);
382
383 // Split blocks where jumps are going inside the block
384 for (auto it = function->blocks().begin();
385 it != function->blocks().end();
386 ++it) {
387 BasicBlock * current_block = it->second;
388 uint64_t inst_size;
389 uint64_t base_address;
390 text_section.getAddress(base_address);
391 uint64_t current_address = current_block->getStartAddress() - base_address;
392 while(current_block->getEndAddress() - base_address > current_address) {
393 MCInst inst;
394 std::string buf;
395 llvm::raw_string_ostream s(buf);
396
397 if(llvm::MCDisassembler::Success ==
398 DisAsm->getInstruction(inst, inst_size, ref, current_address, nulls(), nulls())) {
399 // See if some other block starts here
400 BasicBlock* other = manager->getBasicBlock(current_address
401 + inst_size
402 + base_address);
403
404 // Special case, other block starts here but we are at the end anyway
405 if (other != NULL) {
406 uint64_t endaddress = current_address + inst_size + base_address;
407 if (endaddress != current_block->getEndAddress()) {
408 LOG4CXX_DEBUG(logger, "Shortening block starting at "
409 << std::hex
410 << current_block->getStartAddress()
411 << " now ending at "
412 << other->getStartAddress());
413 function->addBasicBlock(other);
414 current_block->setEndAddress(endaddress);
415 current_block->setNextBlock(0, other->getStartAddress());
416 current_block->setNextBlock(1, 0);
417 }
418 }
419 } else {
420 inst_size = 1;
421 }
422 current_address += inst_size;
423 }
424 }
425 }
426
427 template<>
428 void LLVMDisassembler<COFFT>::readDynamicSymbols() {
429 //TODO
430 }
431
432 template <typename ELFT>
433 void LLVMDisassembler<ELFT>::readDynamicSymbols() {
434 const auto elffile = dyn_cast<ELFObjectFile<ELFT>>(o)->getELFFile();
435 for (auto it = elffile->begin_dynamic_symbols(),
436 end = elffile->end_dynamic_symbols();
437 it != end;
438 ++it) {
439 if (it->getType() == 2) { // Function
440 bool is_default;
441 // TODO: Error handling
442 std::string symbolname = *(elffile->getSymbolName(it));
443 std::string symbolversion = *(elffile->getSymbolVersion(nullptr, &*it, is_default));
444 manager->signal_new_dyn_symbol(symbolname + (is_default? "@@" : "@") + symbolversion);
445 LOG4CXX_DEBUG(logger, "Adding dynamic Symbol " << symbolname << (is_default? "@@" : "@") << symbolversion);
446 }
447 }
448 }
449
450 template <typename ELFT>
451 void LLVMDisassembler<ELFT>::readSymbols() {
452 error_code ec;
453 symbol_iterator si(o->symbol_begin()), se(o->symbol_end());
454 for (; si != se; ++si) {
455 StringRef name;
456 if ((ec = si->getName(name))) {
457 LOG4CXX_ERROR(logger, ec.message());
458 break;
459 }
460 LOG4CXX_DEBUG(logger, "Added symbol " << name.str());
461 symbols.insert(make_pair(name.str(), *si));
462 }
463 }
464
465 template <typename ELFT>
466 void LLVMDisassembler<ELFT>::readSections() {
467 error_code ec;
468 section_iterator i(o->section_begin()), e(o->section_end());
469 for (; i != e; ++i) {
470 StringRef name;
471 if ((ec = i->getName(name))) {
472 LOG4CXX_ERROR(logger, ec.message());
473 break;
474 }
475 LOG4CXX_DEBUG(logger, "Added section " << name.str());
476 sections.insert(make_pair(name.str(), *i));
477 }
478
479 }
480
481 // template <typename ELFT>
482 // void LLVMDisassembler<ELFT>::forEachFunction(std::function<void (uint64_t, Function*)> callback) {
483 // // std::for_each(functions.begin(), functions.end(),
484 // // [&](std::pair<uint64_t, Function*> x) {
485 // // callback(x.first, x.second);
486 // // });
487 // }
488
489 template <typename ELFT>
490 void LLVMDisassembler<ELFT>::printEachInstruction(uint64_t start, uint64_t end,
491 std::function<void (uint8_t*, size_t,
492 const std::string&,
493 const std::string&)> fun) {
494 SectionRef text_section = sections[".text"];
495 uint64_t base_address;
496 text_section.getAddress(base_address);
497 uint64_t current_address = start - base_address;
498
499 StringRef bytes;
500 text_section.getContents(bytes);
501 StringRefMemoryObject ref(bytes);
502
503 while (current_address < end - base_address) {
504 uint64_t inst_size;
505 MCInst inst;
506 std::string buf;
507 llvm::raw_string_ostream s(buf);
508
509 if(llvm::MCDisassembler::Success ==
510 DisAsm->getInstruction(inst, inst_size, ref, current_address, nulls(), nulls())) {
511
512 uint8_t bytes[inst_size+2];
513 ref.readBytes(current_address, inst_size, bytes);
514
515 uint64_t jmptarget;
516 std::string ref("");
517 IP->printInst(&inst, s, "");
518 if (MIA->evaluateBranch(inst, current_address, inst_size, jmptarget)) {
519 std::stringstream stream;
520 if (MIA->isCall(inst))
521 stream << "function:";
522 else
523 stream << "block:";
524
525 stream << std::hex << (base_address + jmptarget);
526 ref = stream.str();
527 }
528
529
530 fun(bytes, inst_size, s.str(), ref);
531 } else {
532 LOG4CXX_WARN(logger, "Invalid byte at" << std::hex << current_address + base_address);
533 fun(NULL, 0, "Invalid Byte", "");
534 inst_size = 1;
535 }
536
537 current_address += inst_size;
538 }
539 }