]> git.siccegge.de Git - frida/frida.git/blob - src/disassembler/llvm/LLVMDisassembler.cxx
Re-add support for PE binaries
[frida/frida.git] / src / disassembler / llvm / LLVMDisassembler.cxx
1 #include "disassembler/llvm/LLVMDisassembler.hxx"
2 #include "core/InformationManager.hxx"
3 #include "core/Function.hxx"
4 #include "core/BasicBlock.hxx"
5
6 #include <stack>
7 #include <algorithm>
8 #include <cassert>
9
10 using namespace llvm;
11 using namespace llvm::object;
12 using std::error_code;
13
14 namespace {
15 class COFFT {
16
17 };
18 }
19
20 /*
21 *
22 */
23 Disassembler * createLLVMDisassembler(const std::string& filename, InformationManager* manager) {
24 if (filename == "")
25 return NULL;
26
27 std::unique_ptr<Binary> o;
28 o.reset(createBinary(filename).get());
29 Binary * op = o.release();
30
31 // ELFType<endian, maxalign, 64bit>
32 if (ELF32LEObjectFile * object = dyn_cast<ELF32LEObjectFile>(op)) {
33 return new LLVMDisassembler<ELFType<support::little, 2, false>>(filename, manager, object);
34 }
35 if (ELF64LEObjectFile * object = dyn_cast<ELF64LEObjectFile>(op)) {
36 return new LLVMDisassembler<ELFType<support::little, 2, true>>(filename, manager, object);
37 }
38 if (ELF32BEObjectFile * object = dyn_cast<ELF32BEObjectFile>(op)) {
39 return new LLVMDisassembler<ELFType<support::big, 2, false>>(filename, manager, object);
40 }
41 if (ELF64BEObjectFile * object = dyn_cast<ELF64BEObjectFile>(op)) {
42 return new LLVMDisassembler<ELFType<support::big, 2, true>>(filename, manager, object);
43 }
44 if (COFFObjectFile * object = dyn_cast<COFFObjectFile>(op)) {
45 return new LLVMDisassembler<COFFT>(filename, manager, object);
46 }
47
48 return NULL;
49 }
50
51 /*
52 * TODO: fallback code falls die Datei kein ELF/PE/COFF/MacO/.. binary
53 * ist sondern z.B. einfach nur Instruktionen oder ein Bootsektor oder
54 * foo
55 */
56 template <typename ELFT>
57 LLVMDisassembler<ELFT>::LLVMDisassembler(const std::string& filename,
58 InformationManager* manager,
59 ObjectFile* file)
60 : Disassembler()
61 , logger(log4cxx::Logger::getLogger("LLVMDisassembler"))
62 , triple("unknown-unknown-unknown")
63 , manager(manager)
64 {
65 LOG4CXX_DEBUG(logger, "Handling file " << filename);
66
67 if (!file) {
68 auto result = createBinary(filename);
69
70 error_code ec;
71 if ((ec = result.getError())) {
72 LOG4CXX_ERROR(logger, "Failed to load Binary" << ec.message());
73 binary = NULL;
74 return;
75 }
76
77 binary.reset(result.get());
78
79 o = dyn_cast<ObjectFile>(binary.get());
80 } else {
81 o = file;
82 binary.reset(file);
83 }
84
85 triple.setArch(Triple::ArchType(o->getArch()));
86 std::string tripleName(triple.getTriple());
87
88 LOG4CXX_INFO(logger, "Architecture " << tripleName);
89
90
91 std::string es;
92 target = TargetRegistry::lookupTarget("", triple, es);
93 if (!target) {
94 LOG4CXX_ERROR(logger, es);
95 return;
96 }
97
98 LOG4CXX_INFO(logger, "Target " << target->getName());
99
100 MRI.reset(target->createMCRegInfo(tripleName));
101 if (!MRI) {
102 LOG4CXX_ERROR(logger, "no register info for target " << tripleName);
103 return;
104 }
105
106 // Set up disassembler.
107 AsmInfo.reset(target->createMCAsmInfo(*MRI, tripleName));
108 if (!AsmInfo) {
109 LOG4CXX_ERROR(logger, "no assembly info for target " << tripleName);
110 return;
111 }
112
113 STI.reset(target->createMCSubtargetInfo(tripleName, "", ""));
114 if (!STI) {
115 LOG4CXX_ERROR(logger, "no subtarget info for target " << tripleName);
116 return;
117 }
118
119 MII.reset(target->createMCInstrInfo());
120 if (!MII) {
121 LOG4CXX_ERROR(logger, "no instruction info for target " << tripleName);
122 return;
123 }
124
125 MOFI.reset(new MCObjectFileInfo);
126 MCContext Ctx(AsmInfo.get(), MRI.get(), MOFI.get());
127
128 DisAsm.reset(target->createMCDisassembler(*STI, Ctx));
129 if (!DisAsm) {
130 LOG4CXX_ERROR(logger, "no disassembler for target " << tripleName);
131 return;
132 }
133 RelInfo.reset(
134 target->createMCRelocationInfo(tripleName, Ctx));
135 if (RelInfo) {
136 Symzer.reset(
137 MCObjectSymbolizer::createObjectSymbolizer(Ctx, std::move(RelInfo), o));
138 if (Symzer)
139 DisAsm->setSymbolizer(std::move(Symzer));
140 }
141 RelInfo.release();
142 Symzer.release();
143
144 MIA.reset(target->createMCInstrAnalysis(MII.get()));
145 if (!MIA) {
146 LOG4CXX_ERROR(logger, "no instruction analysis for target " << tripleName);
147 return;
148 }
149
150 int AsmPrinterVariant = AsmInfo->getAssemblerDialect();
151 IP.reset(target->createMCInstPrinter(AsmPrinterVariant, *AsmInfo, *MII, *MRI, *STI));
152 if (!IP) {
153 LOG4CXX_ERROR(logger, "no instruction printer for target " << tripleName);
154 return;
155 }
156
157 IP->setPrintImmHex(llvm::HexStyle::C);
158 IP->setPrintImmHex(true);
159
160 std::unique_ptr<MCObjectDisassembler> OD(
161 new MCObjectDisassembler(*o, *DisAsm, *MIA));
162 Mod.reset(OD->buildModule(false));
163
164 readSections();
165 }
166
167 template <typename ELFT>
168 void LLVMDisassembler<ELFT>::start() {
169 readSymbols();
170 disassemble();
171 readDynamicSymbols();
172 }
173
174 template <typename ELFT>
175 LLVMDisassembler<ELFT>::~LLVMDisassembler() {}
176
177 template <typename ELFT>
178 Function* LLVMDisassembler<ELFT>::disassembleFunctionAt(uint64_t address, const std::string& name) {
179 Function * function;
180 SectionRef text_section = sections[".text"];
181 uint64_t base_address, size;
182 text_section.getAddress(base_address);
183 text_section.getSize(size);
184
185 if (address < base_address ||
186 address >= base_address + size) {
187 return NULL;
188 }
189
190 if (NULL == (function = manager->getFunction(address))) {
191
192 if (name == "") {
193 std::stringstream s;
194 s << "<Unnamed 0x" << std::hex << address << ">";
195 function = manager->newFunction(address);
196 function->setName(s.str());
197 } else {
198 function = manager->newFunction(address);
199 function->setName(name);
200 }
201 disassembleFunction(function);
202 manager->finishFunction(function);
203 }
204
205 return function;
206 }
207
208 template <typename ELFT>
209 void LLVMDisassembler<ELFT>::disassembleFunction(Function* function) {
210 std::stack<BasicBlock*> remaining_blocks;
211 /* TODO:
212 * Do all blocks get added properly? We should take care to remove
213 * the other ones at the end of the function!
214 */
215 std::map<uint64_t, BasicBlock*> new_blocks;
216 SectionRef text_section = sections[".text"];
217 StringRef bytes;
218 text_section.getContents(bytes);
219 StringRefMemoryObject ref(bytes);
220
221 LOG4CXX_DEBUG(logger, "Handling function " << function->getName());
222
223 BasicBlock * block = manager->newBasicBlock(function->getStartAddress());
224 remaining_blocks.push(block);
225 new_blocks.insert(std::make_pair(block->getStartAddress(), block));
226 function->addBasicBlock(block);
227
228 while (remaining_blocks.size()) {
229 BasicBlock * current_block = remaining_blocks.top();
230 remaining_blocks.pop();
231
232 LOG4CXX_DEBUG(logger, "Handling Block starting at " << std::hex
233 << current_block->getStartAddress());
234
235 uint64_t inst_size;
236 uint64_t base_address;
237 text_section.getAddress(base_address);
238 uint64_t current_address = current_block->getStartAddress() - base_address;
239 while(true) {
240 MCInst inst;
241 std::string buf;
242 llvm::raw_string_ostream s(buf);
243
244 if(llvm::MCDisassembler::Success ==
245 DisAsm->getInstruction(inst, inst_size, ref, current_address, nulls(), nulls())) {
246 uint64_t jmptarget;
247
248 if (MIA->evaluateBranch(inst, current_address, inst_size, jmptarget)) {
249 jmptarget += base_address;
250 if (!MIA->isIndirectBranch(inst)) {
251 if (MIA->isCall(inst)) {
252 if (NULL == manager->getFunction(jmptarget))
253 disassembleFunctionAt(jmptarget);
254 } else {
255 current_block->setNextBlock(0, jmptarget);
256 if (new_blocks.find(jmptarget) == new_blocks.end()) {
257 BasicBlock * block = manager->newBasicBlock(jmptarget);
258 assert(block);
259 new_blocks.insert(std::make_pair(block->getStartAddress(), block));
260 function->addBasicBlock(block);
261 remaining_blocks.push(block);
262 } else {
263 LOG4CXX_DEBUG(logger, "Reusing Block starting at " << std::hex
264 << current_block->getStartAddress());
265 function->addBasicBlock(new_blocks.find(jmptarget)->second);
266 }
267 if (MIA->isConditionalBranch(inst)) {
268 jmptarget = base_address + current_address + inst_size;
269 current_block->setNextBlock(1, jmptarget);
270 if (new_blocks.find(jmptarget) == new_blocks.end()) {
271 BasicBlock * block = manager->newBasicBlock(jmptarget);
272 assert(block);
273 new_blocks.insert(std::make_pair(block->getStartAddress(), block));
274 function->addBasicBlock(block);
275 remaining_blocks.push(block);
276 } else {
277 LOG4CXX_DEBUG(logger, "Reusing Block starting at " << std::hex
278 << current_block->getStartAddress());
279 function->addBasicBlock(new_blocks.find(jmptarget)->second);
280 }
281 }
282 }
283 }
284 }
285 } else {
286 inst_size = 0;
287 }
288
289
290 if (inst_size == 0 || MIA->isTerminator(inst) || MIA->isBranch(inst)) {
291 current_block->setEndAddress(current_address + base_address + inst_size);
292 LOG4CXX_DEBUG(logger, "Finished Block at " << std::hex <<
293 current_block->getEndAddress());
294 break;
295 }
296 current_address += inst_size;
297 }
298 }
299 splitBlocks(function);
300 LOG4CXX_DEBUG(logger, "Finished function " << function->getName());
301 manager->signal_new_function(function);
302 }
303
304 template <typename ELFT>
305 void LLVMDisassembler<ELFT>::disassemble() {
306 SectionRef text_section = sections[".text"];
307 std::vector<Function*> remaining_functions;
308
309 // Assume all function symbols actually start a real function
310 for (auto x = symbols.begin(); x != symbols.end(); ++x) {
311 uint64_t result;
312 bool contains;
313 SymbolRef::Type symbol_type;
314
315
316 if (text_section.containsSymbol(x->second, contains) || !contains)
317 continue;
318
319 if (x->second.getType(symbol_type)
320 || SymbolRef::ST_Function != symbol_type)
321 continue;
322
323 if (!x->second.getAddress(result)) {
324 Function * fun = manager->newFunction(result);
325 fun->setName(x->first);
326 remaining_functions.push_back(fun);
327 LOG4CXX_DEBUG(logger, "Disasembling " << x->first);
328 }
329 }
330
331 for (Function* function : remaining_functions) {
332 disassembleFunction(function);
333 manager->finishFunction(function);
334 }
335
336 if (binary->isELF()) {
337 uint64_t _entryAddress = entryAddress();
338 LOG4CXX_DEBUG(logger, "Adding entryAddress at: " << std::hex << _entryAddress);
339 std::stringstream s;
340 s << "<_start 0x" << std::hex << _entryAddress << ">";
341
342 disassembleFunctionAt(_entryAddress, s.str());
343 }
344
345 if (!manager->hasFunctions()) {
346 uint64_t text_entry;
347 text_section.getAddress(text_entry);
348 LOG4CXX_INFO(logger, "No Symbols found, starting at the beginning of the text segment");
349 disassembleFunctionAt(text_entry);
350 }
351 }
352
353 template <>
354 uint64_t LLVMDisassembler<COFFT>::entryAddress() {
355 const auto coffobject = dyn_cast<COFFObjectFile>(o);
356 const struct pe32_header* pe32_header;
357 const struct pe32plus_header* pe32plus_header;
358
359 coffobject->getPE32PlusHeader(pe32plus_header);
360
361 if (pe32plus_header) {
362 return pe32plus_header->AddressOfEntryPoint;
363 } else {
364 coffobject->getPE32Header(pe32_header);
365 return pe32_header->AddressOfEntryPoint;
366 }
367 }
368
369 template <typename ELFT>
370 uint64_t LLVMDisassembler<ELFT>::entryAddress() {
371 const auto elffile = dyn_cast<ELFObjectFile<ELFT>>(o)->getELFFile();
372 const auto * header = elffile->getHeader();
373
374 return header->e_entry;
375 }
376
377 template <typename ELFT>
378 void LLVMDisassembler<ELFT>::splitBlocks(Function* function) {
379 SectionRef text_section = sections[".text"];
380 StringRef bytes;
381 text_section.getContents(bytes);
382 StringRefMemoryObject ref(bytes);
383
384 // Split blocks where jumps are going inside the block
385 for (auto it = function->blocks().begin();
386 it != function->blocks().end();
387 ++it) {
388 BasicBlock * current_block = it->second;
389 uint64_t inst_size;
390 uint64_t base_address;
391 text_section.getAddress(base_address);
392 uint64_t current_address = current_block->getStartAddress() - base_address;
393 while(current_block->getEndAddress() - base_address > current_address) {
394 MCInst inst;
395 std::string buf;
396 llvm::raw_string_ostream s(buf);
397
398 if(llvm::MCDisassembler::Success ==
399 DisAsm->getInstruction(inst, inst_size, ref, current_address, nulls(), nulls())) {
400 // See if some other block starts here
401 BasicBlock* other = manager->getBasicBlock(current_address
402 + inst_size
403 + base_address);
404
405 // Special case, other block starts here but we are at the end anyway
406 if (other != NULL) {
407 uint64_t endaddress = current_address + inst_size + base_address;
408 if (endaddress != current_block->getEndAddress()) {
409 LOG4CXX_DEBUG(logger, "Shortening block starting at "
410 << std::hex
411 << current_block->getStartAddress()
412 << " now ending at "
413 << other->getStartAddress());
414 function->addBasicBlock(other);
415 current_block->setEndAddress(endaddress);
416 current_block->setNextBlock(0, other->getStartAddress());
417 current_block->setNextBlock(1, 0);
418 }
419 }
420 } else {
421 inst_size = 1;
422 }
423 current_address += inst_size;
424 }
425 }
426 }
427
428 template<>
429 void LLVMDisassembler<COFFT>::readDynamicSymbols() {
430 //TODO
431 }
432
433 template <typename ELFT>
434 void LLVMDisassembler<ELFT>::readDynamicSymbols() {
435 const auto elffile = dyn_cast<ELFObjectFile<ELFT>>(o)->getELFFile();
436 for (auto it = elffile->begin_dynamic_symbols(),
437 end = elffile->end_dynamic_symbols();
438 it != end;
439 ++it) {
440 if (it->getType() == 2) { // Function
441 bool is_default;
442 // TODO: Error handling
443 std::string symbolname = *(elffile->getSymbolName(it));
444 std::string symbolversion = *(elffile->getSymbolVersion(nullptr, &*it, is_default));
445 manager->signal_new_dyn_symbol(symbolname + (is_default? "@@" : "@") + symbolversion);
446 LOG4CXX_DEBUG(logger, "Adding dynamic Symbol " << symbolname << (is_default? "@@" : "@") << symbolversion);
447 }
448 }
449 }
450
451 template <typename ELFT>
452 void LLVMDisassembler<ELFT>::readSymbols() {
453 error_code ec;
454 symbol_iterator si(o->symbol_begin()), se(o->symbol_end());
455 for (; si != se; ++si) {
456 StringRef name;
457 if ((ec = si->getName(name))) {
458 LOG4CXX_ERROR(logger, ec.message());
459 break;
460 }
461 LOG4CXX_DEBUG(logger, "Added symbol " << name.str());
462 symbols.insert(make_pair(name.str(), *si));
463 }
464 }
465
466 template <typename ELFT>
467 void LLVMDisassembler<ELFT>::readSections() {
468 error_code ec;
469 section_iterator i(o->section_begin()), e(o->section_end());
470 for (; i != e; ++i) {
471 StringRef name;
472 if ((ec = i->getName(name))) {
473 LOG4CXX_ERROR(logger, ec.message());
474 break;
475 }
476 LOG4CXX_DEBUG(logger, "Added section " << name.str());
477 sections.insert(make_pair(name.str(), *i));
478 }
479
480 }
481
482 // template <typename ELFT>
483 // void LLVMDisassembler<ELFT>::forEachFunction(std::function<void (uint64_t, Function*)> callback) {
484 // // std::for_each(functions.begin(), functions.end(),
485 // // [&](std::pair<uint64_t, Function*> x) {
486 // // callback(x.first, x.second);
487 // // });
488 // }
489
490 template <typename ELFT>
491 void LLVMDisassembler<ELFT>::printEachInstruction(uint64_t start, uint64_t end,
492 std::function<void (uint8_t*, size_t,
493 const std::string&,
494 const std::string&)> fun) {
495 SectionRef text_section = sections[".text"];
496 uint64_t base_address;
497 text_section.getAddress(base_address);
498 uint64_t current_address = start - base_address;
499
500 StringRef bytes;
501 text_section.getContents(bytes);
502 StringRefMemoryObject ref(bytes);
503
504 while (current_address < end - base_address) {
505 uint64_t inst_size;
506 MCInst inst;
507 std::string buf;
508 llvm::raw_string_ostream s(buf);
509
510 if(llvm::MCDisassembler::Success ==
511 DisAsm->getInstruction(inst, inst_size, ref, current_address, nulls(), nulls())) {
512
513 uint8_t bytes[inst_size+2];
514 ref.readBytes(current_address, inst_size, bytes);
515
516 uint64_t jmptarget;
517 std::string ref("");
518 IP->printInst(&inst, s, "");
519 if (MIA->evaluateBranch(inst, current_address, inst_size, jmptarget)) {
520 std::stringstream stream;
521 if (MIA->isCall(inst))
522 stream << "function:";
523 else
524 stream << "block:";
525
526 stream << std::hex << (base_address + jmptarget);
527 ref = stream.str();
528 }
529
530
531 fun(bytes, inst_size, s.str(), ref);
532 } else {
533 LOG4CXX_WARN(logger, "Invalid byte at" << std::hex << current_address + base_address);
534 fun(NULL, 0, "Invalid Byte", "");
535 inst_size = 1;
536 }
537
538 current_address += inst_size;
539 }
540 }