]> git.siccegge.de Git - frida/frida.git/blob - src/disassembler/llvm/LLVMDisassembler.cxx
Add dynamically linked symbols
[frida/frida.git] / src / disassembler / llvm / LLVMDisassembler.cxx
1 #include "disassembler/llvm/LLVMDisassembler.hxx"
2 #include "disassembler/llvm/LLVMBasicBlock.hxx"
3 #include "disassembler/llvm/LLVMFunction.hxx"
4
5 #include <stack>
6 #include <algorithm>
7
8 using namespace llvm;
9 using namespace llvm::object;
10 using std::error_code;
11
12 /*
13 *
14 */
15 Disassembler * createLLVMDisassembler(const std::string& filename, InformationManager* manager) {
16 if (filename == "")
17 return NULL;
18
19 std::unique_ptr<Binary> o;
20 o.reset(createBinary(filename).get());
21 Binary * op = o.release();
22
23 // ELFType<endian, maxalign, 64bit>
24 if (ELF32LEObjectFile * object = dyn_cast<ELF32LEObjectFile>(op)) {
25 return new LLVMDisassembler<ELFType<support::little, 2, false>>(filename, manager, object);
26 }
27 if (ELF64LEObjectFile * object = dyn_cast<ELF64LEObjectFile>(op)) {
28 return new LLVMDisassembler<ELFType<support::little, 2, true>>(filename, manager, object);
29 }
30 if (ELF32BEObjectFile * object = dyn_cast<ELF32BEObjectFile>(op)) {
31 return new LLVMDisassembler<ELFType<support::big, 2, false>>(filename, manager, object);
32 }
33 if (ELF64BEObjectFile * object = dyn_cast<ELF64BEObjectFile>(op)) {
34 return new LLVMDisassembler<ELFType<support::big, 2, true>>(filename, manager, object);
35 }
36
37 return NULL;
38 }
39
40 /*
41 * TODO: fallback code falls die Datei kein ELF/PE/COFF/MacO/.. binary
42 * ist sondern z.B. einfach nur Instruktionen oder ein Bootsektor oder
43 * foo
44 */
45 template <typename ELFT>
46 LLVMDisassembler<ELFT>::LLVMDisassembler(const std::string& filename,
47 InformationManager* manager,
48 ELFObjectFile<ELFT>* file)
49 : Disassembler(filename, manager)
50 , logger(log4cxx::Logger::getLogger("LLVMDisassembler"))
51 , triple("unknown-unknown-unknown")
52 , manager(manager)
53 {
54 LOG4CXX_DEBUG(logger, "Handling file" << filename);
55
56 if (!file) {
57 auto result = createBinary(filename);
58
59 error_code ec;
60 if ((ec = result.getError())) {
61 LOG4CXX_ERROR(logger, "Failed to load Binary" << ec.message());
62 binary = NULL;
63 return;
64 }
65
66 binary.reset(result.get());
67
68 o = dyn_cast<ELFObjectFile<ELFT>>(binary.get());
69 } else {
70 o = file;
71 binary.reset(file);
72 }
73
74 triple.setArch(Triple::ArchType(o->getArch()));
75 std::string tripleName(triple.getTriple());
76
77 LOG4CXX_INFO(logger, "Architecture " << tripleName);
78
79
80 std::string es;
81 target = TargetRegistry::lookupTarget("", triple, es);
82 if (!target) {
83 LOG4CXX_ERROR(logger, es);
84 return;
85 }
86
87 LOG4CXX_INFO(logger, "Target " << target->getName());
88
89 MRI.reset(target->createMCRegInfo(tripleName));
90 if (!MRI) {
91 LOG4CXX_ERROR(logger, "no register info for target " << tripleName);
92 return;
93 }
94
95 // Set up disassembler.
96 AsmInfo.reset(target->createMCAsmInfo(*MRI, tripleName));
97 if (!AsmInfo) {
98 LOG4CXX_ERROR(logger, "no assembly info for target " << tripleName);
99 return;
100 }
101
102 STI.reset(target->createMCSubtargetInfo(tripleName, "", ""));
103 if (!STI) {
104 LOG4CXX_ERROR(logger, "no subtarget info for target " << tripleName);
105 return;
106 }
107
108 MII.reset(target->createMCInstrInfo());
109 if (!MII) {
110 LOG4CXX_ERROR(logger, "no instruction info for target " << tripleName);
111 return;
112 }
113
114 MOFI.reset(new MCObjectFileInfo);
115 MCContext Ctx(AsmInfo.get(), MRI.get(), MOFI.get());
116
117 DisAsm.reset(target->createMCDisassembler(*STI, Ctx));
118 if (!DisAsm) {
119 LOG4CXX_ERROR(logger, "no disassembler for target " << tripleName);
120 return;
121 }
122 RelInfo.reset(
123 target->createMCRelocationInfo(tripleName, Ctx));
124 if (RelInfo) {
125 Symzer.reset(
126 MCObjectSymbolizer::createObjectSymbolizer(Ctx, std::move(RelInfo), o));
127 if (Symzer)
128 DisAsm->setSymbolizer(std::move(Symzer));
129 }
130 RelInfo.release();
131 Symzer.release();
132
133 MIA.reset(target->createMCInstrAnalysis(MII.get()));
134 if (!MIA) {
135 LOG4CXX_ERROR(logger, "no instruction analysis for target " << tripleName);
136 return;
137 }
138
139 int AsmPrinterVariant = AsmInfo->getAssemblerDialect();
140 IP.reset(target->createMCInstPrinter(AsmPrinterVariant, *AsmInfo, *MII, *MRI, *STI));
141 if (!IP) {
142 LOG4CXX_ERROR(logger, "no instruction printer for target " << tripleName);
143 return;
144 }
145
146 IP->setPrintImmHex(llvm::HexStyle::C);
147 IP->setPrintImmHex(true);
148
149 std::unique_ptr<MCObjectDisassembler> OD(
150 new MCObjectDisassembler(*o, *DisAsm, *MIA));
151 Mod.reset(OD->buildModule(false));
152 }
153
154 template <typename ELFT>
155 void LLVMDisassembler<ELFT>::start() {
156 readSymbols();
157 readSections();
158 disassemble();
159 readDynamicSymbols();
160 }
161
162 template <typename ELFT>
163 LLVMDisassembler<ELFT>::~LLVMDisassembler() {
164 std::for_each(functions.begin(), functions.end(),
165 [](std::pair<uint64_t,LLVMFunction*> it) {
166 delete it.second;
167 });
168 std::for_each(blocks.begin(), blocks.end(),
169 [](std::pair<uint64_t, LLVMBasicBlock*> it) {
170 delete it.second;
171 });
172 }
173
174 template <typename ELFT>
175 Function* LLVMDisassembler<ELFT>::disassembleFunctionAt(uint64_t address, const std::string& name) {
176 SectionRef text_section = sections[".text"];
177 uint64_t base_address, size;
178 text_section.getAddress(base_address);
179 text_section.getSize(size);
180
181 if (address < base_address ||
182 address >= base_address + size) {
183 return NULL;
184 }
185
186 if (functions.find(address) != functions.end()) {
187 return functions[address];
188 }
189
190 LLVMFunction * function;
191 if (name == "") {
192 std::stringstream s;
193 s << "<Unnamed 0x" << std::hex << address << ">";
194 function = new LLVMFunction(s.str(), address);
195 } else {
196 function = new LLVMFunction(name, address);
197 }
198 functions.insert(std::make_pair(address, function));
199
200 disassembleFunction(function);
201
202 return function;
203 }
204
205 template <typename ELFT>
206 void LLVMDisassembler<ELFT>::disassembleFunction(LLVMFunction* function) {
207 std::stack<LLVMBasicBlock*> remaining_blocks;
208 SectionRef text_section = sections[".text"];
209 StringRef bytes;
210 text_section.getContents(bytes);
211 StringRefMemoryObject ref(bytes);
212
213 LOG4CXX_DEBUG(logger, "Handling function " << function->getName());
214
215 LLVMBasicBlock * block = new LLVMBasicBlock(function->getStartAddress(), this);
216 remaining_blocks.push(block);
217 blocks.insert(std::make_pair(block->getStartAddress(), block));
218 function->addBasicBlock(block);
219
220 while (remaining_blocks.size()) {
221 LLVMBasicBlock * current_block = remaining_blocks.top();
222 remaining_blocks.pop();
223
224 LOG4CXX_DEBUG(logger, "Handling Block starting at " << std::hex << current_block->getStartAddress());
225
226 uint64_t inst_size;
227 uint64_t base_address;
228 text_section.getAddress(base_address);
229 uint64_t current_address = current_block->getStartAddress() - base_address;
230 while(true) {
231 MCInst inst;
232 std::string buf;
233 llvm::raw_string_ostream s(buf);
234
235 if(llvm::MCDisassembler::Success ==
236 DisAsm->getInstruction(inst, inst_size, ref, current_address, nulls(), nulls())) {
237 uint64_t jmptarget;
238
239 if (MIA->evaluateBranch(inst, current_address, inst_size, jmptarget)) {
240 jmptarget += base_address;
241 if (!MIA->isIndirectBranch(inst)) {
242 if (MIA->isCall(inst)) {
243 if (functions.find(jmptarget) == functions.end()) {
244 disassembleFunctionAt(jmptarget);
245 }
246 } else {
247 current_block->setNextBlock(0, jmptarget);
248 if (blocks.find(jmptarget) == blocks.end()) {
249 LLVMBasicBlock * block = new LLVMBasicBlock(jmptarget, this);
250 blocks.insert(std::make_pair(block->getStartAddress(), block));
251 function->addBasicBlock(block);
252 remaining_blocks.push(block);
253 } else {
254 LOG4CXX_DEBUG(logger, "Reusing Block starting at " << std::hex << current_block->getStartAddress());
255 function->addBasicBlock(blocks.find(jmptarget)->second);
256 }
257 if (MIA->isConditionalBranch(inst)) {
258 jmptarget = base_address + current_address + inst_size;
259 current_block->setNextBlock(1, jmptarget);
260 if (blocks.find(jmptarget) == blocks.end()) {
261 LLVMBasicBlock * block = new LLVMBasicBlock(jmptarget, this);
262 blocks.insert(std::make_pair(block->getStartAddress(), block));
263 function->addBasicBlock(block);
264 remaining_blocks.push(block);
265 } else {
266 LOG4CXX_DEBUG(logger, "Reusing Block starting at " << std::hex << current_block->getStartAddress());
267 function->addBasicBlock(blocks.find(jmptarget)->second);
268 }
269 }
270 }
271 }
272 }
273 } else {
274 inst_size = 0;
275 }
276
277
278 if (inst_size == 0 || MIA->isTerminator(inst) || MIA->isBranch(inst)) {
279 current_block->setEndAddress(current_address + base_address + inst_size);
280 LOG4CXX_DEBUG(logger, "Finished Block at " << std::hex <<
281 current_block->getEndAddress());
282 break;
283 }
284 current_address += inst_size;
285 }
286 }
287 splitBlocks(function);
288 LOG4CXX_DEBUG(logger, "Finished function " << function->getName());
289 manager->signal_new_function(function);
290 }
291
292 template <typename ELFT>
293 void LLVMDisassembler<ELFT>::disassemble() {
294 SectionRef text_section = sections[".text"];
295 std::vector<LLVMFunction*> remaining_functions;
296
297 // Assume all function symbols actually start a real function
298 for (auto x = symbols.begin(); x != symbols.end(); ++x) {
299 uint64_t result;
300 bool contains;
301 SymbolRef::Type symbol_type;
302
303
304 if (text_section.containsSymbol(x->second, contains) || !contains)
305 continue;
306
307 if (x->second.getType(symbol_type)
308 || SymbolRef::ST_Function != symbol_type)
309 continue;
310
311 if (!x->second.getAddress(result)) {
312 LLVMFunction * fun = new LLVMFunction(x->first, result);
313 remaining_functions.push_back(fun);
314 functions.insert(std::make_pair(result, fun));
315 LOG4CXX_DEBUG(logger, "Disasembling " << x->first);
316 }
317 }
318
319 for (LLVMFunction* function : remaining_functions) {
320 disassembleFunction(function);
321 }
322
323 if (binary->isELF()) {
324 const ELFO * elffile = o->getELFFile();
325 const typename ELFO::Elf_Ehdr * header = elffile->getHeader();
326
327 _entryAddress = header->e_entry;
328 LOG4CXX_DEBUG(logger, "Adding entryAddress at: " << std::hex << _entryAddress);
329 std::stringstream s;
330 s << "<_start 0x" << std::hex << _entryAddress << ">";
331
332 disassembleFunctionAt(_entryAddress, s.str());
333 }
334
335 if (functions.empty()) {
336 uint64_t text_entry;
337 text_section.getAddress(text_entry);
338 LOG4CXX_INFO(logger, "No Symbols found, starting at the beginning of the text segment");
339 disassembleFunctionAt(text_entry);
340 }
341 }
342
343 template <typename ELFT>
344 void LLVMDisassembler<ELFT>::splitBlocks(LLVMFunction* function) {
345 SectionRef text_section = sections[".text"];
346 StringRef bytes;
347 text_section.getContents(bytes);
348 StringRefMemoryObject ref(bytes);
349
350 // Split blocks where jumps are going inside the block
351 for (auto it = function->blocks().begin();
352 it != function->blocks().end();
353 ++it) {
354 BasicBlock * current_block = it->second;
355 uint64_t inst_size;
356 uint64_t base_address;
357 text_section.getAddress(base_address);
358 uint64_t current_address = current_block->getStartAddress() - base_address;
359 while(current_block->getEndAddress() - base_address > current_address) {
360 MCInst inst;
361 std::string buf;
362 llvm::raw_string_ostream s(buf);
363
364 if(llvm::MCDisassembler::Success ==
365 DisAsm->getInstruction(inst, inst_size, ref, current_address, nulls(), nulls())) {
366 // See if some other block starts here
367 auto other = blocks.find(current_address + inst_size + base_address);
368
369 // Special case, other block starts here but we are at the end anyway
370 if (other != blocks.end()) {
371 uint64_t endaddress = current_address + inst_size + base_address;
372 if (endaddress != current_block->getEndAddress()) {
373 LOG4CXX_DEBUG(logger, "Shortening block starting at "
374 << std::hex
375 << current_block->getStartAddress()
376 << " now ending at "
377 << other->first);
378 function->addBasicBlock(other->second);
379 current_block->setEndAddress(endaddress);
380 current_block->setNextBlock(0, other->first);
381 current_block->setNextBlock(1, 0);
382 }
383 }
384 } else {
385 inst_size = 1;
386 }
387 current_address += inst_size;
388 }
389 }
390 }
391
392 template <typename ELFT>
393 void LLVMDisassembler<ELFT>::readDynamicSymbols() {
394 const ELFO * elffile = o->getELFFile();
395 for (typename ELFO::Elf_Sym_Iter
396 it = elffile->begin_dynamic_symbols(),
397 end = elffile->end_dynamic_symbols();
398 it != end;
399 ++it) {
400 if (it->getType() == 2) { // Function
401 bool is_default;
402 // TODO: Error handling
403 std::string symbolname = *(elffile->getSymbolName(it));
404 std::string symbolversion = *(elffile->getSymbolVersion(nullptr, &*it, is_default));
405 manager->signal_new_dyn_symbol(symbolname + (is_default? "@@" : "@") + symbolversion);
406 LOG4CXX_DEBUG(logger, "Adding dynamic Symbol " << symbolname << (is_default? "@@" : "@") << symbolversion);
407 }
408 }
409 }
410
411 template <typename ELFT>
412 void LLVMDisassembler<ELFT>::readSymbols() {
413 error_code ec;
414 symbol_iterator si(o->symbol_begin()), se(o->symbol_end());
415 for (; si != se; ++si) {
416 StringRef name;
417 if ((ec = si->getName(name))) {
418 LOG4CXX_ERROR(logger, ec.message());
419 break;
420 }
421 LOG4CXX_DEBUG(logger, "Added symbol " << name.str());
422 symbols.insert(make_pair(name.str(), *si));
423 }
424 }
425
426 template <typename ELFT>
427 void LLVMDisassembler<ELFT>::readSections() {
428 error_code ec;
429 section_iterator i(o->section_begin()), e(o->section_end());
430 for (; i != e; ++i) {
431 StringRef name;
432 if ((ec = i->getName(name))) {
433 LOG4CXX_ERROR(logger, ec.message());
434 break;
435 }
436 LOG4CXX_DEBUG(logger, "Added section " << name.str());
437 sections.insert(make_pair(name.str(), *i));
438 }
439
440 }
441
442 template <typename ELFT>
443 void LLVMDisassembler<ELFT>::forEachFunction(std::function<void (uint64_t, Function*)> callback) {
444 std::for_each(functions.begin(), functions.end(),
445 [&](std::pair<uint64_t, LLVMFunction*> x) {
446 callback(x.first, x.second);
447 });
448 }
449
450 template <typename ELFT>
451 void LLVMDisassembler<ELFT>::printEachInstruction(uint64_t start, uint64_t end,
452 std::function<void (uint8_t*, size_t,
453 const std::string&,
454 const std::string&)> fun) {
455 SectionRef text_section = sections[".text"];
456 uint64_t base_address;
457 text_section.getAddress(base_address);
458 uint64_t current_address = start - base_address;
459
460 StringRef bytes;
461 text_section.getContents(bytes);
462 StringRefMemoryObject ref(bytes);
463
464 while (current_address < end - base_address) {
465 uint64_t inst_size;
466 MCInst inst;
467 std::string buf;
468 llvm::raw_string_ostream s(buf);
469
470 if(llvm::MCDisassembler::Success ==
471 DisAsm->getInstruction(inst, inst_size, ref, current_address, nulls(), nulls())) {
472
473 uint8_t bytes[inst_size+2];
474 ref.readBytes(current_address, inst_size, bytes);
475
476 uint64_t jmptarget;
477 std::string ref("");
478 IP->printInst(&inst, s, "");
479 if (MIA->evaluateBranch(inst, current_address, inst_size, jmptarget)) {
480 std::stringstream stream;
481 if (MIA->isCall(inst))
482 stream << "function:";
483 else
484 stream << "block:";
485
486 stream << std::hex << (base_address + jmptarget);
487 ref = stream.str();
488 }
489
490
491 fun(bytes, inst_size, s.str(), ref);
492 } else {
493 LOG4CXX_WARN(logger, "Invalid byte at" << std::hex << current_address + base_address);
494 fun(NULL, 0, "Invalid Byte", "");
495 inst_size = 1;
496 }
497
498 current_address += inst_size;
499 }
500 }