]> git.siccegge.de Git - frida/frida.git/blob - src/disassembler/llvm/LLVMDisassembler.cxx
readSections() already in Disassembler constructor
[frida/frida.git] / src / disassembler / llvm / LLVMDisassembler.cxx
1 #include "disassembler/llvm/LLVMDisassembler.hxx"
2 #include "core/InformationManager.hxx"
3 #include "core/Function.hxx"
4 #include "core/BasicBlock.hxx"
5
6 #include <stack>
7 #include <algorithm>
8 #include <cassert>
9
10 using namespace llvm;
11 using namespace llvm::object;
12 using std::error_code;
13
14 /*
15 *
16 */
17 Disassembler * createLLVMDisassembler(const std::string& filename, InformationManager* manager) {
18 if (filename == "")
19 return NULL;
20
21 std::unique_ptr<Binary> o;
22 o.reset(createBinary(filename).get());
23 Binary * op = o.release();
24
25 // ELFType<endian, maxalign, 64bit>
26 if (ELF32LEObjectFile * object = dyn_cast<ELF32LEObjectFile>(op)) {
27 return new LLVMDisassembler<ELFType<support::little, 2, false>>(filename, manager, object);
28 }
29 if (ELF64LEObjectFile * object = dyn_cast<ELF64LEObjectFile>(op)) {
30 return new LLVMDisassembler<ELFType<support::little, 2, true>>(filename, manager, object);
31 }
32 if (ELF32BEObjectFile * object = dyn_cast<ELF32BEObjectFile>(op)) {
33 return new LLVMDisassembler<ELFType<support::big, 2, false>>(filename, manager, object);
34 }
35 if (ELF64BEObjectFile * object = dyn_cast<ELF64BEObjectFile>(op)) {
36 return new LLVMDisassembler<ELFType<support::big, 2, true>>(filename, manager, object);
37 }
38
39 return NULL;
40 }
41
42 /*
43 * TODO: fallback code falls die Datei kein ELF/PE/COFF/MacO/.. binary
44 * ist sondern z.B. einfach nur Instruktionen oder ein Bootsektor oder
45 * foo
46 */
47 template <typename ELFT>
48 LLVMDisassembler<ELFT>::LLVMDisassembler(const std::string& filename,
49 InformationManager* manager,
50 ELFObjectFile<ELFT>* file)
51 : Disassembler()
52 , logger(log4cxx::Logger::getLogger("LLVMDisassembler"))
53 , triple("unknown-unknown-unknown")
54 , manager(manager)
55 {
56 LOG4CXX_DEBUG(logger, "Handling file" << filename);
57
58 if (!file) {
59 auto result = createBinary(filename);
60
61 error_code ec;
62 if ((ec = result.getError())) {
63 LOG4CXX_ERROR(logger, "Failed to load Binary" << ec.message());
64 binary = NULL;
65 return;
66 }
67
68 binary.reset(result.get());
69
70 o = dyn_cast<ELFObjectFile<ELFT>>(binary.get());
71 } else {
72 o = file;
73 binary.reset(file);
74 }
75
76 triple.setArch(Triple::ArchType(o->getArch()));
77 std::string tripleName(triple.getTriple());
78
79 LOG4CXX_INFO(logger, "Architecture " << tripleName);
80
81
82 std::string es;
83 target = TargetRegistry::lookupTarget("", triple, es);
84 if (!target) {
85 LOG4CXX_ERROR(logger, es);
86 return;
87 }
88
89 LOG4CXX_INFO(logger, "Target " << target->getName());
90
91 MRI.reset(target->createMCRegInfo(tripleName));
92 if (!MRI) {
93 LOG4CXX_ERROR(logger, "no register info for target " << tripleName);
94 return;
95 }
96
97 // Set up disassembler.
98 AsmInfo.reset(target->createMCAsmInfo(*MRI, tripleName));
99 if (!AsmInfo) {
100 LOG4CXX_ERROR(logger, "no assembly info for target " << tripleName);
101 return;
102 }
103
104 STI.reset(target->createMCSubtargetInfo(tripleName, "", ""));
105 if (!STI) {
106 LOG4CXX_ERROR(logger, "no subtarget info for target " << tripleName);
107 return;
108 }
109
110 MII.reset(target->createMCInstrInfo());
111 if (!MII) {
112 LOG4CXX_ERROR(logger, "no instruction info for target " << tripleName);
113 return;
114 }
115
116 MOFI.reset(new MCObjectFileInfo);
117 MCContext Ctx(AsmInfo.get(), MRI.get(), MOFI.get());
118
119 DisAsm.reset(target->createMCDisassembler(*STI, Ctx));
120 if (!DisAsm) {
121 LOG4CXX_ERROR(logger, "no disassembler for target " << tripleName);
122 return;
123 }
124 RelInfo.reset(
125 target->createMCRelocationInfo(tripleName, Ctx));
126 if (RelInfo) {
127 Symzer.reset(
128 MCObjectSymbolizer::createObjectSymbolizer(Ctx, std::move(RelInfo), o));
129 if (Symzer)
130 DisAsm->setSymbolizer(std::move(Symzer));
131 }
132 RelInfo.release();
133 Symzer.release();
134
135 MIA.reset(target->createMCInstrAnalysis(MII.get()));
136 if (!MIA) {
137 LOG4CXX_ERROR(logger, "no instruction analysis for target " << tripleName);
138 return;
139 }
140
141 int AsmPrinterVariant = AsmInfo->getAssemblerDialect();
142 IP.reset(target->createMCInstPrinter(AsmPrinterVariant, *AsmInfo, *MII, *MRI, *STI));
143 if (!IP) {
144 LOG4CXX_ERROR(logger, "no instruction printer for target " << tripleName);
145 return;
146 }
147
148 IP->setPrintImmHex(llvm::HexStyle::C);
149 IP->setPrintImmHex(true);
150
151 std::unique_ptr<MCObjectDisassembler> OD(
152 new MCObjectDisassembler(*o, *DisAsm, *MIA));
153 Mod.reset(OD->buildModule(false));
154
155 readSections();
156 }
157
158 template <typename ELFT>
159 void LLVMDisassembler<ELFT>::start() {
160 readSymbols();
161 disassemble();
162 readDynamicSymbols();
163 }
164
165 template <typename ELFT>
166 LLVMDisassembler<ELFT>::~LLVMDisassembler() {}
167
168 template <typename ELFT>
169 Function* LLVMDisassembler<ELFT>::disassembleFunctionAt(uint64_t address, const std::string& name) {
170 Function * function;
171 SectionRef text_section = sections[".text"];
172 uint64_t base_address, size;
173 text_section.getAddress(base_address);
174 text_section.getSize(size);
175
176 if (address < base_address ||
177 address >= base_address + size) {
178 return NULL;
179 }
180
181 if (NULL == (function = manager->getFunction(address))) {
182
183 if (name == "") {
184 std::stringstream s;
185 s << "<Unnamed 0x" << std::hex << address << ">";
186 function = manager->newFunction(address);
187 function->setName(s.str());
188 } else {
189 function = manager->newFunction(address);
190 function->setName(name);
191 }
192 disassembleFunction(function);
193 manager->finishFunction(function);
194 }
195
196 return function;
197 }
198
199 template <typename ELFT>
200 void LLVMDisassembler<ELFT>::disassembleFunction(Function* function) {
201 std::stack<BasicBlock*> remaining_blocks;
202 /* TODO:
203 * Do all blocks get added properly? We should take care to remove
204 * the other ones at the end of the function!
205 */
206 std::map<uint64_t, BasicBlock*> new_blocks;
207 SectionRef text_section = sections[".text"];
208 StringRef bytes;
209 text_section.getContents(bytes);
210 StringRefMemoryObject ref(bytes);
211
212 LOG4CXX_DEBUG(logger, "Handling function " << function->getName());
213
214 BasicBlock * block = manager->newBasicBlock(function->getStartAddress());
215 remaining_blocks.push(block);
216 new_blocks.insert(std::make_pair(block->getStartAddress(), block));
217 function->addBasicBlock(block);
218
219 while (remaining_blocks.size()) {
220 BasicBlock * current_block = remaining_blocks.top();
221 remaining_blocks.pop();
222
223 LOG4CXX_DEBUG(logger, "Handling Block starting at " << std::hex
224 << current_block->getStartAddress());
225
226 uint64_t inst_size;
227 uint64_t base_address;
228 text_section.getAddress(base_address);
229 uint64_t current_address = current_block->getStartAddress() - base_address;
230 while(true) {
231 MCInst inst;
232 std::string buf;
233 llvm::raw_string_ostream s(buf);
234
235 if(llvm::MCDisassembler::Success ==
236 DisAsm->getInstruction(inst, inst_size, ref, current_address, nulls(), nulls())) {
237 uint64_t jmptarget;
238
239 if (MIA->evaluateBranch(inst, current_address, inst_size, jmptarget)) {
240 jmptarget += base_address;
241 if (!MIA->isIndirectBranch(inst)) {
242 if (MIA->isCall(inst)) {
243 if (NULL == manager->getFunction(jmptarget))
244 disassembleFunctionAt(jmptarget);
245 } else {
246 current_block->setNextBlock(0, jmptarget);
247 if (new_blocks.find(jmptarget) == new_blocks.end()) {
248 BasicBlock * block = manager->newBasicBlock(jmptarget);
249 assert(block);
250 new_blocks.insert(std::make_pair(block->getStartAddress(), block));
251 function->addBasicBlock(block);
252 remaining_blocks.push(block);
253 } else {
254 LOG4CXX_DEBUG(logger, "Reusing Block starting at " << std::hex
255 << current_block->getStartAddress());
256 function->addBasicBlock(new_blocks.find(jmptarget)->second);
257 }
258 if (MIA->isConditionalBranch(inst)) {
259 jmptarget = base_address + current_address + inst_size;
260 current_block->setNextBlock(1, jmptarget);
261 if (new_blocks.find(jmptarget) == new_blocks.end()) {
262 BasicBlock * block = manager->newBasicBlock(jmptarget);
263 assert(block);
264 new_blocks.insert(std::make_pair(block->getStartAddress(), block));
265 function->addBasicBlock(block);
266 remaining_blocks.push(block);
267 } else {
268 LOG4CXX_DEBUG(logger, "Reusing Block starting at " << std::hex
269 << current_block->getStartAddress());
270 function->addBasicBlock(new_blocks.find(jmptarget)->second);
271 }
272 }
273 }
274 }
275 }
276 } else {
277 inst_size = 0;
278 }
279
280
281 if (inst_size == 0 || MIA->isTerminator(inst) || MIA->isBranch(inst)) {
282 current_block->setEndAddress(current_address + base_address + inst_size);
283 LOG4CXX_DEBUG(logger, "Finished Block at " << std::hex <<
284 current_block->getEndAddress());
285 break;
286 }
287 current_address += inst_size;
288 }
289 }
290 splitBlocks(function);
291 LOG4CXX_DEBUG(logger, "Finished function " << function->getName());
292 manager->signal_new_function(function);
293 }
294
295 template <typename ELFT>
296 void LLVMDisassembler<ELFT>::disassemble() {
297 SectionRef text_section = sections[".text"];
298 std::vector<Function*> remaining_functions;
299
300 // Assume all function symbols actually start a real function
301 for (auto x = symbols.begin(); x != symbols.end(); ++x) {
302 uint64_t result;
303 bool contains;
304 SymbolRef::Type symbol_type;
305
306
307 if (text_section.containsSymbol(x->second, contains) || !contains)
308 continue;
309
310 if (x->second.getType(symbol_type)
311 || SymbolRef::ST_Function != symbol_type)
312 continue;
313
314 if (!x->second.getAddress(result)) {
315 Function * fun = manager->newFunction(result);
316 fun->setName(x->first);
317 remaining_functions.push_back(fun);
318 LOG4CXX_DEBUG(logger, "Disasembling " << x->first);
319 }
320 }
321
322 for (Function* function : remaining_functions) {
323 disassembleFunction(function);
324 manager->finishFunction(function);
325 }
326
327 if (binary->isELF()) {
328 const ELFO * elffile = o->getELFFile();
329 const typename ELFO::Elf_Ehdr * header = elffile->getHeader();
330
331 _entryAddress = header->e_entry;
332 LOG4CXX_DEBUG(logger, "Adding entryAddress at: " << std::hex << _entryAddress);
333 std::stringstream s;
334 s << "<_start 0x" << std::hex << _entryAddress << ">";
335
336 disassembleFunctionAt(_entryAddress, s.str());
337 }
338
339 if (!manager->hasFunctions()) {
340 uint64_t text_entry;
341 text_section.getAddress(text_entry);
342 LOG4CXX_INFO(logger, "No Symbols found, starting at the beginning of the text segment");
343 disassembleFunctionAt(text_entry);
344 }
345 }
346
347 template <typename ELFT>
348 void LLVMDisassembler<ELFT>::splitBlocks(Function* function) {
349 SectionRef text_section = sections[".text"];
350 StringRef bytes;
351 text_section.getContents(bytes);
352 StringRefMemoryObject ref(bytes);
353
354 // Split blocks where jumps are going inside the block
355 for (auto it = function->blocks().begin();
356 it != function->blocks().end();
357 ++it) {
358 BasicBlock * current_block = it->second;
359 uint64_t inst_size;
360 uint64_t base_address;
361 text_section.getAddress(base_address);
362 uint64_t current_address = current_block->getStartAddress() - base_address;
363 while(current_block->getEndAddress() - base_address > current_address) {
364 MCInst inst;
365 std::string buf;
366 llvm::raw_string_ostream s(buf);
367
368 if(llvm::MCDisassembler::Success ==
369 DisAsm->getInstruction(inst, inst_size, ref, current_address, nulls(), nulls())) {
370 // See if some other block starts here
371 BasicBlock* other = manager->getBasicBlock(current_address
372 + inst_size
373 + base_address);
374
375 // Special case, other block starts here but we are at the end anyway
376 if (other != NULL) {
377 uint64_t endaddress = current_address + inst_size + base_address;
378 if (endaddress != current_block->getEndAddress()) {
379 LOG4CXX_DEBUG(logger, "Shortening block starting at "
380 << std::hex
381 << current_block->getStartAddress()
382 << " now ending at "
383 << other->getStartAddress());
384 function->addBasicBlock(other);
385 current_block->setEndAddress(endaddress);
386 current_block->setNextBlock(0, other->getStartAddress());
387 current_block->setNextBlock(1, 0);
388 }
389 }
390 } else {
391 inst_size = 1;
392 }
393 current_address += inst_size;
394 }
395 }
396 }
397
398 template <typename ELFT>
399 void LLVMDisassembler<ELFT>::readDynamicSymbols() {
400 const ELFO * elffile = o->getELFFile();
401 for (typename ELFO::Elf_Sym_Iter
402 it = elffile->begin_dynamic_symbols(),
403 end = elffile->end_dynamic_symbols();
404 it != end;
405 ++it) {
406 if (it->getType() == 2) { // Function
407 bool is_default;
408 // TODO: Error handling
409 std::string symbolname = *(elffile->getSymbolName(it));
410 std::string symbolversion = *(elffile->getSymbolVersion(nullptr, &*it, is_default));
411 manager->signal_new_dyn_symbol(symbolname + (is_default? "@@" : "@") + symbolversion);
412 LOG4CXX_DEBUG(logger, "Adding dynamic Symbol " << symbolname << (is_default? "@@" : "@") << symbolversion);
413 }
414 }
415 }
416
417 template <typename ELFT>
418 void LLVMDisassembler<ELFT>::readSymbols() {
419 error_code ec;
420 symbol_iterator si(o->symbol_begin()), se(o->symbol_end());
421 for (; si != se; ++si) {
422 StringRef name;
423 if ((ec = si->getName(name))) {
424 LOG4CXX_ERROR(logger, ec.message());
425 break;
426 }
427 LOG4CXX_DEBUG(logger, "Added symbol " << name.str());
428 symbols.insert(make_pair(name.str(), *si));
429 }
430 }
431
432 template <typename ELFT>
433 void LLVMDisassembler<ELFT>::readSections() {
434 error_code ec;
435 section_iterator i(o->section_begin()), e(o->section_end());
436 for (; i != e; ++i) {
437 StringRef name;
438 if ((ec = i->getName(name))) {
439 LOG4CXX_ERROR(logger, ec.message());
440 break;
441 }
442 LOG4CXX_DEBUG(logger, "Added section " << name.str());
443 sections.insert(make_pair(name.str(), *i));
444 }
445
446 }
447
448 // template <typename ELFT>
449 // void LLVMDisassembler<ELFT>::forEachFunction(std::function<void (uint64_t, Function*)> callback) {
450 // // std::for_each(functions.begin(), functions.end(),
451 // // [&](std::pair<uint64_t, Function*> x) {
452 // // callback(x.first, x.second);
453 // // });
454 // }
455
456 template <typename ELFT>
457 void LLVMDisassembler<ELFT>::printEachInstruction(uint64_t start, uint64_t end,
458 std::function<void (uint8_t*, size_t,
459 const std::string&,
460 const std::string&)> fun) {
461 SectionRef text_section = sections[".text"];
462 uint64_t base_address;
463 text_section.getAddress(base_address);
464 uint64_t current_address = start - base_address;
465
466 StringRef bytes;
467 text_section.getContents(bytes);
468 StringRefMemoryObject ref(bytes);
469
470 while (current_address < end - base_address) {
471 uint64_t inst_size;
472 MCInst inst;
473 std::string buf;
474 llvm::raw_string_ostream s(buf);
475
476 if(llvm::MCDisassembler::Success ==
477 DisAsm->getInstruction(inst, inst_size, ref, current_address, nulls(), nulls())) {
478
479 uint8_t bytes[inst_size+2];
480 ref.readBytes(current_address, inst_size, bytes);
481
482 uint64_t jmptarget;
483 std::string ref("");
484 IP->printInst(&inst, s, "");
485 if (MIA->evaluateBranch(inst, current_address, inst_size, jmptarget)) {
486 std::stringstream stream;
487 if (MIA->isCall(inst))
488 stream << "function:";
489 else
490 stream << "block:";
491
492 stream << std::hex << (base_address + jmptarget);
493 ref = stream.str();
494 }
495
496
497 fun(bytes, inst_size, s.str(), ref);
498 } else {
499 LOG4CXX_WARN(logger, "Invalid byte at" << std::hex << current_address + base_address);
500 fun(NULL, 0, "Invalid Byte", "");
501 inst_size = 1;
502 }
503
504 current_address += inst_size;
505 }
506 }