]> git.siccegge.de Git - frida/frida.git/blob - src/disassembler/llvm/LLVMDisassembler.cxx
8dd548332bf4f1f01106cd2020db3b137e13b626
[frida/frida.git] / src / disassembler / llvm / LLVMDisassembler.cxx
1 #include "disassembler/llvm/LLVMDisassembler.hxx"
2 #include "disassembler/llvm/LLVMBasicBlock.hxx"
3 #include "disassembler/llvm/LLVMFunction.hxx"
4
5 #include <stack>
6 #include <algorithm>
7
8 using namespace llvm;
9 using namespace llvm::object;
10 using std::error_code;
11
12 /*
13 *
14 */
15 Disassembler * createLLVMDisassembler(const std::string& filename, InformationManager* manager) {
16 if (filename == "")
17 return NULL;
18
19 std::unique_ptr<Binary> o;
20 o.reset(createBinary(filename).get());
21 Binary * op = o.release();
22
23 // ELFType<endian, maxalign, 64bit>
24 if (ELF32LEObjectFile * object = dyn_cast<ELF32LEObjectFile>(op)) {
25 return new LLVMDisassembler<ELFType<support::little, 2, false>>(filename, manager, object);
26 }
27 if (ELF64LEObjectFile * object = dyn_cast<ELF64LEObjectFile>(op)) {
28 return new LLVMDisassembler<ELFType<support::little, 2, true>>(filename, manager, object);
29 }
30 if (ELF32BEObjectFile * object = dyn_cast<ELF32BEObjectFile>(op)) {
31 return new LLVMDisassembler<ELFType<support::big, 2, false>>(filename, manager, object);
32 }
33 if (ELF64BEObjectFile * object = dyn_cast<ELF64BEObjectFile>(op)) {
34 return new LLVMDisassembler<ELFType<support::big, 2, true>>(filename, manager, object);
35 }
36
37 return NULL;
38 }
39
40 /*
41 * TODO: fallback code falls die Datei kein ELF/PE/COFF/MacO/.. binary
42 * ist sondern z.B. einfach nur Instruktionen oder ein Bootsektor oder
43 * foo
44 */
45 template <typename ELFT>
46 LLVMDisassembler<ELFT>::LLVMDisassembler(const std::string& filename,
47 InformationManager* manager,
48 ELFObjectFile<ELFT>* file)
49 : Disassembler(filename, manager)
50 , logger(log4cxx::Logger::getLogger("LLVMDisassembler"))
51 , triple("unknown-unknown-unknown")
52 , manager(manager)
53 {
54 LOG4CXX_DEBUG(logger, "Handling file" << filename);
55
56 if (!file) {
57 auto result = createBinary(filename);
58
59 error_code ec;
60 if ((ec = result.getError())) {
61 LOG4CXX_ERROR(logger, "Failed to load Binary" << ec.message());
62 binary = NULL;
63 return;
64 }
65
66 binary.reset(result.get());
67
68 o = dyn_cast<ELFObjectFile<ELFT>>(binary.get());
69 } else {
70 o = file;
71 binary.reset(file);
72 }
73
74 triple.setArch(Triple::ArchType(o->getArch()));
75 std::string tripleName(triple.getTriple());
76
77 LOG4CXX_INFO(logger, "Architecture " << tripleName);
78
79
80 std::string es;
81 target = TargetRegistry::lookupTarget("", triple, es);
82 if (!target) {
83 LOG4CXX_ERROR(logger, es);
84 return;
85 }
86
87 LOG4CXX_INFO(logger, "Target " << target->getName());
88
89 MRI.reset(target->createMCRegInfo(tripleName));
90 if (!MRI) {
91 LOG4CXX_ERROR(logger, "no register info for target " << tripleName);
92 return;
93 }
94
95 // Set up disassembler.
96 AsmInfo.reset(target->createMCAsmInfo(*MRI, tripleName));
97 if (!AsmInfo) {
98 LOG4CXX_ERROR(logger, "no assembly info for target " << tripleName);
99 return;
100 }
101
102 STI.reset(target->createMCSubtargetInfo(tripleName, "", ""));
103 if (!STI) {
104 LOG4CXX_ERROR(logger, "no subtarget info for target " << tripleName);
105 return;
106 }
107
108 MII.reset(target->createMCInstrInfo());
109 if (!MII) {
110 LOG4CXX_ERROR(logger, "no instruction info for target " << tripleName);
111 return;
112 }
113
114 MOFI.reset(new MCObjectFileInfo);
115 MCContext Ctx(AsmInfo.get(), MRI.get(), MOFI.get());
116
117 DisAsm.reset(target->createMCDisassembler(*STI, Ctx));
118 if (!DisAsm) {
119 LOG4CXX_ERROR(logger, "no disassembler for target " << tripleName);
120 return;
121 }
122 RelInfo.reset(
123 target->createMCRelocationInfo(tripleName, Ctx));
124 if (RelInfo) {
125 Symzer.reset(
126 MCObjectSymbolizer::createObjectSymbolizer(Ctx, std::move(RelInfo), o));
127 if (Symzer)
128 DisAsm->setSymbolizer(std::move(Symzer));
129 }
130 RelInfo.release();
131 Symzer.release();
132
133 MIA.reset(target->createMCInstrAnalysis(MII.get()));
134 if (!MIA) {
135 LOG4CXX_ERROR(logger, "no instruction analysis for target " << tripleName);
136 return;
137 }
138
139 int AsmPrinterVariant = AsmInfo->getAssemblerDialect();
140 IP.reset(target->createMCInstPrinter(AsmPrinterVariant, *AsmInfo, *MII, *MRI, *STI));
141 if (!IP) {
142 LOG4CXX_ERROR(logger, "no instruction printer for target " << tripleName);
143 return;
144 }
145
146 IP->setPrintImmHex(llvm::HexStyle::C);
147 IP->setPrintImmHex(true);
148
149 std::unique_ptr<MCObjectDisassembler> OD(
150 new MCObjectDisassembler(*o, *DisAsm, *MIA));
151 Mod.reset(OD->buildModule(false));
152 }
153
154 template <typename ELFT>
155 void LLVMDisassembler<ELFT>::start() {
156 readSymbols();
157 readSections();
158 disassemble();
159 }
160
161 template <typename ELFT>
162 LLVMDisassembler<ELFT>::~LLVMDisassembler() {
163 std::for_each(functions.begin(), functions.end(),
164 [](std::pair<uint64_t,LLVMFunction*> it) {
165 delete it.second;
166 });
167 std::for_each(blocks.begin(), blocks.end(),
168 [](std::pair<uint64_t, LLVMBasicBlock*> it) {
169 delete it.second;
170 });
171 }
172
173 template <typename ELFT>
174 Function* LLVMDisassembler<ELFT>::disassembleFunctionAt(uint64_t address, const std::string& name) {
175 SectionRef text_section = sections[".text"];
176 uint64_t base_address, size;
177 text_section.getAddress(base_address);
178 text_section.getSize(size);
179
180 if (address < base_address ||
181 address >= base_address + size) {
182 return NULL;
183 }
184
185 if (functions.find(address) != functions.end()) {
186 return functions[address];
187 }
188
189 LLVMFunction * function;
190 if (name == "") {
191 std::stringstream s;
192 s << "<Unnamed 0x" << std::hex << address << ">";
193 function = new LLVMFunction(s.str(), address);
194 } else {
195 function = new LLVMFunction(name, address);
196 }
197 functions.insert(std::make_pair(address, function));
198
199 disassembleFunction(function);
200
201 return function;
202 }
203
204 template <typename ELFT>
205 void LLVMDisassembler<ELFT>::disassembleFunction(LLVMFunction* function) {
206 std::stack<LLVMBasicBlock*> remaining_blocks;
207 SectionRef text_section = sections[".text"];
208 StringRef bytes;
209 text_section.getContents(bytes);
210 StringRefMemoryObject ref(bytes);
211
212 LOG4CXX_DEBUG(logger, "Handling function " << function->getName());
213
214 LLVMBasicBlock * block = new LLVMBasicBlock(function->getStartAddress(), this);
215 remaining_blocks.push(block);
216 blocks.insert(std::make_pair(block->getStartAddress(), block));
217 function->addBasicBlock(block);
218
219 while (remaining_blocks.size()) {
220 LLVMBasicBlock * current_block = remaining_blocks.top();
221 remaining_blocks.pop();
222
223 LOG4CXX_DEBUG(logger, "Handling Block starting at " << std::hex << current_block->getStartAddress());
224
225 uint64_t inst_size;
226 uint64_t base_address;
227 text_section.getAddress(base_address);
228 uint64_t current_address = current_block->getStartAddress() - base_address;
229 while(true) {
230 MCInst inst;
231 std::string buf;
232 llvm::raw_string_ostream s(buf);
233
234 if(llvm::MCDisassembler::Success ==
235 DisAsm->getInstruction(inst, inst_size, ref, current_address, nulls(), nulls())) {
236 uint64_t jmptarget;
237
238 if (MIA->evaluateBranch(inst, current_address, inst_size, jmptarget)) {
239 jmptarget += base_address;
240 if (!MIA->isIndirectBranch(inst)) {
241 if (MIA->isCall(inst)) {
242 if (functions.find(jmptarget) == functions.end()) {
243 disassembleFunctionAt(jmptarget);
244 }
245 } else {
246 current_block->setNextBlock(0, jmptarget);
247 if (blocks.find(jmptarget) == blocks.end()) {
248 LLVMBasicBlock * block = new LLVMBasicBlock(jmptarget, this);
249 blocks.insert(std::make_pair(block->getStartAddress(), block));
250 function->addBasicBlock(block);
251 remaining_blocks.push(block);
252 } else {
253 LOG4CXX_DEBUG(logger, "Reusing Block starting at " << std::hex << current_block->getStartAddress());
254 function->addBasicBlock(blocks.find(jmptarget)->second);
255 }
256 if (MIA->isConditionalBranch(inst)) {
257 jmptarget = base_address + current_address + inst_size;
258 current_block->setNextBlock(1, jmptarget);
259 if (blocks.find(jmptarget) == blocks.end()) {
260 LLVMBasicBlock * block = new LLVMBasicBlock(jmptarget, this);
261 blocks.insert(std::make_pair(block->getStartAddress(), block));
262 function->addBasicBlock(block);
263 remaining_blocks.push(block);
264 } else {
265 LOG4CXX_DEBUG(logger, "Reusing Block starting at " << std::hex << current_block->getStartAddress());
266 function->addBasicBlock(blocks.find(jmptarget)->second);
267 }
268 }
269 }
270 }
271 }
272 } else {
273 inst_size = 0;
274 }
275
276
277 if (inst_size == 0 || MIA->isTerminator(inst) || MIA->isBranch(inst)) {
278 current_block->setEndAddress(current_address + base_address + inst_size);
279 LOG4CXX_DEBUG(logger, "Finished Block at " << std::hex <<
280 current_block->getEndAddress());
281 break;
282 }
283 current_address += inst_size;
284 }
285 }
286 splitBlocks(function);
287 LOG4CXX_DEBUG(logger, "Finished function " << function->getName());
288 manager->signal_new_function(function);
289 }
290
291 template <typename ELFT>
292 void LLVMDisassembler<ELFT>::disassemble() {
293 SectionRef text_section = sections[".text"];
294 std::vector<LLVMFunction*> remaining_functions;
295
296 // Assume all function symbols actually start a real function
297 for (auto x = symbols.begin(); x != symbols.end(); ++x) {
298 uint64_t result;
299 bool contains;
300 SymbolRef::Type symbol_type;
301
302
303 if (text_section.containsSymbol(x->second, contains) || !contains)
304 continue;
305
306 if (x->second.getType(symbol_type)
307 || SymbolRef::ST_Function != symbol_type)
308 continue;
309
310 if (!x->second.getAddress(result)) {
311 LLVMFunction * fun = new LLVMFunction(x->first, result);
312 remaining_functions.push_back(fun);
313 functions.insert(std::make_pair(result, fun));
314 LOG4CXX_DEBUG(logger, "Disasembling " << x->first);
315 }
316 }
317
318 for (LLVMFunction* function : remaining_functions) {
319 disassembleFunction(function);
320 }
321
322 if (binary->isELF()) {
323 bool is64bit = (binary->getData()[4] == 0x02);
324
325 for (int i(0); i < (is64bit? 8 : 4); ++i) {
326 if (binary->isLittleEndian()) {
327 _entryAddress |= (unsigned int)((unsigned char)binary->getData()[0x18 + i]) << 8*i;
328 } else {
329 _entryAddress = _entryAddress << 8;
330 _entryAddress |= (unsigned char)binary->getData()[0x18 + i];
331 }
332 }
333 LOG4CXX_DEBUG(logger, "Adding entryAddress at: " << std::hex << _entryAddress);
334 std::stringstream s;
335 s << "<_start 0x" << std::hex << _entryAddress << ">";
336
337 disassembleFunctionAt(_entryAddress, s.str());
338 }
339
340 if (functions.empty()) {
341 uint64_t text_entry;
342 text_section.getAddress(text_entry);
343 LOG4CXX_INFO(logger, "No Symbols found, starting at the beginning of the text segment");
344 disassembleFunctionAt(text_entry);
345 }
346 }
347
348 template <typename ELFT>
349 void LLVMDisassembler<ELFT>::splitBlocks(LLVMFunction* function) {
350 SectionRef text_section = sections[".text"];
351 StringRef bytes;
352 text_section.getContents(bytes);
353 StringRefMemoryObject ref(bytes);
354
355 // Split blocks where jumps are going inside the block
356 for (auto it = function->blocks().begin();
357 it != function->blocks().end();
358 ++it) {
359 BasicBlock * current_block = it->second;
360 uint64_t inst_size;
361 uint64_t base_address;
362 text_section.getAddress(base_address);
363 uint64_t current_address = current_block->getStartAddress() - base_address;
364 while(current_block->getEndAddress() - base_address > current_address) {
365 MCInst inst;
366 std::string buf;
367 llvm::raw_string_ostream s(buf);
368
369 if(llvm::MCDisassembler::Success ==
370 DisAsm->getInstruction(inst, inst_size, ref, current_address, nulls(), nulls())) {
371 // See if some other block starts here
372 auto other = blocks.find(current_address + inst_size + base_address);
373
374 // Special case, other block starts here but we are at the end anyway
375 if (other != blocks.end()) {
376 uint64_t endaddress = current_address + inst_size + base_address;
377 if (endaddress != current_block->getEndAddress()) {
378 LOG4CXX_DEBUG(logger, "Shortening block starting at "
379 << std::hex
380 << current_block->getStartAddress()
381 << " now ending at "
382 << other->first);
383 function->addBasicBlock(other->second);
384 current_block->setEndAddress(endaddress);
385 current_block->setNextBlock(0, other->first);
386 current_block->setNextBlock(1, 0);
387 }
388 }
389 } else {
390 inst_size = 1;
391 }
392 current_address += inst_size;
393 }
394 }
395 }
396
397 template <typename ELFT>
398 void LLVMDisassembler<ELFT>::readSymbols() {
399 error_code ec;
400 symbol_iterator si(o->symbol_begin()), se(o->symbol_end());
401 for (; si != se; ++si) {
402 StringRef name;
403 if ((ec = si->getName(name))) {
404 LOG4CXX_ERROR(logger, ec.message());
405 break;
406 }
407 LOG4CXX_DEBUG(logger, "Added symbol " << name.str());
408 symbols.insert(make_pair(name.str(), *si));
409 }
410 }
411
412 template <typename ELFT>
413 void LLVMDisassembler<ELFT>::readSections() {
414 error_code ec;
415 section_iterator i(o->section_begin()), e(o->section_end());
416 for (; i != e; ++i) {
417 StringRef name;
418 if ((ec = i->getName(name))) {
419 LOG4CXX_ERROR(logger, ec.message());
420 break;
421 }
422 LOG4CXX_DEBUG(logger, "Added section " << name.str());
423 sections.insert(make_pair(name.str(), *i));
424 }
425
426 }
427
428 template <typename ELFT>
429 void LLVMDisassembler<ELFT>::forEachFunction(std::function<void (uint64_t, Function*)> callback) {
430 std::for_each(functions.begin(), functions.end(),
431 [&](std::pair<uint64_t, LLVMFunction*> x) {
432 callback(x.first, x.second);
433 });
434 }
435
436 template <typename ELFT>
437 void LLVMDisassembler<ELFT>::printEachInstruction(uint64_t start, uint64_t end,
438 std::function<void (uint8_t*, size_t,
439 const std::string&,
440 const std::string&)> fun) {
441 SectionRef text_section = sections[".text"];
442 uint64_t base_address;
443 text_section.getAddress(base_address);
444 uint64_t current_address = start - base_address;
445
446 StringRef bytes;
447 text_section.getContents(bytes);
448 StringRefMemoryObject ref(bytes);
449
450 while (current_address < end - base_address) {
451 uint64_t inst_size;
452 MCInst inst;
453 std::string buf;
454 llvm::raw_string_ostream s(buf);
455
456 if(llvm::MCDisassembler::Success ==
457 DisAsm->getInstruction(inst, inst_size, ref, current_address, nulls(), nulls())) {
458
459 uint8_t bytes[inst_size+2];
460 ref.readBytes(current_address, inst_size, bytes);
461
462 uint64_t jmptarget;
463 std::string ref("");
464 IP->printInst(&inst, s, "");
465 if (MIA->evaluateBranch(inst, current_address, inst_size, jmptarget)) {
466 std::stringstream stream;
467 if (MIA->isCall(inst))
468 stream << "function:";
469 else
470 stream << "block:";
471
472 stream << std::hex << (base_address + jmptarget);
473 ref = stream.str();
474 }
475
476
477 fun(bytes, inst_size, s.str(), ref);
478 } else {
479 LOG4CXX_WARN(logger, "Invalid byte at" << std::hex << current_address + base_address);
480 fun(NULL, 0, "Invalid Byte", "");
481 inst_size = 1;
482 }
483
484 current_address += inst_size;
485 }
486 }