]> git.siccegge.de Git - frida/frida.git/blob - src/disassembler/llvm/LLVMDisassembler.cxx
Fix startup if no binary is specified on the commandline
[frida/frida.git] / src / disassembler / llvm / LLVMDisassembler.cxx
1 #include "disassembler/llvm/LLVMDisassembler.hxx"
2 #include "disassembler/llvm/LLVMBasicBlock.hxx"
3 #include "disassembler/llvm/LLVMFunction.hxx"
4
5 #include <stack>
6 #include <algorithm>
7
8 using namespace llvm;
9 using namespace llvm::object;
10 using std::error_code;
11
12 /*
13 *
14 */
15 Disassembler * createLLVMDisassembler(const std::string& filename, InformationManager* manager) {
16 if (filename == "")
17 return NULL;
18
19 std::unique_ptr<Binary> o;
20 o.reset(createBinary(filename).get());
21 const Binary * op = o.get();
22
23 // ELFType<endian, maxalign, 64bit>
24 if (const ELF32LEObjectFile * _ = dyn_cast<ELF32LEObjectFile>(op)) {
25 #pragma unused(_)
26 return new LLVMDisassembler<ELFType<support::little, 2, false>>(filename, manager);
27 }
28 if (const ELF64LEObjectFile * _ = dyn_cast<ELF64LEObjectFile>(op)) {
29 #pragma unused(_)
30 return new LLVMDisassembler<ELFType<support::little, 2, true>>(filename, manager);
31 }
32 if (const ELF32BEObjectFile * _ = dyn_cast<ELF32BEObjectFile>(op)) {
33 #pragma unused(_)
34 return new LLVMDisassembler<ELFType<support::big, 2, false>>(filename, manager);
35 }
36 if (const ELF64BEObjectFile * _ = dyn_cast<ELF64BEObjectFile>(op)) {
37 #pragma unused(_)
38 return new LLVMDisassembler<ELFType<support::big, 2, true>>(filename, manager);
39 }
40
41 return NULL;
42 }
43
44 /*
45 * TODO: fallback code falls die Datei kein ELF/PE/COFF/MacO/.. binary
46 * ist sondern z.B. einfach nur Instruktionen oder ein Bootsektor oder
47 * foo
48 */
49 template <typename ELFT>
50 LLVMDisassembler<ELFT>::LLVMDisassembler(const std::string& filename,
51 InformationManager* manager)
52 : Disassembler(filename, manager)
53 , logger(log4cxx::Logger::getLogger("LLVMDisassembler"))
54 , triple("unknown-unknown-unknown")
55 , manager(manager)
56 {
57 LOG4CXX_DEBUG(logger, "Handling file" << filename);
58 auto result = createBinary(filename);
59
60 error_code ec;
61 if ((ec = result.getError())) {
62 LOG4CXX_ERROR(logger, "Failed to load Binary" << ec.message());
63 binary = NULL;
64 return;
65 }
66
67 binary.reset(result.get());
68
69 o = dyn_cast<ObjectFile>(binary.get());
70
71 triple.setArch(Triple::ArchType(o->getArch()));
72 std::string tripleName(triple.getTriple());
73
74 LOG4CXX_INFO(logger, "Architecture " << tripleName);
75
76
77 std::string es;
78 target = TargetRegistry::lookupTarget("", triple, es);
79 if (!target) {
80 LOG4CXX_ERROR(logger, es);
81 return;
82 }
83
84 LOG4CXX_INFO(logger, "Target " << target->getName());
85
86 MRI.reset(target->createMCRegInfo(tripleName));
87 if (!MRI) {
88 LOG4CXX_ERROR(logger, "no register info for target " << tripleName);
89 return;
90 }
91
92 // Set up disassembler.
93 AsmInfo.reset(target->createMCAsmInfo(*MRI, tripleName));
94 if (!AsmInfo) {
95 LOG4CXX_ERROR(logger, "no assembly info for target " << tripleName);
96 return;
97 }
98
99 STI.reset(target->createMCSubtargetInfo(tripleName, "", ""));
100 if (!STI) {
101 LOG4CXX_ERROR(logger, "no subtarget info for target " << tripleName);
102 return;
103 }
104
105 MII.reset(target->createMCInstrInfo());
106 if (!MII) {
107 LOG4CXX_ERROR(logger, "no instruction info for target " << tripleName);
108 return;
109 }
110
111 MOFI.reset(new MCObjectFileInfo);
112 MCContext Ctx(AsmInfo.get(), MRI.get(), MOFI.get());
113
114 DisAsm.reset(target->createMCDisassembler(*STI, Ctx));
115 if (!DisAsm) {
116 LOG4CXX_ERROR(logger, "no disassembler for target " << tripleName);
117 return;
118 }
119 RelInfo.reset(
120 target->createMCRelocationInfo(tripleName, Ctx));
121 if (RelInfo) {
122 Symzer.reset(
123 MCObjectSymbolizer::createObjectSymbolizer(Ctx, std::move(RelInfo), o));
124 if (Symzer)
125 DisAsm->setSymbolizer(std::move(Symzer));
126 }
127 RelInfo.release();
128 Symzer.release();
129
130 MIA.reset(target->createMCInstrAnalysis(MII.get()));
131 if (!MIA) {
132 LOG4CXX_ERROR(logger, "no instruction analysis for target " << tripleName);
133 return;
134 }
135
136 int AsmPrinterVariant = AsmInfo->getAssemblerDialect();
137 IP.reset(target->createMCInstPrinter(AsmPrinterVariant, *AsmInfo, *MII, *MRI, *STI));
138 if (!IP) {
139 LOG4CXX_ERROR(logger, "no instruction printer for target " << tripleName);
140 return;
141 }
142
143 IP->setPrintImmHex(llvm::HexStyle::C);
144 IP->setPrintImmHex(true);
145
146 std::unique_ptr<MCObjectDisassembler> OD(
147 new MCObjectDisassembler(*o, *DisAsm, *MIA));
148 Mod.reset(OD->buildModule(false));
149 }
150
151 template <typename ELFT>
152 void LLVMDisassembler<ELFT>::start() {
153 readSymbols();
154 readSections();
155 disassemble();
156 }
157
158 template <typename ELFT>
159 LLVMDisassembler<ELFT>::~LLVMDisassembler() {
160 std::for_each(functions.begin(), functions.end(),
161 [](std::pair<uint64_t,LLVMFunction*> it) {
162 delete it.second;
163 });
164 std::for_each(blocks.begin(), blocks.end(),
165 [](std::pair<uint64_t, LLVMBasicBlock*> it) {
166 delete it.second;
167 });
168 }
169
170 template <typename ELFT>
171 Function* LLVMDisassembler<ELFT>::disassembleFunctionAt(uint64_t address, const std::string& name) {
172 SectionRef text_section = sections[".text"];
173 uint64_t base_address, size;
174 text_section.getAddress(base_address);
175 text_section.getSize(size);
176
177 if (address < base_address ||
178 address >= base_address + size) {
179 return NULL;
180 }
181
182 if (functions.find(address) != functions.end()) {
183 return functions[address];
184 }
185
186 LLVMFunction * function;
187 if (name == "") {
188 std::stringstream s;
189 s << "<Unnamed 0x" << std::hex << address << ">";
190 function = new LLVMFunction(s.str(), address);
191 } else {
192 function = new LLVMFunction(name, address);
193 }
194 functions.insert(std::make_pair(address, function));
195
196 disassembleFunction(function);
197
198 return function;
199 }
200
201 template <typename ELFT>
202 void LLVMDisassembler<ELFT>::disassembleFunction(LLVMFunction* function) {
203 std::stack<LLVMBasicBlock*> remaining_blocks;
204 SectionRef text_section = sections[".text"];
205 StringRef bytes;
206 text_section.getContents(bytes);
207 StringRefMemoryObject ref(bytes);
208
209 LOG4CXX_DEBUG(logger, "Handling function " << function->getName());
210
211 LLVMBasicBlock * block = new LLVMBasicBlock(function->getStartAddress(), this);
212 remaining_blocks.push(block);
213 blocks.insert(std::make_pair(block->getStartAddress(), block));
214 function->addBasicBlock(block);
215
216 while (remaining_blocks.size()) {
217 LLVMBasicBlock * current_block = remaining_blocks.top();
218 remaining_blocks.pop();
219
220 LOG4CXX_DEBUG(logger, "Handling Block starting at " << std::hex << current_block->getStartAddress());
221
222 uint64_t inst_size;
223 uint64_t base_address;
224 text_section.getAddress(base_address);
225 uint64_t current_address = current_block->getStartAddress() - base_address;
226 while(true) {
227 MCInst inst;
228 std::string buf;
229 llvm::raw_string_ostream s(buf);
230
231 if(llvm::MCDisassembler::Success ==
232 DisAsm->getInstruction(inst, inst_size, ref, current_address, nulls(), nulls())) {
233 uint64_t jmptarget;
234
235 if (MIA->evaluateBranch(inst, current_address, inst_size, jmptarget)) {
236 jmptarget += base_address;
237 if (!MIA->isIndirectBranch(inst)) {
238 if (MIA->isCall(inst)) {
239 if (functions.find(jmptarget) == functions.end()) {
240 disassembleFunctionAt(jmptarget);
241 }
242 } else {
243 current_block->setNextBlock(0, jmptarget);
244 if (blocks.find(jmptarget) == blocks.end()) {
245 LLVMBasicBlock * block = new LLVMBasicBlock(jmptarget, this);
246 blocks.insert(std::make_pair(block->getStartAddress(), block));
247 function->addBasicBlock(block);
248 remaining_blocks.push(block);
249 } else {
250 LOG4CXX_DEBUG(logger, "Reusing Block starting at " << std::hex << current_block->getStartAddress());
251 function->addBasicBlock(blocks.find(jmptarget)->second);
252 }
253 if (MIA->isConditionalBranch(inst)) {
254 jmptarget = base_address + current_address + inst_size;
255 current_block->setNextBlock(1, jmptarget);
256 if (blocks.find(jmptarget) == blocks.end()) {
257 LLVMBasicBlock * block = new LLVMBasicBlock(jmptarget, this);
258 blocks.insert(std::make_pair(block->getStartAddress(), block));
259 function->addBasicBlock(block);
260 remaining_blocks.push(block);
261 } else {
262 LOG4CXX_DEBUG(logger, "Reusing Block starting at " << std::hex << current_block->getStartAddress());
263 function->addBasicBlock(blocks.find(jmptarget)->second);
264 }
265 }
266 }
267 }
268 }
269 } else {
270 inst_size = 0;
271 }
272
273
274 if (inst_size == 0 || MIA->isTerminator(inst) || MIA->isBranch(inst)) {
275 current_block->setEndAddress(current_address + base_address + inst_size);
276 LOG4CXX_DEBUG(logger, "Finished Block at " << std::hex <<
277 current_block->getEndAddress());
278 break;
279 }
280 current_address += inst_size;
281 }
282 }
283 splitBlocks(function);
284 LOG4CXX_DEBUG(logger, "Finished function " << function->getName());
285 manager->signal_new_function(function);
286 }
287
288 template <typename ELFT>
289 void LLVMDisassembler<ELFT>::disassemble() {
290 SectionRef text_section = sections[".text"];
291 std::vector<LLVMFunction*> remaining_functions;
292
293 // Assume all function symbols actually start a real function
294 for (auto x = symbols.begin(); x != symbols.end(); ++x) {
295 uint64_t result;
296 bool contains;
297 SymbolRef::Type symbol_type;
298
299
300 if (text_section.containsSymbol(x->second, contains) || !contains)
301 continue;
302
303 if (x->second.getType(symbol_type)
304 || SymbolRef::ST_Function != symbol_type)
305 continue;
306
307 if (!x->second.getAddress(result)) {
308 LLVMFunction * fun = new LLVMFunction(x->first, result);
309 remaining_functions.push_back(fun);
310 functions.insert(std::make_pair(result, fun));
311 LOG4CXX_DEBUG(logger, "Disasembling " << x->first);
312 }
313 }
314
315 for (LLVMFunction* function : remaining_functions) {
316 disassembleFunction(function);
317 }
318
319 if (binary->isELF()) {
320 bool is64bit = (binary->getData()[4] == 0x02);
321
322 for (int i(0); i < (is64bit? 8 : 4); ++i) {
323 if (binary->isLittleEndian()) {
324 _entryAddress |= (unsigned int)((unsigned char)binary->getData()[0x18 + i]) << 8*i;
325 } else {
326 _entryAddress = _entryAddress << 8;
327 _entryAddress |= (unsigned char)binary->getData()[0x18 + i];
328 }
329 }
330 LOG4CXX_DEBUG(logger, "Adding entryAddress at: " << std::hex << _entryAddress);
331 std::stringstream s;
332 s << "<_start 0x" << std::hex << _entryAddress << ">";
333
334 disassembleFunctionAt(_entryAddress, s.str());
335 }
336
337 if (functions.empty()) {
338 uint64_t text_entry;
339 text_section.getAddress(text_entry);
340 LOG4CXX_INFO(logger, "No Symbols found, starting at the beginning of the text segment");
341 disassembleFunctionAt(text_entry);
342 }
343 }
344
345 template <typename ELFT>
346 void LLVMDisassembler<ELFT>::splitBlocks(LLVMFunction* function) {
347 SectionRef text_section = sections[".text"];
348 StringRef bytes;
349 text_section.getContents(bytes);
350 StringRefMemoryObject ref(bytes);
351
352 // Split blocks where jumps are going inside the block
353 for (auto it = function->blocks().begin();
354 it != function->blocks().end();
355 ++it) {
356 BasicBlock * current_block = it->second;
357 uint64_t inst_size;
358 uint64_t base_address;
359 text_section.getAddress(base_address);
360 uint64_t current_address = current_block->getStartAddress() - base_address;
361 while(current_block->getEndAddress() - base_address > current_address) {
362 MCInst inst;
363 std::string buf;
364 llvm::raw_string_ostream s(buf);
365
366 if(llvm::MCDisassembler::Success ==
367 DisAsm->getInstruction(inst, inst_size, ref, current_address, nulls(), nulls())) {
368 // See if some other block starts here
369 auto other = blocks.find(current_address + inst_size + base_address);
370
371 // Special case, other block starts here but we are at the end anyway
372 if (other != blocks.end()) {
373 uint64_t endaddress = current_address + inst_size + base_address;
374 if (endaddress != current_block->getEndAddress()) {
375 LOG4CXX_DEBUG(logger, "Shortening block starting at "
376 << std::hex
377 << current_block->getStartAddress()
378 << " now ending at "
379 << other->first);
380 function->addBasicBlock(other->second);
381 current_block->setEndAddress(endaddress);
382 current_block->setNextBlock(0, other->first);
383 current_block->setNextBlock(1, 0);
384 }
385 }
386 } else {
387 inst_size = 1;
388 }
389 current_address += inst_size;
390 }
391 }
392 }
393
394 template <typename ELFT>
395 void LLVMDisassembler<ELFT>::readSymbols() {
396 error_code ec;
397 symbol_iterator si(o->symbol_begin()), se(o->symbol_end());
398 for (; si != se; ++si) {
399 StringRef name;
400 if ((ec = si->getName(name))) {
401 LOG4CXX_ERROR(logger, ec.message());
402 break;
403 }
404 LOG4CXX_DEBUG(logger, "Added symbol " << name.str());
405 symbols.insert(make_pair(name.str(), *si));
406 }
407 }
408
409 template <typename ELFT>
410 void LLVMDisassembler<ELFT>::readSections() {
411 error_code ec;
412 section_iterator i(o->section_begin()), e(o->section_end());
413 for (; i != e; ++i) {
414 StringRef name;
415 if ((ec = i->getName(name))) {
416 LOG4CXX_ERROR(logger, ec.message());
417 break;
418 }
419 LOG4CXX_DEBUG(logger, "Added section " << name.str());
420 sections.insert(make_pair(name.str(), *i));
421 }
422
423 }
424
425 template <typename ELFT>
426 void LLVMDisassembler<ELFT>::forEachFunction(std::function<void (uint64_t, Function*)> callback) {
427 std::for_each(functions.begin(), functions.end(),
428 [&](std::pair<uint64_t, LLVMFunction*> x) {
429 callback(x.first, x.second);
430 });
431 }
432
433 template <typename ELFT>
434 void LLVMDisassembler<ELFT>::printEachInstruction(uint64_t start, uint64_t end,
435 std::function<void (uint8_t*, size_t,
436 const std::string&,
437 const std::string&)> fun) {
438 SectionRef text_section = sections[".text"];
439 uint64_t base_address;
440 text_section.getAddress(base_address);
441 uint64_t current_address = start - base_address;
442
443 StringRef bytes;
444 text_section.getContents(bytes);
445 StringRefMemoryObject ref(bytes);
446
447 while (current_address < end - base_address) {
448 uint64_t inst_size;
449 MCInst inst;
450 std::string buf;
451 llvm::raw_string_ostream s(buf);
452
453 if(llvm::MCDisassembler::Success ==
454 DisAsm->getInstruction(inst, inst_size, ref, current_address, nulls(), nulls())) {
455
456 uint8_t bytes[inst_size+2];
457 ref.readBytes(current_address, inst_size, bytes);
458
459 uint64_t jmptarget;
460 std::string ref("");
461 IP->printInst(&inst, s, "");
462 if (MIA->evaluateBranch(inst, current_address, inst_size, jmptarget)) {
463 std::stringstream stream;
464 if (MIA->isCall(inst))
465 stream << "function:";
466 else
467 stream << "block:";
468
469 stream << std::hex << (base_address + jmptarget);
470 ref = stream.str();
471 }
472
473
474 fun(bytes, inst_size, s.str(), ref);
475 } else {
476 LOG4CXX_WARN(logger, "Invalid byte at" << std::hex << current_address + base_address);
477 fun(NULL, 0, "Invalid Byte", "");
478 inst_size = 1;
479 }
480
481 current_address += inst_size;
482 }
483 }