]> git.siccegge.de Git - frida/frida.git/blob - src/disassembler/llvm/LLVMDisassembler.cxx
Fix Loading of symbolless files
[frida/frida.git] / src / disassembler / llvm / LLVMDisassembler.cxx
1 #include "disassembler/llvm/LLVMDisassembler.hxx"
2 #include "core/InformationManager.hxx"
3 #include "core/Function.hxx"
4 #include "core/BasicBlock.hxx"
5
6 #include <stack>
7 #include <algorithm>
8 #include <cassert>
9
10 using namespace llvm;
11 using namespace llvm::object;
12 using std::error_code;
13
14 /*
15 *
16 */
17 Disassembler * createLLVMDisassembler(const std::string& filename, InformationManager* manager) {
18 if (filename == "")
19 return NULL;
20
21 std::unique_ptr<Binary> o;
22 o.reset(createBinary(filename).get());
23 Binary * op = o.release();
24
25 // ELFType<endian, maxalign, 64bit>
26 if (ELF32LEObjectFile * object = dyn_cast<ELF32LEObjectFile>(op)) {
27 return new LLVMDisassembler<ELFType<support::little, 2, false>>(filename, manager, object);
28 }
29 if (ELF64LEObjectFile * object = dyn_cast<ELF64LEObjectFile>(op)) {
30 return new LLVMDisassembler<ELFType<support::little, 2, true>>(filename, manager, object);
31 }
32 if (ELF32BEObjectFile * object = dyn_cast<ELF32BEObjectFile>(op)) {
33 return new LLVMDisassembler<ELFType<support::big, 2, false>>(filename, manager, object);
34 }
35 if (ELF64BEObjectFile * object = dyn_cast<ELF64BEObjectFile>(op)) {
36 return new LLVMDisassembler<ELFType<support::big, 2, true>>(filename, manager, object);
37 }
38
39 return NULL;
40 }
41
42 /*
43 * TODO: fallback code falls die Datei kein ELF/PE/COFF/MacO/.. binary
44 * ist sondern z.B. einfach nur Instruktionen oder ein Bootsektor oder
45 * foo
46 */
47 template <typename ELFT>
48 LLVMDisassembler<ELFT>::LLVMDisassembler(const std::string& filename,
49 InformationManager* manager,
50 ELFObjectFile<ELFT>* file)
51 : Disassembler(filename, manager)
52 , logger(log4cxx::Logger::getLogger("LLVMDisassembler"))
53 , triple("unknown-unknown-unknown")
54 , manager(manager)
55 {
56 LOG4CXX_DEBUG(logger, "Handling file" << filename);
57
58 if (!file) {
59 auto result = createBinary(filename);
60
61 error_code ec;
62 if ((ec = result.getError())) {
63 LOG4CXX_ERROR(logger, "Failed to load Binary" << ec.message());
64 binary = NULL;
65 return;
66 }
67
68 binary.reset(result.get());
69
70 o = dyn_cast<ELFObjectFile<ELFT>>(binary.get());
71 } else {
72 o = file;
73 binary.reset(file);
74 }
75
76 triple.setArch(Triple::ArchType(o->getArch()));
77 std::string tripleName(triple.getTriple());
78
79 LOG4CXX_INFO(logger, "Architecture " << tripleName);
80
81
82 std::string es;
83 target = TargetRegistry::lookupTarget("", triple, es);
84 if (!target) {
85 LOG4CXX_ERROR(logger, es);
86 return;
87 }
88
89 LOG4CXX_INFO(logger, "Target " << target->getName());
90
91 MRI.reset(target->createMCRegInfo(tripleName));
92 if (!MRI) {
93 LOG4CXX_ERROR(logger, "no register info for target " << tripleName);
94 return;
95 }
96
97 // Set up disassembler.
98 AsmInfo.reset(target->createMCAsmInfo(*MRI, tripleName));
99 if (!AsmInfo) {
100 LOG4CXX_ERROR(logger, "no assembly info for target " << tripleName);
101 return;
102 }
103
104 STI.reset(target->createMCSubtargetInfo(tripleName, "", ""));
105 if (!STI) {
106 LOG4CXX_ERROR(logger, "no subtarget info for target " << tripleName);
107 return;
108 }
109
110 MII.reset(target->createMCInstrInfo());
111 if (!MII) {
112 LOG4CXX_ERROR(logger, "no instruction info for target " << tripleName);
113 return;
114 }
115
116 MOFI.reset(new MCObjectFileInfo);
117 MCContext Ctx(AsmInfo.get(), MRI.get(), MOFI.get());
118
119 DisAsm.reset(target->createMCDisassembler(*STI, Ctx));
120 if (!DisAsm) {
121 LOG4CXX_ERROR(logger, "no disassembler for target " << tripleName);
122 return;
123 }
124 RelInfo.reset(
125 target->createMCRelocationInfo(tripleName, Ctx));
126 if (RelInfo) {
127 Symzer.reset(
128 MCObjectSymbolizer::createObjectSymbolizer(Ctx, std::move(RelInfo), o));
129 if (Symzer)
130 DisAsm->setSymbolizer(std::move(Symzer));
131 }
132 RelInfo.release();
133 Symzer.release();
134
135 MIA.reset(target->createMCInstrAnalysis(MII.get()));
136 if (!MIA) {
137 LOG4CXX_ERROR(logger, "no instruction analysis for target " << tripleName);
138 return;
139 }
140
141 int AsmPrinterVariant = AsmInfo->getAssemblerDialect();
142 IP.reset(target->createMCInstPrinter(AsmPrinterVariant, *AsmInfo, *MII, *MRI, *STI));
143 if (!IP) {
144 LOG4CXX_ERROR(logger, "no instruction printer for target " << tripleName);
145 return;
146 }
147
148 IP->setPrintImmHex(llvm::HexStyle::C);
149 IP->setPrintImmHex(true);
150
151 std::unique_ptr<MCObjectDisassembler> OD(
152 new MCObjectDisassembler(*o, *DisAsm, *MIA));
153 Mod.reset(OD->buildModule(false));
154 }
155
156 template <typename ELFT>
157 void LLVMDisassembler<ELFT>::start() {
158 readSymbols();
159 readSections();
160 disassemble();
161 readDynamicSymbols();
162 }
163
164 template <typename ELFT>
165 LLVMDisassembler<ELFT>::~LLVMDisassembler() {}
166
167 template <typename ELFT>
168 Function* LLVMDisassembler<ELFT>::disassembleFunctionAt(uint64_t address, const std::string& name) {
169 Function * function;
170 SectionRef text_section = sections[".text"];
171 uint64_t base_address, size;
172 text_section.getAddress(base_address);
173 text_section.getSize(size);
174
175 if (address < base_address ||
176 address >= base_address + size) {
177 return NULL;
178 }
179
180 if (NULL == (function = manager->getFunction(address))) {
181
182 if (name == "") {
183 std::stringstream s;
184 s << "<Unnamed 0x" << std::hex << address << ">";
185 function = manager->newFunction(address);
186 function->setName(s.str());
187 } else {
188 function = manager->newFunction(address);
189 function->setName(name);
190 }
191 disassembleFunction(function);
192 manager->finishFunction(function);
193 }
194
195 return function;
196 }
197
198 template <typename ELFT>
199 void LLVMDisassembler<ELFT>::disassembleFunction(Function* function) {
200 std::stack<BasicBlock*> remaining_blocks;
201 /* TODO:
202 * Do all blocks get added properly? We should take care to remove
203 * the other ones at the end of the function!
204 */
205 std::map<uint64_t, BasicBlock*> new_blocks;
206 SectionRef text_section = sections[".text"];
207 StringRef bytes;
208 text_section.getContents(bytes);
209 StringRefMemoryObject ref(bytes);
210
211 LOG4CXX_DEBUG(logger, "Handling function " << function->getName());
212
213 BasicBlock * block = manager->newBasicBlock(function->getStartAddress());
214 remaining_blocks.push(block);
215 new_blocks.insert(std::make_pair(block->getStartAddress(), block));
216 function->addBasicBlock(block);
217
218 while (remaining_blocks.size()) {
219 BasicBlock * current_block = remaining_blocks.top();
220 remaining_blocks.pop();
221
222 LOG4CXX_DEBUG(logger, "Handling Block starting at " << std::hex
223 << current_block->getStartAddress());
224
225 uint64_t inst_size;
226 uint64_t base_address;
227 text_section.getAddress(base_address);
228 uint64_t current_address = current_block->getStartAddress() - base_address;
229 while(true) {
230 MCInst inst;
231 std::string buf;
232 llvm::raw_string_ostream s(buf);
233
234 if(llvm::MCDisassembler::Success ==
235 DisAsm->getInstruction(inst, inst_size, ref, current_address, nulls(), nulls())) {
236 uint64_t jmptarget;
237
238 if (MIA->evaluateBranch(inst, current_address, inst_size, jmptarget)) {
239 jmptarget += base_address;
240 if (!MIA->isIndirectBranch(inst)) {
241 if (MIA->isCall(inst)) {
242 if (NULL == manager->getFunction(jmptarget))
243 disassembleFunctionAt(jmptarget);
244 } else {
245 current_block->setNextBlock(0, jmptarget);
246 if (new_blocks.find(jmptarget) == new_blocks.end()) {
247 BasicBlock * block = manager->newBasicBlock(jmptarget);
248 assert(block);
249 new_blocks.insert(std::make_pair(block->getStartAddress(), block));
250 function->addBasicBlock(block);
251 remaining_blocks.push(block);
252 } else {
253 LOG4CXX_DEBUG(logger, "Reusing Block starting at " << std::hex
254 << current_block->getStartAddress());
255 function->addBasicBlock(new_blocks.find(jmptarget)->second);
256 }
257 if (MIA->isConditionalBranch(inst)) {
258 jmptarget = base_address + current_address + inst_size;
259 current_block->setNextBlock(1, jmptarget);
260 if (new_blocks.find(jmptarget) == new_blocks.end()) {
261 BasicBlock * block = manager->newBasicBlock(jmptarget);
262 assert(block);
263 new_blocks.insert(std::make_pair(block->getStartAddress(), block));
264 function->addBasicBlock(block);
265 remaining_blocks.push(block);
266 } else {
267 LOG4CXX_DEBUG(logger, "Reusing Block starting at " << std::hex
268 << current_block->getStartAddress());
269 function->addBasicBlock(new_blocks.find(jmptarget)->second);
270 }
271 }
272 }
273 }
274 }
275 } else {
276 inst_size = 0;
277 }
278
279
280 if (inst_size == 0 || MIA->isTerminator(inst) || MIA->isBranch(inst)) {
281 current_block->setEndAddress(current_address + base_address + inst_size);
282 LOG4CXX_DEBUG(logger, "Finished Block at " << std::hex <<
283 current_block->getEndAddress());
284 break;
285 }
286 current_address += inst_size;
287 }
288 }
289 splitBlocks(function);
290 LOG4CXX_DEBUG(logger, "Finished function " << function->getName());
291 manager->signal_new_function(function);
292 }
293
294 template <typename ELFT>
295 void LLVMDisassembler<ELFT>::disassemble() {
296 SectionRef text_section = sections[".text"];
297 std::vector<Function*> remaining_functions;
298
299 // Assume all function symbols actually start a real function
300 for (auto x = symbols.begin(); x != symbols.end(); ++x) {
301 uint64_t result;
302 bool contains;
303 SymbolRef::Type symbol_type;
304
305
306 if (text_section.containsSymbol(x->second, contains) || !contains)
307 continue;
308
309 if (x->second.getType(symbol_type)
310 || SymbolRef::ST_Function != symbol_type)
311 continue;
312
313 if (!x->second.getAddress(result)) {
314 Function * fun = manager->newFunction(result);
315 fun->setName(x->first);
316 remaining_functions.push_back(fun);
317 LOG4CXX_DEBUG(logger, "Disasembling " << x->first);
318 }
319 }
320
321 for (Function* function : remaining_functions) {
322 disassembleFunction(function);
323 manager->finishFunction(function);
324 }
325
326 if (binary->isELF()) {
327 const ELFO * elffile = o->getELFFile();
328 const typename ELFO::Elf_Ehdr * header = elffile->getHeader();
329
330 _entryAddress = header->e_entry;
331 LOG4CXX_DEBUG(logger, "Adding entryAddress at: " << std::hex << _entryAddress);
332 std::stringstream s;
333 s << "<_start 0x" << std::hex << _entryAddress << ">";
334
335 disassembleFunctionAt(_entryAddress, s.str());
336 }
337
338 if (!manager->hasFunctions()) {
339 uint64_t text_entry;
340 text_section.getAddress(text_entry);
341 LOG4CXX_INFO(logger, "No Symbols found, starting at the beginning of the text segment");
342 disassembleFunctionAt(text_entry);
343 }
344 }
345
346 template <typename ELFT>
347 void LLVMDisassembler<ELFT>::splitBlocks(Function* function) {
348 SectionRef text_section = sections[".text"];
349 StringRef bytes;
350 text_section.getContents(bytes);
351 StringRefMemoryObject ref(bytes);
352
353 // Split blocks where jumps are going inside the block
354 for (auto it = function->blocks().begin();
355 it != function->blocks().end();
356 ++it) {
357 BasicBlock * current_block = it->second;
358 uint64_t inst_size;
359 uint64_t base_address;
360 text_section.getAddress(base_address);
361 uint64_t current_address = current_block->getStartAddress() - base_address;
362 while(current_block->getEndAddress() - base_address > current_address) {
363 MCInst inst;
364 std::string buf;
365 llvm::raw_string_ostream s(buf);
366
367 if(llvm::MCDisassembler::Success ==
368 DisAsm->getInstruction(inst, inst_size, ref, current_address, nulls(), nulls())) {
369 // See if some other block starts here
370 BasicBlock* other = manager->getBasicBlock(current_address
371 + inst_size
372 + base_address);
373
374 // Special case, other block starts here but we are at the end anyway
375 if (other != NULL) {
376 uint64_t endaddress = current_address + inst_size + base_address;
377 if (endaddress != current_block->getEndAddress()) {
378 LOG4CXX_DEBUG(logger, "Shortening block starting at "
379 << std::hex
380 << current_block->getStartAddress()
381 << " now ending at "
382 << other->getStartAddress());
383 function->addBasicBlock(other);
384 current_block->setEndAddress(endaddress);
385 current_block->setNextBlock(0, other->getStartAddress());
386 current_block->setNextBlock(1, 0);
387 }
388 }
389 } else {
390 inst_size = 1;
391 }
392 current_address += inst_size;
393 }
394 }
395 }
396
397 template <typename ELFT>
398 void LLVMDisassembler<ELFT>::readDynamicSymbols() {
399 const ELFO * elffile = o->getELFFile();
400 for (typename ELFO::Elf_Sym_Iter
401 it = elffile->begin_dynamic_symbols(),
402 end = elffile->end_dynamic_symbols();
403 it != end;
404 ++it) {
405 if (it->getType() == 2) { // Function
406 bool is_default;
407 // TODO: Error handling
408 std::string symbolname = *(elffile->getSymbolName(it));
409 std::string symbolversion = *(elffile->getSymbolVersion(nullptr, &*it, is_default));
410 manager->signal_new_dyn_symbol(symbolname + (is_default? "@@" : "@") + symbolversion);
411 LOG4CXX_DEBUG(logger, "Adding dynamic Symbol " << symbolname << (is_default? "@@" : "@") << symbolversion);
412 }
413 }
414 }
415
416 template <typename ELFT>
417 void LLVMDisassembler<ELFT>::readSymbols() {
418 error_code ec;
419 symbol_iterator si(o->symbol_begin()), se(o->symbol_end());
420 for (; si != se; ++si) {
421 StringRef name;
422 if ((ec = si->getName(name))) {
423 LOG4CXX_ERROR(logger, ec.message());
424 break;
425 }
426 LOG4CXX_DEBUG(logger, "Added symbol " << name.str());
427 symbols.insert(make_pair(name.str(), *si));
428 }
429 }
430
431 template <typename ELFT>
432 void LLVMDisassembler<ELFT>::readSections() {
433 error_code ec;
434 section_iterator i(o->section_begin()), e(o->section_end());
435 for (; i != e; ++i) {
436 StringRef name;
437 if ((ec = i->getName(name))) {
438 LOG4CXX_ERROR(logger, ec.message());
439 break;
440 }
441 LOG4CXX_DEBUG(logger, "Added section " << name.str());
442 sections.insert(make_pair(name.str(), *i));
443 }
444
445 }
446
447 // template <typename ELFT>
448 // void LLVMDisassembler<ELFT>::forEachFunction(std::function<void (uint64_t, Function*)> callback) {
449 // // std::for_each(functions.begin(), functions.end(),
450 // // [&](std::pair<uint64_t, Function*> x) {
451 // // callback(x.first, x.second);
452 // // });
453 // }
454
455 template <typename ELFT>
456 void LLVMDisassembler<ELFT>::printEachInstruction(uint64_t start, uint64_t end,
457 std::function<void (uint8_t*, size_t,
458 const std::string&,
459 const std::string&)> fun) {
460 SectionRef text_section = sections[".text"];
461 uint64_t base_address;
462 text_section.getAddress(base_address);
463 uint64_t current_address = start - base_address;
464
465 StringRef bytes;
466 text_section.getContents(bytes);
467 StringRefMemoryObject ref(bytes);
468
469 while (current_address < end - base_address) {
470 uint64_t inst_size;
471 MCInst inst;
472 std::string buf;
473 llvm::raw_string_ostream s(buf);
474
475 if(llvm::MCDisassembler::Success ==
476 DisAsm->getInstruction(inst, inst_size, ref, current_address, nulls(), nulls())) {
477
478 uint8_t bytes[inst_size+2];
479 ref.readBytes(current_address, inst_size, bytes);
480
481 uint64_t jmptarget;
482 std::string ref("");
483 IP->printInst(&inst, s, "");
484 if (MIA->evaluateBranch(inst, current_address, inst_size, jmptarget)) {
485 std::stringstream stream;
486 if (MIA->isCall(inst))
487 stream << "function:";
488 else
489 stream << "block:";
490
491 stream << std::hex << (base_address + jmptarget);
492 ref = stream.str();
493 }
494
495
496 fun(bytes, inst_size, s.str(), ref);
497 } else {
498 LOG4CXX_WARN(logger, "Invalid byte at" << std::hex << current_address + base_address);
499 fun(NULL, 0, "Invalid Byte", "");
500 inst_size = 1;
501 }
502
503 current_address += inst_size;
504 }
505 }