]> git.siccegge.de Git - frida/frida.git/blob - src/disassembler/llvm/LLVMDisassembler.cxx
Get entry address from llvm
[frida/frida.git] / src / disassembler / llvm / LLVMDisassembler.cxx
1 #include "disassembler/llvm/LLVMDisassembler.hxx"
2 #include "disassembler/llvm/LLVMBasicBlock.hxx"
3 #include "disassembler/llvm/LLVMFunction.hxx"
4
5 #include <stack>
6 #include <algorithm>
7
8 using namespace llvm;
9 using namespace llvm::object;
10 using std::error_code;
11
12 /*
13 *
14 */
15 Disassembler * createLLVMDisassembler(const std::string& filename, InformationManager* manager) {
16 if (filename == "")
17 return NULL;
18
19 std::unique_ptr<Binary> o;
20 o.reset(createBinary(filename).get());
21 Binary * op = o.release();
22
23 // ELFType<endian, maxalign, 64bit>
24 if (ELF32LEObjectFile * object = dyn_cast<ELF32LEObjectFile>(op)) {
25 return new LLVMDisassembler<ELFType<support::little, 2, false>>(filename, manager, object);
26 }
27 if (ELF64LEObjectFile * object = dyn_cast<ELF64LEObjectFile>(op)) {
28 return new LLVMDisassembler<ELFType<support::little, 2, true>>(filename, manager, object);
29 }
30 if (ELF32BEObjectFile * object = dyn_cast<ELF32BEObjectFile>(op)) {
31 return new LLVMDisassembler<ELFType<support::big, 2, false>>(filename, manager, object);
32 }
33 if (ELF64BEObjectFile * object = dyn_cast<ELF64BEObjectFile>(op)) {
34 return new LLVMDisassembler<ELFType<support::big, 2, true>>(filename, manager, object);
35 }
36
37 return NULL;
38 }
39
40 /*
41 * TODO: fallback code falls die Datei kein ELF/PE/COFF/MacO/.. binary
42 * ist sondern z.B. einfach nur Instruktionen oder ein Bootsektor oder
43 * foo
44 */
45 template <typename ELFT>
46 LLVMDisassembler<ELFT>::LLVMDisassembler(const std::string& filename,
47 InformationManager* manager,
48 ELFObjectFile<ELFT>* file)
49 : Disassembler(filename, manager)
50 , logger(log4cxx::Logger::getLogger("LLVMDisassembler"))
51 , triple("unknown-unknown-unknown")
52 , manager(manager)
53 {
54 LOG4CXX_DEBUG(logger, "Handling file" << filename);
55
56 if (!file) {
57 auto result = createBinary(filename);
58
59 error_code ec;
60 if ((ec = result.getError())) {
61 LOG4CXX_ERROR(logger, "Failed to load Binary" << ec.message());
62 binary = NULL;
63 return;
64 }
65
66 binary.reset(result.get());
67
68 o = dyn_cast<ELFObjectFile<ELFT>>(binary.get());
69 } else {
70 o = file;
71 binary.reset(file);
72 }
73
74 triple.setArch(Triple::ArchType(o->getArch()));
75 std::string tripleName(triple.getTriple());
76
77 LOG4CXX_INFO(logger, "Architecture " << tripleName);
78
79
80 std::string es;
81 target = TargetRegistry::lookupTarget("", triple, es);
82 if (!target) {
83 LOG4CXX_ERROR(logger, es);
84 return;
85 }
86
87 LOG4CXX_INFO(logger, "Target " << target->getName());
88
89 MRI.reset(target->createMCRegInfo(tripleName));
90 if (!MRI) {
91 LOG4CXX_ERROR(logger, "no register info for target " << tripleName);
92 return;
93 }
94
95 // Set up disassembler.
96 AsmInfo.reset(target->createMCAsmInfo(*MRI, tripleName));
97 if (!AsmInfo) {
98 LOG4CXX_ERROR(logger, "no assembly info for target " << tripleName);
99 return;
100 }
101
102 STI.reset(target->createMCSubtargetInfo(tripleName, "", ""));
103 if (!STI) {
104 LOG4CXX_ERROR(logger, "no subtarget info for target " << tripleName);
105 return;
106 }
107
108 MII.reset(target->createMCInstrInfo());
109 if (!MII) {
110 LOG4CXX_ERROR(logger, "no instruction info for target " << tripleName);
111 return;
112 }
113
114 MOFI.reset(new MCObjectFileInfo);
115 MCContext Ctx(AsmInfo.get(), MRI.get(), MOFI.get());
116
117 DisAsm.reset(target->createMCDisassembler(*STI, Ctx));
118 if (!DisAsm) {
119 LOG4CXX_ERROR(logger, "no disassembler for target " << tripleName);
120 return;
121 }
122 RelInfo.reset(
123 target->createMCRelocationInfo(tripleName, Ctx));
124 if (RelInfo) {
125 Symzer.reset(
126 MCObjectSymbolizer::createObjectSymbolizer(Ctx, std::move(RelInfo), o));
127 if (Symzer)
128 DisAsm->setSymbolizer(std::move(Symzer));
129 }
130 RelInfo.release();
131 Symzer.release();
132
133 MIA.reset(target->createMCInstrAnalysis(MII.get()));
134 if (!MIA) {
135 LOG4CXX_ERROR(logger, "no instruction analysis for target " << tripleName);
136 return;
137 }
138
139 int AsmPrinterVariant = AsmInfo->getAssemblerDialect();
140 IP.reset(target->createMCInstPrinter(AsmPrinterVariant, *AsmInfo, *MII, *MRI, *STI));
141 if (!IP) {
142 LOG4CXX_ERROR(logger, "no instruction printer for target " << tripleName);
143 return;
144 }
145
146 IP->setPrintImmHex(llvm::HexStyle::C);
147 IP->setPrintImmHex(true);
148
149 std::unique_ptr<MCObjectDisassembler> OD(
150 new MCObjectDisassembler(*o, *DisAsm, *MIA));
151 Mod.reset(OD->buildModule(false));
152 }
153
154 template <typename ELFT>
155 void LLVMDisassembler<ELFT>::start() {
156 readSymbols();
157 readSections();
158 disassemble();
159 }
160
161 template <typename ELFT>
162 LLVMDisassembler<ELFT>::~LLVMDisassembler() {
163 std::for_each(functions.begin(), functions.end(),
164 [](std::pair<uint64_t,LLVMFunction*> it) {
165 delete it.second;
166 });
167 std::for_each(blocks.begin(), blocks.end(),
168 [](std::pair<uint64_t, LLVMBasicBlock*> it) {
169 delete it.second;
170 });
171 }
172
173 template <typename ELFT>
174 Function* LLVMDisassembler<ELFT>::disassembleFunctionAt(uint64_t address, const std::string& name) {
175 SectionRef text_section = sections[".text"];
176 uint64_t base_address, size;
177 text_section.getAddress(base_address);
178 text_section.getSize(size);
179
180 if (address < base_address ||
181 address >= base_address + size) {
182 return NULL;
183 }
184
185 if (functions.find(address) != functions.end()) {
186 return functions[address];
187 }
188
189 LLVMFunction * function;
190 if (name == "") {
191 std::stringstream s;
192 s << "<Unnamed 0x" << std::hex << address << ">";
193 function = new LLVMFunction(s.str(), address);
194 } else {
195 function = new LLVMFunction(name, address);
196 }
197 functions.insert(std::make_pair(address, function));
198
199 disassembleFunction(function);
200
201 return function;
202 }
203
204 template <typename ELFT>
205 void LLVMDisassembler<ELFT>::disassembleFunction(LLVMFunction* function) {
206 std::stack<LLVMBasicBlock*> remaining_blocks;
207 SectionRef text_section = sections[".text"];
208 StringRef bytes;
209 text_section.getContents(bytes);
210 StringRefMemoryObject ref(bytes);
211
212 LOG4CXX_DEBUG(logger, "Handling function " << function->getName());
213
214 LLVMBasicBlock * block = new LLVMBasicBlock(function->getStartAddress(), this);
215 remaining_blocks.push(block);
216 blocks.insert(std::make_pair(block->getStartAddress(), block));
217 function->addBasicBlock(block);
218
219 while (remaining_blocks.size()) {
220 LLVMBasicBlock * current_block = remaining_blocks.top();
221 remaining_blocks.pop();
222
223 LOG4CXX_DEBUG(logger, "Handling Block starting at " << std::hex << current_block->getStartAddress());
224
225 uint64_t inst_size;
226 uint64_t base_address;
227 text_section.getAddress(base_address);
228 uint64_t current_address = current_block->getStartAddress() - base_address;
229 while(true) {
230 MCInst inst;
231 std::string buf;
232 llvm::raw_string_ostream s(buf);
233
234 if(llvm::MCDisassembler::Success ==
235 DisAsm->getInstruction(inst, inst_size, ref, current_address, nulls(), nulls())) {
236 uint64_t jmptarget;
237
238 if (MIA->evaluateBranch(inst, current_address, inst_size, jmptarget)) {
239 jmptarget += base_address;
240 if (!MIA->isIndirectBranch(inst)) {
241 if (MIA->isCall(inst)) {
242 if (functions.find(jmptarget) == functions.end()) {
243 disassembleFunctionAt(jmptarget);
244 }
245 } else {
246 current_block->setNextBlock(0, jmptarget);
247 if (blocks.find(jmptarget) == blocks.end()) {
248 LLVMBasicBlock * block = new LLVMBasicBlock(jmptarget, this);
249 blocks.insert(std::make_pair(block->getStartAddress(), block));
250 function->addBasicBlock(block);
251 remaining_blocks.push(block);
252 } else {
253 LOG4CXX_DEBUG(logger, "Reusing Block starting at " << std::hex << current_block->getStartAddress());
254 function->addBasicBlock(blocks.find(jmptarget)->second);
255 }
256 if (MIA->isConditionalBranch(inst)) {
257 jmptarget = base_address + current_address + inst_size;
258 current_block->setNextBlock(1, jmptarget);
259 if (blocks.find(jmptarget) == blocks.end()) {
260 LLVMBasicBlock * block = new LLVMBasicBlock(jmptarget, this);
261 blocks.insert(std::make_pair(block->getStartAddress(), block));
262 function->addBasicBlock(block);
263 remaining_blocks.push(block);
264 } else {
265 LOG4CXX_DEBUG(logger, "Reusing Block starting at " << std::hex << current_block->getStartAddress());
266 function->addBasicBlock(blocks.find(jmptarget)->second);
267 }
268 }
269 }
270 }
271 }
272 } else {
273 inst_size = 0;
274 }
275
276
277 if (inst_size == 0 || MIA->isTerminator(inst) || MIA->isBranch(inst)) {
278 current_block->setEndAddress(current_address + base_address + inst_size);
279 LOG4CXX_DEBUG(logger, "Finished Block at " << std::hex <<
280 current_block->getEndAddress());
281 break;
282 }
283 current_address += inst_size;
284 }
285 }
286 splitBlocks(function);
287 LOG4CXX_DEBUG(logger, "Finished function " << function->getName());
288 manager->signal_new_function(function);
289 }
290
291 template <typename ELFT>
292 void LLVMDisassembler<ELFT>::disassemble() {
293 SectionRef text_section = sections[".text"];
294 std::vector<LLVMFunction*> remaining_functions;
295
296 // Assume all function symbols actually start a real function
297 for (auto x = symbols.begin(); x != symbols.end(); ++x) {
298 uint64_t result;
299 bool contains;
300 SymbolRef::Type symbol_type;
301
302
303 if (text_section.containsSymbol(x->second, contains) || !contains)
304 continue;
305
306 if (x->second.getType(symbol_type)
307 || SymbolRef::ST_Function != symbol_type)
308 continue;
309
310 if (!x->second.getAddress(result)) {
311 LLVMFunction * fun = new LLVMFunction(x->first, result);
312 remaining_functions.push_back(fun);
313 functions.insert(std::make_pair(result, fun));
314 LOG4CXX_DEBUG(logger, "Disasembling " << x->first);
315 }
316 }
317
318 for (LLVMFunction* function : remaining_functions) {
319 disassembleFunction(function);
320 }
321
322 if (binary->isELF()) {
323 typedef ELFFile<ELFT> ELFO;
324 const ELFO * elffile = o->getELFFile();
325 const typename ELFO::Elf_Ehdr * header = elffile->getHeader();
326
327 _entryAddress = header->e_entry;
328 LOG4CXX_DEBUG(logger, "Adding entryAddress at: " << std::hex << _entryAddress);
329 std::stringstream s;
330 s << "<_start 0x" << std::hex << _entryAddress << ">";
331
332 disassembleFunctionAt(_entryAddress, s.str());
333 }
334
335 if (functions.empty()) {
336 uint64_t text_entry;
337 text_section.getAddress(text_entry);
338 LOG4CXX_INFO(logger, "No Symbols found, starting at the beginning of the text segment");
339 disassembleFunctionAt(text_entry);
340 }
341 }
342
343 template <typename ELFT>
344 void LLVMDisassembler<ELFT>::splitBlocks(LLVMFunction* function) {
345 SectionRef text_section = sections[".text"];
346 StringRef bytes;
347 text_section.getContents(bytes);
348 StringRefMemoryObject ref(bytes);
349
350 // Split blocks where jumps are going inside the block
351 for (auto it = function->blocks().begin();
352 it != function->blocks().end();
353 ++it) {
354 BasicBlock * current_block = it->second;
355 uint64_t inst_size;
356 uint64_t base_address;
357 text_section.getAddress(base_address);
358 uint64_t current_address = current_block->getStartAddress() - base_address;
359 while(current_block->getEndAddress() - base_address > current_address) {
360 MCInst inst;
361 std::string buf;
362 llvm::raw_string_ostream s(buf);
363
364 if(llvm::MCDisassembler::Success ==
365 DisAsm->getInstruction(inst, inst_size, ref, current_address, nulls(), nulls())) {
366 // See if some other block starts here
367 auto other = blocks.find(current_address + inst_size + base_address);
368
369 // Special case, other block starts here but we are at the end anyway
370 if (other != blocks.end()) {
371 uint64_t endaddress = current_address + inst_size + base_address;
372 if (endaddress != current_block->getEndAddress()) {
373 LOG4CXX_DEBUG(logger, "Shortening block starting at "
374 << std::hex
375 << current_block->getStartAddress()
376 << " now ending at "
377 << other->first);
378 function->addBasicBlock(other->second);
379 current_block->setEndAddress(endaddress);
380 current_block->setNextBlock(0, other->first);
381 current_block->setNextBlock(1, 0);
382 }
383 }
384 } else {
385 inst_size = 1;
386 }
387 current_address += inst_size;
388 }
389 }
390 }
391
392 template <typename ELFT>
393 void LLVMDisassembler<ELFT>::readSymbols() {
394 error_code ec;
395 symbol_iterator si(o->symbol_begin()), se(o->symbol_end());
396 for (; si != se; ++si) {
397 StringRef name;
398 if ((ec = si->getName(name))) {
399 LOG4CXX_ERROR(logger, ec.message());
400 break;
401 }
402 LOG4CXX_DEBUG(logger, "Added symbol " << name.str());
403 symbols.insert(make_pair(name.str(), *si));
404 }
405 }
406
407 template <typename ELFT>
408 void LLVMDisassembler<ELFT>::readSections() {
409 error_code ec;
410 section_iterator i(o->section_begin()), e(o->section_end());
411 for (; i != e; ++i) {
412 StringRef name;
413 if ((ec = i->getName(name))) {
414 LOG4CXX_ERROR(logger, ec.message());
415 break;
416 }
417 LOG4CXX_DEBUG(logger, "Added section " << name.str());
418 sections.insert(make_pair(name.str(), *i));
419 }
420
421 }
422
423 template <typename ELFT>
424 void LLVMDisassembler<ELFT>::forEachFunction(std::function<void (uint64_t, Function*)> callback) {
425 std::for_each(functions.begin(), functions.end(),
426 [&](std::pair<uint64_t, LLVMFunction*> x) {
427 callback(x.first, x.second);
428 });
429 }
430
431 template <typename ELFT>
432 void LLVMDisassembler<ELFT>::printEachInstruction(uint64_t start, uint64_t end,
433 std::function<void (uint8_t*, size_t,
434 const std::string&,
435 const std::string&)> fun) {
436 SectionRef text_section = sections[".text"];
437 uint64_t base_address;
438 text_section.getAddress(base_address);
439 uint64_t current_address = start - base_address;
440
441 StringRef bytes;
442 text_section.getContents(bytes);
443 StringRefMemoryObject ref(bytes);
444
445 while (current_address < end - base_address) {
446 uint64_t inst_size;
447 MCInst inst;
448 std::string buf;
449 llvm::raw_string_ostream s(buf);
450
451 if(llvm::MCDisassembler::Success ==
452 DisAsm->getInstruction(inst, inst_size, ref, current_address, nulls(), nulls())) {
453
454 uint8_t bytes[inst_size+2];
455 ref.readBytes(current_address, inst_size, bytes);
456
457 uint64_t jmptarget;
458 std::string ref("");
459 IP->printInst(&inst, s, "");
460 if (MIA->evaluateBranch(inst, current_address, inst_size, jmptarget)) {
461 std::stringstream stream;
462 if (MIA->isCall(inst))
463 stream << "function:";
464 else
465 stream << "block:";
466
467 stream << std::hex << (base_address + jmptarget);
468 ref = stream.str();
469 }
470
471
472 fun(bytes, inst_size, s.str(), ref);
473 } else {
474 LOG4CXX_WARN(logger, "Invalid byte at" << std::hex << current_address + base_address);
475 fun(NULL, 0, "Invalid Byte", "");
476 inst_size = 1;
477 }
478
479 current_address += inst_size;
480 }
481 }