]> git.siccegge.de Git - frida/frida.git/blob - src/disassembler/llvm/LLVMDisassembler.cxx
6b3402eb0b2414d56c4b415f7467e3c29d7e33cd
[frida/frida.git] / src / disassembler / llvm / LLVMDisassembler.cxx
1 #include "disassembler/llvm/LLVMDisassembler.hxx"
2 #include "disassembler/llvm/LLVMBasicBlock.hxx"
3 #include "disassembler/llvm/LLVMFunction.hxx"
4
5 #include <stack>
6 #include <algorithm>
7
8 using namespace llvm;
9 using namespace llvm::object;
10 using std::error_code;
11
12 /*
13 * TODO: fallback code falls die Datei kein ELF/PE/COFF/MacO/.. binary
14 * ist sondern z.B. einfach nur Instruktionen oder ein Bootsektor oder
15 * foo
16 */
17 LLVMDisassembler::LLVMDisassembler(const std::string& filename)
18 : Disassembler(filename)
19 , logger(log4cxx::Logger::getLogger("LLVMDisassembler"))
20 , triple("unknown-unknown-unknown")
21 {
22 LOG4CXX_DEBUG(logger, "Handling file" << filename);
23 auto result = createBinary(filename);
24
25 error_code ec;
26 if ((ec = result.getError())) {
27 LOG4CXX_ERROR(logger, "Failed to load Binary" << ec.message());
28 binary = NULL;
29 return;
30 }
31
32 binary.reset(result.get());
33
34 o = dyn_cast<ObjectFile>(binary.get());
35
36 triple.setArch(Triple::ArchType(o->getArch()));
37 std::string tripleName(triple.getTriple());
38
39 LOG4CXX_INFO(logger, "Architecture " << tripleName);
40
41
42 std::string es;
43 target = TargetRegistry::lookupTarget("", triple, es);
44 if (!target) {
45 LOG4CXX_ERROR(logger, es);
46 return;
47 }
48
49 LOG4CXX_INFO(logger, "Target " << target->getName());
50
51 MRI.reset(target->createMCRegInfo(tripleName));
52 if (!MRI) {
53 LOG4CXX_ERROR(logger, "no register info for target " << tripleName);
54 return;
55 }
56
57 // Set up disassembler.
58 AsmInfo.reset(target->createMCAsmInfo(*MRI, tripleName));
59 if (!AsmInfo) {
60 LOG4CXX_ERROR(logger, "no assembly info for target " << tripleName);
61 return;
62 }
63
64 STI.reset(target->createMCSubtargetInfo(tripleName, "", ""));
65 if (!STI) {
66 LOG4CXX_ERROR(logger, "no subtarget info for target " << tripleName);
67 return;
68 }
69
70 MII.reset(target->createMCInstrInfo());
71 if (!MII) {
72 LOG4CXX_ERROR(logger, "no instruction info for target " << tripleName);
73 return;
74 }
75
76 MOFI.reset(new MCObjectFileInfo);
77 MCContext Ctx(AsmInfo.get(), MRI.get(), MOFI.get());
78
79 DisAsm.reset(target->createMCDisassembler(*STI, Ctx));
80 if (!DisAsm) {
81 LOG4CXX_ERROR(logger, "no disassembler for target " << tripleName);
82 return;
83 }
84 RelInfo.reset(
85 target->createMCRelocationInfo(tripleName, Ctx));
86 if (RelInfo) {
87 Symzer.reset(
88 MCObjectSymbolizer::createObjectSymbolizer(Ctx, std::move(RelInfo), o));
89 if (Symzer)
90 DisAsm->setSymbolizer(std::move(Symzer));
91 }
92 RelInfo.release();
93 Symzer.release();
94
95 MIA.reset(target->createMCInstrAnalysis(MII.get()));
96 if (!MIA) {
97 LOG4CXX_ERROR(logger, "no instruction analysis for target " << tripleName);
98 return;
99 }
100
101 int AsmPrinterVariant = AsmInfo->getAssemblerDialect();
102 IP.reset(target->createMCInstPrinter(AsmPrinterVariant, *AsmInfo, *MII, *MRI, *STI));
103 if (!IP) {
104 LOG4CXX_ERROR(logger, "no instruction printer for target " << tripleName);
105 return;
106 }
107
108 IP->setPrintImmHex(llvm::HexStyle::C);
109 IP->setPrintImmHex(true);
110
111 std::unique_ptr<MCObjectDisassembler> OD(
112 new MCObjectDisassembler(*o, *DisAsm, *MIA));
113 Mod.reset(OD->buildModule(false));
114
115 readSymbols();
116 readSections();
117 disassemble();
118 }
119
120 LLVMDisassembler::~LLVMDisassembler() {
121 std::for_each(functions.begin(), functions.end(),
122 [](std::pair<uint64_t,LLVMFunction*> it) {
123 delete it.second;
124 });
125 std::for_each(blocks.begin(), blocks.end(),
126 [](std::pair<uint64_t, LLVMBasicBlock*> it) {
127 delete it.second;
128 });
129 }
130
131 Function* LLVMDisassembler::disassembleFunctionAt(uint64_t address, const std::string& name) {
132 SectionRef text_section = sections[".text"];
133 uint64_t base_address, size;
134 text_section.getAddress(base_address);
135 text_section.getSize(size);
136
137 if (address < base_address ||
138 address >= base_address + size) {
139 return NULL;
140 }
141
142 if (functions.find(address) != functions.end()) {
143 return functions[address];
144 }
145
146 LLVMFunction * function;
147 if (name == "") {
148 std::stringstream s;
149 s << "<Unnamed 0x" << std::hex << address << ">";
150 function = new LLVMFunction(s.str(), address);
151 } else {
152 function = new LLVMFunction(name, address);
153 }
154 functions.insert(std::make_pair(address, function));
155
156 disassembleFunction(function);
157
158 return function;
159 }
160
161 void LLVMDisassembler::disassembleFunction(LLVMFunction* function) {
162 std::stack<LLVMBasicBlock*> remaining_blocks;
163 SectionRef text_section = sections[".text"];
164 StringRef bytes;
165 text_section.getContents(bytes);
166 StringRefMemoryObject ref(bytes);
167
168 LOG4CXX_DEBUG(logger, "Handling function " << function->getName());
169
170 LLVMBasicBlock * block = new LLVMBasicBlock(function->getStartAddress(), this);
171 remaining_blocks.push(block);
172 blocks.insert(std::make_pair(block->getStartAddress(), block));
173
174 while (remaining_blocks.size()) {
175 LLVMBasicBlock * current_block = remaining_blocks.top();
176 remaining_blocks.pop();
177
178 LOG4CXX_DEBUG(logger, "Handling Block starting at " << std::hex << current_block->getStartAddress());
179
180 uint64_t inst_size;
181 uint64_t base_address;
182 text_section.getAddress(base_address);
183 uint64_t current_address = current_block->getStartAddress() - base_address;
184 while(true) {
185 MCInst inst;
186 std::string buf;
187 llvm::raw_string_ostream s(buf);
188
189 if(llvm::MCDisassembler::Success ==
190 DisAsm->getInstruction(inst, inst_size, ref, current_address, nulls(), nulls())) {
191 uint64_t jmptarget;
192
193 if (MIA->evaluateBranch(inst, current_address, inst_size, jmptarget)) {
194 jmptarget += base_address;
195 if (!MIA->isIndirectBranch(inst)) {
196 if (MIA->isCall(inst)) {
197 if (functions.find(jmptarget) == functions.end()) {
198 disassembleFunctionAt(jmptarget);
199 }
200 } else {
201 current_block->setNextBlock(0, jmptarget);
202 if (blocks.find(jmptarget) == blocks.end()) {
203 LLVMBasicBlock * block = new LLVMBasicBlock(jmptarget, this);
204 blocks.insert(std::make_pair(block->getStartAddress(), block));
205 remaining_blocks.push(block);
206 }
207 if (MIA->isConditionalBranch(inst)) {
208 jmptarget = base_address + current_address + inst_size;
209 current_block->setNextBlock(1, jmptarget);
210 if (blocks.find(jmptarget) == blocks.end()) {
211 LLVMBasicBlock * block = new LLVMBasicBlock(jmptarget, this);
212 blocks.insert(std::make_pair(block->getStartAddress(), block));
213 remaining_blocks.push(block);
214 }
215 }
216 }
217 }
218 }
219 } else {
220 inst_size = 0;
221 }
222
223
224 if (inst_size == 0 || MIA->isTerminator(inst) || MIA->isBranch(inst)) {
225 current_block->setEndAddress(current_address + base_address + inst_size);
226 LOG4CXX_DEBUG(logger, "Finished Block at " << std::hex <<
227 current_block->getEndAddress());
228 break;
229 }
230 current_address += inst_size;
231 }
232 }
233 LOG4CXX_DEBUG(logger, "Finished function " << function->getName());
234 }
235
236 void LLVMDisassembler::disassemble() {
237 SectionRef text_section = sections[".text"];
238 std::vector<LLVMFunction*> remaining_functions;
239
240 // Assume all function symbols actually start a real function
241 for (auto x = symbols.begin(); x != symbols.end(); ++x) {
242 uint64_t result;
243 bool contains;
244 SymbolRef::Type symbol_type;
245
246
247 if (text_section.containsSymbol(x->second, contains) || !contains)
248 continue;
249
250 if (x->second.getType(symbol_type)
251 || SymbolRef::ST_Function != symbol_type)
252 continue;
253
254 if (!x->second.getAddress(result)) {
255 LLVMFunction * fun = new LLVMFunction(x->first, result);
256 remaining_functions.push_back(fun);
257 functions.insert(std::make_pair(result, fun));
258 LOG4CXX_DEBUG(logger, "Disasembling " << x->first);
259 }
260 }
261
262 for (LLVMFunction* function : remaining_functions) {
263 disassembleFunction(function);
264 }
265
266 if (binary->isELF()) {
267 bool is64bit = (binary->getData()[4] == 0x02);
268
269 uint64_t entry(0);
270 for (int i(0); i < (is64bit? 8 : 4); ++i) {
271 if (binary->isLittleEndian()) {
272 entry |= (unsigned int)((unsigned char)binary->getData()[0x18 + i]) << 8*i;
273 } else {
274 entry = entry << 8;
275 entry |= (unsigned char)binary->getData()[0x18 + i];
276 }
277 }
278 LOG4CXX_DEBUG(logger, "Adding entry at: " << std::hex << entry);
279 std::stringstream s;
280 s << "<_start 0x" << std::hex << entry << ">";
281
282 disassembleFunctionAt(entry, s.str());
283 }
284
285 if (functions.empty()) {
286 uint64_t text_entry;
287 text_section.getAddress(text_entry);
288 LOG4CXX_INFO(logger, "No Symbols found, starting at the beginning of the text segment");
289 disassembleFunctionAt(text_entry);
290 }
291
292 splitBlocks();
293 }
294
295 void LLVMDisassembler::splitBlocks() {
296 SectionRef text_section = sections[".text"];
297 StringRef bytes;
298 text_section.getContents(bytes);
299 StringRefMemoryObject ref(bytes);
300
301 // Split blocks where jumps are going inside the block
302 for (auto it = blocks.begin(); it != blocks.end(); ++it) {
303 LLVMBasicBlock * current_block = it->second;
304 uint64_t inst_size;
305 uint64_t base_address;
306 text_section.getAddress(base_address);
307 uint64_t current_address = current_block->getStartAddress() - base_address;
308 while(current_block->getEndAddress() - base_address > current_address) {
309 MCInst inst;
310 std::string buf;
311 llvm::raw_string_ostream s(buf);
312
313 if(llvm::MCDisassembler::Success ==
314 DisAsm->getInstruction(inst, inst_size, ref, current_address, nulls(), nulls())) {
315 auto other = blocks.find(current_address + inst_size + base_address);
316
317 if (other != blocks.end()) {
318 uint64_t endaddress = current_address + inst_size + base_address;
319 if (endaddress != current_block->getEndAddress()) {
320 LOG4CXX_DEBUG(logger, "Shortening block starting at "
321 << std::hex
322 << current_block->getStartAddress()
323 << " now ending at "
324 << other->first);
325 current_block->setEndAddress(endaddress);
326 current_block->setNextBlock(0, other->first);
327 current_block->setNextBlock(1, 0);
328 }
329 }
330 } else {
331 inst_size = 1;
332 }
333 current_address += inst_size;
334 }
335 }
336 }
337
338 void LLVMDisassembler::readSymbols() {
339 error_code ec;
340 symbol_iterator si(o->symbol_begin()), se(o->symbol_end());
341 for (; si != se; ++si) {
342 StringRef name;
343 if ((ec = si->getName(name))) {
344 LOG4CXX_ERROR(logger, ec.message());
345 break;
346 }
347 LOG4CXX_DEBUG(logger, "Added symbol " << name.str());
348 symbols.insert(make_pair(name.str(), *si));
349 }
350 }
351
352 void LLVMDisassembler::readSections() {
353 error_code ec;
354 section_iterator i(o->section_begin()), e(o->section_end());
355 for (; i != e; ++i) {
356 StringRef name;
357 if ((ec = i->getName(name))) {
358 LOG4CXX_ERROR(logger, ec.message());
359 break;
360 }
361 LOG4CXX_DEBUG(logger, "Added section " << name.str());
362 sections.insert(make_pair(name.str(), *i));
363 }
364
365 }
366
367 void LLVMDisassembler::forEachFunction(std::function<void (uint64_t, Function*)> callback) {
368 std::for_each(functions.begin(), functions.end(),
369 [&](std::pair<uint64_t, LLVMFunction*> x) {
370 callback(x.first, x.second);
371 });
372 }
373
374 void LLVMDisassembler::printEachInstruction(uint64_t start, uint64_t end,
375 std::function<void (uint8_t*, size_t, const std::string&)> fun) {
376 SectionRef text_section = sections[".text"];
377 uint64_t base_address;
378 text_section.getAddress(base_address);
379 uint64_t current_address = start - base_address;
380
381 StringRef bytes;
382 text_section.getContents(bytes);
383 StringRefMemoryObject ref(bytes);
384
385 while (current_address < end - base_address) {
386 uint64_t inst_size;
387 MCInst inst;
388 std::string buf;
389 llvm::raw_string_ostream s(buf);
390
391 if(llvm::MCDisassembler::Success ==
392 DisAsm->getInstruction(inst, inst_size, ref, current_address, nulls(), nulls())) {
393
394 uint8_t bytes[inst_size+2];
395 ref.readBytes(current_address, inst_size, bytes);
396
397 uint64_t jmptarget;
398 if (MIA->evaluateBranch(inst, current_address, inst_size, jmptarget)) {
399 std::stringstream stream;
400 stream << std::hex << (base_address + jmptarget);
401 IP->printInst(&inst, s, stream.str());
402 } else
403 IP->printInst(&inst, s, "");
404
405 fun(bytes, inst_size, s.str());
406 } else {
407 LOG4CXX_WARN(logger, "Invalid byte at" << std::hex << current_address + base_address);
408 fun(NULL, 0, "Invalid Byte");
409 inst_size = 1;
410 }
411
412 current_address += inst_size;
413 }
414 }