]> git.siccegge.de Git - frida/frida.git/blob - src/disassembler/llvm/LLVMDisassembler.cxx
Logging improvements
[frida/frida.git] / src / disassembler / llvm / LLVMDisassembler.cxx
1 #include "disassembler/llvm/LLVMDisassembler.hxx"
2 #include "disassembler/llvm/LLVMBasicBlock.hxx"
3 #include "disassembler/llvm/LLVMFunction.hxx"
4
5 #include <stack>
6 #include <algorithm>
7
8 using namespace llvm;
9 using namespace llvm::object;
10 using std::error_code;
11
12 /*
13 * TODO: fallback code falls die Datei kein ELF/PE/COFF/MacO/.. binary
14 * ist sondern z.B. einfach nur Instruktionen oder ein Bootsektor oder
15 * foo
16 */
17 LLVMDisassembler::LLVMDisassembler(const std::string& filename,
18 InformationManager* manager)
19 : Disassembler(filename, manager)
20 , logger(log4cxx::Logger::getLogger("LLVMDisassembler"))
21 , triple("unknown-unknown-unknown")
22 , manager(manager)
23 {
24 LOG4CXX_DEBUG(logger, "Handling file" << filename);
25 auto result = createBinary(filename);
26
27 error_code ec;
28 if ((ec = result.getError())) {
29 LOG4CXX_ERROR(logger, "Failed to load Binary" << ec.message());
30 binary = NULL;
31 return;
32 }
33
34 binary.reset(result.get());
35
36 o = dyn_cast<ObjectFile>(binary.get());
37
38 triple.setArch(Triple::ArchType(o->getArch()));
39 std::string tripleName(triple.getTriple());
40
41 LOG4CXX_INFO(logger, "Architecture " << tripleName);
42
43
44 std::string es;
45 target = TargetRegistry::lookupTarget("", triple, es);
46 if (!target) {
47 LOG4CXX_ERROR(logger, es);
48 return;
49 }
50
51 LOG4CXX_INFO(logger, "Target " << target->getName());
52
53 MRI.reset(target->createMCRegInfo(tripleName));
54 if (!MRI) {
55 LOG4CXX_ERROR(logger, "no register info for target " << tripleName);
56 return;
57 }
58
59 // Set up disassembler.
60 AsmInfo.reset(target->createMCAsmInfo(*MRI, tripleName));
61 if (!AsmInfo) {
62 LOG4CXX_ERROR(logger, "no assembly info for target " << tripleName);
63 return;
64 }
65
66 STI.reset(target->createMCSubtargetInfo(tripleName, "", ""));
67 if (!STI) {
68 LOG4CXX_ERROR(logger, "no subtarget info for target " << tripleName);
69 return;
70 }
71
72 MII.reset(target->createMCInstrInfo());
73 if (!MII) {
74 LOG4CXX_ERROR(logger, "no instruction info for target " << tripleName);
75 return;
76 }
77
78 MOFI.reset(new MCObjectFileInfo);
79 MCContext Ctx(AsmInfo.get(), MRI.get(), MOFI.get());
80
81 DisAsm.reset(target->createMCDisassembler(*STI, Ctx));
82 if (!DisAsm) {
83 LOG4CXX_ERROR(logger, "no disassembler for target " << tripleName);
84 return;
85 }
86 RelInfo.reset(
87 target->createMCRelocationInfo(tripleName, Ctx));
88 if (RelInfo) {
89 Symzer.reset(
90 MCObjectSymbolizer::createObjectSymbolizer(Ctx, std::move(RelInfo), o));
91 if (Symzer)
92 DisAsm->setSymbolizer(std::move(Symzer));
93 }
94 RelInfo.release();
95 Symzer.release();
96
97 MIA.reset(target->createMCInstrAnalysis(MII.get()));
98 if (!MIA) {
99 LOG4CXX_ERROR(logger, "no instruction analysis for target " << tripleName);
100 return;
101 }
102
103 int AsmPrinterVariant = AsmInfo->getAssemblerDialect();
104 IP.reset(target->createMCInstPrinter(AsmPrinterVariant, *AsmInfo, *MII, *MRI, *STI));
105 if (!IP) {
106 LOG4CXX_ERROR(logger, "no instruction printer for target " << tripleName);
107 return;
108 }
109
110 IP->setPrintImmHex(llvm::HexStyle::C);
111 IP->setPrintImmHex(true);
112
113 std::unique_ptr<MCObjectDisassembler> OD(
114 new MCObjectDisassembler(*o, *DisAsm, *MIA));
115 Mod.reset(OD->buildModule(false));
116 }
117
118 void LLVMDisassembler::start() {
119 readSymbols();
120 readSections();
121 disassemble();
122 }
123
124 LLVMDisassembler::~LLVMDisassembler() {
125 std::for_each(functions.begin(), functions.end(),
126 [](std::pair<uint64_t,LLVMFunction*> it) {
127 delete it.second;
128 });
129 std::for_each(blocks.begin(), blocks.end(),
130 [](std::pair<uint64_t, LLVMBasicBlock*> it) {
131 delete it.second;
132 });
133 }
134
135 Function* LLVMDisassembler::disassembleFunctionAt(uint64_t address, const std::string& name) {
136 SectionRef text_section = sections[".text"];
137 uint64_t base_address, size;
138 text_section.getAddress(base_address);
139 text_section.getSize(size);
140
141 if (address < base_address ||
142 address >= base_address + size) {
143 return NULL;
144 }
145
146 if (functions.find(address) != functions.end()) {
147 return functions[address];
148 }
149
150 LLVMFunction * function;
151 if (name == "") {
152 std::stringstream s;
153 s << "<Unnamed 0x" << std::hex << address << ">";
154 function = new LLVMFunction(s.str(), address);
155 } else {
156 function = new LLVMFunction(name, address);
157 }
158 functions.insert(std::make_pair(address, function));
159
160 disassembleFunction(function);
161
162 return function;
163 }
164
165 void LLVMDisassembler::disassembleFunction(LLVMFunction* function) {
166 std::stack<LLVMBasicBlock*> remaining_blocks;
167 SectionRef text_section = sections[".text"];
168 StringRef bytes;
169 text_section.getContents(bytes);
170 StringRefMemoryObject ref(bytes);
171
172 LOG4CXX_DEBUG(logger, "Handling function " << function->getName());
173
174 LLVMBasicBlock * block = new LLVMBasicBlock(function->getStartAddress(), this);
175 remaining_blocks.push(block);
176 blocks.insert(std::make_pair(block->getStartAddress(), block));
177 function->addBasicBlock(block);
178
179 while (remaining_blocks.size()) {
180 LLVMBasicBlock * current_block = remaining_blocks.top();
181 remaining_blocks.pop();
182
183 LOG4CXX_DEBUG(logger, "Handling Block starting at " << std::hex << current_block->getStartAddress());
184
185 uint64_t inst_size;
186 uint64_t base_address;
187 text_section.getAddress(base_address);
188 uint64_t current_address = current_block->getStartAddress() - base_address;
189 while(true) {
190 MCInst inst;
191 std::string buf;
192 llvm::raw_string_ostream s(buf);
193
194 if(llvm::MCDisassembler::Success ==
195 DisAsm->getInstruction(inst, inst_size, ref, current_address, nulls(), nulls())) {
196 uint64_t jmptarget;
197
198 if (MIA->evaluateBranch(inst, current_address, inst_size, jmptarget)) {
199 jmptarget += base_address;
200 if (!MIA->isIndirectBranch(inst)) {
201 if (MIA->isCall(inst)) {
202 if (functions.find(jmptarget) == functions.end()) {
203 disassembleFunctionAt(jmptarget);
204 }
205 } else {
206 current_block->setNextBlock(0, jmptarget);
207 if (blocks.find(jmptarget) == blocks.end()) {
208 LLVMBasicBlock * block = new LLVMBasicBlock(jmptarget, this);
209 blocks.insert(std::make_pair(block->getStartAddress(), block));
210 function->addBasicBlock(block);
211 remaining_blocks.push(block);
212 } else {
213 LOG4CXX_DEBUG(logger, "Reusing Block starting at " << std::hex << current_block->getStartAddress());
214 function->addBasicBlock(blocks.find(jmptarget)->second);
215 }
216 if (MIA->isConditionalBranch(inst)) {
217 jmptarget = base_address + current_address + inst_size;
218 current_block->setNextBlock(1, jmptarget);
219 if (blocks.find(jmptarget) == blocks.end()) {
220 LLVMBasicBlock * block = new LLVMBasicBlock(jmptarget, this);
221 blocks.insert(std::make_pair(block->getStartAddress(), block));
222 function->addBasicBlock(block);
223 remaining_blocks.push(block);
224 } else {
225 LOG4CXX_DEBUG(logger, "Reusing Block starting at " << std::hex << current_block->getStartAddress());
226 function->addBasicBlock(blocks.find(jmptarget)->second);
227 }
228 }
229 }
230 }
231 }
232 } else {
233 inst_size = 0;
234 }
235
236
237 if (inst_size == 0 || MIA->isTerminator(inst) || MIA->isBranch(inst)) {
238 current_block->setEndAddress(current_address + base_address + inst_size);
239 LOG4CXX_DEBUG(logger, "Finished Block at " << std::hex <<
240 current_block->getEndAddress());
241 break;
242 }
243 current_address += inst_size;
244 }
245 }
246 splitBlocks(function);
247 LOG4CXX_DEBUG(logger, "Finished function " << function->getName());
248 manager->signal_new_function(function);
249 }
250
251 void LLVMDisassembler::disassemble() {
252 SectionRef text_section = sections[".text"];
253 std::vector<LLVMFunction*> remaining_functions;
254
255 // Assume all function symbols actually start a real function
256 for (auto x = symbols.begin(); x != symbols.end(); ++x) {
257 uint64_t result;
258 bool contains;
259 SymbolRef::Type symbol_type;
260
261
262 if (text_section.containsSymbol(x->second, contains) || !contains)
263 continue;
264
265 if (x->second.getType(symbol_type)
266 || SymbolRef::ST_Function != symbol_type)
267 continue;
268
269 if (!x->second.getAddress(result)) {
270 LLVMFunction * fun = new LLVMFunction(x->first, result);
271 remaining_functions.push_back(fun);
272 functions.insert(std::make_pair(result, fun));
273 LOG4CXX_DEBUG(logger, "Disasembling " << x->first);
274 }
275 }
276
277 for (LLVMFunction* function : remaining_functions) {
278 disassembleFunction(function);
279 }
280
281 if (binary->isELF()) {
282 bool is64bit = (binary->getData()[4] == 0x02);
283
284 uint64_t entry(0);
285 for (int i(0); i < (is64bit? 8 : 4); ++i) {
286 if (binary->isLittleEndian()) {
287 entry |= (unsigned int)((unsigned char)binary->getData()[0x18 + i]) << 8*i;
288 } else {
289 entry = entry << 8;
290 entry |= (unsigned char)binary->getData()[0x18 + i];
291 }
292 }
293 LOG4CXX_DEBUG(logger, "Adding entry at: " << std::hex << entry);
294 std::stringstream s;
295 s << "<_start 0x" << std::hex << entry << ">";
296
297 disassembleFunctionAt(entry, s.str());
298 }
299
300 if (functions.empty()) {
301 uint64_t text_entry;
302 text_section.getAddress(text_entry);
303 LOG4CXX_INFO(logger, "No Symbols found, starting at the beginning of the text segment");
304 disassembleFunctionAt(text_entry);
305 }
306 }
307
308 void LLVMDisassembler::splitBlocks(LLVMFunction* function) {
309 SectionRef text_section = sections[".text"];
310 StringRef bytes;
311 text_section.getContents(bytes);
312 StringRefMemoryObject ref(bytes);
313
314 // Split blocks where jumps are going inside the block
315 for (auto it = function->blocks().begin();
316 it != function->blocks().end();
317 ++it) {
318 BasicBlock * current_block = it->second;
319 uint64_t inst_size;
320 uint64_t base_address;
321 text_section.getAddress(base_address);
322 uint64_t current_address = current_block->getStartAddress() - base_address;
323 while(current_block->getEndAddress() - base_address > current_address) {
324 MCInst inst;
325 std::string buf;
326 llvm::raw_string_ostream s(buf);
327
328 if(llvm::MCDisassembler::Success ==
329 DisAsm->getInstruction(inst, inst_size, ref, current_address, nulls(), nulls())) {
330 // See if some other block starts here
331 auto other = blocks.find(current_address + inst_size + base_address);
332
333 // Special case, other block starts here but we are at the end anyway
334 if (other != blocks.end()) {
335 uint64_t endaddress = current_address + inst_size + base_address;
336 if (endaddress != current_block->getEndAddress()) {
337 LOG4CXX_DEBUG(logger, "Shortening block starting at "
338 << std::hex
339 << current_block->getStartAddress()
340 << " now ending at "
341 << other->first);
342 function->addBasicBlock(other->second);
343 current_block->setEndAddress(endaddress);
344 current_block->setNextBlock(0, other->first);
345 current_block->setNextBlock(1, 0);
346 }
347 }
348 } else {
349 inst_size = 1;
350 }
351 current_address += inst_size;
352 }
353 }
354 }
355
356 void LLVMDisassembler::readSymbols() {
357 error_code ec;
358 symbol_iterator si(o->symbol_begin()), se(o->symbol_end());
359 for (; si != se; ++si) {
360 StringRef name;
361 if ((ec = si->getName(name))) {
362 LOG4CXX_ERROR(logger, ec.message());
363 break;
364 }
365 LOG4CXX_DEBUG(logger, "Added symbol " << name.str());
366 symbols.insert(make_pair(name.str(), *si));
367 }
368 }
369
370 void LLVMDisassembler::readSections() {
371 error_code ec;
372 section_iterator i(o->section_begin()), e(o->section_end());
373 for (; i != e; ++i) {
374 StringRef name;
375 if ((ec = i->getName(name))) {
376 LOG4CXX_ERROR(logger, ec.message());
377 break;
378 }
379 LOG4CXX_DEBUG(logger, "Added section " << name.str());
380 sections.insert(make_pair(name.str(), *i));
381 }
382
383 }
384
385 void LLVMDisassembler::forEachFunction(std::function<void (uint64_t, Function*)> callback) {
386 std::for_each(functions.begin(), functions.end(),
387 [&](std::pair<uint64_t, LLVMFunction*> x) {
388 callback(x.first, x.second);
389 });
390 }
391
392 void LLVMDisassembler::printEachInstruction(uint64_t start, uint64_t end,
393 std::function<void (uint8_t*, size_t,
394 const std::string&)> fun) {
395 SectionRef text_section = sections[".text"];
396 uint64_t base_address;
397 text_section.getAddress(base_address);
398 uint64_t current_address = start - base_address;
399
400 StringRef bytes;
401 text_section.getContents(bytes);
402 StringRefMemoryObject ref(bytes);
403
404 while (current_address < end - base_address) {
405 uint64_t inst_size;
406 MCInst inst;
407 std::string buf;
408 llvm::raw_string_ostream s(buf);
409
410 if(llvm::MCDisassembler::Success ==
411 DisAsm->getInstruction(inst, inst_size, ref, current_address, nulls(), nulls())) {
412
413 uint8_t bytes[inst_size+2];
414 ref.readBytes(current_address, inst_size, bytes);
415
416 uint64_t jmptarget;
417 if (MIA->evaluateBranch(inst, current_address, inst_size, jmptarget)) {
418 std::stringstream stream;
419 stream << std::hex << (base_address + jmptarget);
420 IP->printInst(&inst, s, stream.str());
421 } else
422 IP->printInst(&inst, s, "");
423
424 fun(bytes, inst_size, s.str());
425 } else {
426 LOG4CXX_WARN(logger, "Invalid byte at" << std::hex << current_address + base_address);
427 fun(NULL, 0, "Invalid Byte");
428 inst_size = 1;
429 }
430
431 current_address += inst_size;
432 }
433 }