]> git.siccegge.de Git - frida/frida.git/blob - src/disassembler/llvm/LLVMDisassembler.cxx
Split blocks before finishing function
[frida/frida.git] / src / disassembler / llvm / LLVMDisassembler.cxx
1 #include "disassembler/llvm/LLVMDisassembler.hxx"
2 #include "disassembler/llvm/LLVMBasicBlock.hxx"
3 #include "disassembler/llvm/LLVMFunction.hxx"
4
5 #include <stack>
6 #include <algorithm>
7
8 using namespace llvm;
9 using namespace llvm::object;
10 using std::error_code;
11
12 /*
13 * TODO: fallback code falls die Datei kein ELF/PE/COFF/MacO/.. binary
14 * ist sondern z.B. einfach nur Instruktionen oder ein Bootsektor oder
15 * foo
16 */
17 LLVMDisassembler::LLVMDisassembler(const std::string& filename,
18 InformationManager* manager)
19 : Disassembler(filename, manager)
20 , logger(log4cxx::Logger::getLogger("LLVMDisassembler"))
21 , triple("unknown-unknown-unknown")
22 , manager(manager)
23 {
24 LOG4CXX_DEBUG(logger, "Handling file" << filename);
25 auto result = createBinary(filename);
26
27 error_code ec;
28 if ((ec = result.getError())) {
29 LOG4CXX_ERROR(logger, "Failed to load Binary" << ec.message());
30 binary = NULL;
31 return;
32 }
33
34 binary.reset(result.get());
35
36 o = dyn_cast<ObjectFile>(binary.get());
37
38 triple.setArch(Triple::ArchType(o->getArch()));
39 std::string tripleName(triple.getTriple());
40
41 LOG4CXX_INFO(logger, "Architecture " << tripleName);
42
43
44 std::string es;
45 target = TargetRegistry::lookupTarget("", triple, es);
46 if (!target) {
47 LOG4CXX_ERROR(logger, es);
48 return;
49 }
50
51 LOG4CXX_INFO(logger, "Target " << target->getName());
52
53 MRI.reset(target->createMCRegInfo(tripleName));
54 if (!MRI) {
55 LOG4CXX_ERROR(logger, "no register info for target " << tripleName);
56 return;
57 }
58
59 // Set up disassembler.
60 AsmInfo.reset(target->createMCAsmInfo(*MRI, tripleName));
61 if (!AsmInfo) {
62 LOG4CXX_ERROR(logger, "no assembly info for target " << tripleName);
63 return;
64 }
65
66 STI.reset(target->createMCSubtargetInfo(tripleName, "", ""));
67 if (!STI) {
68 LOG4CXX_ERROR(logger, "no subtarget info for target " << tripleName);
69 return;
70 }
71
72 MII.reset(target->createMCInstrInfo());
73 if (!MII) {
74 LOG4CXX_ERROR(logger, "no instruction info for target " << tripleName);
75 return;
76 }
77
78 MOFI.reset(new MCObjectFileInfo);
79 MCContext Ctx(AsmInfo.get(), MRI.get(), MOFI.get());
80
81 DisAsm.reset(target->createMCDisassembler(*STI, Ctx));
82 if (!DisAsm) {
83 LOG4CXX_ERROR(logger, "no disassembler for target " << tripleName);
84 return;
85 }
86 RelInfo.reset(
87 target->createMCRelocationInfo(tripleName, Ctx));
88 if (RelInfo) {
89 Symzer.reset(
90 MCObjectSymbolizer::createObjectSymbolizer(Ctx, std::move(RelInfo), o));
91 if (Symzer)
92 DisAsm->setSymbolizer(std::move(Symzer));
93 }
94 RelInfo.release();
95 Symzer.release();
96
97 MIA.reset(target->createMCInstrAnalysis(MII.get()));
98 if (!MIA) {
99 LOG4CXX_ERROR(logger, "no instruction analysis for target " << tripleName);
100 return;
101 }
102
103 int AsmPrinterVariant = AsmInfo->getAssemblerDialect();
104 IP.reset(target->createMCInstPrinter(AsmPrinterVariant, *AsmInfo, *MII, *MRI, *STI));
105 if (!IP) {
106 LOG4CXX_ERROR(logger, "no instruction printer for target " << tripleName);
107 return;
108 }
109
110 IP->setPrintImmHex(llvm::HexStyle::C);
111 IP->setPrintImmHex(true);
112
113 std::unique_ptr<MCObjectDisassembler> OD(
114 new MCObjectDisassembler(*o, *DisAsm, *MIA));
115 Mod.reset(OD->buildModule(false));
116 }
117
118 void LLVMDisassembler::start() {
119 readSymbols();
120 readSections();
121 disassemble();
122 }
123
124 LLVMDisassembler::~LLVMDisassembler() {
125 std::for_each(functions.begin(), functions.end(),
126 [](std::pair<uint64_t,LLVMFunction*> it) {
127 delete it.second;
128 });
129 std::for_each(blocks.begin(), blocks.end(),
130 [](std::pair<uint64_t, LLVMBasicBlock*> it) {
131 delete it.second;
132 });
133 }
134
135 Function* LLVMDisassembler::disassembleFunctionAt(uint64_t address, const std::string& name) {
136 SectionRef text_section = sections[".text"];
137 uint64_t base_address, size;
138 text_section.getAddress(base_address);
139 text_section.getSize(size);
140
141 if (address < base_address ||
142 address >= base_address + size) {
143 return NULL;
144 }
145
146 if (functions.find(address) != functions.end()) {
147 return functions[address];
148 }
149
150 LLVMFunction * function;
151 if (name == "") {
152 std::stringstream s;
153 s << "<Unnamed 0x" << std::hex << address << ">";
154 function = new LLVMFunction(s.str(), address);
155 } else {
156 function = new LLVMFunction(name, address);
157 }
158 functions.insert(std::make_pair(address, function));
159
160 disassembleFunction(function);
161
162 return function;
163 }
164
165 void LLVMDisassembler::disassembleFunction(LLVMFunction* function) {
166 std::stack<LLVMBasicBlock*> remaining_blocks;
167 SectionRef text_section = sections[".text"];
168 StringRef bytes;
169 text_section.getContents(bytes);
170 StringRefMemoryObject ref(bytes);
171
172 LOG4CXX_DEBUG(logger, "Handling function " << function->getName());
173
174 LLVMBasicBlock * block = new LLVMBasicBlock(function->getStartAddress(), this);
175 remaining_blocks.push(block);
176 blocks.insert(std::make_pair(block->getStartAddress(), block));
177 function->addBasicBlock(block);
178
179 while (remaining_blocks.size()) {
180 LLVMBasicBlock * current_block = remaining_blocks.top();
181 remaining_blocks.pop();
182
183 LOG4CXX_DEBUG(logger, "Handling Block starting at " << std::hex << current_block->getStartAddress());
184
185 uint64_t inst_size;
186 uint64_t base_address;
187 text_section.getAddress(base_address);
188 uint64_t current_address = current_block->getStartAddress() - base_address;
189 while(true) {
190 MCInst inst;
191 std::string buf;
192 llvm::raw_string_ostream s(buf);
193
194 if(llvm::MCDisassembler::Success ==
195 DisAsm->getInstruction(inst, inst_size, ref, current_address, nulls(), nulls())) {
196 uint64_t jmptarget;
197
198 if (MIA->evaluateBranch(inst, current_address, inst_size, jmptarget)) {
199 jmptarget += base_address;
200 if (!MIA->isIndirectBranch(inst)) {
201 if (MIA->isCall(inst)) {
202 if (functions.find(jmptarget) == functions.end()) {
203 disassembleFunctionAt(jmptarget);
204 }
205 } else {
206 current_block->setNextBlock(0, jmptarget);
207 if (blocks.find(jmptarget) == blocks.end()) {
208 LLVMBasicBlock * block = new LLVMBasicBlock(jmptarget, this);
209 blocks.insert(std::make_pair(block->getStartAddress(), block));
210 function->addBasicBlock(block);
211 remaining_blocks.push(block);
212 }
213 if (MIA->isConditionalBranch(inst)) {
214 jmptarget = base_address + current_address + inst_size;
215 current_block->setNextBlock(1, jmptarget);
216 if (blocks.find(jmptarget) == blocks.end()) {
217 LLVMBasicBlock * block = new LLVMBasicBlock(jmptarget, this);
218 blocks.insert(std::make_pair(block->getStartAddress(), block));
219 function->addBasicBlock(block);
220 remaining_blocks.push(block);
221 }
222 }
223 }
224 }
225 }
226 } else {
227 inst_size = 0;
228 }
229
230
231 if (inst_size == 0 || MIA->isTerminator(inst) || MIA->isBranch(inst)) {
232 current_block->setEndAddress(current_address + base_address + inst_size);
233 LOG4CXX_DEBUG(logger, "Finished Block at " << std::hex <<
234 current_block->getEndAddress());
235 break;
236 }
237 current_address += inst_size;
238 }
239 }
240 splitBlocks(function);
241 LOG4CXX_DEBUG(logger, "Finished function " << function->getName());
242 manager->signal_new_function(function);
243 }
244
245 void LLVMDisassembler::disassemble() {
246 SectionRef text_section = sections[".text"];
247 std::vector<LLVMFunction*> remaining_functions;
248
249 // Assume all function symbols actually start a real function
250 for (auto x = symbols.begin(); x != symbols.end(); ++x) {
251 uint64_t result;
252 bool contains;
253 SymbolRef::Type symbol_type;
254
255
256 if (text_section.containsSymbol(x->second, contains) || !contains)
257 continue;
258
259 if (x->second.getType(symbol_type)
260 || SymbolRef::ST_Function != symbol_type)
261 continue;
262
263 if (!x->second.getAddress(result)) {
264 LLVMFunction * fun = new LLVMFunction(x->first, result);
265 remaining_functions.push_back(fun);
266 functions.insert(std::make_pair(result, fun));
267 LOG4CXX_DEBUG(logger, "Disasembling " << x->first);
268 }
269 }
270
271 for (LLVMFunction* function : remaining_functions) {
272 disassembleFunction(function);
273 }
274
275 if (binary->isELF()) {
276 bool is64bit = (binary->getData()[4] == 0x02);
277
278 uint64_t entry(0);
279 for (int i(0); i < (is64bit? 8 : 4); ++i) {
280 if (binary->isLittleEndian()) {
281 entry |= (unsigned int)((unsigned char)binary->getData()[0x18 + i]) << 8*i;
282 } else {
283 entry = entry << 8;
284 entry |= (unsigned char)binary->getData()[0x18 + i];
285 }
286 }
287 LOG4CXX_DEBUG(logger, "Adding entry at: " << std::hex << entry);
288 std::stringstream s;
289 s << "<_start 0x" << std::hex << entry << ">";
290
291 disassembleFunctionAt(entry, s.str());
292 }
293
294 if (functions.empty()) {
295 uint64_t text_entry;
296 text_section.getAddress(text_entry);
297 LOG4CXX_INFO(logger, "No Symbols found, starting at the beginning of the text segment");
298 disassembleFunctionAt(text_entry);
299 }
300 }
301
302 void LLVMDisassembler::splitBlocks(LLVMFunction* function) {
303 SectionRef text_section = sections[".text"];
304 StringRef bytes;
305 text_section.getContents(bytes);
306 StringRefMemoryObject ref(bytes);
307
308 // Split blocks where jumps are going inside the block
309 for (auto it = function->blocks().begin();
310 it != function->blocks().end();
311 ++it) {
312 BasicBlock * current_block = it->second;
313 uint64_t inst_size;
314 uint64_t base_address;
315 text_section.getAddress(base_address);
316 uint64_t current_address = current_block->getStartAddress() - base_address;
317 while(current_block->getEndAddress() - base_address > current_address) {
318 MCInst inst;
319 std::string buf;
320 llvm::raw_string_ostream s(buf);
321
322 if(llvm::MCDisassembler::Success ==
323 DisAsm->getInstruction(inst, inst_size, ref, current_address, nulls(), nulls())) {
324 // See if some other block starts here
325 auto other = blocks.find(current_address + inst_size + base_address);
326
327 // Special case, other block starts here but we are at the end anyway
328 if (other != blocks.end()) {
329 uint64_t endaddress = current_address + inst_size + base_address;
330 if (endaddress != current_block->getEndAddress()) {
331 LOG4CXX_DEBUG(logger, "Shortening block starting at "
332 << std::hex
333 << current_block->getStartAddress()
334 << " now ending at "
335 << other->first);
336 current_block->setEndAddress(endaddress);
337 current_block->setNextBlock(0, other->first);
338 current_block->setNextBlock(1, 0);
339 }
340 }
341 } else {
342 inst_size = 1;
343 }
344 current_address += inst_size;
345 }
346 }
347 }
348
349 void LLVMDisassembler::readSymbols() {
350 error_code ec;
351 symbol_iterator si(o->symbol_begin()), se(o->symbol_end());
352 for (; si != se; ++si) {
353 StringRef name;
354 if ((ec = si->getName(name))) {
355 LOG4CXX_ERROR(logger, ec.message());
356 break;
357 }
358 LOG4CXX_DEBUG(logger, "Added symbol " << name.str());
359 symbols.insert(make_pair(name.str(), *si));
360 }
361 }
362
363 void LLVMDisassembler::readSections() {
364 error_code ec;
365 section_iterator i(o->section_begin()), e(o->section_end());
366 for (; i != e; ++i) {
367 StringRef name;
368 if ((ec = i->getName(name))) {
369 LOG4CXX_ERROR(logger, ec.message());
370 break;
371 }
372 LOG4CXX_DEBUG(logger, "Added section " << name.str());
373 sections.insert(make_pair(name.str(), *i));
374 }
375
376 }
377
378 void LLVMDisassembler::forEachFunction(std::function<void (uint64_t, Function*)> callback) {
379 std::for_each(functions.begin(), functions.end(),
380 [&](std::pair<uint64_t, LLVMFunction*> x) {
381 callback(x.first, x.second);
382 });
383 }
384
385 void LLVMDisassembler::printEachInstruction(uint64_t start, uint64_t end,
386 std::function<void (uint8_t*, size_t,
387 const std::string&)> fun) {
388 SectionRef text_section = sections[".text"];
389 uint64_t base_address;
390 text_section.getAddress(base_address);
391 uint64_t current_address = start - base_address;
392
393 StringRef bytes;
394 text_section.getContents(bytes);
395 StringRefMemoryObject ref(bytes);
396
397 while (current_address < end - base_address) {
398 uint64_t inst_size;
399 MCInst inst;
400 std::string buf;
401 llvm::raw_string_ostream s(buf);
402
403 if(llvm::MCDisassembler::Success ==
404 DisAsm->getInstruction(inst, inst_size, ref, current_address, nulls(), nulls())) {
405
406 uint8_t bytes[inst_size+2];
407 ref.readBytes(current_address, inst_size, bytes);
408
409 uint64_t jmptarget;
410 if (MIA->evaluateBranch(inst, current_address, inst_size, jmptarget)) {
411 std::stringstream stream;
412 stream << std::hex << (base_address + jmptarget);
413 IP->printInst(&inst, s, stream.str());
414 } else
415 IP->printInst(&inst, s, "");
416
417 fun(bytes, inst_size, s.str());
418 } else {
419 LOG4CXX_WARN(logger, "Invalid byte at" << std::hex << current_address + base_address);
420 fun(NULL, 0, "Invalid Byte");
421 inst_size = 1;
422 }
423
424 current_address += inst_size;
425 }
426 }