]> git.siccegge.de Git - frida/frida.git/blob - src/disassembler/llvm/LLVMDisassembler.cxx
Add in an Information Manager
[frida/frida.git] / src / disassembler / llvm / LLVMDisassembler.cxx
1 #include "disassembler/llvm/LLVMDisassembler.hxx"
2 #include "disassembler/llvm/LLVMBasicBlock.hxx"
3 #include "disassembler/llvm/LLVMFunction.hxx"
4
5 #include <stack>
6 #include <algorithm>
7
8 using namespace llvm;
9 using namespace llvm::object;
10 using std::error_code;
11
12 /*
13 * TODO: fallback code falls die Datei kein ELF/PE/COFF/MacO/.. binary
14 * ist sondern z.B. einfach nur Instruktionen oder ein Bootsektor oder
15 * foo
16 */
17 LLVMDisassembler::LLVMDisassembler(const std::string& filename,
18 InformationManager* manager)
19 : Disassembler(filename, manager)
20 , logger(log4cxx::Logger::getLogger("LLVMDisassembler"))
21 , triple("unknown-unknown-unknown")
22 , manager(manager)
23 {
24 LOG4CXX_DEBUG(logger, "Handling file" << filename);
25 auto result = createBinary(filename);
26
27 error_code ec;
28 if ((ec = result.getError())) {
29 LOG4CXX_ERROR(logger, "Failed to load Binary" << ec.message());
30 binary = NULL;
31 return;
32 }
33
34 binary.reset(result.get());
35
36 o = dyn_cast<ObjectFile>(binary.get());
37
38 triple.setArch(Triple::ArchType(o->getArch()));
39 std::string tripleName(triple.getTriple());
40
41 LOG4CXX_INFO(logger, "Architecture " << tripleName);
42
43
44 std::string es;
45 target = TargetRegistry::lookupTarget("", triple, es);
46 if (!target) {
47 LOG4CXX_ERROR(logger, es);
48 return;
49 }
50
51 LOG4CXX_INFO(logger, "Target " << target->getName());
52
53 MRI.reset(target->createMCRegInfo(tripleName));
54 if (!MRI) {
55 LOG4CXX_ERROR(logger, "no register info for target " << tripleName);
56 return;
57 }
58
59 // Set up disassembler.
60 AsmInfo.reset(target->createMCAsmInfo(*MRI, tripleName));
61 if (!AsmInfo) {
62 LOG4CXX_ERROR(logger, "no assembly info for target " << tripleName);
63 return;
64 }
65
66 STI.reset(target->createMCSubtargetInfo(tripleName, "", ""));
67 if (!STI) {
68 LOG4CXX_ERROR(logger, "no subtarget info for target " << tripleName);
69 return;
70 }
71
72 MII.reset(target->createMCInstrInfo());
73 if (!MII) {
74 LOG4CXX_ERROR(logger, "no instruction info for target " << tripleName);
75 return;
76 }
77
78 MOFI.reset(new MCObjectFileInfo);
79 MCContext Ctx(AsmInfo.get(), MRI.get(), MOFI.get());
80
81 DisAsm.reset(target->createMCDisassembler(*STI, Ctx));
82 if (!DisAsm) {
83 LOG4CXX_ERROR(logger, "no disassembler for target " << tripleName);
84 return;
85 }
86 RelInfo.reset(
87 target->createMCRelocationInfo(tripleName, Ctx));
88 if (RelInfo) {
89 Symzer.reset(
90 MCObjectSymbolizer::createObjectSymbolizer(Ctx, std::move(RelInfo), o));
91 if (Symzer)
92 DisAsm->setSymbolizer(std::move(Symzer));
93 }
94 RelInfo.release();
95 Symzer.release();
96
97 MIA.reset(target->createMCInstrAnalysis(MII.get()));
98 if (!MIA) {
99 LOG4CXX_ERROR(logger, "no instruction analysis for target " << tripleName);
100 return;
101 }
102
103 int AsmPrinterVariant = AsmInfo->getAssemblerDialect();
104 IP.reset(target->createMCInstPrinter(AsmPrinterVariant, *AsmInfo, *MII, *MRI, *STI));
105 if (!IP) {
106 LOG4CXX_ERROR(logger, "no instruction printer for target " << tripleName);
107 return;
108 }
109
110 IP->setPrintImmHex(llvm::HexStyle::C);
111 IP->setPrintImmHex(true);
112
113 std::unique_ptr<MCObjectDisassembler> OD(
114 new MCObjectDisassembler(*o, *DisAsm, *MIA));
115 Mod.reset(OD->buildModule(false));
116 }
117
118 void LLVMDisassembler::start() {
119 readSymbols();
120 readSections();
121 disassemble();
122 }
123
124 LLVMDisassembler::~LLVMDisassembler() {
125 std::for_each(functions.begin(), functions.end(),
126 [](std::pair<uint64_t,LLVMFunction*> it) {
127 delete it.second;
128 });
129 std::for_each(blocks.begin(), blocks.end(),
130 [](std::pair<uint64_t, LLVMBasicBlock*> it) {
131 delete it.second;
132 });
133 }
134
135 Function* LLVMDisassembler::disassembleFunctionAt(uint64_t address, const std::string& name) {
136 SectionRef text_section = sections[".text"];
137 uint64_t base_address, size;
138 text_section.getAddress(base_address);
139 text_section.getSize(size);
140
141 if (address < base_address ||
142 address >= base_address + size) {
143 return NULL;
144 }
145
146 if (functions.find(address) != functions.end()) {
147 return functions[address];
148 }
149
150 LLVMFunction * function;
151 if (name == "") {
152 std::stringstream s;
153 s << "<Unnamed 0x" << std::hex << address << ">";
154 function = new LLVMFunction(s.str(), address);
155 } else {
156 function = new LLVMFunction(name, address);
157 }
158 functions.insert(std::make_pair(address, function));
159
160 disassembleFunction(function);
161
162 return function;
163 }
164
165 void LLVMDisassembler::disassembleFunction(LLVMFunction* function) {
166 std::stack<LLVMBasicBlock*> remaining_blocks;
167 SectionRef text_section = sections[".text"];
168 StringRef bytes;
169 text_section.getContents(bytes);
170 StringRefMemoryObject ref(bytes);
171
172 LOG4CXX_DEBUG(logger, "Handling function " << function->getName());
173
174 LLVMBasicBlock * block = new LLVMBasicBlock(function->getStartAddress(), this);
175 remaining_blocks.push(block);
176 blocks.insert(std::make_pair(block->getStartAddress(), block));
177
178 while (remaining_blocks.size()) {
179 LLVMBasicBlock * current_block = remaining_blocks.top();
180 remaining_blocks.pop();
181
182 LOG4CXX_DEBUG(logger, "Handling Block starting at " << std::hex << current_block->getStartAddress());
183
184 uint64_t inst_size;
185 uint64_t base_address;
186 text_section.getAddress(base_address);
187 uint64_t current_address = current_block->getStartAddress() - base_address;
188 while(true) {
189 MCInst inst;
190 std::string buf;
191 llvm::raw_string_ostream s(buf);
192
193 if(llvm::MCDisassembler::Success ==
194 DisAsm->getInstruction(inst, inst_size, ref, current_address, nulls(), nulls())) {
195 uint64_t jmptarget;
196
197 if (MIA->evaluateBranch(inst, current_address, inst_size, jmptarget)) {
198 jmptarget += base_address;
199 if (!MIA->isIndirectBranch(inst)) {
200 if (MIA->isCall(inst)) {
201 if (functions.find(jmptarget) == functions.end()) {
202 disassembleFunctionAt(jmptarget);
203 }
204 } else {
205 current_block->setNextBlock(0, jmptarget);
206 if (blocks.find(jmptarget) == blocks.end()) {
207 LLVMBasicBlock * block = new LLVMBasicBlock(jmptarget, this);
208 blocks.insert(std::make_pair(block->getStartAddress(), block));
209 remaining_blocks.push(block);
210 }
211 if (MIA->isConditionalBranch(inst)) {
212 jmptarget = base_address + current_address + inst_size;
213 current_block->setNextBlock(1, jmptarget);
214 if (blocks.find(jmptarget) == blocks.end()) {
215 LLVMBasicBlock * block = new LLVMBasicBlock(jmptarget, this);
216 blocks.insert(std::make_pair(block->getStartAddress(), block));
217 remaining_blocks.push(block);
218 }
219 }
220 }
221 }
222 }
223 } else {
224 inst_size = 0;
225 }
226
227
228 if (inst_size == 0 || MIA->isTerminator(inst) || MIA->isBranch(inst)) {
229 current_block->setEndAddress(current_address + base_address + inst_size);
230 LOG4CXX_DEBUG(logger, "Finished Block at " << std::hex <<
231 current_block->getEndAddress());
232 break;
233 }
234 current_address += inst_size;
235 }
236 }
237 LOG4CXX_DEBUG(logger, "Finished function " << function->getName());
238 manager->signal_new_function(function);
239 }
240
241 void LLVMDisassembler::disassemble() {
242 SectionRef text_section = sections[".text"];
243 std::vector<LLVMFunction*> remaining_functions;
244
245 // Assume all function symbols actually start a real function
246 for (auto x = symbols.begin(); x != symbols.end(); ++x) {
247 uint64_t result;
248 bool contains;
249 SymbolRef::Type symbol_type;
250
251
252 if (text_section.containsSymbol(x->second, contains) || !contains)
253 continue;
254
255 if (x->second.getType(symbol_type)
256 || SymbolRef::ST_Function != symbol_type)
257 continue;
258
259 if (!x->second.getAddress(result)) {
260 LLVMFunction * fun = new LLVMFunction(x->first, result);
261 remaining_functions.push_back(fun);
262 functions.insert(std::make_pair(result, fun));
263 LOG4CXX_DEBUG(logger, "Disasembling " << x->first);
264 }
265 }
266
267 for (LLVMFunction* function : remaining_functions) {
268 disassembleFunction(function);
269 }
270
271 if (binary->isELF()) {
272 bool is64bit = (binary->getData()[4] == 0x02);
273
274 uint64_t entry(0);
275 for (int i(0); i < (is64bit? 8 : 4); ++i) {
276 if (binary->isLittleEndian()) {
277 entry |= (unsigned int)((unsigned char)binary->getData()[0x18 + i]) << 8*i;
278 } else {
279 entry = entry << 8;
280 entry |= (unsigned char)binary->getData()[0x18 + i];
281 }
282 }
283 LOG4CXX_DEBUG(logger, "Adding entry at: " << std::hex << entry);
284 std::stringstream s;
285 s << "<_start 0x" << std::hex << entry << ">";
286
287 disassembleFunctionAt(entry, s.str());
288 }
289
290 if (functions.empty()) {
291 uint64_t text_entry;
292 text_section.getAddress(text_entry);
293 LOG4CXX_INFO(logger, "No Symbols found, starting at the beginning of the text segment");
294 disassembleFunctionAt(text_entry);
295 }
296
297 splitBlocks();
298 }
299
300 void LLVMDisassembler::splitBlocks() {
301 SectionRef text_section = sections[".text"];
302 StringRef bytes;
303 text_section.getContents(bytes);
304 StringRefMemoryObject ref(bytes);
305
306 // Split blocks where jumps are going inside the block
307 for (auto it = blocks.begin(); it != blocks.end(); ++it) {
308 LLVMBasicBlock * current_block = it->second;
309 uint64_t inst_size;
310 uint64_t base_address;
311 text_section.getAddress(base_address);
312 uint64_t current_address = current_block->getStartAddress() - base_address;
313 while(current_block->getEndAddress() - base_address > current_address) {
314 MCInst inst;
315 std::string buf;
316 llvm::raw_string_ostream s(buf);
317
318 if(llvm::MCDisassembler::Success ==
319 DisAsm->getInstruction(inst, inst_size, ref, current_address, nulls(), nulls())) {
320 auto other = blocks.find(current_address + inst_size + base_address);
321
322 if (other != blocks.end()) {
323 uint64_t endaddress = current_address + inst_size + base_address;
324 if (endaddress != current_block->getEndAddress()) {
325 LOG4CXX_DEBUG(logger, "Shortening block starting at "
326 << std::hex
327 << current_block->getStartAddress()
328 << " now ending at "
329 << other->first);
330 current_block->setEndAddress(endaddress);
331 current_block->setNextBlock(0, other->first);
332 current_block->setNextBlock(1, 0);
333 }
334 }
335 } else {
336 inst_size = 1;
337 }
338 current_address += inst_size;
339 }
340 }
341 }
342
343 void LLVMDisassembler::readSymbols() {
344 error_code ec;
345 symbol_iterator si(o->symbol_begin()), se(o->symbol_end());
346 for (; si != se; ++si) {
347 StringRef name;
348 if ((ec = si->getName(name))) {
349 LOG4CXX_ERROR(logger, ec.message());
350 break;
351 }
352 LOG4CXX_DEBUG(logger, "Added symbol " << name.str());
353 symbols.insert(make_pair(name.str(), *si));
354 }
355 }
356
357 void LLVMDisassembler::readSections() {
358 error_code ec;
359 section_iterator i(o->section_begin()), e(o->section_end());
360 for (; i != e; ++i) {
361 StringRef name;
362 if ((ec = i->getName(name))) {
363 LOG4CXX_ERROR(logger, ec.message());
364 break;
365 }
366 LOG4CXX_DEBUG(logger, "Added section " << name.str());
367 sections.insert(make_pair(name.str(), *i));
368 }
369
370 }
371
372 void LLVMDisassembler::forEachFunction(std::function<void (uint64_t, Function*)> callback) {
373 std::for_each(functions.begin(), functions.end(),
374 [&](std::pair<uint64_t, LLVMFunction*> x) {
375 callback(x.first, x.second);
376 });
377 }
378
379 void LLVMDisassembler::printEachInstruction(uint64_t start, uint64_t end,
380 std::function<void (uint8_t*, size_t, const std::string&)> fun) {
381 SectionRef text_section = sections[".text"];
382 uint64_t base_address;
383 text_section.getAddress(base_address);
384 uint64_t current_address = start - base_address;
385
386 StringRef bytes;
387 text_section.getContents(bytes);
388 StringRefMemoryObject ref(bytes);
389
390 while (current_address < end - base_address) {
391 uint64_t inst_size;
392 MCInst inst;
393 std::string buf;
394 llvm::raw_string_ostream s(buf);
395
396 if(llvm::MCDisassembler::Success ==
397 DisAsm->getInstruction(inst, inst_size, ref, current_address, nulls(), nulls())) {
398
399 uint8_t bytes[inst_size+2];
400 ref.readBytes(current_address, inst_size, bytes);
401
402 uint64_t jmptarget;
403 if (MIA->evaluateBranch(inst, current_address, inst_size, jmptarget)) {
404 std::stringstream stream;
405 stream << std::hex << (base_address + jmptarget);
406 IP->printInst(&inst, s, stream.str());
407 } else
408 IP->printInst(&inst, s, "");
409
410 fun(bytes, inst_size, s.str());
411 } else {
412 LOG4CXX_WARN(logger, "Invalid byte at" << std::hex << current_address + base_address);
413 fun(NULL, 0, "Invalid Byte");
414 inst_size = 1;
415 }
416
417 current_address += inst_size;
418 }
419 }