]> git.siccegge.de Git - frida/frida.git/blob - src/disassembler/llvm/LLVMDisassembler.cxx
Forward-port to LLVM 3.5 release
[frida/frida.git] / src / disassembler / llvm / LLVMDisassembler.cxx
1 #include "disassembler/llvm/LLVMDisassembler.hxx"
2 #include "disassembler/llvm/LLVMBasicBlock.hxx"
3 #include "disassembler/llvm/LLVMFunction.hxx"
4
5 #include <stack>
6 #include <algorithm>
7
8 using namespace llvm;
9 using namespace llvm::object;
10 using std::error_code;
11
12 /*
13 * TODO: fallback code falls die Datei kein ELF/PE/COFF/MacO/.. binary
14 * ist sondern z.B. einfach nur Instruktionen oder ein Bootsektor oder
15 * foo
16 */
17 LLVMDisassembler::LLVMDisassembler(const std::string& filename)
18 : Disassembler(filename)
19 , logger(log4cxx::Logger::getLogger("LLVMDisassembler"))
20 , triple("unknown-unknown-unknown")
21 {
22 LOG4CXX_DEBUG(logger, "Handling file" << filename);
23 auto result = createBinary(filename);
24
25 error_code ec;
26 if ((ec = result.getError())) {
27 LOG4CXX_ERROR(logger, "Failed to load Binary" << ec.message());
28 binary = NULL;
29 return;
30 }
31
32 binary.reset(result.get());
33
34 o = dyn_cast<ObjectFile>(binary.get());
35
36 triple.setArch(Triple::ArchType(o->getArch()));
37 std::string tripleName(triple.getTriple());
38
39 LOG4CXX_INFO(logger, "Architecture " << tripleName);
40
41
42 std::string es;
43 target = TargetRegistry::lookupTarget("", triple, es);
44 if (!target) {
45 LOG4CXX_ERROR(logger, es);
46 return;
47 }
48
49 LOG4CXX_INFO(logger, "Target " << target->getName());
50
51 MRI.reset(target->createMCRegInfo(tripleName));
52 if (!MRI) {
53 LOG4CXX_ERROR(logger, "no register info for target " << tripleName);
54 return;
55 }
56
57 // Set up disassembler.
58 AsmInfo.reset(target->createMCAsmInfo(*MRI, tripleName));
59 if (!AsmInfo) {
60 LOG4CXX_ERROR(logger, "no assembly info for target " << tripleName);
61 return;
62 }
63
64 STI.reset(target->createMCSubtargetInfo(tripleName, "", ""));
65 if (!STI) {
66 LOG4CXX_ERROR(logger, "no subtarget info for target " << tripleName);
67 return;
68 }
69
70 MII.reset(target->createMCInstrInfo());
71 if (!MII) {
72 LOG4CXX_ERROR(logger, "no instruction info for target " << tripleName);
73 return;
74 }
75
76 MOFI.reset(new MCObjectFileInfo);
77 MCContext Ctx(AsmInfo.get(), MRI.get(), MOFI.get());
78
79 DisAsm.reset(target->createMCDisassembler(*STI, Ctx));
80 if (!DisAsm) {
81 LOG4CXX_ERROR(logger, "no disassembler for target " << tripleName);
82 return;
83 }
84 RelInfo.reset(
85 target->createMCRelocationInfo(tripleName, Ctx));
86 if (RelInfo) {
87 Symzer.reset(
88 MCObjectSymbolizer::createObjectSymbolizer(Ctx, std::move(RelInfo), o));
89 if (Symzer)
90 DisAsm->setSymbolizer(std::move(Symzer));
91 }
92 RelInfo.release();
93 Symzer.release();
94
95
96 MIA.reset(target->createMCInstrAnalysis(MII.get()));
97
98 int AsmPrinterVariant = AsmInfo->getAssemblerDialect();
99 IP.reset(target->createMCInstPrinter(AsmPrinterVariant, *AsmInfo, *MII, *MRI, *STI));
100 if (!IP) {
101 LOG4CXX_ERROR(logger, "no instruction printer for target " << tripleName);
102 return;
103 }
104
105 IP->setPrintImmHex(llvm::HexStyle::C);
106 IP->setPrintImmHex(true);
107
108 std::unique_ptr<MCObjectDisassembler> OD(
109 new MCObjectDisassembler(*o, *DisAsm, *MIA));
110 Mod.reset(OD->buildModule(false));
111
112 readSymbols();
113 readSections();
114 disassemble();
115 }
116
117 LLVMDisassembler::~LLVMDisassembler() {
118 std::for_each(functions.begin(), functions.end(),
119 [](std::pair<uint64_t,LLVMFunction*> it) {
120 delete it.second;
121 });
122 std::for_each(blocks.begin(), blocks.end(),
123 [](std::pair<uint64_t, LLVMBasicBlock*> it) {
124 delete it.second;
125 });
126 }
127
128 void LLVMDisassembler::disassemble() {
129 std::stack<LLVMFunction*> remaining_functions;
130 std::stack<LLVMBasicBlock*> remaining_blocks;
131 SectionRef text_section = sections[".text"];
132
133 // Assume all function symbols actually start a real function
134 for (auto x = symbols.begin(); x != symbols.end(); ++x) {
135 uint64_t result;
136 bool contains;
137 SymbolRef::Type symbol_type;
138
139
140 if (text_section.containsSymbol(x->second, contains) || !contains)
141 continue;
142
143 if (x->second.getType(symbol_type)
144 || SymbolRef::ST_Function != symbol_type)
145 continue;
146
147 if (!x->second.getAddress(result)) {
148 LLVMFunction * fun = new LLVMFunction(x->first, result);
149 remaining_functions.push(fun);
150 functions.insert(std::make_pair(result, fun));
151 LOG4CXX_DEBUG(logger, "Disasembling " << x->first);
152 }
153 }
154
155 StringRef bytes;
156 text_section.getContents(bytes);
157 StringRefMemoryObject ref(bytes);
158
159 while (remaining_functions.size()) {
160 LLVMFunction * current_function = remaining_functions.top();
161 remaining_functions.pop();
162
163 LOG4CXX_DEBUG(logger, "Handling function " << current_function->getName());
164
165 LLVMBasicBlock * block = new LLVMBasicBlock(current_function->getStartAddress(), this);
166 remaining_blocks.push(block);
167 blocks.insert(std::make_pair(block->getStartAddress(), block));
168
169 while (remaining_blocks.size()) {
170 LLVMBasicBlock * current_block = remaining_blocks.top();
171 remaining_blocks.pop();
172
173 LOG4CXX_DEBUG(logger, "Handling Block starting at " << std::hex << current_block->getStartAddress());
174
175 uint64_t inst_size;
176 uint64_t base_address;
177 text_section.getAddress(base_address);
178 uint64_t current_address = current_block->getStartAddress() - base_address;
179 while(true) {
180 MCInst inst;
181 std::string buf;
182 llvm::raw_string_ostream s(buf);
183
184 if(llvm::MCDisassembler::Success ==
185 DisAsm->getInstruction(inst, inst_size, ref, current_address, nulls(), nulls())) {
186
187 uint64_t jmptarget;
188 if (MIA->evaluateBranch(inst, current_address, inst_size, jmptarget)) {
189 jmptarget += base_address;
190 if (!MIA->isIndirectBranch(inst)) {
191 if (MIA->isCall(inst)) {
192 if (functions.find(jmptarget) == functions.end()) {
193 std::stringstream s;
194 s << "<Unnamed 0x" << std::hex << jmptarget << ">";
195 LLVMFunction * fun = new LLVMFunction(s.str(), jmptarget);
196 functions.insert(std::make_pair(jmptarget, fun));
197 remaining_functions.push(fun);
198 }
199 } else {
200 current_block->setNextBlock(0, jmptarget);
201 if (blocks.find(jmptarget) == blocks.end()) {
202 LLVMBasicBlock * block = new LLVMBasicBlock(jmptarget, this);
203 blocks.insert(std::make_pair(block->getStartAddress(), block));
204 remaining_blocks.push(block);
205 }
206 if (MIA->isConditionalBranch(inst)) {
207 jmptarget = base_address + current_address + inst_size;
208 current_block->setNextBlock(1, jmptarget);
209 if (blocks.find(jmptarget) == blocks.end()) {
210 LLVMBasicBlock * block = new LLVMBasicBlock(jmptarget, this);
211 blocks.insert(std::make_pair(block->getStartAddress(), block));
212 remaining_blocks.push(block);
213 }
214 }
215 }
216 }
217 }
218 } else {
219 inst_size = 0;
220 }
221
222
223 if (inst_size == 0 || MIA->isTerminator(inst) || MIA->isBranch(inst)) {
224 current_block->setEndAddress(current_address + base_address + inst_size);
225 LOG4CXX_DEBUG(logger, "Finished Block at " << std::hex <<
226 current_block->getEndAddress());
227 break;
228 }
229 current_address += inst_size;
230 }
231 }
232 LOG4CXX_DEBUG(logger, "Finished function " << current_function->getName());
233 }
234
235 // Split blocks where jumps are going inside the block
236 for (auto it = blocks.begin(); it != blocks.end(); ++it) {
237 LLVMBasicBlock * current_block = it->second;
238 uint64_t inst_size;
239 uint64_t base_address;
240 text_section.getAddress(base_address);
241 uint64_t current_address = current_block->getStartAddress() - base_address;
242 while(current_block->getEndAddress() - base_address > current_address) {
243 MCInst inst;
244 std::string buf;
245 llvm::raw_string_ostream s(buf);
246
247 if(llvm::MCDisassembler::Success ==
248 DisAsm->getInstruction(inst, inst_size, ref, current_address, nulls(), nulls())) {
249 auto other = blocks.find(current_address + inst_size + base_address);
250
251 if (other != blocks.end()) {
252 uint64_t endaddress = current_address + inst_size + base_address;
253 if (endaddress != current_block->getEndAddress()) {
254 LOG4CXX_DEBUG(logger, "Shortening block starting at "
255 << std::hex
256 << current_block->getStartAddress()
257 << " now ending at "
258 << other->first);
259 current_block->setEndAddress(endaddress);
260 current_block->setNextBlock(0, other->first);
261 current_block->setNextBlock(1, 0);
262 }
263 }
264 } else {
265 inst_size = 1;
266 }
267 current_address += inst_size;
268 }
269 }
270 }
271
272 void LLVMDisassembler::readSymbols() {
273 error_code ec;
274 symbol_iterator si(o->symbol_begin()), se(o->symbol_end());
275 for (; si != se; ++si) {
276 StringRef name;
277 if ((ec = si->getName(name))) {
278 LOG4CXX_ERROR(logger, ec.message());
279 break;
280 }
281 LOG4CXX_DEBUG(logger, "Added symbol " << name.str());
282 symbols.insert(make_pair(name.str(), *si));
283 }
284 }
285
286 void LLVMDisassembler::readSections() {
287 error_code ec;
288 section_iterator i(o->section_begin()), e(o->section_end());
289 for (; i != e; ++i) {
290 StringRef name;
291 if ((ec = i->getName(name))) {
292 LOG4CXX_ERROR(logger, ec.message());
293 break;
294 }
295 LOG4CXX_DEBUG(logger, "Added section " << name.str());
296 sections.insert(make_pair(name.str(), *i));
297 }
298
299 }
300
301 void LLVMDisassembler::forEachFunction(std::function<void (uint64_t, Function*)> callback) {
302 std::for_each(functions.begin(), functions.end(),
303 [&](std::pair<uint64_t, LLVMFunction*> x) {
304 callback(x.first, x.second);
305 });
306 }
307
308 void LLVMDisassembler::printEachInstruction(uint64_t start, uint64_t end,
309 std::function<void (uint8_t*, size_t, const std::string&)> fun) {
310 SectionRef text_section = sections[".text"];
311 uint64_t base_address;
312 text_section.getAddress(base_address);
313 uint64_t current_address = start - base_address;
314
315 StringRef bytes;
316 text_section.getContents(bytes);
317 StringRefMemoryObject ref(bytes);
318
319 while (current_address < end - base_address) {
320 uint64_t inst_size;
321 MCInst inst;
322 std::string buf;
323 llvm::raw_string_ostream s(buf);
324
325 if(llvm::MCDisassembler::Success ==
326 DisAsm->getInstruction(inst, inst_size, ref, current_address, nulls(), nulls())) {
327
328 uint8_t bytes[inst_size+2];
329 ref.readBytes(current_address, inst_size, bytes);
330
331 uint64_t jmptarget;
332 if (MIA->evaluateBranch(inst, current_address, inst_size, jmptarget)) {
333 std::stringstream stream;
334 stream << std::hex << (base_address + jmptarget);
335 IP->printInst(&inst, s, stream.str());
336 } else
337 IP->printInst(&inst, s, "");
338
339 fun(bytes, inst_size, s.str());
340 } else {
341 LOG4CXX_WARN(logger, "Invalid byte at" << std::hex << current_address + base_address);
342 fun(NULL, 0, "Invalid Byte");
343 inst_size = 1;
344 }
345
346 current_address += inst_size;
347 }
348 }