]> git.siccegge.de Git - frida/frida.git/blob - src/disassembler/llvm/LLVMDisassembler.cxx
Include absolute branch address
[frida/frida.git] / src / disassembler / llvm / LLVMDisassembler.cxx
1 #include "disassembler/llvm/LLVMDisassembler.hxx"
2 #include "disassembler/llvm/LLVMBasicBlock.hxx"
3 #include "disassembler/llvm/LLVMFunction.hxx"
4
5 #include <stack>
6 #include <algorithm>
7
8 using namespace llvm;
9 using namespace llvm::object;
10
11 /*
12 * TODO: fallback code falls die Datei kein ELF/PE/COFF/MacO/.. binary
13 * ist sondern z.B. einfach nur Instruktionen oder ein Bootsektor oder
14 * foo
15 */
16 LLVMDisassembler::LLVMDisassembler(const std::string& filename)
17 : Disassembler(filename)
18 , logger(log4cxx::Logger::getLogger("LLVMDisassembler"))
19 , triple("unknown-unknown-unknown")
20 {
21 LOG4CXX_DEBUG(logger, "Handling file" << filename);
22 auto result = createBinary(filename);
23
24 error_code ec;
25 if ((ec = result.getError())) {
26 LOG4CXX_ERROR(logger, "Failed to load Binary" << ec.message());
27 binary = NULL;
28 return;
29 }
30
31 binary.reset(result.get());
32
33 o = dyn_cast<ObjectFile>(binary.get());
34
35 triple.setArch(Triple::ArchType(o->getArch()));
36 std::string tripleName(triple.getTriple());
37
38 LOG4CXX_INFO(logger, "Architecture " << tripleName);
39
40
41 std::string es;
42 target = TargetRegistry::lookupTarget("", triple, es);
43 if (!target) {
44 LOG4CXX_ERROR(logger, es);
45 return;
46 }
47
48 LOG4CXX_INFO(logger, "Target " << target->getName());
49
50 MRI.reset(target->createMCRegInfo(tripleName));
51 if (!MRI) {
52 LOG4CXX_ERROR(logger, "no register info for target " << tripleName);
53 return;
54 }
55
56 // Set up disassembler.
57 AsmInfo.reset(target->createMCAsmInfo(*MRI, tripleName));
58 if (!AsmInfo) {
59 LOG4CXX_ERROR(logger, "no assembly info for target " << tripleName);
60 return;
61 }
62
63 STI.reset(target->createMCSubtargetInfo(tripleName, "", ""));
64 if (!STI) {
65 LOG4CXX_ERROR(logger, "no subtarget info for target " << tripleName);
66 return;
67 }
68
69 MII.reset(target->createMCInstrInfo());
70 if (!MII) {
71 LOG4CXX_ERROR(logger, "no instruction info for target " << tripleName);
72 return;
73 }
74
75 DisAsm.reset(target->createMCDisassembler(*STI));
76 if (!DisAsm) {
77 LOG4CXX_ERROR(logger, "no disassembler for target " << tripleName);
78 return;
79 }
80
81 MOFI.reset(new MCObjectFileInfo);
82 Ctx.reset(new MCContext(AsmInfo.get(), MRI.get(), MOFI.get()));
83 RelInfo.reset(
84 target->createMCRelocationInfo(tripleName, *Ctx.get()));
85 if (RelInfo) {
86 Symzer.reset(
87 MCObjectSymbolizer::createObjectSymbolizer(*Ctx.get(), RelInfo, o));
88 if (Symzer)
89 DisAsm->setSymbolizer(Symzer);
90 }
91
92 MIA.reset(target->createMCInstrAnalysis(MII.get()));
93
94 int AsmPrinterVariant = AsmInfo->getAssemblerDialect();
95 IP.reset(target->createMCInstPrinter(AsmPrinterVariant, *AsmInfo, *MII, *MRI, *STI));
96 if (!IP) {
97 LOG4CXX_ERROR(logger, "no instruction printer for target " << tripleName);
98 return;
99 }
100
101 IP->setPrintImmHex(llvm::HexStyle::C);
102 IP->setPrintImmHex(true);
103
104 OwningPtr<MCObjectDisassembler> OD(
105 new MCObjectDisassembler(*o, *DisAsm, *MIA));
106 Mod.reset(OD->buildModule(false));
107
108 readSymbols();
109 readSections();
110 disassemble();
111 }
112
113 LLVMDisassembler::~LLVMDisassembler() {
114 std::for_each(functions.begin(), functions.end(),
115 [](std::pair<uint64_t,LLVMFunction*> it) {
116 delete it.second;
117 });
118 std::for_each(blocks.begin(), blocks.end(),
119 [](std::pair<uint64_t, LLVMBasicBlock*> it) {
120 delete it.second;
121 });
122 }
123
124 void LLVMDisassembler::disassemble() {
125 std::stack<LLVMFunction*> remaining_functions;
126 std::stack<LLVMBasicBlock*> remaining_blocks;
127 SectionRef text_section = sections[".text"];
128
129 // Assume all function symbols actually start a real function
130 for (auto x = symbols.begin(); x != symbols.end(); ++x) {
131 uint64_t result;
132 bool contains;
133 SymbolRef::Type symbol_type;
134
135
136 if (text_section.containsSymbol(x->second, contains) || !contains)
137 continue;
138
139 if (x->second.getType(symbol_type)
140 || SymbolRef::ST_Function != symbol_type)
141 continue;
142
143 if (!x->second.getAddress(result)) {
144 LLVMFunction * fun = new LLVMFunction(x->first, result);
145 remaining_functions.push(fun);
146 functions.insert(std::make_pair(result, fun));
147 LOG4CXX_DEBUG(logger, "Disasembling " << x->first);
148 }
149 }
150
151 StringRef bytes;
152 text_section.getContents(bytes);
153 StringRefMemoryObject ref(bytes);
154
155 while (remaining_functions.size()) {
156 LLVMFunction * current_function = remaining_functions.top();
157 remaining_functions.pop();
158
159 LOG4CXX_DEBUG(logger, "Handling function " << current_function->getName());
160
161 LLVMBasicBlock * block = new LLVMBasicBlock(current_function->getStartAddress(), this);
162 remaining_blocks.push(block);
163 blocks.insert(std::make_pair(block->getStartAddress(), block));
164
165 while (remaining_blocks.size()) {
166 LLVMBasicBlock * current_block = remaining_blocks.top();
167 remaining_blocks.pop();
168
169 LOG4CXX_DEBUG(logger, "Handling Block starting at " << std::hex << current_block->getStartAddress());
170
171 uint64_t inst_size;
172 uint64_t base_address;
173 text_section.getAddress(base_address);
174 uint64_t current_address = current_block->getStartAddress() - base_address;
175 while(true) {
176 MCInst inst;
177 std::string buf;
178 llvm::raw_string_ostream s(buf);
179
180 if(llvm::MCDisassembler::Success ==
181 DisAsm->getInstruction(inst, inst_size, ref, current_address, nulls(), nulls())) {
182
183 uint64_t jmptarget;
184 if (MIA->evaluateBranch(inst, current_address, inst_size, jmptarget)) {
185 jmptarget += base_address;
186 if (!MIA->isIndirectBranch(inst)) {
187 if (MIA->isCall(inst)) {
188 if (functions.find(jmptarget) == functions.end()) {
189 std::stringstream s;
190 s << "<Unnamed 0x" << std::hex << jmptarget << ">";
191 LLVMFunction * fun = new LLVMFunction(s.str(), jmptarget);
192 functions.insert(std::make_pair(jmptarget, fun));
193 remaining_functions.push(fun);
194 }
195 } else {
196 current_block->setNextBlock(0, jmptarget);
197 if (blocks.find(jmptarget) == blocks.end()) {
198 LLVMBasicBlock * block = new LLVMBasicBlock(jmptarget, this);
199 blocks.insert(std::make_pair(block->getStartAddress(), block));
200 remaining_blocks.push(block);
201 }
202 if (MIA->isConditionalBranch(inst)) {
203 jmptarget = base_address + current_address + inst_size;
204 current_block->setNextBlock(1, jmptarget);
205 if (blocks.find(jmptarget) == blocks.end()) {
206 LLVMBasicBlock * block = new LLVMBasicBlock(jmptarget, this);
207 blocks.insert(std::make_pair(block->getStartAddress(), block));
208 remaining_blocks.push(block);
209 }
210 }
211 }
212 }
213 }
214 } else {
215 inst_size = 0;
216 }
217
218
219 if (inst_size == 0 || MIA->isTerminator(inst) || MIA->isBranch(inst)) {
220 current_block->setEndAddress(current_address + base_address + inst_size);
221 LOG4CXX_DEBUG(logger, "Finished Block at " << std::hex <<
222 current_block->getEndAddress());
223 break;
224 }
225 current_address += inst_size;
226 }
227 }
228 LOG4CXX_DEBUG(logger, "Finished function " << current_function->getName());
229 }
230
231 // Split blocks where jumps are going inside the block
232 for (auto it = blocks.begin(); it != blocks.end(); ++it) {
233 LLVMBasicBlock * current_block = it->second;
234 uint64_t inst_size;
235 uint64_t base_address;
236 text_section.getAddress(base_address);
237 uint64_t current_address = current_block->getStartAddress() - base_address;
238 while(current_block->getEndAddress() - base_address > current_address) {
239 MCInst inst;
240 std::string buf;
241 llvm::raw_string_ostream s(buf);
242
243 if(llvm::MCDisassembler::Success ==
244 DisAsm->getInstruction(inst, inst_size, ref, current_address, nulls(), nulls())) {
245 auto other = blocks.find(current_address + inst_size + base_address);
246
247 if (other != blocks.end()) {
248 uint64_t endaddress = current_address + inst_size + base_address;
249 if (endaddress != current_block->getEndAddress()) {
250 LOG4CXX_DEBUG(logger, "Shortening block starting at "
251 << std::hex
252 << current_block->getStartAddress()
253 << " now ending at "
254 << other->first);
255 current_block->setEndAddress(endaddress);
256 current_block->setNextBlock(0, other->first);
257 current_block->setNextBlock(1, 0);
258 }
259 }
260 } else {
261 inst_size = 1;
262 }
263 current_address += inst_size;
264 }
265 }
266 }
267
268 void LLVMDisassembler::readSymbols() {
269 error_code ec;
270 symbol_iterator si(o->symbol_begin()), se(o->symbol_end());
271 for (; si != se; ++si) {
272 StringRef name;
273 if ((ec = si->getName(name))) {
274 LOG4CXX_ERROR(logger, ec.message());
275 break;
276 }
277 LOG4CXX_DEBUG(logger, "Added symbol " << name.str());
278 symbols.insert(make_pair(name.str(), *si));
279 }
280 }
281
282 void LLVMDisassembler::readSections() {
283 error_code ec;
284 section_iterator i(o->section_begin()), e(o->section_end());
285 for (; i != e; ++i) {
286 StringRef name;
287 if ((ec = i->getName(name))) {
288 LOG4CXX_ERROR(logger, ec.message());
289 break;
290 }
291 LOG4CXX_DEBUG(logger, "Added section " << name.str());
292 sections.insert(make_pair(name.str(), *i));
293 }
294
295 }
296
297 void LLVMDisassembler::forEachFunction(std::function<void (uint64_t, Function*)> callback) {
298 std::for_each(functions.begin(), functions.end(),
299 [&](std::pair<uint64_t, LLVMFunction*> x) {
300 callback(x.first, x.second);
301 });
302 }
303
304 void LLVMDisassembler::printEachInstruction(uint64_t start, uint64_t end,
305 std::function<void (uint8_t*, size_t, const std::string&)> fun) {
306 SectionRef text_section = sections[".text"];
307 uint64_t base_address;
308 text_section.getAddress(base_address);
309 uint64_t current_address = start - base_address;
310
311 StringRef bytes;
312 text_section.getContents(bytes);
313 StringRefMemoryObject ref(bytes);
314
315 while (current_address < end - base_address) {
316 uint64_t inst_size;
317 MCInst inst;
318 std::string buf;
319 llvm::raw_string_ostream s(buf);
320
321 if(llvm::MCDisassembler::Success ==
322 DisAsm->getInstruction(inst, inst_size, ref, current_address, nulls(), nulls())) {
323
324 uint8_t bytes[inst_size+2];
325 ref.readBytes(current_address, inst_size, bytes);
326
327 uint64_t jmptarget;
328 if (MIA->evaluateBranch(inst, current_address, inst_size, jmptarget)) {
329 std::stringstream stream;
330 stream << std::hex << (base_address + jmptarget);
331 IP->printInst(&inst, s, stream.str());
332 } else
333 IP->printInst(&inst, s, "");
334
335 fun(bytes, inst_size, s.str());
336 } else {
337 LOG4CXX_WARN(logger, "Invalid byte at" << std::hex << current_address + base_address);
338 fun(NULL, 0, "Invalid Byte");
339 inst_size = 1;
340 }
341
342 current_address += inst_size;
343 }
344 }