Recursive disassembler
[frida/frida.git] / src / disassembler / llvm / LLVMDisassembler.cxx
1 #include "disassembler/llvm/LLVMDisassembler.hxx"
2 #include "disassembler/llvm/LLVMBasicBlock.hxx"
3 #include "disassembler/llvm/LLVMFunction.hxx"
4
5 #include <stack>
6 #include <algorithm>
7
8 using namespace llvm;
9 using namespace llvm::object;
10
11 /*
12 * TODO: fallback code falls die Datei kein ELF/PE/COFF/MacO/.. binary
13 * ist sondern z.B. einfach nur Instruktionen oder ein Bootsektor oder
14 * foo
15 */
16
17 LLVMDisassembler::LLVMDisassembler(const std::string& filename)
18 : Disassembler(filename)
19 , logger(log4cxx::Logger::getLogger("LLVMDisassembler"))
20 , triple("unknown-unknown-unknown")
21 {
22 LOG4CXX_DEBUG(logger, "Handling file" << filename);
23 auto result = createBinary(filename);
24
25 error_code ec;
26 if ((ec = result.getError())) {
27 LOG4CXX_ERROR(logger, "Failed to load Binary" << ec.message());
28 binary = NULL;
29 return;
30 }
31
32 binary.reset(result.get());
33
34 o = dyn_cast<ObjectFile>(binary.get());
35
36 triple.setArch(Triple::ArchType(o->getArch()));
37 std::string tripleName(triple.getTriple());
38
39 LOG4CXX_INFO(logger, "Architecture " << tripleName);
40
41
42 std::string es;
43 target = TargetRegistry::lookupTarget("", triple, es);
44 if (!target) {
45 LOG4CXX_ERROR(logger, es);
46 return;
47 }
48
49 LOG4CXX_INFO(logger, "Target " << target->getName());
50
51 MRI.reset(target->createMCRegInfo(tripleName));
52 if (!MRI) {
53 LOG4CXX_ERROR(logger, "no register info for target " << tripleName);
54 return;
55 }
56
57 // Set up disassembler.
58 AsmInfo.reset(target->createMCAsmInfo(*MRI, tripleName));
59 if (!AsmInfo) {
60 LOG4CXX_ERROR(logger, "no assembly info for target " << tripleName);
61 return;
62 }
63
64 STI.reset(target->createMCSubtargetInfo(tripleName, "", ""));
65 if (!STI) {
66 LOG4CXX_ERROR(logger, "no subtarget info for target " << tripleName);
67 return;
68 }
69
70 MII.reset(target->createMCInstrInfo());
71 if (!MII) {
72 LOG4CXX_ERROR(logger, "no instruction info for target " << tripleName);
73 return;
74 }
75
76 DisAsm.reset(target->createMCDisassembler(*STI));
77 if (!DisAsm) {
78 LOG4CXX_ERROR(logger, "no disassembler for target " << tripleName);
79 return;
80 }
81
82 MOFI.reset(new MCObjectFileInfo);
83 Ctx.reset(new MCContext(AsmInfo.get(), MRI.get(), MOFI.get()));
84 RelInfo.reset(
85 target->createMCRelocationInfo(tripleName, *Ctx.get()));
86 if (RelInfo) {
87 Symzer.reset(
88 MCObjectSymbolizer::createObjectSymbolizer(*Ctx.get(), RelInfo, o));
89 if (Symzer)
90 DisAsm->setSymbolizer(Symzer);
91 }
92
93 MIA.reset(target->createMCInstrAnalysis(MII.get()));
94
95 int AsmPrinterVariant = AsmInfo->getAssemblerDialect();
96 IP.reset(target->createMCInstPrinter(AsmPrinterVariant, *AsmInfo, *MII, *MRI, *STI));
97 if (!IP) {
98 LOG4CXX_ERROR(logger, "no instruction printer for target " << tripleName);
99 return;
100 }
101
102 IP->setPrintImmHex(llvm::HexStyle::C);
103 IP->setPrintImmHex(true);
104
105 OwningPtr<MCObjectDisassembler> OD(
106 new MCObjectDisassembler(*o, *DisAsm, *MIA));
107 Mod.reset(OD->buildModule(false));
108
109 readSymbols();
110 readSections();
111 disassemble();
112 }
113
114
115 void LLVMDisassembler::disassemble() {
116 std::stack<LLVMFunction*> remaining_functions;
117 std::stack<LLVMBasicBlock*> remaining_blocks;
118 SectionRef text_section = sections[".text"];
119
120 std::for_each(symbols.begin(), symbols.end(),
121 [&](std::pair<const std::string, SymbolRef> x) {
122 uint64_t result;
123 bool contains;
124 SymbolRef::Type symbol_type;
125
126 if (text_section.containsSymbol(x.second, contains) || !contains)
127 return;
128
129 if (x.second.getType(symbol_type)
130 || SymbolRef::ST_Function != symbol_type)
131 return;
132
133 if (!x.second.getAddress(result)) {
134 remaining_functions.push(new LLVMFunction(x.first, result));
135 LOG4CXX_DEBUG(logger, "Disasembling " << x.first);
136 }
137 });
138
139 StringRef bytes;
140 text_section.getContents(bytes);
141 StringRefMemoryObject ref(bytes);
142
143 while (remaining_functions.size()) {
144 LLVMFunction * current_function = remaining_functions.top();
145 remaining_functions.pop();
146
147 LOG4CXX_INFO(logger, "Handling function " << current_function->getName());
148
149 // if ("_start" != current_function->getName())
150 // continue;
151
152 remaining_blocks.push(new LLVMBasicBlock(current_function->getStartAddress()));
153
154 while (remaining_blocks.size()) {
155 LLVMBasicBlock * current_block = remaining_blocks.top();
156 remaining_blocks.pop();
157
158 LOG4CXX_INFO(logger, "Handling Block starting at " << std::hex << current_block->getStartAddress());
159
160 uint64_t inst_size;
161 uint64_t base_address;
162 text_section.getAddress(base_address);
163 uint64_t current_address = current_block->getStartAddress() - base_address;
164 while(true) {
165 MCInst inst;
166 std::string buf;
167 llvm::raw_string_ostream s(buf);
168
169 if(llvm::MCDisassembler::Success ==
170 DisAsm->getInstruction(inst, inst_size, ref, current_address, nulls(), nulls())) {
171 LOG4CXX_DEBUG(logger, "Inst Size " << inst_size);
172
173 uint8_t bytes[inst_size+2];
174 ref.readBytes(current_address, inst_size, bytes);
175 s << '\t';
176 for(uint8_t* cur = bytes; cur < bytes + inst_size; ++cur) {
177 s.write_hex(*cur);
178 s << ' ';
179 }
180 s << '\t';
181
182 IP->printInst(&inst, s, "");
183
184 LOG4CXX_DEBUG(logger, std::hex << current_address + base_address << s.str());
185
186 uint64_t jmptarget;
187 if (MIA->evaluateBranch(inst, current_address, inst_size, jmptarget)) {
188 jmptarget += base_address;
189 if (!MIA->isIndirectBranch(inst)) {
190 if (MIA->isCall(inst)) {
191 if (blocks.find(jmptarget) == blocks.end())
192 remaining_functions.push(new LLVMFunction("<Unnamed>", jmptarget));
193 } else {
194 if (blocks.find(jmptarget) == blocks.end())
195 remaining_blocks.push(new LLVMBasicBlock(jmptarget));
196 if (MIA->isConditionalBranch(inst)) {
197 jmptarget = base_address + current_address + inst_size;
198 if (blocks.find(jmptarget) == blocks.end())
199 remaining_blocks.push(new LLVMBasicBlock(jmptarget));
200 }
201 }
202 }
203 }
204 } else {
205 inst_size = 0;
206 }
207
208
209 if (inst_size == 0 || MIA->isTerminator(inst) || MIA->isBranch(inst)) {
210 current_block->setEndAddress(current_address + base_address);
211 blocks.insert(std::make_pair(current_block->getStartAddress(), current_block));
212 LOG4CXX_INFO(logger, "Finished Block at " << current_block->getEndAddress());
213 break;
214 }
215 current_address += inst_size;
216 }
217 }
218 LOG4CXX_INFO(logger, "Finished function " << current_function->getName());
219 }
220 }
221
222 void LLVMDisassembler::readSymbols() {
223 error_code ec;
224 symbol_iterator si(o->symbol_begin()), se(o->symbol_end());
225 for (; si != se; ++si) {
226 StringRef name;
227 if ((ec = si->getName(name))) {
228 LOG4CXX_ERROR(logger, ec.message());
229 break;
230 }
231 LOG4CXX_DEBUG(logger, "Added symbol " << name.str());
232 symbols.insert(make_pair(name.str(), *si));
233 }
234 }
235
236 void LLVMDisassembler::readSections() {
237 error_code ec;
238 section_iterator i(o->section_begin()), e(o->section_end());
239 for (; i != e; ++i) {
240 StringRef name;
241 if ((ec = i->getName(name))) {
242 LOG4CXX_ERROR(logger, ec.message());
243 break;
244 }
245 LOG4CXX_DEBUG(logger, "Added section " << name.str());
246 sections.insert(make_pair(name.str(), *i));
247 }
248
249 }
250
251 BasicBlock * LLVMDisassembler::generateControlFlowGraph(uint64_t address) {
252
253 }