]>
git.siccegge.de Git - frida/frida.git/blob - LLVMDisassembler.cxx
6b3402eb0b2414d56c4b415f7467e3c29d7e33cd
1 #include "disassembler/llvm/LLVMDisassembler.hxx"
2 #include "disassembler/llvm/LLVMBasicBlock.hxx"
3 #include "disassembler/llvm/LLVMFunction.hxx"
9 using namespace llvm::object
;
10 using std::error_code
;
13 * TODO: fallback code falls die Datei kein ELF/PE/COFF/MacO/.. binary
14 * ist sondern z.B. einfach nur Instruktionen oder ein Bootsektor oder
17 LLVMDisassembler::LLVMDisassembler(const std::string
& filename
)
18 : Disassembler(filename
)
19 , logger(log4cxx::Logger::getLogger("LLVMDisassembler"))
20 , triple("unknown-unknown-unknown")
22 LOG4CXX_DEBUG(logger
, "Handling file" << filename
);
23 auto result
= createBinary(filename
);
26 if ((ec
= result
.getError())) {
27 LOG4CXX_ERROR(logger
, "Failed to load Binary" << ec
.message());
32 binary
.reset(result
.get());
34 o
= dyn_cast
<ObjectFile
>(binary
.get());
36 triple
.setArch(Triple::ArchType(o
->getArch()));
37 std::string
tripleName(triple
.getTriple());
39 LOG4CXX_INFO(logger
, "Architecture " << tripleName
);
43 target
= TargetRegistry::lookupTarget("", triple
, es
);
45 LOG4CXX_ERROR(logger
, es
);
49 LOG4CXX_INFO(logger
, "Target " << target
->getName());
51 MRI
.reset(target
->createMCRegInfo(tripleName
));
53 LOG4CXX_ERROR(logger
, "no register info for target " << tripleName
);
57 // Set up disassembler.
58 AsmInfo
.reset(target
->createMCAsmInfo(*MRI
, tripleName
));
60 LOG4CXX_ERROR(logger
, "no assembly info for target " << tripleName
);
64 STI
.reset(target
->createMCSubtargetInfo(tripleName
, "", ""));
66 LOG4CXX_ERROR(logger
, "no subtarget info for target " << tripleName
);
70 MII
.reset(target
->createMCInstrInfo());
72 LOG4CXX_ERROR(logger
, "no instruction info for target " << tripleName
);
76 MOFI
.reset(new MCObjectFileInfo
);
77 MCContext
Ctx(AsmInfo
.get(), MRI
.get(), MOFI
.get());
79 DisAsm
.reset(target
->createMCDisassembler(*STI
, Ctx
));
81 LOG4CXX_ERROR(logger
, "no disassembler for target " << tripleName
);
85 target
->createMCRelocationInfo(tripleName
, Ctx
));
88 MCObjectSymbolizer::createObjectSymbolizer(Ctx
, std::move(RelInfo
), o
));
90 DisAsm
->setSymbolizer(std::move(Symzer
));
95 MIA
.reset(target
->createMCInstrAnalysis(MII
.get()));
97 LOG4CXX_ERROR(logger
, "no instruction analysis for target " << tripleName
);
101 int AsmPrinterVariant
= AsmInfo
->getAssemblerDialect();
102 IP
.reset(target
->createMCInstPrinter(AsmPrinterVariant
, *AsmInfo
, *MII
, *MRI
, *STI
));
104 LOG4CXX_ERROR(logger
, "no instruction printer for target " << tripleName
);
108 IP
->setPrintImmHex(llvm::HexStyle::C
);
109 IP
->setPrintImmHex(true);
111 std::unique_ptr
<MCObjectDisassembler
> OD(
112 new MCObjectDisassembler(*o
, *DisAsm
, *MIA
));
113 Mod
.reset(OD
->buildModule(false));
120 LLVMDisassembler::~LLVMDisassembler() {
121 std::for_each(functions
.begin(), functions
.end(),
122 [](std::pair
<uint64_t,LLVMFunction
*> it
) {
125 std::for_each(blocks
.begin(), blocks
.end(),
126 [](std::pair
<uint64_t, LLVMBasicBlock
*> it
) {
131 Function
* LLVMDisassembler::disassembleFunctionAt(uint64_t address
, const std::string
& name
) {
132 SectionRef text_section
= sections
[".text"];
133 uint64_t base_address
, size
;
134 text_section
.getAddress(base_address
);
135 text_section
.getSize(size
);
137 if (address
< base_address
||
138 address
>= base_address
+ size
) {
142 if (functions
.find(address
) != functions
.end()) {
143 return functions
[address
];
146 LLVMFunction
* function
;
149 s
<< "<Unnamed 0x" << std::hex
<< address
<< ">";
150 function
= new LLVMFunction(s
.str(), address
);
152 function
= new LLVMFunction(name
, address
);
154 functions
.insert(std::make_pair(address
, function
));
156 disassembleFunction(function
);
161 void LLVMDisassembler::disassembleFunction(LLVMFunction
* function
) {
162 std::stack
<LLVMBasicBlock
*> remaining_blocks
;
163 SectionRef text_section
= sections
[".text"];
165 text_section
.getContents(bytes
);
166 StringRefMemoryObject
ref(bytes
);
168 LOG4CXX_DEBUG(logger
, "Handling function " << function
->getName());
170 LLVMBasicBlock
* block
= new LLVMBasicBlock(function
->getStartAddress(), this);
171 remaining_blocks
.push(block
);
172 blocks
.insert(std::make_pair(block
->getStartAddress(), block
));
174 while (remaining_blocks
.size()) {
175 LLVMBasicBlock
* current_block
= remaining_blocks
.top();
176 remaining_blocks
.pop();
178 LOG4CXX_DEBUG(logger
, "Handling Block starting at " << std::hex
<< current_block
->getStartAddress());
181 uint64_t base_address
;
182 text_section
.getAddress(base_address
);
183 uint64_t current_address
= current_block
->getStartAddress() - base_address
;
187 llvm::raw_string_ostream
s(buf
);
189 if(llvm::MCDisassembler::Success
==
190 DisAsm
->getInstruction(inst
, inst_size
, ref
, current_address
, nulls(), nulls())) {
193 if (MIA
->evaluateBranch(inst
, current_address
, inst_size
, jmptarget
)) {
194 jmptarget
+= base_address
;
195 if (!MIA
->isIndirectBranch(inst
)) {
196 if (MIA
->isCall(inst
)) {
197 if (functions
.find(jmptarget
) == functions
.end()) {
198 disassembleFunctionAt(jmptarget
);
201 current_block
->setNextBlock(0, jmptarget
);
202 if (blocks
.find(jmptarget
) == blocks
.end()) {
203 LLVMBasicBlock
* block
= new LLVMBasicBlock(jmptarget
, this);
204 blocks
.insert(std::make_pair(block
->getStartAddress(), block
));
205 remaining_blocks
.push(block
);
207 if (MIA
->isConditionalBranch(inst
)) {
208 jmptarget
= base_address
+ current_address
+ inst_size
;
209 current_block
->setNextBlock(1, jmptarget
);
210 if (blocks
.find(jmptarget
) == blocks
.end()) {
211 LLVMBasicBlock
* block
= new LLVMBasicBlock(jmptarget
, this);
212 blocks
.insert(std::make_pair(block
->getStartAddress(), block
));
213 remaining_blocks
.push(block
);
224 if (inst_size
== 0 || MIA
->isTerminator(inst
) || MIA
->isBranch(inst
)) {
225 current_block
->setEndAddress(current_address
+ base_address
+ inst_size
);
226 LOG4CXX_DEBUG(logger
, "Finished Block at " << std::hex
<<
227 current_block
->getEndAddress());
230 current_address
+= inst_size
;
233 LOG4CXX_DEBUG(logger
, "Finished function " << function
->getName());
236 void LLVMDisassembler::disassemble() {
237 SectionRef text_section
= sections
[".text"];
238 std::vector
<LLVMFunction
*> remaining_functions
;
240 // Assume all function symbols actually start a real function
241 for (auto x
= symbols
.begin(); x
!= symbols
.end(); ++x
) {
244 SymbolRef::Type symbol_type
;
247 if (text_section
.containsSymbol(x
->second
, contains
) || !contains
)
250 if (x
->second
.getType(symbol_type
)
251 || SymbolRef::ST_Function
!= symbol_type
)
254 if (!x
->second
.getAddress(result
)) {
255 LLVMFunction
* fun
= new LLVMFunction(x
->first
, result
);
256 remaining_functions
.push_back(fun
);
257 functions
.insert(std::make_pair(result
, fun
));
258 LOG4CXX_DEBUG(logger
, "Disasembling " << x
->first
);
262 for (LLVMFunction
* function
: remaining_functions
) {
263 disassembleFunction(function
);
266 if (binary
->isELF()) {
267 bool is64bit
= (binary
->getData()[4] == 0x02);
270 for (int i(0); i
< (is64bit
? 8 : 4); ++i
) {
271 if (binary
->isLittleEndian()) {
272 entry
|= (unsigned int)((unsigned char)binary
->getData()[0x18 + i
]) << 8*i
;
275 entry
|= (unsigned char)binary
->getData()[0x18 + i
];
278 LOG4CXX_DEBUG(logger
, "Adding entry at: " << std::hex
<< entry
);
280 s
<< "<_start 0x" << std::hex
<< entry
<< ">";
282 disassembleFunctionAt(entry
, s
.str());
285 if (functions
.empty()) {
287 text_section
.getAddress(text_entry
);
288 LOG4CXX_INFO(logger
, "No Symbols found, starting at the beginning of the text segment");
289 disassembleFunctionAt(text_entry
);
295 void LLVMDisassembler::splitBlocks() {
296 SectionRef text_section
= sections
[".text"];
298 text_section
.getContents(bytes
);
299 StringRefMemoryObject
ref(bytes
);
301 // Split blocks where jumps are going inside the block
302 for (auto it
= blocks
.begin(); it
!= blocks
.end(); ++it
) {
303 LLVMBasicBlock
* current_block
= it
->second
;
305 uint64_t base_address
;
306 text_section
.getAddress(base_address
);
307 uint64_t current_address
= current_block
->getStartAddress() - base_address
;
308 while(current_block
->getEndAddress() - base_address
> current_address
) {
311 llvm::raw_string_ostream
s(buf
);
313 if(llvm::MCDisassembler::Success
==
314 DisAsm
->getInstruction(inst
, inst_size
, ref
, current_address
, nulls(), nulls())) {
315 auto other
= blocks
.find(current_address
+ inst_size
+ base_address
);
317 if (other
!= blocks
.end()) {
318 uint64_t endaddress
= current_address
+ inst_size
+ base_address
;
319 if (endaddress
!= current_block
->getEndAddress()) {
320 LOG4CXX_DEBUG(logger
, "Shortening block starting at "
322 << current_block
->getStartAddress()
325 current_block
->setEndAddress(endaddress
);
326 current_block
->setNextBlock(0, other
->first
);
327 current_block
->setNextBlock(1, 0);
333 current_address
+= inst_size
;
338 void LLVMDisassembler::readSymbols() {
340 symbol_iterator
si(o
->symbol_begin()), se(o
->symbol_end());
341 for (; si
!= se
; ++si
) {
343 if ((ec
= si
->getName(name
))) {
344 LOG4CXX_ERROR(logger
, ec
.message());
347 LOG4CXX_DEBUG(logger
, "Added symbol " << name
.str());
348 symbols
.insert(make_pair(name
.str(), *si
));
352 void LLVMDisassembler::readSections() {
354 section_iterator
i(o
->section_begin()), e(o
->section_end());
355 for (; i
!= e
; ++i
) {
357 if ((ec
= i
->getName(name
))) {
358 LOG4CXX_ERROR(logger
, ec
.message());
361 LOG4CXX_DEBUG(logger
, "Added section " << name
.str());
362 sections
.insert(make_pair(name
.str(), *i
));
367 void LLVMDisassembler::forEachFunction(std::function
<void (uint64_t, Function
*)> callback
) {
368 std::for_each(functions
.begin(), functions
.end(),
369 [&](std::pair
<uint64_t, LLVMFunction
*> x
) {
370 callback(x
.first
, x
.second
);
374 void LLVMDisassembler::printEachInstruction(uint64_t start
, uint64_t end
,
375 std::function
<void (uint8_t*, size_t, const std::string
&)> fun
) {
376 SectionRef text_section
= sections
[".text"];
377 uint64_t base_address
;
378 text_section
.getAddress(base_address
);
379 uint64_t current_address
= start
- base_address
;
382 text_section
.getContents(bytes
);
383 StringRefMemoryObject
ref(bytes
);
385 while (current_address
< end
- base_address
) {
389 llvm::raw_string_ostream
s(buf
);
391 if(llvm::MCDisassembler::Success
==
392 DisAsm
->getInstruction(inst
, inst_size
, ref
, current_address
, nulls(), nulls())) {
394 uint8_t bytes
[inst_size
+2];
395 ref
.readBytes(current_address
, inst_size
, bytes
);
398 if (MIA
->evaluateBranch(inst
, current_address
, inst_size
, jmptarget
)) {
399 std::stringstream stream
;
400 stream
<< std::hex
<< (base_address
+ jmptarget
);
401 IP
->printInst(&inst
, s
, stream
.str());
403 IP
->printInst(&inst
, s
, "");
405 fun(bytes
, inst_size
, s
.str());
407 LOG4CXX_WARN(logger
, "Invalid byte at" << std::hex
<< current_address
+ base_address
);
408 fun(NULL
, 0, "Invalid Byte");
412 current_address
+= inst_size
;