]>
git.siccegge.de Git - frida/frida.git/blob - LLVMDisassembler.cxx
b6f8dae75ae2d6cc33b2586bc2b45df68b899eec
1 #include "disassembler/llvm/LLVMDisassembler.hxx"
2 #include "core/InformationManager.hxx"
3 #include "core/Function.hxx"
4 #include "core/BasicBlock.hxx"
10 using namespace llvm::object
;
11 using std::error_code
;
16 Disassembler
* createLLVMDisassembler(const std::string
& filename
, InformationManager
* manager
) {
20 std::unique_ptr
<Binary
> o
;
21 o
.reset(createBinary(filename
).get());
22 Binary
* op
= o
.release();
24 // ELFType<endian, maxalign, 64bit>
25 if (ELF32LEObjectFile
* object
= dyn_cast
<ELF32LEObjectFile
>(op
)) {
26 return new LLVMDisassembler
<ELFType
<support::little
, 2, false>>(filename
, manager
, object
);
28 if (ELF64LEObjectFile
* object
= dyn_cast
<ELF64LEObjectFile
>(op
)) {
29 return new LLVMDisassembler
<ELFType
<support::little
, 2, true>>(filename
, manager
, object
);
31 if (ELF32BEObjectFile
* object
= dyn_cast
<ELF32BEObjectFile
>(op
)) {
32 return new LLVMDisassembler
<ELFType
<support::big
, 2, false>>(filename
, manager
, object
);
34 if (ELF64BEObjectFile
* object
= dyn_cast
<ELF64BEObjectFile
>(op
)) {
35 return new LLVMDisassembler
<ELFType
<support::big
, 2, true>>(filename
, manager
, object
);
42 * TODO: fallback code falls die Datei kein ELF/PE/COFF/MacO/.. binary
43 * ist sondern z.B. einfach nur Instruktionen oder ein Bootsektor oder
46 template <typename ELFT
>
47 LLVMDisassembler
<ELFT
>::LLVMDisassembler(const std::string
& filename
,
48 InformationManager
* manager
,
49 ELFObjectFile
<ELFT
>* file
)
50 : Disassembler(filename
, manager
)
51 , logger(log4cxx::Logger::getLogger("LLVMDisassembler"))
52 , triple("unknown-unknown-unknown")
55 LOG4CXX_DEBUG(logger
, "Handling file" << filename
);
58 auto result
= createBinary(filename
);
61 if ((ec
= result
.getError())) {
62 LOG4CXX_ERROR(logger
, "Failed to load Binary" << ec
.message());
67 binary
.reset(result
.get());
69 o
= dyn_cast
<ELFObjectFile
<ELFT
>>(binary
.get());
75 triple
.setArch(Triple::ArchType(o
->getArch()));
76 std::string
tripleName(triple
.getTriple());
78 LOG4CXX_INFO(logger
, "Architecture " << tripleName
);
82 target
= TargetRegistry::lookupTarget("", triple
, es
);
84 LOG4CXX_ERROR(logger
, es
);
88 LOG4CXX_INFO(logger
, "Target " << target
->getName());
90 MRI
.reset(target
->createMCRegInfo(tripleName
));
92 LOG4CXX_ERROR(logger
, "no register info for target " << tripleName
);
96 // Set up disassembler.
97 AsmInfo
.reset(target
->createMCAsmInfo(*MRI
, tripleName
));
99 LOG4CXX_ERROR(logger
, "no assembly info for target " << tripleName
);
103 STI
.reset(target
->createMCSubtargetInfo(tripleName
, "", ""));
105 LOG4CXX_ERROR(logger
, "no subtarget info for target " << tripleName
);
109 MII
.reset(target
->createMCInstrInfo());
111 LOG4CXX_ERROR(logger
, "no instruction info for target " << tripleName
);
115 MOFI
.reset(new MCObjectFileInfo
);
116 MCContext
Ctx(AsmInfo
.get(), MRI
.get(), MOFI
.get());
118 DisAsm
.reset(target
->createMCDisassembler(*STI
, Ctx
));
120 LOG4CXX_ERROR(logger
, "no disassembler for target " << tripleName
);
124 target
->createMCRelocationInfo(tripleName
, Ctx
));
127 MCObjectSymbolizer::createObjectSymbolizer(Ctx
, std::move(RelInfo
), o
));
129 DisAsm
->setSymbolizer(std::move(Symzer
));
134 MIA
.reset(target
->createMCInstrAnalysis(MII
.get()));
136 LOG4CXX_ERROR(logger
, "no instruction analysis for target " << tripleName
);
140 int AsmPrinterVariant
= AsmInfo
->getAssemblerDialect();
141 IP
.reset(target
->createMCInstPrinter(AsmPrinterVariant
, *AsmInfo
, *MII
, *MRI
, *STI
));
143 LOG4CXX_ERROR(logger
, "no instruction printer for target " << tripleName
);
147 IP
->setPrintImmHex(llvm::HexStyle::C
);
148 IP
->setPrintImmHex(true);
150 std::unique_ptr
<MCObjectDisassembler
> OD(
151 new MCObjectDisassembler(*o
, *DisAsm
, *MIA
));
152 Mod
.reset(OD
->buildModule(false));
155 template <typename ELFT
>
156 void LLVMDisassembler
<ELFT
>::start() {
160 readDynamicSymbols();
163 template <typename ELFT
>
164 LLVMDisassembler
<ELFT
>::~LLVMDisassembler() {
165 // std::for_each(functions.begin(), functions.end(),
166 // [](std::pair<uint64_t,LLVMFunction*> it) {
169 // std::for_each(blocks.begin(), blocks.end(),
170 // [](std::pair<uint64_t, LLVMBasicBlock*> it) {
175 template <typename ELFT
>
176 Function
* LLVMDisassembler
<ELFT
>::disassembleFunctionAt(uint64_t address
, const std::string
& name
) {
177 SectionRef text_section
= sections
[".text"];
178 uint64_t base_address
, size
;
179 text_section
.getAddress(base_address
);
180 text_section
.getSize(size
);
182 if (address
< base_address
||
183 address
>= base_address
+ size
) {
187 if (functions
.find(address
) != functions
.end()) {
188 return functions
[address
];
194 s
<< "<Unnamed 0x" << std::hex
<< address
<< ">";
195 function
= new Function(s
.str(), address
, manager
);
197 function
= new Function(name
, address
, manager
);
199 functions
.insert(std::make_pair(address
, function
));
201 disassembleFunction(function
);
206 template <typename ELFT
>
207 void LLVMDisassembler
<ELFT
>::disassembleFunction(Function
* function
) {
208 std::stack
<BasicBlock
*> remaining_blocks
;
209 SectionRef text_section
= sections
[".text"];
211 text_section
.getContents(bytes
);
212 StringRefMemoryObject
ref(bytes
);
214 LOG4CXX_DEBUG(logger
, "Handling function " << function
->getName());
216 BasicBlock
* block
= new BasicBlock(function
->getStartAddress(), this, manager
);
217 remaining_blocks
.push(block
);
218 blocks
.insert(std::make_pair(block
->getStartAddress(), block
));
219 function
->addBasicBlock(block
);
221 while (remaining_blocks
.size()) {
222 BasicBlock
* current_block
= remaining_blocks
.top();
223 remaining_blocks
.pop();
225 LOG4CXX_DEBUG(logger
, "Handling Block starting at " << std::hex
<< current_block
->getStartAddress());
228 uint64_t base_address
;
229 text_section
.getAddress(base_address
);
230 uint64_t current_address
= current_block
->getStartAddress() - base_address
;
234 llvm::raw_string_ostream
s(buf
);
236 if(llvm::MCDisassembler::Success
==
237 DisAsm
->getInstruction(inst
, inst_size
, ref
, current_address
, nulls(), nulls())) {
240 if (MIA
->evaluateBranch(inst
, current_address
, inst_size
, jmptarget
)) {
241 jmptarget
+= base_address
;
242 if (!MIA
->isIndirectBranch(inst
)) {
243 if (MIA
->isCall(inst
)) {
244 if (functions
.find(jmptarget
) == functions
.end()) {
245 disassembleFunctionAt(jmptarget
);
248 current_block
->setNextBlock(0, jmptarget
);
249 if (blocks
.find(jmptarget
) == blocks
.end()) {
250 BasicBlock
* block
= new BasicBlock(jmptarget
, this, manager
);
251 blocks
.insert(std::make_pair(block
->getStartAddress(), block
));
252 function
->addBasicBlock(block
);
253 remaining_blocks
.push(block
);
255 LOG4CXX_DEBUG(logger
, "Reusing Block starting at " << std::hex
<< current_block
->getStartAddress());
256 function
->addBasicBlock(blocks
.find(jmptarget
)->second
);
258 if (MIA
->isConditionalBranch(inst
)) {
259 jmptarget
= base_address
+ current_address
+ inst_size
;
260 current_block
->setNextBlock(1, jmptarget
);
261 if (blocks
.find(jmptarget
) == blocks
.end()) {
262 BasicBlock
* block
= new BasicBlock(jmptarget
, this, manager
);
263 blocks
.insert(std::make_pair(block
->getStartAddress(), block
));
264 function
->addBasicBlock(block
);
265 remaining_blocks
.push(block
);
267 LOG4CXX_DEBUG(logger
, "Reusing Block starting at " << std::hex
<< current_block
->getStartAddress());
268 function
->addBasicBlock(blocks
.find(jmptarget
)->second
);
279 if (inst_size
== 0 || MIA
->isTerminator(inst
) || MIA
->isBranch(inst
)) {
280 current_block
->setEndAddress(current_address
+ base_address
+ inst_size
);
281 LOG4CXX_DEBUG(logger
, "Finished Block at " << std::hex
<<
282 current_block
->getEndAddress());
285 current_address
+= inst_size
;
288 splitBlocks(function
);
289 LOG4CXX_DEBUG(logger
, "Finished function " << function
->getName());
290 manager
->signal_new_function(function
);
293 template <typename ELFT
>
294 void LLVMDisassembler
<ELFT
>::disassemble() {
295 SectionRef text_section
= sections
[".text"];
296 std::vector
<Function
*> remaining_functions
;
298 // Assume all function symbols actually start a real function
299 for (auto x
= symbols
.begin(); x
!= symbols
.end(); ++x
) {
302 SymbolRef::Type symbol_type
;
305 if (text_section
.containsSymbol(x
->second
, contains
) || !contains
)
308 if (x
->second
.getType(symbol_type
)
309 || SymbolRef::ST_Function
!= symbol_type
)
312 if (!x
->second
.getAddress(result
)) {
313 Function
* fun
= new Function(x
->first
, result
, manager
);
314 remaining_functions
.push_back(fun
);
315 functions
.insert(std::make_pair(result
, fun
));
316 LOG4CXX_DEBUG(logger
, "Disasembling " << x
->first
);
320 for (Function
* function
: remaining_functions
) {
321 disassembleFunction(function
);
324 if (binary
->isELF()) {
325 const ELFO
* elffile
= o
->getELFFile();
326 const typename
ELFO::Elf_Ehdr
* header
= elffile
->getHeader();
328 _entryAddress
= header
->e_entry
;
329 LOG4CXX_DEBUG(logger
, "Adding entryAddress at: " << std::hex
<< _entryAddress
);
331 s
<< "<_start 0x" << std::hex
<< _entryAddress
<< ">";
333 disassembleFunctionAt(_entryAddress
, s
.str());
336 if (functions
.empty()) {
338 text_section
.getAddress(text_entry
);
339 LOG4CXX_INFO(logger
, "No Symbols found, starting at the beginning of the text segment");
340 disassembleFunctionAt(text_entry
);
344 template <typename ELFT
>
345 void LLVMDisassembler
<ELFT
>::splitBlocks(Function
* function
) {
346 SectionRef text_section
= sections
[".text"];
348 text_section
.getContents(bytes
);
349 StringRefMemoryObject
ref(bytes
);
351 // Split blocks where jumps are going inside the block
352 for (auto it
= function
->blocks().begin();
353 it
!= function
->blocks().end();
355 BasicBlock
* current_block
= it
->second
;
357 uint64_t base_address
;
358 text_section
.getAddress(base_address
);
359 uint64_t current_address
= current_block
->getStartAddress() - base_address
;
360 while(current_block
->getEndAddress() - base_address
> current_address
) {
363 llvm::raw_string_ostream
s(buf
);
365 if(llvm::MCDisassembler::Success
==
366 DisAsm
->getInstruction(inst
, inst_size
, ref
, current_address
, nulls(), nulls())) {
367 // See if some other block starts here
368 auto other
= blocks
.find(current_address
+ inst_size
+ base_address
);
370 // Special case, other block starts here but we are at the end anyway
371 if (other
!= blocks
.end()) {
372 uint64_t endaddress
= current_address
+ inst_size
+ base_address
;
373 if (endaddress
!= current_block
->getEndAddress()) {
374 LOG4CXX_DEBUG(logger
, "Shortening block starting at "
376 << current_block
->getStartAddress()
379 function
->addBasicBlock(other
->second
);
380 current_block
->setEndAddress(endaddress
);
381 current_block
->setNextBlock(0, other
->first
);
382 current_block
->setNextBlock(1, 0);
388 current_address
+= inst_size
;
393 template <typename ELFT
>
394 void LLVMDisassembler
<ELFT
>::readDynamicSymbols() {
395 const ELFO
* elffile
= o
->getELFFile();
396 for (typename
ELFO::Elf_Sym_Iter
397 it
= elffile
->begin_dynamic_symbols(),
398 end
= elffile
->end_dynamic_symbols();
401 if (it
->getType() == 2) { // Function
403 // TODO: Error handling
404 std::string symbolname
= *(elffile
->getSymbolName(it
));
405 std::string symbolversion
= *(elffile
->getSymbolVersion(nullptr, &*it
, is_default
));
406 manager
->signal_new_dyn_symbol(symbolname
+ (is_default
? "@@" : "@") + symbolversion
);
407 LOG4CXX_DEBUG(logger
, "Adding dynamic Symbol " << symbolname
<< (is_default
? "@@" : "@") << symbolversion
);
412 template <typename ELFT
>
413 void LLVMDisassembler
<ELFT
>::readSymbols() {
415 symbol_iterator
si(o
->symbol_begin()), se(o
->symbol_end());
416 for (; si
!= se
; ++si
) {
418 if ((ec
= si
->getName(name
))) {
419 LOG4CXX_ERROR(logger
, ec
.message());
422 LOG4CXX_DEBUG(logger
, "Added symbol " << name
.str());
423 symbols
.insert(make_pair(name
.str(), *si
));
427 template <typename ELFT
>
428 void LLVMDisassembler
<ELFT
>::readSections() {
430 section_iterator
i(o
->section_begin()), e(o
->section_end());
431 for (; i
!= e
; ++i
) {
433 if ((ec
= i
->getName(name
))) {
434 LOG4CXX_ERROR(logger
, ec
.message());
437 LOG4CXX_DEBUG(logger
, "Added section " << name
.str());
438 sections
.insert(make_pair(name
.str(), *i
));
443 template <typename ELFT
>
444 void LLVMDisassembler
<ELFT
>::forEachFunction(std::function
<void (uint64_t, Function
*)> callback
) {
445 std::for_each(functions
.begin(), functions
.end(),
446 [&](std::pair
<uint64_t, Function
*> x
) {
447 callback(x
.first
, x
.second
);
451 template <typename ELFT
>
452 void LLVMDisassembler
<ELFT
>::printEachInstruction(uint64_t start
, uint64_t end
,
453 std::function
<void (uint8_t*, size_t,
455 const std::string
&)> fun
) {
456 SectionRef text_section
= sections
[".text"];
457 uint64_t base_address
;
458 text_section
.getAddress(base_address
);
459 uint64_t current_address
= start
- base_address
;
462 text_section
.getContents(bytes
);
463 StringRefMemoryObject
ref(bytes
);
465 while (current_address
< end
- base_address
) {
469 llvm::raw_string_ostream
s(buf
);
471 if(llvm::MCDisassembler::Success
==
472 DisAsm
->getInstruction(inst
, inst_size
, ref
, current_address
, nulls(), nulls())) {
474 uint8_t bytes
[inst_size
+2];
475 ref
.readBytes(current_address
, inst_size
, bytes
);
479 IP
->printInst(&inst
, s
, "");
480 if (MIA
->evaluateBranch(inst
, current_address
, inst_size
, jmptarget
)) {
481 std::stringstream stream
;
482 if (MIA
->isCall(inst
))
483 stream
<< "function:";
487 stream
<< std::hex
<< (base_address
+ jmptarget
);
492 fun(bytes
, inst_size
, s
.str(), ref
);
494 LOG4CXX_WARN(logger
, "Invalid byte at" << std::hex
<< current_address
+ base_address
);
495 fun(NULL
, 0, "Invalid Byte", "");
499 current_address
+= inst_size
;