]>
git.siccegge.de Git - frida/frida.git/blob - src/disassembler/llvm/LLVMDisassembler.cxx
875a474b239c1d69607b55f60c4b1d564fa5d048
1 #include "disassembler/llvm/LLVMDisassembler.hxx"
2 #include "core/InformationManager.hxx"
3 #include "core/Function.hxx"
4 #include "core/BasicBlock.hxx"
11 using namespace llvm::object
;
12 using std::error_code
;
23 Disassembler
* createLLVMDisassembler(const std::string
& filename
, InformationManager
* manager
) {
27 std::unique_ptr
<Binary
> o
;
28 o
.reset(createBinary(filename
).get());
29 Binary
* op
= o
.release();
31 // ELFType<endian, maxalign, 64bit>
32 if (ELF32LEObjectFile
* object
= dyn_cast
<ELF32LEObjectFile
>(op
)) {
33 return new LLVMDisassembler
<ELFType
<support::little
, 2, false>>(filename
, manager
, object
);
35 if (ELF64LEObjectFile
* object
= dyn_cast
<ELF64LEObjectFile
>(op
)) {
36 return new LLVMDisassembler
<ELFType
<support::little
, 2, true>>(filename
, manager
, object
);
38 if (ELF32BEObjectFile
* object
= dyn_cast
<ELF32BEObjectFile
>(op
)) {
39 return new LLVMDisassembler
<ELFType
<support::big
, 2, false>>(filename
, manager
, object
);
41 if (ELF64BEObjectFile
* object
= dyn_cast
<ELF64BEObjectFile
>(op
)) {
42 return new LLVMDisassembler
<ELFType
<support::big
, 2, true>>(filename
, manager
, object
);
44 if (COFFObjectFile
* object
= dyn_cast
<COFFObjectFile
>(op
)) {
45 return new LLVMDisassembler
<COFFT
>(filename
, manager
, object
);
52 * TODO: fallback code falls die Datei kein ELF/PE/COFF/MacO/.. binary
53 * ist sondern z.B. einfach nur Instruktionen oder ein Bootsektor oder
56 template <typename ELFT
>
57 LLVMDisassembler
<ELFT
>::LLVMDisassembler(const std::string
& filename
,
58 InformationManager
* manager
,
61 , logger(log4cxx::Logger::getLogger("LLVMDisassembler"))
62 , triple("unknown-unknown-unknown")
65 LOG4CXX_DEBUG(logger
, "Handling file " << filename
);
68 auto result
= createBinary(filename
);
71 if ((ec
= result
.getError())) {
72 LOG4CXX_ERROR(logger
, "Failed to load Binary" << ec
.message());
77 binary
.reset(result
.get());
79 o
= dyn_cast
<ObjectFile
>(binary
.get());
85 triple
.setArch(Triple::ArchType(o
->getArch()));
86 std::string
tripleName(triple
.getTriple());
88 LOG4CXX_INFO(logger
, "Architecture " << tripleName
);
92 target
= TargetRegistry::lookupTarget("", triple
, es
);
94 LOG4CXX_ERROR(logger
, es
);
98 LOG4CXX_INFO(logger
, "Target " << target
->getName());
100 MRI
.reset(target
->createMCRegInfo(tripleName
));
102 LOG4CXX_ERROR(logger
, "no register info for target " << tripleName
);
106 // Set up disassembler.
107 AsmInfo
.reset(target
->createMCAsmInfo(*MRI
, tripleName
));
109 LOG4CXX_ERROR(logger
, "no assembly info for target " << tripleName
);
113 STI
.reset(target
->createMCSubtargetInfo(tripleName
, "", ""));
115 LOG4CXX_ERROR(logger
, "no subtarget info for target " << tripleName
);
119 MII
.reset(target
->createMCInstrInfo());
121 LOG4CXX_ERROR(logger
, "no instruction info for target " << tripleName
);
125 MOFI
.reset(new MCObjectFileInfo
);
126 MCContext
Ctx(AsmInfo
.get(), MRI
.get(), MOFI
.get());
128 DisAsm
.reset(target
->createMCDisassembler(*STI
, Ctx
));
130 LOG4CXX_ERROR(logger
, "no disassembler for target " << tripleName
);
134 target
->createMCRelocationInfo(tripleName
, Ctx
));
137 // MCObjectSymbolizer::createObjectSymbolizer(Ctx, std::move(RelInfo), o));
139 // DisAsm->setSymbolizer(std::move(Symzer));
144 MIA
.reset(target
->createMCInstrAnalysis(MII
.get()));
146 LOG4CXX_ERROR(logger
, "no instruction analysis for target " << tripleName
);
150 int AsmPrinterVariant
= AsmInfo
->getAssemblerDialect();
151 IP
.reset(target
->createMCInstPrinter(AsmPrinterVariant
, *AsmInfo
, *MII
, *MRI
, *STI
));
153 LOG4CXX_ERROR(logger
, "no instruction printer for target " << tripleName
);
157 IP
->setPrintImmHex(llvm::HexStyle::C
);
158 IP
->setPrintImmHex(true);
160 std::unique_ptr
<MCObjectDisassembler
> OD(
161 new MCObjectDisassembler(*o
, *DisAsm
, *MIA
));
162 Mod
.reset(OD
->buildModule(false));
167 template <typename ELFT
>
168 void LLVMDisassembler
<ELFT
>::start() {
171 readDynamicSymbols();
174 template <typename ELFT
>
175 LLVMDisassembler
<ELFT
>::~LLVMDisassembler() {}
177 template <typename ELFT
>
178 Function
* LLVMDisassembler
<ELFT
>::disassembleFunctionAt(uint64_t address
, const std::string
& name
) {
180 SectionRef text_section
= sections
[".text"];
181 uint64_t base_address
, size
;
182 text_section
.getAddress(base_address
);
183 text_section
.getSize(size
);
185 if (address
< base_address
||
186 address
>= base_address
+ size
) {
190 if (NULL
== (function
= manager
->getFunction(address
))) {
194 s
<< "<Unnamed 0x" << std::hex
<< address
<< ">";
195 function
= manager
->newFunction(address
);
196 function
->setName(s
.str());
198 function
= manager
->newFunction(address
);
199 function
->setName(name
);
201 disassembleFunction(function
);
207 template <typename ELFT
>
208 void LLVMDisassembler
<ELFT
>::disassembleFunction(Function
* function
) {
209 std::stack
<BasicBlock
*> remaining_blocks
;
211 * Do all blocks get added properly? We should take care to remove
212 * the other ones at the end of the function!
214 std::map
<uint64_t, BasicBlock
*> new_blocks
;
215 SectionRef text_section
= sections
[".text"];
217 text_section
.getContents(bytes
);
218 StringRefMemoryObject
ref(bytes
);
220 LOG4CXX_DEBUG(logger
, "Handling function " << function
->getName());
222 BasicBlock
* block
= manager
->newBasicBlock(function
->getStartAddress());
223 remaining_blocks
.push(block
);
224 new_blocks
.insert(std::make_pair(block
->getStartAddress(), block
));
225 function
->addBasicBlock(block
);
227 while (remaining_blocks
.size()) {
228 BasicBlock
* current_block
= remaining_blocks
.top();
229 remaining_blocks
.pop();
231 LOG4CXX_DEBUG(logger
, "Handling Block starting at " << std::hex
232 << current_block
->getStartAddress());
235 uint64_t base_address
;
236 text_section
.getAddress(base_address
);
237 uint64_t current_address
= current_block
->getStartAddress() - base_address
;
241 llvm::raw_string_ostream
s(buf
);
243 if(llvm::MCDisassembler::Success
==
244 DisAsm
->getInstruction(inst
, inst_size
, ref
, current_address
, nulls(), nulls())) {
247 if (MIA
->evaluateBranch(inst
, current_address
, inst_size
, jmptarget
)) {
248 jmptarget
+= base_address
;
249 if (!MIA
->isIndirectBranch(inst
)) {
250 if (MIA
->isCall(inst
)) {
251 if (NULL
== manager
->getFunction(jmptarget
))
252 disassembleFunctionAt(jmptarget
);
254 current_block
->setNextBlock(0, jmptarget
);
255 if (new_blocks
.find(jmptarget
) == new_blocks
.end()) {
256 BasicBlock
* block
= manager
->newBasicBlock(jmptarget
);
258 new_blocks
.insert(std::make_pair(block
->getStartAddress(), block
));
259 function
->addBasicBlock(block
);
260 remaining_blocks
.push(block
);
262 LOG4CXX_DEBUG(logger
, "Reusing Block starting at " << std::hex
263 << current_block
->getStartAddress());
264 function
->addBasicBlock(new_blocks
.find(jmptarget
)->second
);
266 if (MIA
->isConditionalBranch(inst
)) {
267 jmptarget
= base_address
+ current_address
+ inst_size
;
268 current_block
->setNextBlock(1, jmptarget
);
269 if (new_blocks
.find(jmptarget
) == new_blocks
.end()) {
270 BasicBlock
* block
= manager
->newBasicBlock(jmptarget
);
272 new_blocks
.insert(std::make_pair(block
->getStartAddress(), block
));
273 function
->addBasicBlock(block
);
274 remaining_blocks
.push(block
);
276 LOG4CXX_DEBUG(logger
, "Reusing Block starting at " << std::hex
277 << current_block
->getStartAddress());
278 function
->addBasicBlock(new_blocks
.find(jmptarget
)->second
);
289 if (inst_size
== 0 || MIA
->isTerminator(inst
) || MIA
->isBranch(inst
)) {
290 current_block
->setEndAddress(current_address
+ base_address
+ inst_size
);
291 LOG4CXX_DEBUG(logger
, "Finished Block at " << std::hex
<<
292 current_block
->getEndAddress());
295 current_address
+= inst_size
;
298 splitBlocks(function
);
299 LOG4CXX_DEBUG(logger
, "Finished function " << function
->getName());
300 manager
->finishFunction(function
);
303 template <typename ELFT
>
304 void LLVMDisassembler
<ELFT
>::disassemble() {
305 SectionRef text_section
= sections
[".text"];
306 std::vector
<Function
*> remaining_functions
;
308 // Assume all function symbols actually start a real function
309 for (auto x
= symbols
.begin(); x
!= symbols
.end(); ++x
) {
312 SymbolRef::Type symbol_type
;
315 if (text_section
.containsSymbol(x
->second
, contains
) || !contains
)
318 if (x
->second
.getType(symbol_type
)
319 || SymbolRef::ST_Function
!= symbol_type
)
322 if (!x
->second
.getAddress(result
)) {
323 Function
* fun
= manager
->newFunction(result
);
324 fun
->setName(x
->first
);
325 remaining_functions
.push_back(fun
);
326 LOG4CXX_DEBUG(logger
, "Disasembling " << x
->first
);
330 for (Function
* function
: remaining_functions
) {
331 disassembleFunction(function
);
332 manager
->finishFunction(function
);
335 if (binary
->isELF()) {
336 uint64_t _entryAddress
= entryAddress();
337 LOG4CXX_DEBUG(logger
, "Adding entryAddress at: " << std::hex
<< _entryAddress
);
339 s
<< "<_start 0x" << std::hex
<< _entryAddress
<< ">";
341 disassembleFunctionAt(_entryAddress
, s
.str());
344 if (!manager
->hasFunctions()) {
346 text_section
.getAddress(text_entry
);
347 LOG4CXX_INFO(logger
, "No Symbols found, starting at the beginning of the text segment");
348 disassembleFunctionAt(text_entry
);
353 uint64_t LLVMDisassembler
<COFFT
>::entryAddress() {
354 const auto coffobject
= dyn_cast
<COFFObjectFile
>(o
);
355 const struct pe32_header
* pe32_header
;
356 const struct pe32plus_header
* pe32plus_header
;
358 coffobject
->getPE32PlusHeader(pe32plus_header
);
360 if (pe32plus_header
) {
361 return pe32plus_header
->AddressOfEntryPoint
;
363 coffobject
->getPE32Header(pe32_header
);
364 return pe32_header
->AddressOfEntryPoint
;
368 template <typename ELFT
>
369 uint64_t LLVMDisassembler
<ELFT
>::entryAddress() {
370 const auto elffile
= dyn_cast
<ELFObjectFile
<ELFT
>>(o
)->getELFFile();
371 const auto * header
= elffile
->getHeader();
373 return header
->e_entry
;
376 template <typename ELFT
>
377 void LLVMDisassembler
<ELFT
>::splitBlocks(Function
* function
) {
378 SectionRef text_section
= sections
[".text"];
380 text_section
.getContents(bytes
);
381 StringRefMemoryObject
ref(bytes
);
383 // Split blocks where jumps are going inside the block
384 for (auto it
= function
->blocks().begin();
385 it
!= function
->blocks().end();
387 BasicBlock
* current_block
= it
->second
;
389 uint64_t base_address
;
390 text_section
.getAddress(base_address
);
391 uint64_t current_address
= current_block
->getStartAddress() - base_address
;
392 while(current_block
->getEndAddress() - base_address
> current_address
) {
395 llvm::raw_string_ostream
s(buf
);
397 if(llvm::MCDisassembler::Success
==
398 DisAsm
->getInstruction(inst
, inst_size
, ref
, current_address
, nulls(), nulls())) {
399 // See if some other block starts here
400 BasicBlock
* other
= manager
->getBasicBlock(current_address
404 // Special case, other block starts here but we are at the end anyway
406 uint64_t endaddress
= current_address
+ inst_size
+ base_address
;
407 if (endaddress
!= current_block
->getEndAddress()) {
408 LOG4CXX_DEBUG(logger
, "Shortening block starting at "
410 << current_block
->getStartAddress()
412 << other
->getStartAddress());
413 function
->addBasicBlock(other
);
414 current_block
->setEndAddress(endaddress
);
415 current_block
->setNextBlock(0, other
->getStartAddress());
416 current_block
->setNextBlock(1, 0);
422 current_address
+= inst_size
;
428 void LLVMDisassembler
<COFFT
>::readDynamicSymbols() {
432 template <typename ELFT
>
433 void LLVMDisassembler
<ELFT
>::readDynamicSymbols() {
434 const auto elffile
= dyn_cast
<ELFObjectFile
<ELFT
>>(o
)->getELFFile();
435 for (auto it
= elffile
->begin_dynamic_symbols(),
436 end
= elffile
->end_dynamic_symbols();
439 if (it
->getType() == 2) { // Function
441 // TODO: Error handling
442 std::string symbolname
= *(elffile
->getSymbolName(it
));
443 std::string symbolversion
= *(elffile
->getSymbolVersion(nullptr, &*it
, is_default
));
444 manager
->signal_new_dyn_symbol(symbolname
+ (is_default
? "@@" : "@") + symbolversion
);
445 LOG4CXX_DEBUG(logger
, "Adding dynamic Symbol " << symbolname
<< (is_default
? "@@" : "@") << symbolversion
);
450 template <typename ELFT
>
451 void LLVMDisassembler
<ELFT
>::readSymbols() {
453 symbol_iterator
si(o
->symbol_begin()), se(o
->symbol_end());
454 for (; si
!= se
; ++si
) {
456 if ((ec
= si
->getName(name
))) {
457 LOG4CXX_ERROR(logger
, ec
.message());
460 LOG4CXX_DEBUG(logger
, "Added symbol " << name
.str());
461 symbols
.insert(make_pair(name
.str(), *si
));
465 template <typename ELFT
>
466 void LLVMDisassembler
<ELFT
>::readSections() {
468 section_iterator
i(o
->section_begin()), e(o
->section_end());
469 for (; i
!= e
; ++i
) {
471 if ((ec
= i
->getName(name
))) {
472 LOG4CXX_ERROR(logger
, ec
.message());
475 LOG4CXX_DEBUG(logger
, "Added section " << name
.str());
476 sections
.insert(make_pair(name
.str(), *i
));
481 // template <typename ELFT>
482 // void LLVMDisassembler<ELFT>::forEachFunction(std::function<void (uint64_t, Function*)> callback) {
483 // // std::for_each(functions.begin(), functions.end(),
484 // // [&](std::pair<uint64_t, Function*> x) {
485 // // callback(x.first, x.second);
489 template <typename ELFT
>
490 void LLVMDisassembler
<ELFT
>::printEachInstruction(uint64_t start
, uint64_t end
,
491 std::function
<void (uint8_t*, size_t,
493 const std::string
&)> fun
) {
494 SectionRef text_section
= sections
[".text"];
495 uint64_t base_address
;
496 text_section
.getAddress(base_address
);
497 uint64_t current_address
= start
- base_address
;
500 text_section
.getContents(bytes
);
501 StringRefMemoryObject
ref(bytes
);
503 while (current_address
< end
- base_address
) {
507 llvm::raw_string_ostream
s(buf
);
509 if(llvm::MCDisassembler::Success
==
510 DisAsm
->getInstruction(inst
, inst_size
, ref
, current_address
, nulls(), nulls())) {
512 uint8_t bytes
[inst_size
+2];
513 ref
.readBytes(current_address
, inst_size
, bytes
);
517 IP
->printInst(&inst
, s
, "");
518 if (MIA
->evaluateBranch(inst
, current_address
, inst_size
, jmptarget
)) {
519 std::stringstream stream
;
520 if (MIA
->isCall(inst
))
521 stream
<< "function:";
525 stream
<< std::hex
<< (base_address
+ jmptarget
);
530 fun(bytes
, inst_size
, s
.str(), ref
);
532 LOG4CXX_WARN(logger
, "Invalid byte at" << std::hex
<< current_address
+ base_address
);
533 fun(NULL
, 0, "Invalid Byte", "");
537 current_address
+= inst_size
;