]>
git.siccegge.de Git - frida/frida.git/blob - disassembler/llvm/LLVMDisassembler.cxx
1 #include "disassembler/llvm/LLVMDisassembler.hxx"
2 #include "disassembler/llvm/LLVMBasicBlock.hxx"
3 #include "disassembler/llvm/LLVMFunction.hxx"
9 using namespace llvm::object
;
10 using std::error_code
;
15 Disassembler
* createLLVMDisassembler(const std::string
& filename
, InformationManager
* manager
) {
19 std::unique_ptr
<Binary
> o
;
20 o
.reset(createBinary(filename
).get());
21 Binary
* op
= o
.release();
23 // ELFType<endian, maxalign, 64bit>
24 if (ELF32LEObjectFile
* object
= dyn_cast
<ELF32LEObjectFile
>(op
)) {
25 return new LLVMDisassembler
<ELFType
<support::little
, 2, false>>(filename
, manager
, object
);
27 if (ELF64LEObjectFile
* object
= dyn_cast
<ELF64LEObjectFile
>(op
)) {
28 return new LLVMDisassembler
<ELFType
<support::little
, 2, true>>(filename
, manager
, object
);
30 if (ELF32BEObjectFile
* object
= dyn_cast
<ELF32BEObjectFile
>(op
)) {
31 return new LLVMDisassembler
<ELFType
<support::big
, 2, false>>(filename
, manager
, object
);
33 if (ELF64BEObjectFile
* object
= dyn_cast
<ELF64BEObjectFile
>(op
)) {
34 return new LLVMDisassembler
<ELFType
<support::big
, 2, true>>(filename
, manager
, object
);
41 * TODO: fallback code falls die Datei kein ELF/PE/COFF/MacO/.. binary
42 * ist sondern z.B. einfach nur Instruktionen oder ein Bootsektor oder
45 template <typename ELFT
>
46 LLVMDisassembler
<ELFT
>::LLVMDisassembler(const std::string
& filename
,
47 InformationManager
* manager
,
48 ELFObjectFile
<ELFT
>* file
)
49 : Disassembler(filename
, manager
)
50 , logger(log4cxx::Logger::getLogger("LLVMDisassembler"))
51 , triple("unknown-unknown-unknown")
54 LOG4CXX_DEBUG(logger
, "Handling file" << filename
);
57 auto result
= createBinary(filename
);
60 if ((ec
= result
.getError())) {
61 LOG4CXX_ERROR(logger
, "Failed to load Binary" << ec
.message());
66 binary
.reset(result
.get());
68 o
= dyn_cast
<ELFObjectFile
<ELFT
>>(binary
.get());
74 triple
.setArch(Triple::ArchType(o
->getArch()));
75 std::string
tripleName(triple
.getTriple());
77 LOG4CXX_INFO(logger
, "Architecture " << tripleName
);
81 target
= TargetRegistry::lookupTarget("", triple
, es
);
83 LOG4CXX_ERROR(logger
, es
);
87 LOG4CXX_INFO(logger
, "Target " << target
->getName());
89 MRI
.reset(target
->createMCRegInfo(tripleName
));
91 LOG4CXX_ERROR(logger
, "no register info for target " << tripleName
);
95 // Set up disassembler.
96 AsmInfo
.reset(target
->createMCAsmInfo(*MRI
, tripleName
));
98 LOG4CXX_ERROR(logger
, "no assembly info for target " << tripleName
);
102 STI
.reset(target
->createMCSubtargetInfo(tripleName
, "", ""));
104 LOG4CXX_ERROR(logger
, "no subtarget info for target " << tripleName
);
108 MII
.reset(target
->createMCInstrInfo());
110 LOG4CXX_ERROR(logger
, "no instruction info for target " << tripleName
);
114 MOFI
.reset(new MCObjectFileInfo
);
115 MCContext
Ctx(AsmInfo
.get(), MRI
.get(), MOFI
.get());
117 DisAsm
.reset(target
->createMCDisassembler(*STI
, Ctx
));
119 LOG4CXX_ERROR(logger
, "no disassembler for target " << tripleName
);
123 target
->createMCRelocationInfo(tripleName
, Ctx
));
126 MCObjectSymbolizer::createObjectSymbolizer(Ctx
, std::move(RelInfo
), o
));
128 DisAsm
->setSymbolizer(std::move(Symzer
));
133 MIA
.reset(target
->createMCInstrAnalysis(MII
.get()));
135 LOG4CXX_ERROR(logger
, "no instruction analysis for target " << tripleName
);
139 int AsmPrinterVariant
= AsmInfo
->getAssemblerDialect();
140 IP
.reset(target
->createMCInstPrinter(AsmPrinterVariant
, *AsmInfo
, *MII
, *MRI
, *STI
));
142 LOG4CXX_ERROR(logger
, "no instruction printer for target " << tripleName
);
146 IP
->setPrintImmHex(llvm::HexStyle::C
);
147 IP
->setPrintImmHex(true);
149 std::unique_ptr
<MCObjectDisassembler
> OD(
150 new MCObjectDisassembler(*o
, *DisAsm
, *MIA
));
151 Mod
.reset(OD
->buildModule(false));
154 template <typename ELFT
>
155 void LLVMDisassembler
<ELFT
>::start() {
161 template <typename ELFT
>
162 LLVMDisassembler
<ELFT
>::~LLVMDisassembler() {
163 std::for_each(functions
.begin(), functions
.end(),
164 [](std::pair
<uint64_t,LLVMFunction
*> it
) {
167 std::for_each(blocks
.begin(), blocks
.end(),
168 [](std::pair
<uint64_t, LLVMBasicBlock
*> it
) {
173 template <typename ELFT
>
174 Function
* LLVMDisassembler
<ELFT
>::disassembleFunctionAt(uint64_t address
, const std::string
& name
) {
175 SectionRef text_section
= sections
[".text"];
176 uint64_t base_address
, size
;
177 text_section
.getAddress(base_address
);
178 text_section
.getSize(size
);
180 if (address
< base_address
||
181 address
>= base_address
+ size
) {
185 if (functions
.find(address
) != functions
.end()) {
186 return functions
[address
];
189 LLVMFunction
* function
;
192 s
<< "<Unnamed 0x" << std::hex
<< address
<< ">";
193 function
= new LLVMFunction(s
.str(), address
);
195 function
= new LLVMFunction(name
, address
);
197 functions
.insert(std::make_pair(address
, function
));
199 disassembleFunction(function
);
204 template <typename ELFT
>
205 void LLVMDisassembler
<ELFT
>::disassembleFunction(LLVMFunction
* function
) {
206 std::stack
<LLVMBasicBlock
*> remaining_blocks
;
207 SectionRef text_section
= sections
[".text"];
209 text_section
.getContents(bytes
);
210 StringRefMemoryObject
ref(bytes
);
212 LOG4CXX_DEBUG(logger
, "Handling function " << function
->getName());
214 LLVMBasicBlock
* block
= new LLVMBasicBlock(function
->getStartAddress(), this);
215 remaining_blocks
.push(block
);
216 blocks
.insert(std::make_pair(block
->getStartAddress(), block
));
217 function
->addBasicBlock(block
);
219 while (remaining_blocks
.size()) {
220 LLVMBasicBlock
* current_block
= remaining_blocks
.top();
221 remaining_blocks
.pop();
223 LOG4CXX_DEBUG(logger
, "Handling Block starting at " << std::hex
<< current_block
->getStartAddress());
226 uint64_t base_address
;
227 text_section
.getAddress(base_address
);
228 uint64_t current_address
= current_block
->getStartAddress() - base_address
;
232 llvm::raw_string_ostream
s(buf
);
234 if(llvm::MCDisassembler::Success
==
235 DisAsm
->getInstruction(inst
, inst_size
, ref
, current_address
, nulls(), nulls())) {
238 if (MIA
->evaluateBranch(inst
, current_address
, inst_size
, jmptarget
)) {
239 jmptarget
+= base_address
;
240 if (!MIA
->isIndirectBranch(inst
)) {
241 if (MIA
->isCall(inst
)) {
242 if (functions
.find(jmptarget
) == functions
.end()) {
243 disassembleFunctionAt(jmptarget
);
246 current_block
->setNextBlock(0, jmptarget
);
247 if (blocks
.find(jmptarget
) == blocks
.end()) {
248 LLVMBasicBlock
* block
= new LLVMBasicBlock(jmptarget
, this);
249 blocks
.insert(std::make_pair(block
->getStartAddress(), block
));
250 function
->addBasicBlock(block
);
251 remaining_blocks
.push(block
);
253 LOG4CXX_DEBUG(logger
, "Reusing Block starting at " << std::hex
<< current_block
->getStartAddress());
254 function
->addBasicBlock(blocks
.find(jmptarget
)->second
);
256 if (MIA
->isConditionalBranch(inst
)) {
257 jmptarget
= base_address
+ current_address
+ inst_size
;
258 current_block
->setNextBlock(1, jmptarget
);
259 if (blocks
.find(jmptarget
) == blocks
.end()) {
260 LLVMBasicBlock
* block
= new LLVMBasicBlock(jmptarget
, this);
261 blocks
.insert(std::make_pair(block
->getStartAddress(), block
));
262 function
->addBasicBlock(block
);
263 remaining_blocks
.push(block
);
265 LOG4CXX_DEBUG(logger
, "Reusing Block starting at " << std::hex
<< current_block
->getStartAddress());
266 function
->addBasicBlock(blocks
.find(jmptarget
)->second
);
277 if (inst_size
== 0 || MIA
->isTerminator(inst
) || MIA
->isBranch(inst
)) {
278 current_block
->setEndAddress(current_address
+ base_address
+ inst_size
);
279 LOG4CXX_DEBUG(logger
, "Finished Block at " << std::hex
<<
280 current_block
->getEndAddress());
283 current_address
+= inst_size
;
286 splitBlocks(function
);
287 LOG4CXX_DEBUG(logger
, "Finished function " << function
->getName());
288 manager
->signal_new_function(function
);
291 template <typename ELFT
>
292 void LLVMDisassembler
<ELFT
>::disassemble() {
293 SectionRef text_section
= sections
[".text"];
294 std::vector
<LLVMFunction
*> remaining_functions
;
296 // Assume all function symbols actually start a real function
297 for (auto x
= symbols
.begin(); x
!= symbols
.end(); ++x
) {
300 SymbolRef::Type symbol_type
;
303 if (text_section
.containsSymbol(x
->second
, contains
) || !contains
)
306 if (x
->second
.getType(symbol_type
)
307 || SymbolRef::ST_Function
!= symbol_type
)
310 if (!x
->second
.getAddress(result
)) {
311 LLVMFunction
* fun
= new LLVMFunction(x
->first
, result
);
312 remaining_functions
.push_back(fun
);
313 functions
.insert(std::make_pair(result
, fun
));
314 LOG4CXX_DEBUG(logger
, "Disasembling " << x
->first
);
318 for (LLVMFunction
* function
: remaining_functions
) {
319 disassembleFunction(function
);
322 if (binary
->isELF()) {
323 typedef ELFFile
<ELFT
> ELFO
;
324 const ELFO
* elffile
= o
->getELFFile();
325 const typename
ELFO::Elf_Ehdr
* header
= elffile
->getHeader();
327 _entryAddress
= header
->e_entry
;
328 LOG4CXX_DEBUG(logger
, "Adding entryAddress at: " << std::hex
<< _entryAddress
);
330 s
<< "<_start 0x" << std::hex
<< _entryAddress
<< ">";
332 disassembleFunctionAt(_entryAddress
, s
.str());
335 if (functions
.empty()) {
337 text_section
.getAddress(text_entry
);
338 LOG4CXX_INFO(logger
, "No Symbols found, starting at the beginning of the text segment");
339 disassembleFunctionAt(text_entry
);
343 template <typename ELFT
>
344 void LLVMDisassembler
<ELFT
>::splitBlocks(LLVMFunction
* function
) {
345 SectionRef text_section
= sections
[".text"];
347 text_section
.getContents(bytes
);
348 StringRefMemoryObject
ref(bytes
);
350 // Split blocks where jumps are going inside the block
351 for (auto it
= function
->blocks().begin();
352 it
!= function
->blocks().end();
354 BasicBlock
* current_block
= it
->second
;
356 uint64_t base_address
;
357 text_section
.getAddress(base_address
);
358 uint64_t current_address
= current_block
->getStartAddress() - base_address
;
359 while(current_block
->getEndAddress() - base_address
> current_address
) {
362 llvm::raw_string_ostream
s(buf
);
364 if(llvm::MCDisassembler::Success
==
365 DisAsm
->getInstruction(inst
, inst_size
, ref
, current_address
, nulls(), nulls())) {
366 // See if some other block starts here
367 auto other
= blocks
.find(current_address
+ inst_size
+ base_address
);
369 // Special case, other block starts here but we are at the end anyway
370 if (other
!= blocks
.end()) {
371 uint64_t endaddress
= current_address
+ inst_size
+ base_address
;
372 if (endaddress
!= current_block
->getEndAddress()) {
373 LOG4CXX_DEBUG(logger
, "Shortening block starting at "
375 << current_block
->getStartAddress()
378 function
->addBasicBlock(other
->second
);
379 current_block
->setEndAddress(endaddress
);
380 current_block
->setNextBlock(0, other
->first
);
381 current_block
->setNextBlock(1, 0);
387 current_address
+= inst_size
;
392 template <typename ELFT
>
393 void LLVMDisassembler
<ELFT
>::readSymbols() {
395 symbol_iterator
si(o
->symbol_begin()), se(o
->symbol_end());
396 for (; si
!= se
; ++si
) {
398 if ((ec
= si
->getName(name
))) {
399 LOG4CXX_ERROR(logger
, ec
.message());
402 LOG4CXX_DEBUG(logger
, "Added symbol " << name
.str());
403 symbols
.insert(make_pair(name
.str(), *si
));
407 template <typename ELFT
>
408 void LLVMDisassembler
<ELFT
>::readSections() {
410 section_iterator
i(o
->section_begin()), e(o
->section_end());
411 for (; i
!= e
; ++i
) {
413 if ((ec
= i
->getName(name
))) {
414 LOG4CXX_ERROR(logger
, ec
.message());
417 LOG4CXX_DEBUG(logger
, "Added section " << name
.str());
418 sections
.insert(make_pair(name
.str(), *i
));
423 template <typename ELFT
>
424 void LLVMDisassembler
<ELFT
>::forEachFunction(std::function
<void (uint64_t, Function
*)> callback
) {
425 std::for_each(functions
.begin(), functions
.end(),
426 [&](std::pair
<uint64_t, LLVMFunction
*> x
) {
427 callback(x
.first
, x
.second
);
431 template <typename ELFT
>
432 void LLVMDisassembler
<ELFT
>::printEachInstruction(uint64_t start
, uint64_t end
,
433 std::function
<void (uint8_t*, size_t,
435 const std::string
&)> fun
) {
436 SectionRef text_section
= sections
[".text"];
437 uint64_t base_address
;
438 text_section
.getAddress(base_address
);
439 uint64_t current_address
= start
- base_address
;
442 text_section
.getContents(bytes
);
443 StringRefMemoryObject
ref(bytes
);
445 while (current_address
< end
- base_address
) {
449 llvm::raw_string_ostream
s(buf
);
451 if(llvm::MCDisassembler::Success
==
452 DisAsm
->getInstruction(inst
, inst_size
, ref
, current_address
, nulls(), nulls())) {
454 uint8_t bytes
[inst_size
+2];
455 ref
.readBytes(current_address
, inst_size
, bytes
);
459 IP
->printInst(&inst
, s
, "");
460 if (MIA
->evaluateBranch(inst
, current_address
, inst_size
, jmptarget
)) {
461 std::stringstream stream
;
462 if (MIA
->isCall(inst
))
463 stream
<< "function:";
467 stream
<< std::hex
<< (base_address
+ jmptarget
);
472 fun(bytes
, inst_size
, s
.str(), ref
);
474 LOG4CXX_WARN(logger
, "Invalid byte at" << std::hex
<< current_address
+ base_address
);
475 fun(NULL
, 0, "Invalid Byte", "");
479 current_address
+= inst_size
;