]>
git.siccegge.de Git - frida/frida.git/blob - LLVMDisassembler.cxx
a194d2144d459d407efa769596a2975e72ed0a44
1 #include "disassembler/llvm/LLVMDisassembler.hxx"
2 #include "disassembler/llvm/LLVMBasicBlock.hxx"
3 #include "disassembler/llvm/LLVMFunction.hxx"
9 using namespace llvm::object
;
12 * TODO: fallback code falls die Datei kein ELF/PE/COFF/MacO/.. binary
13 * ist sondern z.B. einfach nur Instruktionen oder ein Bootsektor oder
16 LLVMDisassembler::LLVMDisassembler(const std::string
& filename
)
17 : Disassembler(filename
)
18 , logger(log4cxx::Logger::getLogger("LLVMDisassembler"))
19 , triple("unknown-unknown-unknown")
21 LOG4CXX_DEBUG(logger
, "Handling file" << filename
);
22 auto result
= createBinary(filename
);
25 if ((ec
= result
.getError())) {
26 LOG4CXX_ERROR(logger
, "Failed to load Binary" << ec
.message());
31 binary
.reset(result
.get());
33 o
= dyn_cast
<ObjectFile
>(binary
.get());
35 triple
.setArch(Triple::ArchType(o
->getArch()));
36 std::string
tripleName(triple
.getTriple());
38 LOG4CXX_INFO(logger
, "Architecture " << tripleName
);
42 target
= TargetRegistry::lookupTarget("", triple
, es
);
44 LOG4CXX_ERROR(logger
, es
);
48 LOG4CXX_INFO(logger
, "Target " << target
->getName());
50 MRI
.reset(target
->createMCRegInfo(tripleName
));
52 LOG4CXX_ERROR(logger
, "no register info for target " << tripleName
);
56 // Set up disassembler.
57 AsmInfo
.reset(target
->createMCAsmInfo(*MRI
, tripleName
));
59 LOG4CXX_ERROR(logger
, "no assembly info for target " << tripleName
);
63 STI
.reset(target
->createMCSubtargetInfo(tripleName
, "", ""));
65 LOG4CXX_ERROR(logger
, "no subtarget info for target " << tripleName
);
69 MII
.reset(target
->createMCInstrInfo());
71 LOG4CXX_ERROR(logger
, "no instruction info for target " << tripleName
);
75 DisAsm
.reset(target
->createMCDisassembler(*STI
));
77 LOG4CXX_ERROR(logger
, "no disassembler for target " << tripleName
);
81 MOFI
.reset(new MCObjectFileInfo
);
82 Ctx
.reset(new MCContext(AsmInfo
.get(), MRI
.get(), MOFI
.get()));
84 target
->createMCRelocationInfo(tripleName
, *Ctx
.get()));
87 MCObjectSymbolizer::createObjectSymbolizer(*Ctx
.get(), RelInfo
, o
));
89 DisAsm
->setSymbolizer(Symzer
);
92 MIA
.reset(target
->createMCInstrAnalysis(MII
.get()));
94 int AsmPrinterVariant
= AsmInfo
->getAssemblerDialect();
95 IP
.reset(target
->createMCInstPrinter(AsmPrinterVariant
, *AsmInfo
, *MII
, *MRI
, *STI
));
97 LOG4CXX_ERROR(logger
, "no instruction printer for target " << tripleName
);
101 IP
->setPrintImmHex(llvm::HexStyle::C
);
102 IP
->setPrintImmHex(true);
104 OwningPtr
<MCObjectDisassembler
> OD(
105 new MCObjectDisassembler(*o
, *DisAsm
, *MIA
));
106 Mod
.reset(OD
->buildModule(false));
113 LLVMDisassembler::~LLVMDisassembler() {
114 std::for_each(functions
.begin(), functions
.end(),
115 [](std::pair
<uint64_t,LLVMFunction
*> it
) {
118 std::for_each(blocks
.begin(), blocks
.end(),
119 [](std::pair
<uint64_t, LLVMBasicBlock
*> it
) {
125 * TODO: If we jump into some Basic Block we need to split it there into two
127 void LLVMDisassembler::disassemble() {
128 std::stack
<LLVMFunction
*> remaining_functions
;
129 std::stack
<LLVMBasicBlock
*> remaining_blocks
;
130 SectionRef text_section
= sections
[".text"];
132 for (auto x
= symbols
.begin(); x
!= symbols
.end(); ++x
) {
135 SymbolRef::Type symbol_type
;
138 if (text_section
.containsSymbol(x
->second
, contains
) || !contains
)
141 if (x
->second
.getType(symbol_type
)
142 || SymbolRef::ST_Function
!= symbol_type
)
145 if (!x
->second
.getAddress(result
)) {
146 LLVMFunction
* fun
= new LLVMFunction(x
->first
, result
);
147 remaining_functions
.push(fun
);
148 functions
.insert(std::make_pair(result
, fun
));
149 LOG4CXX_DEBUG(logger
, "Disasembling " << x
->first
);
154 text_section
.getContents(bytes
);
155 StringRefMemoryObject
ref(bytes
);
157 while (remaining_functions
.size()) {
158 LLVMFunction
* current_function
= remaining_functions
.top();
159 remaining_functions
.pop();
161 LOG4CXX_DEBUG(logger
, "Handling function " << current_function
->getName());
163 // if ("_start" != current_function->getName())
166 LLVMBasicBlock
* block
= new LLVMBasicBlock(current_function
->getStartAddress(), this);
167 remaining_blocks
.push(block
);
168 blocks
.insert(std::make_pair(block
->getStartAddress(), block
));
170 while (remaining_blocks
.size()) {
171 LLVMBasicBlock
* current_block
= remaining_blocks
.top();
172 remaining_blocks
.pop();
174 LOG4CXX_DEBUG(logger
, "Handling Block starting at " << std::hex
<< current_block
->getStartAddress());
177 uint64_t base_address
;
178 text_section
.getAddress(base_address
);
179 uint64_t current_address
= current_block
->getStartAddress() - base_address
;
183 llvm::raw_string_ostream
s(buf
);
185 if(llvm::MCDisassembler::Success
==
186 DisAsm
->getInstruction(inst
, inst_size
, ref
, current_address
, nulls(), nulls())) {
188 uint8_t bytes
[inst_size
+2];
189 ref
.readBytes(current_address
, inst_size
, bytes
);
191 for(uint8_t* cur
= bytes
; cur
< bytes
+ inst_size
; ++cur
) {
197 IP
->printInst(&inst
, s
, "");
199 LOG4CXX_DEBUG(logger
, std::hex
<< current_address
+ base_address
<< s
.str());
202 if (MIA
->evaluateBranch(inst
, current_address
, inst_size
, jmptarget
)) {
203 jmptarget
+= base_address
;
204 if (!MIA
->isIndirectBranch(inst
)) {
205 if (MIA
->isCall(inst
)) {
206 if (functions
.find(jmptarget
) == functions
.end()) {
208 s
<< "<Unnamed 0x" << std::hex
<< jmptarget
<< ">";
209 LLVMFunction
* fun
= new LLVMFunction(s
.str(), jmptarget
);
210 functions
.insert(std::make_pair(jmptarget
, fun
));
211 remaining_functions
.push(fun
);
214 if (blocks
.find(jmptarget
) == blocks
.end()) {
215 LLVMBasicBlock
* block
= new LLVMBasicBlock(jmptarget
, this);
216 blocks
.insert(std::make_pair(block
->getStartAddress(), block
));
217 current_block
->setNextBlock(0, block
->getStartAddress());
218 remaining_blocks
.push(block
);
220 if (MIA
->isConditionalBranch(inst
)) {
221 jmptarget
= base_address
+ current_address
+ inst_size
;
222 if (blocks
.find(jmptarget
) == blocks
.end()) {
223 LLVMBasicBlock
* block
= new LLVMBasicBlock(jmptarget
, this);
224 blocks
.insert(std::make_pair(block
->getStartAddress(), block
));
225 current_block
->setNextBlock(1, block
->getStartAddress());
226 remaining_blocks
.push(block
);
237 if (inst_size
== 0 || MIA
->isTerminator(inst
) || MIA
->isBranch(inst
)) {
238 current_block
->setEndAddress(current_address
+ base_address
+ inst_size
);
239 LOG4CXX_DEBUG(logger
, "Finished Block at " << std::hex
<<
240 current_block
->getEndAddress());
243 current_address
+= inst_size
;
246 LOG4CXX_DEBUG(logger
, "Finished function " << current_function
->getName());
250 void LLVMDisassembler::readSymbols() {
252 symbol_iterator
si(o
->symbol_begin()), se(o
->symbol_end());
253 for (; si
!= se
; ++si
) {
255 if ((ec
= si
->getName(name
))) {
256 LOG4CXX_ERROR(logger
, ec
.message());
259 LOG4CXX_DEBUG(logger
, "Added symbol " << name
.str());
260 symbols
.insert(make_pair(name
.str(), *si
));
264 void LLVMDisassembler::readSections() {
266 section_iterator
i(o
->section_begin()), e(o
->section_end());
267 for (; i
!= e
; ++i
) {
269 if ((ec
= i
->getName(name
))) {
270 LOG4CXX_ERROR(logger
, ec
.message());
273 LOG4CXX_DEBUG(logger
, "Added section " << name
.str());
274 sections
.insert(make_pair(name
.str(), *i
));
279 void LLVMDisassembler::forEachFunction(std::function
<void (uint64_t, Function
*)> callback
) {
280 std::for_each(functions
.begin(), functions
.end(),
281 [&](std::pair
<uint64_t, LLVMFunction
*> x
) {
282 callback(x
.first
, x
.second
);
286 void LLVMDisassembler::printEachInstruction(uint64_t start
, uint64_t end
,
287 std::function
<void (uint8_t*, size_t, const std::string
&)> fun
) {
288 SectionRef text_section
= sections
[".text"];
289 uint64_t base_address
;
290 text_section
.getAddress(base_address
);
291 uint64_t current_address
= start
- base_address
;
294 text_section
.getContents(bytes
);
295 StringRefMemoryObject
ref(bytes
);
297 while (current_address
< end
- base_address
) {
301 llvm::raw_string_ostream
s(buf
);
303 if(llvm::MCDisassembler::Success
==
304 DisAsm
->getInstruction(inst
, inst_size
, ref
, current_address
, nulls(), nulls())) {
306 uint8_t bytes
[inst_size
+2];
307 ref
.readBytes(current_address
, inst_size
, bytes
);
309 IP
->printInst(&inst
, s
, "");
310 fun(bytes
, inst_size
, s
.str());
312 fun(NULL
, 0, "Invalid Byte");
316 current_address
+= inst_size
;