]>
git.siccegge.de Git - frida/frida.git/blob - src/Binary.cxx
1d64b8445a0ef80bfcdfd3d87d1aa90302d8c6c5
3 #include "disassembler/Disassembler.hxx"
9 #include "llvm/Support/raw_ostream.h"
12 using namespace llvm::object
;
15 bool error(error_code ec
) {
16 if (!ec
) return false;
18 outs() << "error reading file: " << ec
.message() << ".\n";
23 bool RelocAddressLess(RelocationRef a
, RelocationRef b
) {
24 uint64_t a_addr
, b_addr
;
25 if (error(a
.getOffset(a_addr
))) return false;
26 if (error(b
.getOffset(b_addr
))) return false;
27 return a_addr
< b_addr
;
30 void DumpBytes(StringRef bytes
) {
31 static const char hex_rep
[] = "0123456789abcdef";
32 // FIXME: The real way to do this is to figure out the longest instruction
33 // and align to that size before printing. I'll fix this when I get
34 // around to outputting relocations.
35 // 15 is the longest x86 instruction
36 // 3 is for the hex rep of a byte + a space.
37 // 1 is for the null terminator.
38 enum { OutputSize
= (15 * 3) + 1 };
39 char output
[OutputSize
];
41 assert(bytes
.size() <= 15
42 && "DumpBytes only supports instructions of up to 15 bytes");
43 memset(output
, ' ', sizeof(output
));
45 for (StringRef::iterator i
= bytes
.begin(),
46 e
= bytes
.end(); i
!= e
; ++i
) {
47 output
[index
] = hex_rep
[(*i
& 0xF0) >> 4];
48 output
[index
+ 1] = hex_rep
[*i
& 0xF];
52 output
[sizeof(output
) - 1] = 0;
56 std::map
<std::string
, SectionRef
> readSections(const ObjectFile
& o
) {
58 std::map
<std::string
, SectionRef
> result
;
59 section_iterator
i(o
.begin_sections()), e(o
.end_sections());
60 for (; i
!= e
; i
.increment(ec
)) {
62 if (error(i
->getName(name
))) break;
64 result
.insert(make_pair(name
.str(), *i
));
69 std::map
<std::string
, SymbolRef
> readSymbols(const ObjectFile
& o
) {
71 std::map
<std::string
, SymbolRef
> result
;
72 symbol_iterator
si(o
.begin_symbols()), se(o
.end_symbols());
73 for (; si
!= se
; si
.increment(ec
)) {
75 if (error(si
->getName(name
))) break;
77 result
.insert(make_pair(name
.str(), *si
));
85 Binary::Binary(const std::string
& filename
)
86 : triple("unkown-unknown-unknown")
88 ::Disassembler
d(filename
);
91 createBinary(filename
, binary
);
92 if (Archive
*a
= dyn_cast
<Archive
>(binary
.get())) {
93 std::cerr
<< "Got an archive!" << std::endl
;
97 o
= dyn_cast
<ObjectFile
>(binary
.get());
99 triple
.setArch(Triple::ArchType(o
->getArch()));
100 std::string
tripleName(triple
.getTriple());
102 outs() << tripleName
<< "\n";
104 target
= TargetRegistry::lookupTarget("", triple
, error
);
110 outs() << target
->getName() << "\n";
112 MRI
.reset(target
->createMCRegInfo(tripleName
));
114 std::cerr
<< "error: no register info for target " << tripleName
<< "\n";
118 // Set up disassembler.
119 AsmInfo
.reset(target
->createMCAsmInfo(*MRI
, tripleName
));
121 std::cerr
<< "error: no assembly info for target " << tripleName
<< "\n";
125 STI
.reset(target
->createMCSubtargetInfo(tripleName
, "", ""));
127 errs() << "error: no subtarget info for target " << tripleName
<< "\n";
131 MII
.reset(target
->createMCInstrInfo());
133 std::cerr
<< "error: no instruction info for target " << tripleName
<< "\n";
137 DisAsm
.reset(target
->createMCDisassembler(*STI
));
139 std::cerr
<< "error: no disassembler for target " << tripleName
<< "\n";
143 MOFI
.reset(new MCObjectFileInfo
);
144 Ctx
.reset(new MCContext(AsmInfo
.get(), MRI
.get(), MOFI
.get()));
146 target
->createMCRelocationInfo(tripleName
, *Ctx
.get()));
149 MCObjectSymbolizer::createObjectSymbolizer(*Ctx
.get(), RelInfo
, o
));
151 DisAsm
->setSymbolizer(Symzer
);
154 MIA
.reset(target
->createMCInstrAnalysis(MII
.get()));
156 int AsmPrinterVariant
= AsmInfo
->getAssemblerDialect();
157 IP
.reset(target
->createMCInstPrinter(AsmPrinterVariant
, *AsmInfo
, *MII
, *MRI
, *STI
));
159 std::cerr
<< "error: no instruction printer for target " << tripleName
164 OwningPtr
<MCObjectDisassembler
> OD(
165 new MCObjectDisassembler(*o
, *DisAsm
, *MIA
));
166 Mod
.reset(OD
->buildModule(/* withCFG */ false));
168 symbols
= readSymbols(*o
);
169 sections
= readSections(*o
);
171 // for_each(sections.begin(), sections.end(), [](const std::pair<std::string, SectionRef>& i){
172 // std::cout << "Section: " << i.first << std::endl;
178 std::vector
<std::string
> Binary::getSymbols() {
180 SectionRef r
= sections
[".text"];
181 std::vector
<std::string
> result
;
182 for_each(symbols
.begin(), symbols
.end(), [&](const std::pair
<std::string
, SymbolRef
>& i
) {
184 if (!error(r
.containsSymbol(i
.second
, contains
)) && contains
)
185 result
.push_back(i
.first
);
190 void Binary::for_each_instruction(const std::string
& function
,
191 std::function
<void (long, std::string
, std::string
)> callback
) {
193 uint64_t base_address
, address
, ssize
, size(0), index
, end
;
194 StringRefMemoryObject
memoryObject("");
196 if (symbols
.end() != symbols
.find(function
)) {
198 section_iterator
sec(o
->begin_sections());
200 ref
= symbols
.at(function
);
201 if (error(ref
.getSection(sec
))) return;
202 if (error(ref
.getAddress(address
))) return;
203 if (address
== UnknownAddressOrSize
) return;
204 if (error(ref
.getSize(ssize
))) return;
205 if (error(sec
->getAddress(base_address
))) return;
206 if (error(sec
->getContents(bytes
))) return;
207 memoryObject
= bytes
;
210 else if (sections
.end() != sections
.find(function
)) {
211 SectionRef sref
= sections
.at(function
);
212 if (error(sref
.getAddress(address
))) return;
213 if (address
== UnknownAddressOrSize
) return;
214 if (error(sref
.getSize(ssize
))) return;
215 if (error(sref
.getContents(bytes
))) return;
216 base_address
= address
;
217 memoryObject
= bytes
;
221 // outs() << "Start for_each_instruction " << function << "\n";
224 for (end
= address
+ ssize
- base_address
, index
= address
- base_address
; index
< end
; index
+= size
) {
227 if (DisAsm
->getInstruction(Inst
, size
, memoryObject
, index
,
230 llvm::raw_string_ostream
s(buf
);
231 IP
->printInst(&Inst
, s
, "");
233 if (index
+ size
< bytes
.str().length())
234 callback(base_address
+ index
, bytes
.str().substr(index
, size
), s
.str());
237 errs() << "warning: invalid instruction encoding\n";
239 size
= 1; // skip illegible bytes
242 // outs() << "End for_each_instruction\n";
246 void Binary::disassemble() {
247 for (MCModule::const_atom_iterator AI
= Mod
->atom_begin(),
248 AE
= Mod
->atom_end();
251 if ((*AI
)->getKind() != llvm::MCAtom::TextAtom
)
254 outs() << "\n\nAtom " << (*AI
)->getName() << ": \n";
255 if (const MCTextAtom
*TA
= dyn_cast
<MCTextAtom
>(*AI
)) {
256 for (MCTextAtom::const_iterator II
= TA
->begin(), IE
= TA
->end();
260 IP
->printInst(&II
->Inst
, outs(), "");
266 outs() << "binary " << triple
.getArchName() << "\n";
269 void Binary::disassemble_functions() {
271 for (section_iterator i
= o
->begin_sections(),
272 e
= o
->end_sections();
273 i
!= e
; i
.increment(ec
)) {
274 if (error(ec
)) break;
276 if (error(i
->isText(text
))) break;
279 uint64_t SectionAddr
;
280 if (error(i
->getAddress(SectionAddr
))) break;
282 // Make a list of all the symbols in this section.
283 std::vector
<std::pair
<uint64_t, StringRef
> > Symbols
;
284 for (symbol_iterator si
= o
->begin_symbols(),
285 se
= o
->end_symbols();
286 si
!= se
; si
.increment(ec
)) {
288 if (!error(i
->containsSymbol(*si
, contains
)) && contains
) {
290 if (error(si
->getAddress(Address
))) break;
291 if (Address
== UnknownAddressOrSize
) continue;
292 Address
-= SectionAddr
;
295 if (error(si
->getName(Name
))) break;
297 outs() << "\nXXX " << Name
<< "\n";
299 Symbols
.push_back(std::make_pair(Address
, Name
));
303 // Sort the symbols by address, just in case they didn't come in that way.
304 array_pod_sort(Symbols
.begin(), Symbols
.end());
306 // Make a list of all the relocations for this section.
307 std::vector
<RelocationRef
> Rels
;
308 // if (InlineRelocs) {
309 // for (relocation_iterator ri = i->begin_relocations(),
310 // re = i->end_relocations();
311 // ri != re; ri.increment(ec)) {
312 // if (error(ec)) break;
313 // Rels.push_back(*ri);
317 // Sort relocations by address.
318 std::sort(Rels
.begin(), Rels
.end(), RelocAddressLess
);
320 StringRef SegmentName
= "";
321 // if (const MachOObjectFile *MachO =
322 // dyn_cast<const MachOObjectFile>(o)) {
323 // DataRefImpl DR = i->getRawDataRefImpl();
324 // SegmentName = MachO->getSectionFinalSegmentName(DR);
327 if (error(i
->getName(name
))) break;
328 outs() << "Disassembly of section ";
329 if (!SegmentName
.empty())
330 outs() << SegmentName
<< ",";
331 outs() << name
<< ':';
333 // If the section has no symbols just insert a dummy one and disassemble
334 // the whole section.
336 Symbols
.push_back(std::make_pair(0, name
));
340 if (error(i
->getContents(Bytes
))) break;
341 StringRefMemoryObject
memoryObject(Bytes
);
345 if (error(i
->getSize(SectSize
))) break;
347 std::vector
<RelocationRef
>::const_iterator rel_cur
= Rels
.begin();
348 std::vector
<RelocationRef
>::const_iterator rel_end
= Rels
.end();
349 // Disassemble symbol by symbol.
350 for (unsigned si
= 0, se
= Symbols
.size(); si
!= se
; ++si
) {
351 uint64_t Start
= Symbols
[si
].first
;
353 // The end is either the size of the section or the beginning of the next
357 // Make sure this symbol takes up space.
358 else if (Symbols
[si
+ 1].first
!= Start
)
359 End
= Symbols
[si
+ 1].first
- 1;
361 // This symbol has the same address as the next symbol. Skip it.
364 outs() << '\n' << Symbols
[si
].second
<< ":\n";
367 raw_ostream
&DebugOut
= DebugFlag
? dbgs() : nulls();
369 raw_ostream
&DebugOut
= nulls();
372 for (Index
= Start
; Index
< End
; Index
+= Size
) {
375 if (DisAsm
->getInstruction(Inst
, Size
, memoryObject
, Index
,
376 DebugOut
, nulls())) {
377 outs() << format("%8" PRIx64
":", SectionAddr
+ Index
);
379 DumpBytes(StringRef(Bytes
.data() + Index
, Size
));
381 IP
->printInst(&Inst
, outs(), "");
384 errs() << "warning: invalid instruction encoding\n";
386 Size
= 1; // skip illegible bytes
389 // Print relocation for instruction.
390 while (rel_cur
!= rel_end
) {
393 SmallString
<16> name
;
396 // If this relocation is hidden, skip it.
397 if (error(rel_cur
->getHidden(hidden
))) goto skip_print_rel
;
398 if (hidden
) goto skip_print_rel
;
400 if (error(rel_cur
->getOffset(addr
))) goto skip_print_rel
;
401 // Stop when rel_cur's address is past the current instruction.
402 if (addr
>= Index
+ Size
) break;
403 if (error(rel_cur
->getTypeName(name
))) goto skip_print_rel
;
404 if (error(rel_cur
->getValueString(val
))) goto skip_print_rel
;
406 outs() << format("\t\t\t%8" PRIx64
": ", SectionAddr
+ addr
) << name
407 << "\t" << val
<< "\n";
417 void Binary::disassemble_cfg() {
418 for (MCModule::const_func_iterator FI
= Mod
->func_begin(),
419 FE
= Mod
->func_end();
421 static int filenum
= 0;
422 std::string FileName
= std::string("dot/") + (Twine((*FI
)->getName()) + "_" + utostr(filenum
) + ".dot").str();
424 std::cerr
<< FileName
<< std::endl
;
426 // Start a new dot file.
428 raw_fd_ostream
Out(FileName
.c_str(), Error
);
429 if (!Error
.empty()) {
430 errs() << "llvm-objdump: warning: " << Error
<< '\n';
434 Out
<< "digraph \"" << (*FI
)->getName() << "\" {\n";
435 Out
<< "graph [ rankdir = \"LR\" ];\n";
436 for (MCFunction::const_iterator i
= (*FI
)->begin(), e
= (*FI
)->end(); i
!= e
; ++i
) {
437 // Only print blocks that have predecessors.
438 bool hasPreds
= (*i
)->pred_begin() != (*i
)->pred_end();
440 if (!hasPreds
&& i
!= (*FI
)->begin())
443 Out
<< '"' << (*i
)->getInsts()->getBeginAddr() << "\" [ label=\"<a>";
444 // Print instructions.
445 for (unsigned ii
= 0, ie
= (*i
)->getInsts()->size(); ii
!= ie
;
447 if (ii
!= 0) // Not the first line, start a new row.
449 if (ii
+ 1 == ie
) // Last line, add an end id.
452 // Escape special chars and print the instruction in mnemonic form.
454 raw_string_ostream
OS(Str
);
455 IP
->printInst(&(*i
)->getInsts()->at(ii
).Inst
, OS
, "");
456 Out
<< DOT::EscapeString(OS
.str());
458 Out
<< "\" shape=\"record\" ];\n";
461 for (MCBasicBlock::succ_const_iterator si
= (*i
)->succ_begin(),
462 se
= (*i
)->succ_end(); si
!= se
; ++si
)
463 Out
<< (*i
)->getInsts()->getBeginAddr() << ":o -> "
464 << (*si
)->getInsts()->getBeginAddr() << ":a\n";