From 0daf9a157f3d41690cf4a0287db1adecc4ad0b71 Mon Sep 17 00:00:00 2001 From: Christoph Egger Date: Tue, 24 Feb 2015 23:19:13 +0100 Subject: [PATCH] Reoganize Function/BasicBlock creation More control in the hand of the information manager. Mid-Term goal is to support loading of files so the Disassembler shouldn't have separate storage of all BasicBlocks and Functions that we would have to also populate upon loading saves --- src/core/BasicBlock.hxx | 22 ++-- src/core/Comment.hxx | 21 ++++ src/core/Function.cxx | 7 +- src/core/Function.hxx | 5 +- src/core/InformationManager.cxx | 61 ++++++++++- src/core/InformationManager.hxx | 39 +++++-- src/disassembler/Disassembler.hxx | 2 - src/disassembler/llvm/LLVMDisassembler.cxx | 118 +++++++++++---------- src/disassembler/llvm/LLVMDisassembler.hxx | 7 -- src/gui/Mainwindow.cxx | 46 ++++---- 10 files changed, 211 insertions(+), 117 deletions(-) create mode 100644 src/core/Comment.hxx diff --git a/src/core/BasicBlock.hxx b/src/core/BasicBlock.hxx index 4790b1c..69d4f37 100644 --- a/src/core/BasicBlock.hxx +++ b/src/core/BasicBlock.hxx @@ -10,15 +10,6 @@ class InformationManager; class BasicBlock { public: - BasicBlock(uint64_t start_address, Disassembler * disassembler, - InformationManager* manager) - : start_address(start_address) - , disassembler(disassembler) - , manager(manager) { - next_blocks[0] = 0; - next_blocks[1] = 0; - } - uint64_t getStartAddress() const { return start_address; } @@ -51,19 +42,24 @@ public: return s.str(); } - Disassembler * getDisassembler() const { - return disassembler; - } - InformationManager* getManager() const { return manager; } private: + BasicBlock(uint64_t start_address, InformationManager* manager) + : start_address(start_address) + , manager(manager) { + next_blocks[0] = 0; + next_blocks[1] = 0; + } + uint64_t start_address; uint64_t end_address; Disassembler* disassembler; InformationManager* manager; uint64_t next_blocks[2]; + + friend class InformationManager; }; #endif diff --git a/src/core/Comment.hxx b/src/core/Comment.hxx new file mode 100644 index 0000000..e151f5a --- /dev/null +++ b/src/core/Comment.hxx @@ -0,0 +1,21 @@ +#ifndef INCLUDE__Comment_hxx +#define INCLUDE__Comment_hxx + +#include + +class Function; + +class Comment { +public: + bool isLocal() const {return location == NULL;} + +private: + Comment(uint64_t address); + Comment(uint64_t address, Function* location); + + uint64_t address; + Function* location; + std::string text; +}; + +#endif /* INCLUDE__Comment_hxx */ diff --git a/src/core/Function.cxx b/src/core/Function.cxx index e9bb173..539192c 100644 --- a/src/core/Function.cxx +++ b/src/core/Function.cxx @@ -4,12 +4,9 @@ #include -Function::Function(const std::string& name, uint64_t start_address, - InformationManager* manager) +Function::Function(uint64_t start_address, InformationManager* manager) : start_address(start_address) - , manager(manager) { - setName(name); -} + , manager(manager) {} void Function::setName(const std::string& new_name) { diff --git a/src/core/Function.hxx b/src/core/Function.hxx index b57dca2..5497800 100644 --- a/src/core/Function.hxx +++ b/src/core/Function.hxx @@ -8,7 +8,6 @@ class InformationManager; class Function { public: - Function(const std::string& name, uint64_t start_address, InformationManager* manager); uint64_t getStartAddress() const { return start_address; @@ -30,10 +29,14 @@ public: return _blocks; } private: + Function(uint64_t start_address, InformationManager* manager); + std::string name; uint64_t start_address; InformationManager * manager; std::map _blocks; + + friend class InformationManager; }; #endif diff --git a/src/core/InformationManager.cxx b/src/core/InformationManager.cxx index a274c41..294dc2f 100644 --- a/src/core/InformationManager.cxx +++ b/src/core/InformationManager.cxx @@ -2,6 +2,7 @@ #include "disassembler/llvm/LLVMDisassembler.hxx" #include "core/Function.hxx" #include "core/BasicBlock.hxx" +#include "core/Comment.hxx" #include "gui/qt.hxx" #include @@ -61,10 +62,68 @@ void InformationManager::save(const QString& filename) { } void InformationManager::signal_new_function(Function* fun) { - functions.insert(std::make_pair(fun->getStartAddress(), fun)); +} + +Function* InformationManager::getFunction(uint64_t address) { + auto it = functions.find(address); + if (it != functions.end()) + return it->second; + else + return NULL; +} + +BasicBlock* InformationManager::getBasicBlock(uint64_t address) { + auto it = blocks.find(address); + if (it != blocks.end()) + return it->second; + else + return NULL; +} + +Function* InformationManager::newFunction(uint64_t address) { + Function* fun = new Function(address, this); + functions.insert(std::make_pair(address, fun)); + return fun; +} + +BasicBlock* InformationManager::newBasicBlock(uint64_t address) { + BasicBlock* block = new BasicBlock(address, this); + blocks.insert(std::make_pair(address, block)); + return block; +} + +Comment* InformationManager::newGlobalComment(uint64_t address) { + return NULL; +} + +Comment* InformationManager::newLocalComment(uint64_t address, Function* f) { + return NULL; +} + +void InformationManager::finishFunction(Function* fun) { for (auto b : fun->blocks()) { BasicBlock* bl = b.second; blocks.insert(std::make_pair(bl->getStartAddress(), bl)); } new_function_signal(fun); } + +void InformationManager::finishBasicBlock(BasicBlock* b) { +} + +void InformationManager::finnishComment(Comment* c) { +} + +void InformationManager::deleteFunction(Function* f) { + functions.erase(f->getStartAddress()); + delete f; +} + +void InformationManager::deleteBasicBlock(BasicBlock* b) { + blocks.erase(b->getStartAddress()); + delete b; +} + +void InformationManager::deleteComment(Comment* c) { + delete c; +} diff --git a/src/core/InformationManager.hxx b/src/core/InformationManager.hxx index dc78128..03cb3b8 100644 --- a/src/core/InformationManager.hxx +++ b/src/core/InformationManager.hxx @@ -9,6 +9,7 @@ class Disassembler; class Function; class BasicBlock; +class Comment; class QString; @@ -47,12 +48,38 @@ public: void dispatch(RenameFunctionEvent* event) { rename_function_signal(event); } - Function* getFunction(uint64_t address) - { return functions[address]; } - - BasicBlock* getBlock(uint64_t address) - { return blocks[address]; } - + Function* getFunction(uint64_t address); + BasicBlock* getBasicBlock(uint64_t address); + + /* Protocoll: + * + * Users may allocate new Data containers with the new*() + * functions. Once they have populated the information they hand + * over the object to the information manager using the finish*() + * functions. + * + * if new*() returns NULL there already exists a function at the + * specified address. Users may then get the old object if they + * wish or (more likely) skip creating it. Uniqueness of the + * object is only guaranteed as compared to the finish()ed + * objects. + * + * Users are responsible for destroying functions iff they do not + * finish them using the delete*() functions. Once the objects are + * finished, the information manager is responsible for cleaning + * up the memory. If delete*() is called on a finished object, bad + * thingsmay happen. + */ + Function* newFunction(uint64_t address); + BasicBlock* newBasicBlock(uint64_t address); + Comment* newGlobalComment(uint64_t address); + Comment* newLocalComment(uint64_t address, Function* f); + void finishFunction(Function* f); + void finishBasicBlock(BasicBlock* b); + void finnishComment(Comment* c); + void deleteFunction(Function* f); + void deleteBasicBlock(BasicBlock* b); + void deleteComment(Comment* c); private: boost::signals2::signal reset_signal; boost::signals2::signal new_function_signal; diff --git a/src/disassembler/Disassembler.hxx b/src/disassembler/Disassembler.hxx index 4feeddf..776796a 100644 --- a/src/disassembler/Disassembler.hxx +++ b/src/disassembler/Disassembler.hxx @@ -16,9 +16,7 @@ public: virtual void start() = 0; virtual void getSymbols() = 0; virtual uint64_t entryAddress() = 0; - virtual BasicBlock * getBasicBlock(uint64_t address) = 0; - virtual void forEachFunction(std::function callback) = 0; virtual void printEachInstruction(uint64_t start, uint64_t end, std::function fun) = 0; diff --git a/src/disassembler/llvm/LLVMDisassembler.cxx b/src/disassembler/llvm/LLVMDisassembler.cxx index b6f8dae..773a518 100644 --- a/src/disassembler/llvm/LLVMDisassembler.cxx +++ b/src/disassembler/llvm/LLVMDisassembler.cxx @@ -5,6 +5,7 @@ #include #include +#include using namespace llvm; using namespace llvm::object; @@ -161,19 +162,11 @@ void LLVMDisassembler::start() { } template -LLVMDisassembler::~LLVMDisassembler() { - // std::for_each(functions.begin(), functions.end(), - // [](std::pair it) { - // delete it.second; - // }); - // std::for_each(blocks.begin(), blocks.end(), - // [](std::pair it) { - // delete it.second; - // }); -} +LLVMDisassembler::~LLVMDisassembler() {} template Function* LLVMDisassembler::disassembleFunctionAt(uint64_t address, const std::string& name) { + Function * function; SectionRef text_section = sections[".text"]; uint64_t base_address, size; text_section.getAddress(base_address); @@ -184,21 +177,20 @@ Function* LLVMDisassembler::disassembleFunctionAt(uint64_t address, const return NULL; } - if (functions.find(address) != functions.end()) { - return functions[address]; - } + if (NULL == (function = manager->getFunction(address))) { - Function * function; - if (name == "") { - std::stringstream s; - s << ""; - function = new Function(s.str(), address, manager); - } else { - function = new Function(name, address, manager); + if (name == "") { + std::stringstream s; + s << ""; + function = manager->newFunction(address); + function->setName(s.str()); + } else { + function = manager->newFunction(address); + function->setName(name); + } + disassembleFunction(function); + manager->finishFunction(function); } - functions.insert(std::make_pair(address, function)); - - disassembleFunction(function); return function; } @@ -206,6 +198,11 @@ Function* LLVMDisassembler::disassembleFunctionAt(uint64_t address, const template void LLVMDisassembler::disassembleFunction(Function* function) { std::stack remaining_blocks; + /* TODO: + * Do all blocks get added properly? We should take care to remove + * the other ones at the end of the function! + */ + std::map new_blocks; SectionRef text_section = sections[".text"]; StringRef bytes; text_section.getContents(bytes); @@ -213,16 +210,17 @@ void LLVMDisassembler::disassembleFunction(Function* function) { LOG4CXX_DEBUG(logger, "Handling function " << function->getName()); - BasicBlock * block = new BasicBlock(function->getStartAddress(), this, manager); + BasicBlock * block = manager->newBasicBlock(function->getStartAddress()); remaining_blocks.push(block); - blocks.insert(std::make_pair(block->getStartAddress(), block)); + new_blocks.insert(std::make_pair(block->getStartAddress(), block)); function->addBasicBlock(block); while (remaining_blocks.size()) { BasicBlock * current_block = remaining_blocks.top(); remaining_blocks.pop(); - LOG4CXX_DEBUG(logger, "Handling Block starting at " << std::hex << current_block->getStartAddress()); + LOG4CXX_DEBUG(logger, "Handling Block starting at " << std::hex + << current_block->getStartAddress()); uint64_t inst_size; uint64_t base_address; @@ -241,31 +239,34 @@ void LLVMDisassembler::disassembleFunction(Function* function) { jmptarget += base_address; if (!MIA->isIndirectBranch(inst)) { if (MIA->isCall(inst)) { - if (functions.find(jmptarget) == functions.end()) { + if (NULL == manager->getFunction(jmptarget)) disassembleFunctionAt(jmptarget); - } } else { current_block->setNextBlock(0, jmptarget); - if (blocks.find(jmptarget) == blocks.end()) { - BasicBlock * block = new BasicBlock(jmptarget, this, manager); - blocks.insert(std::make_pair(block->getStartAddress(), block)); + if (new_blocks.find(jmptarget) == new_blocks.end()) { + BasicBlock * block = manager->newBasicBlock(jmptarget); + assert(block); + new_blocks.insert(std::make_pair(block->getStartAddress(), block)); function->addBasicBlock(block); remaining_blocks.push(block); } else { - LOG4CXX_DEBUG(logger, "Reusing Block starting at " << std::hex << current_block->getStartAddress()); - function->addBasicBlock(blocks.find(jmptarget)->second); + LOG4CXX_DEBUG(logger, "Reusing Block starting at " << std::hex + << current_block->getStartAddress()); + function->addBasicBlock(new_blocks.find(jmptarget)->second); } if (MIA->isConditionalBranch(inst)) { jmptarget = base_address + current_address + inst_size; current_block->setNextBlock(1, jmptarget); - if (blocks.find(jmptarget) == blocks.end()) { - BasicBlock * block = new BasicBlock(jmptarget, this, manager); - blocks.insert(std::make_pair(block->getStartAddress(), block)); + if (new_blocks.find(jmptarget) == new_blocks.end()) { + BasicBlock * block = manager->newBasicBlock(jmptarget); + assert(block); + new_blocks.insert(std::make_pair(block->getStartAddress(), block)); function->addBasicBlock(block); remaining_blocks.push(block); } else { - LOG4CXX_DEBUG(logger, "Reusing Block starting at " << std::hex << current_block->getStartAddress()); - function->addBasicBlock(blocks.find(jmptarget)->second); + LOG4CXX_DEBUG(logger, "Reusing Block starting at " << std::hex + << current_block->getStartAddress()); + function->addBasicBlock(new_blocks.find(jmptarget)->second); } } } @@ -310,15 +311,16 @@ void LLVMDisassembler::disassemble() { continue; if (!x->second.getAddress(result)) { - Function * fun = new Function(x->first, result, manager); + Function * fun = manager->newFunction(result); + fun->setName(x->first); remaining_functions.push_back(fun); - functions.insert(std::make_pair(result, fun)); LOG4CXX_DEBUG(logger, "Disasembling " << x->first); } } for (Function* function : remaining_functions) { disassembleFunction(function); + manager->finishFunction(function); } if (binary->isELF()) { @@ -333,12 +335,10 @@ void LLVMDisassembler::disassemble() { disassembleFunctionAt(_entryAddress, s.str()); } - if (functions.empty()) { - uint64_t text_entry; - text_section.getAddress(text_entry); - LOG4CXX_INFO(logger, "No Symbols found, starting at the beginning of the text segment"); - disassembleFunctionAt(text_entry); - } + uint64_t text_entry; + text_section.getAddress(text_entry); + LOG4CXX_INFO(logger, "No Symbols found, starting at the beginning of the text segment"); + disassembleFunctionAt(text_entry); } template @@ -365,20 +365,22 @@ void LLVMDisassembler::splitBlocks(Function* function) { if(llvm::MCDisassembler::Success == DisAsm->getInstruction(inst, inst_size, ref, current_address, nulls(), nulls())) { // See if some other block starts here - auto other = blocks.find(current_address + inst_size + base_address); + BasicBlock* other = manager->getBasicBlock(current_address + + inst_size + + base_address); // Special case, other block starts here but we are at the end anyway - if (other != blocks.end()) { + if (other != NULL) { uint64_t endaddress = current_address + inst_size + base_address; if (endaddress != current_block->getEndAddress()) { LOG4CXX_DEBUG(logger, "Shortening block starting at " << std::hex << current_block->getStartAddress() << " now ending at " - << other->first); - function->addBasicBlock(other->second); + << other->getStartAddress()); + function->addBasicBlock(other); current_block->setEndAddress(endaddress); - current_block->setNextBlock(0, other->first); + current_block->setNextBlock(0, other->getStartAddress()); current_block->setNextBlock(1, 0); } } @@ -440,13 +442,13 @@ void LLVMDisassembler::readSections() { } -template -void LLVMDisassembler::forEachFunction(std::function callback) { - std::for_each(functions.begin(), functions.end(), - [&](std::pair x) { - callback(x.first, x.second); - }); -} +// template +// void LLVMDisassembler::forEachFunction(std::function callback) { +// // std::for_each(functions.begin(), functions.end(), +// // [&](std::pair x) { +// // callback(x.first, x.second); +// // }); +// } template void LLVMDisassembler::printEachInstruction(uint64_t start, uint64_t end, diff --git a/src/disassembler/llvm/LLVMDisassembler.hxx b/src/disassembler/llvm/LLVMDisassembler.hxx index 426cc89..d00381b 100644 --- a/src/disassembler/llvm/LLVMDisassembler.hxx +++ b/src/disassembler/llvm/LLVMDisassembler.hxx @@ -26,15 +26,10 @@ public: void getSymbols() {} uint64_t entryAddress() {return _entryAddress;} - void forEachFunction(std::function callback); void printEachInstruction(uint64_t start, uint64_t end, std::function fun); - BasicBlock * getBasicBlock(uint64_t address) { - return blocks[address]; - } - Function * disassembleFunctionAt(uint64_t address, const std::string& name = ""); protected: @@ -54,8 +49,6 @@ private: void readDynamicSymbols(); log4cxx::LoggerPtr logger; - std::map blocks; - std::map functions; llvm::Triple triple; std::shared_ptr binary; diff --git a/src/gui/Mainwindow.cxx b/src/gui/Mainwindow.cxx index b2f5b4d..47ab2a3 100644 --- a/src/gui/Mainwindow.cxx +++ b/src/gui/Mainwindow.cxx @@ -14,8 +14,8 @@ namespace { BasicBlockWidget * - local__add_basic_block(BasicBlock * block, Disassembler * dis, - Mainwindow * mainwindow, + local__add_basic_block(BasicBlock * block, + Mainwindow * mainwindow, InformationManager * manager, std::map& known_blocks, CFGScene * scene, uint64_t starty, uint64_t startx); } @@ -173,8 +173,7 @@ void Mainwindow::addFunction(Function* fun) { // CFG CFGScene * scene = new CFGScene; - Disassembler * dis = manager->getDisassembler(); - BasicBlock * block = dis->getBasicBlock(fun->getStartAddress()); + BasicBlock * block = manager->getBasicBlock(fun->getStartAddress()); uint64_t start_address(std::numeric_limits::max()); for (auto b : fun->blocks()) { @@ -182,8 +181,8 @@ void Mainwindow::addFunction(Function* fun) { start_address = b.first; } - local__add_basic_block(block, manager->getDisassembler(), this, - blocks, scene, start_address, 100); + local__add_basic_block(block, this, + manager, blocks, scene, start_address, 100); QGraphicsView * view = new QGraphicsView(scene); w->addTab(view, "CFG"); @@ -205,8 +204,8 @@ void Mainwindow::addFunction(Function* fun) { namespace { BasicBlockWidget * - local__add_basic_block(BasicBlock * block, Disassembler * dis, - Mainwindow * mainwindow, + local__add_basic_block(BasicBlock * block, + Mainwindow * mainwindow, InformationManager * manager, std::map& known_blocks, CFGScene * scene, uint64_t starty, uint64_t startx) { @@ -226,16 +225,17 @@ namespace { widget->setFlag(QGraphicsItem::ItemIsMovable, true); widget->moveBy(100*startx, block->getStartAddress() - starty); - dis->printEachInstruction(block->getStartAddress(), - block->getEndAddress(), - [&](uint8_t* bytes, - size_t byte_count, - const std::string& line, - const std::string& ref) { - widget->addItem(bytes, byte_count, - line.c_str() + 1, // remove \t - ref.c_str()); - }); + manager->getDisassembler() + ->printEachInstruction(block->getStartAddress(), + block->getEndAddress(), + [&](uint8_t* bytes, + size_t byte_count, + const std::string& line, + const std::string& ref) { + widget->addItem(bytes, byte_count, + line.c_str() + 1, // remove \t + ref.c_str()); + }); BasicBlockWidget *tmp, *nextl(NULL), *nextr(NULL); BasicBlock * tmpblock; @@ -243,18 +243,16 @@ namespace { int xshift = 0; if (block->getNextBlock(1) != 0) xshift = 1; - tmpblock = dis->getBasicBlock(block->getNextBlock(0)); - tmp = local__add_basic_block(tmpblock, dis, - mainwindow, + tmpblock = manager->getBasicBlock(block->getNextBlock(0)); + tmp = local__add_basic_block(tmpblock, mainwindow, manager, known_blocks, scene, starty, startx+xshift); nextl = tmp; tmp->addPrevious(widget); } if (block->getNextBlock(1) != 0) { - tmpblock = dis->getBasicBlock(block->getNextBlock(1)); - tmp = local__add_basic_block(tmpblock, dis, - mainwindow, + tmpblock = manager->getBasicBlock(block->getNextBlock(1)); + tmp = local__add_basic_block(tmpblock, mainwindow, manager, known_blocks, scene, starty, startx-1); nextr = tmp; -- 2.39.5