Reoganize Function/BasicBlock creation
authorChristoph Egger <christoph@christoph-egger.org>
Tue, 24 Feb 2015 22:19:13 +0000 (23:19 +0100)
committerChristoph Egger <christoph@christoph-egger.org>
Tue, 24 Feb 2015 22:19:13 +0000 (23:19 +0100)
More control in the hand of the information manager. Mid-Term goal is
to support loading of files so the Disassembler shouldn't have
separate storage of all BasicBlocks and Functions that we would have
to also populate upon loading saves

src/core/BasicBlock.hxx
src/core/Comment.hxx [new file with mode: 0644]
src/core/Function.cxx
src/core/Function.hxx
src/core/InformationManager.cxx
src/core/InformationManager.hxx
src/disassembler/Disassembler.hxx
src/disassembler/llvm/LLVMDisassembler.cxx
src/disassembler/llvm/LLVMDisassembler.hxx
src/gui/Mainwindow.cxx

index 4790b1c5626f2e92e02c012ab161d8912bb394da..69d4f37d9c85cb8643bccdd713bb40f9c9adf102 100644 (file)
@@ -10,15 +10,6 @@ class InformationManager;
 
 class BasicBlock {
 public:
-       BasicBlock(uint64_t start_address, Disassembler * disassembler,
-                  InformationManager* manager)
-               : start_address(start_address)
-               , disassembler(disassembler)
-               , manager(manager) {
-               next_blocks[0] = 0;
-               next_blocks[1] = 0;
-       }
-
        uint64_t getStartAddress() const {
                return start_address;
        }
@@ -51,19 +42,24 @@ public:
                return s.str();
        }
 
-       Disassembler * getDisassembler() const {
-               return disassembler;
-       }
-
        InformationManager* getManager() const {
                return manager;
        }
 private:
+       BasicBlock(uint64_t start_address, InformationManager* manager)
+               : start_address(start_address)
+               , manager(manager) {
+               next_blocks[0] = 0;
+               next_blocks[1] = 0;
+       }
+
        uint64_t start_address;
        uint64_t end_address;
        Disassembler* disassembler;
        InformationManager* manager;
        uint64_t next_blocks[2];
+
+       friend class InformationManager;
 };
 
 #endif
diff --git a/src/core/Comment.hxx b/src/core/Comment.hxx
new file mode 100644 (file)
index 0000000..e151f5a
--- /dev/null
@@ -0,0 +1,21 @@
+#ifndef INCLUDE__Comment_hxx
+#define INCLUDE__Comment_hxx
+
+#include <string>
+
+class Function;
+
+class Comment {
+public:
+       bool isLocal() const {return location == NULL;}
+
+private:
+       Comment(uint64_t address);
+       Comment(uint64_t address, Function* location);
+
+       uint64_t address;
+       Function* location;
+       std::string text;
+};
+
+#endif /* INCLUDE__Comment_hxx */
index e9bb173374301703bf0ef3e0c4a928dee0ad79ea..539192c5fd6f2fb2cb90e914c39acf17749d1042 100644 (file)
@@ -4,12 +4,9 @@
 
 #include <iostream>
 
-Function::Function(const std::string& name, uint64_t start_address,
-                   InformationManager* manager)
+Function::Function(uint64_t start_address, InformationManager* manager)
        : start_address(start_address)
-       , manager(manager) {
-       setName(name);
-}
+       , manager(manager) {}
 
 
 void Function::setName(const std::string& new_name) {
index b57dca2706edd31df2b31798c18d5ebedf301017..5497800969e57b8f12a2a2c8e24bdaf20688c675 100644 (file)
@@ -8,7 +8,6 @@ class InformationManager;
 
 class Function {
 public:
-       Function(const std::string& name, uint64_t start_address, InformationManager* manager);
 
        uint64_t getStartAddress() const {
                return start_address;
@@ -30,10 +29,14 @@ public:
                return _blocks;
        }
 private:
+       Function(uint64_t start_address, InformationManager* manager);
+
        std::string name;
        uint64_t start_address;
        InformationManager * manager;
        std::map<uint64_t, BasicBlock*> _blocks;
+
+       friend class InformationManager;
 };
 
 #endif
index a274c418091e40efcab87f1f29821cb67ab3c5f9..294dc2f53583b7e3a9c5ce6b8f33fd49427bf058 100644 (file)
@@ -2,6 +2,7 @@
 #include "disassembler/llvm/LLVMDisassembler.hxx"
 #include "core/Function.hxx"
 #include "core/BasicBlock.hxx"
+#include "core/Comment.hxx"
 
 #include "gui/qt.hxx"
 #include <quazip/quazip.h>
@@ -61,10 +62,68 @@ void InformationManager::save(const QString& filename) {
 }
 
 void InformationManager::signal_new_function(Function* fun) {
-       functions.insert(std::make_pair(fun->getStartAddress(), fun));
+}
+
+Function* InformationManager::getFunction(uint64_t address) {
+       auto it = functions.find(address);
+       if (it != functions.end())
+               return it->second;
+       else
+               return NULL;
+}
+
+BasicBlock* InformationManager::getBasicBlock(uint64_t address) {
+       auto it = blocks.find(address);
+       if (it != blocks.end())
+               return it->second;
+       else
+               return NULL;
+}
+
+Function* InformationManager::newFunction(uint64_t address) {
+       Function* fun = new Function(address, this);
+       functions.insert(std::make_pair(address, fun));
+       return fun;
+}
+
+BasicBlock* InformationManager::newBasicBlock(uint64_t address) {
+       BasicBlock* block = new BasicBlock(address, this);
+       blocks.insert(std::make_pair(address, block));
+       return block;
+}
+
+Comment* InformationManager::newGlobalComment(uint64_t address) {
+       return NULL;
+}
+
+Comment* InformationManager::newLocalComment(uint64_t address, Function* f) {
+       return NULL;
+}
+
+void InformationManager::finishFunction(Function* fun) {
        for (auto b : fun->blocks()) {
                BasicBlock* bl = b.second;
                blocks.insert(std::make_pair(bl->getStartAddress(), bl));
        }
        new_function_signal(fun);
 }
+
+void InformationManager::finishBasicBlock(BasicBlock* b) {
+}
+
+void InformationManager::finnishComment(Comment* c) {
+}
+
+void InformationManager::deleteFunction(Function* f) {
+       functions.erase(f->getStartAddress());
+       delete f;
+}
+
+void InformationManager::deleteBasicBlock(BasicBlock* b) {
+       blocks.erase(b->getStartAddress());
+       delete b;
+}
+
+void InformationManager::deleteComment(Comment* c) {
+       delete c;
+}
index dc781285f049de01b2200f6a2e852ee3f21d9353..03cb3b866ec139b875ffad8a7556820b3733c2e1 100644 (file)
@@ -9,6 +9,7 @@
 class Disassembler;
 class Function;
 class BasicBlock;
+class Comment;
 
 class QString;
 
@@ -47,12 +48,38 @@ public:
        void dispatch(RenameFunctionEvent* event)
                { rename_function_signal(event); }
 
-       Function* getFunction(uint64_t address)
-       { return functions[address]; }
-
-       BasicBlock* getBlock(uint64_t address)
-       { return blocks[address]; }
-
+       Function* getFunction(uint64_t address);
+       BasicBlock* getBasicBlock(uint64_t address);
+
+       /* Protocoll:
+        *
+        * Users may allocate new Data containers with the new*()
+        * functions. Once they have populated the information they hand
+        * over the object to the information manager using the finish*()
+        * functions.
+        *
+        * if new*() returns NULL there already exists a function at the
+        * specified address. Users may then get the old object if they
+        * wish or (more likely) skip creating it. Uniqueness of the
+        * object is only guaranteed as compared to the finish()ed
+        * objects.
+        *
+        * Users are responsible for destroying functions iff they do not
+        * finish them using the delete*() functions. Once the objects are
+        * finished, the information manager is responsible for cleaning
+        * up the memory. If delete*() is called on a finished object, bad
+        * thingsmay happen.
+        */
+       Function* newFunction(uint64_t address);
+       BasicBlock* newBasicBlock(uint64_t address);
+       Comment* newGlobalComment(uint64_t address);
+       Comment* newLocalComment(uint64_t address, Function* f);
+       void finishFunction(Function* f);
+       void finishBasicBlock(BasicBlock* b);
+       void finnishComment(Comment* c);
+       void deleteFunction(Function* f);
+       void deleteBasicBlock(BasicBlock* b);
+       void deleteComment(Comment* c);
 private:
        boost::signals2::signal<void ()> reset_signal;
        boost::signals2::signal<void (Function*)> new_function_signal;
index 4feeddf0e11e03ab97d5b84984daebcfde5af356..776796a8dc22e230201a2cc9f775a284d0cb756f 100644 (file)
@@ -16,9 +16,7 @@ public:
        virtual void start() = 0;
        virtual void getSymbols() = 0;
        virtual uint64_t entryAddress() = 0;
-       virtual BasicBlock * getBasicBlock(uint64_t address) = 0;
 
-       virtual void forEachFunction(std::function<void (uint64_t, Function*)> callback) = 0;
        virtual void printEachInstruction(uint64_t start, uint64_t end,
                                          std::function<void (uint8_t*, size_t, const std::string&,
                                                              const std::string&)> fun) = 0;
index b6f8dae75ae2d6cc33b2586bc2b45df68b899eec..773a5189a27a33d35085da9f698864970b3ac9ec 100644 (file)
@@ -5,6 +5,7 @@
 
 #include <stack>
 #include <algorithm>
+#include <cassert>
 
 using namespace llvm;
 using namespace llvm::object;
@@ -161,19 +162,11 @@ void LLVMDisassembler<ELFT>::start() {
 }
 
 template <typename ELFT>
-LLVMDisassembler<ELFT>::~LLVMDisassembler() {
-       // std::for_each(functions.begin(), functions.end(),
-       //               [](std::pair<uint64_t,LLVMFunction*> it) {
-       //                    delete it.second;
-       //               });
-       // std::for_each(blocks.begin(), blocks.end(),
-       //               [](std::pair<uint64_t, LLVMBasicBlock*> it) {
-       //                    delete it.second;
-       //               });
-}
+LLVMDisassembler<ELFT>::~LLVMDisassembler() {}
 
 template <typename ELFT>
 Function* LLVMDisassembler<ELFT>::disassembleFunctionAt(uint64_t address, const std::string& name) {
+       Function * function;
        SectionRef text_section = sections[".text"];
        uint64_t base_address, size;
        text_section.getAddress(base_address);
@@ -184,21 +177,20 @@ Function* LLVMDisassembler<ELFT>::disassembleFunctionAt(uint64_t address, const
                return NULL;
        }
 
-       if (functions.find(address) != functions.end()) {
-               return functions[address];
-       }
+       if (NULL == (function = manager->getFunction(address))) {
 
-       Function * function;
-       if (name == "") {
-               std::stringstream s;
-               s << "<Unnamed 0x" << std::hex << address << ">";
-               function = new Function(s.str(), address, manager);
-       } else {
-               function = new Function(name, address, manager);
+               if (name == "") {
+                       std::stringstream s;
+                       s << "<Unnamed 0x" << std::hex << address << ">";
+                       function = manager->newFunction(address);
+                       function->setName(s.str());
+               } else {
+                       function = manager->newFunction(address);
+                       function->setName(name);
+               }
+               disassembleFunction(function);
+               manager->finishFunction(function);
        }
-       functions.insert(std::make_pair(address, function));
-
-       disassembleFunction(function);
 
        return function;
 }
@@ -206,6 +198,11 @@ Function* LLVMDisassembler<ELFT>::disassembleFunctionAt(uint64_t address, const
 template <typename ELFT>
 void LLVMDisassembler<ELFT>::disassembleFunction(Function* function) {
        std::stack<BasicBlock*> remaining_blocks;
+       /* TODO:
+        * Do all blocks get added properly? We should take care to remove
+        * the other ones at the end of the function!
+        */
+       std::map<uint64_t, BasicBlock*> new_blocks;
        SectionRef text_section = sections[".text"];
        StringRef bytes;
        text_section.getContents(bytes);
@@ -213,16 +210,17 @@ void LLVMDisassembler<ELFT>::disassembleFunction(Function* function) {
 
        LOG4CXX_DEBUG(logger, "Handling function " << function->getName());
 
-       BasicBlock * block = new BasicBlock(function->getStartAddress(), this, manager);
+       BasicBlock * block = manager->newBasicBlock(function->getStartAddress());
        remaining_blocks.push(block);
-       blocks.insert(std::make_pair(block->getStartAddress(), block));
+       new_blocks.insert(std::make_pair(block->getStartAddress(), block));
        function->addBasicBlock(block);
 
        while (remaining_blocks.size()) {
                BasicBlock * current_block = remaining_blocks.top();
                remaining_blocks.pop();
 
-               LOG4CXX_DEBUG(logger, "Handling Block starting at " << std::hex << current_block->getStartAddress());
+               LOG4CXX_DEBUG(logger, "Handling Block starting at " << std::hex
+                             << current_block->getStartAddress());
 
                uint64_t inst_size;
                uint64_t base_address;
@@ -241,31 +239,34 @@ void LLVMDisassembler<ELFT>::disassembleFunction(Function* function) {
                                        jmptarget += base_address;
                                        if (!MIA->isIndirectBranch(inst)) {
                                                if (MIA->isCall(inst)) {
-                                                       if (functions.find(jmptarget) == functions.end()) {
+                                                       if (NULL == manager->getFunction(jmptarget))
                                                                disassembleFunctionAt(jmptarget);
-                                                       }
                                                } else {
                                                        current_block->setNextBlock(0, jmptarget);
-                                                       if (blocks.find(jmptarget) == blocks.end()) {
-                                                               BasicBlock * block = new BasicBlock(jmptarget, this, manager);
-                                                               blocks.insert(std::make_pair(block->getStartAddress(), block));
+                                                       if (new_blocks.find(jmptarget) == new_blocks.end()) {
+                                                               BasicBlock * block = manager->newBasicBlock(jmptarget);
+                                                               assert(block);
+                                                               new_blocks.insert(std::make_pair(block->getStartAddress(), block));
                                                                function->addBasicBlock(block);
                                                                remaining_blocks.push(block);
                                                        } else {
-                                                               LOG4CXX_DEBUG(logger, "Reusing Block starting at " << std::hex << current_block->getStartAddress());
-                                                               function->addBasicBlock(blocks.find(jmptarget)->second);
+                                                               LOG4CXX_DEBUG(logger, "Reusing Block starting at " << std::hex
+                                                                             << current_block->getStartAddress());
+                                                               function->addBasicBlock(new_blocks.find(jmptarget)->second);
                                                        }
                                                        if (MIA->isConditionalBranch(inst)) {
                                                                jmptarget = base_address + current_address + inst_size;
                                                                current_block->setNextBlock(1, jmptarget);
-                                                               if (blocks.find(jmptarget) == blocks.end()) {
-                                                                       BasicBlock * block = new BasicBlock(jmptarget, this, manager);
-                                                                       blocks.insert(std::make_pair(block->getStartAddress(), block));
+                                                               if (new_blocks.find(jmptarget) == new_blocks.end()) {
+                                                                       BasicBlock * block = manager->newBasicBlock(jmptarget);
+                                                                       assert(block);
+                                                                       new_blocks.insert(std::make_pair(block->getStartAddress(), block));
                                                                        function->addBasicBlock(block);
                                                                        remaining_blocks.push(block);
                                                                } else {
-                                                                       LOG4CXX_DEBUG(logger, "Reusing Block starting at " << std::hex << current_block->getStartAddress());
-                                                                       function->addBasicBlock(blocks.find(jmptarget)->second);
+                                                                       LOG4CXX_DEBUG(logger, "Reusing Block starting at " << std::hex
+                                                                                     << current_block->getStartAddress());
+                                                                       function->addBasicBlock(new_blocks.find(jmptarget)->second);
                                                                }
                                                        }
                                                }
@@ -310,15 +311,16 @@ void LLVMDisassembler<ELFT>::disassemble() {
                        continue;
 
                if (!x->second.getAddress(result)) {
-                       Function * fun = new Function(x->first, result, manager);
+                       Function * fun = manager->newFunction(result);
+                       fun->setName(x->first);
                        remaining_functions.push_back(fun);
-                       functions.insert(std::make_pair(result, fun));
                        LOG4CXX_DEBUG(logger, "Disasembling " << x->first);
                }
        }
 
        for (Function* function : remaining_functions) {
                disassembleFunction(function);
+               manager->finishFunction(function);
        }
 
        if (binary->isELF()) {
@@ -333,12 +335,10 @@ void LLVMDisassembler<ELFT>::disassemble() {
                disassembleFunctionAt(_entryAddress, s.str());
        }
 
-       if (functions.empty()) {
-               uint64_t text_entry;
-               text_section.getAddress(text_entry);
-               LOG4CXX_INFO(logger, "No Symbols found, starting at the beginning of the text segment");
-               disassembleFunctionAt(text_entry);
-       }
+       uint64_t text_entry;
+       text_section.getAddress(text_entry);
+       LOG4CXX_INFO(logger, "No Symbols found, starting at the beginning of the text segment");
+       disassembleFunctionAt(text_entry);
 }
 
 template <typename ELFT>
@@ -365,20 +365,22 @@ void LLVMDisassembler<ELFT>::splitBlocks(Function* function) {
                        if(llvm::MCDisassembler::Success ==
                           DisAsm->getInstruction(inst, inst_size, ref, current_address, nulls(), nulls())) {
                                // See if some other block starts here
-                               auto other = blocks.find(current_address + inst_size + base_address);
+                               BasicBlock* other = manager->getBasicBlock(current_address
+                                                                          + inst_size
+                                                                          + base_address);
 
                                // Special case, other block starts here but we are at the end anyway
-                               if (other != blocks.end()) {
+                               if (other != NULL) {
                                        uint64_t endaddress = current_address + inst_size + base_address;
                                        if (endaddress != current_block->getEndAddress()) {
                                                LOG4CXX_DEBUG(logger, "Shortening block starting at "
                                                              << std::hex
                                                              << current_block->getStartAddress()
                                                              << " now ending at "
-                                                             << other->first);
-                                               function->addBasicBlock(other->second);
+                                                             << other->getStartAddress());
+                                               function->addBasicBlock(other);
                                                current_block->setEndAddress(endaddress);
-                                               current_block->setNextBlock(0, other->first);
+                                               current_block->setNextBlock(0, other->getStartAddress());
                                                current_block->setNextBlock(1, 0);
                                        }
                                }
@@ -440,13 +442,13 @@ void LLVMDisassembler<ELFT>::readSections() {
 
 }
 
-template <typename ELFT>
-void LLVMDisassembler<ELFT>::forEachFunction(std::function<void (uint64_t, Function*)> callback) {
-       std::for_each(functions.begin(), functions.end(),
-                     [&](std::pair<uint64_t, Function*> x) {
-                             callback(x.first, x.second);
-                     });
-}
+// template <typename ELFT>
+// void LLVMDisassembler<ELFT>::forEachFunction(std::function<void (uint64_t, Function*)> callback) {
+//     // std::for_each(functions.begin(), functions.end(),
+//     //               [&](std::pair<uint64_t, Function*> x) {
+//     //                    callback(x.first, x.second);
+//     //               });
+// }
 
 template <typename ELFT>
 void LLVMDisassembler<ELFT>::printEachInstruction(uint64_t start, uint64_t end,
index 426cc89053e932a9eeeb0ee3e6060ec87277336e..d00381b7ed962862c8be6e26008cdf94d06f9da8 100644 (file)
@@ -26,15 +26,10 @@ public:
        void getSymbols() {}
        uint64_t entryAddress() {return _entryAddress;}
 
-       void forEachFunction(std::function<void (uint64_t, Function*)> callback);
        void printEachInstruction(uint64_t start, uint64_t end,
                                  std::function<void (uint8_t*, size_t, const std::string&,
                                                      const std::string&)> fun);
 
-       BasicBlock * getBasicBlock(uint64_t address) {
-               return blocks[address];
-       }
-
        Function * disassembleFunctionAt(uint64_t address, const std::string& name = "");
 
 protected:
@@ -54,8 +49,6 @@ private:
        void readDynamicSymbols();
 
        log4cxx::LoggerPtr logger;
-       std::map<uint64_t, BasicBlock*> blocks;
-       std::map<uint64_t, Function*> functions;
 
        llvm::Triple triple;
        std::shared_ptr<llvm::object::Binary> binary;
index b2f5b4da2b1e273e1e3a0ed7065bb1e9b2ba3996..47ab2a3eda39b0447d26c3259e351546302e474e 100644 (file)
@@ -14,8 +14,8 @@
 
 namespace {
        BasicBlockWidget *
-       local__add_basic_block(BasicBlock * block, Disassembler * dis,
-                              Mainwindow * mainwindow,
+       local__add_basic_block(BasicBlock * block, 
+                              Mainwindow * mainwindow, InformationManager * manager,
                               std::map<uint64_t, BasicBlockWidget*>& known_blocks,
                               CFGScene * scene, uint64_t starty, uint64_t startx);
 }
@@ -173,8 +173,7 @@ void Mainwindow::addFunction(Function* fun) {
        // CFG
        CFGScene * scene = new CFGScene;
 
-       Disassembler * dis = manager->getDisassembler();
-       BasicBlock * block = dis->getBasicBlock(fun->getStartAddress());
+       BasicBlock * block = manager->getBasicBlock(fun->getStartAddress());
 
        uint64_t start_address(std::numeric_limits<uint64_t>::max());
        for (auto b : fun->blocks()) {
@@ -182,8 +181,8 @@ void Mainwindow::addFunction(Function* fun) {
                        start_address = b.first;
        }
 
-       local__add_basic_block(block, manager->getDisassembler(), this,
-                              blocks, scene, start_address, 100);
+       local__add_basic_block(block, this,
+                              manager, blocks, scene, start_address, 100);
 
        QGraphicsView * view = new QGraphicsView(scene);
        w->addTab(view, "CFG");
@@ -205,8 +204,8 @@ void Mainwindow::addFunction(Function* fun) {
 
 namespace {
        BasicBlockWidget *
-       local__add_basic_block(BasicBlock * block, Disassembler * dis,
-                              Mainwindow * mainwindow,
+       local__add_basic_block(BasicBlock * block,
+                              Mainwindow * mainwindow, InformationManager * manager,
                               std::map<uint64_t, BasicBlockWidget*>& known_blocks,
                               CFGScene * scene, uint64_t starty, uint64_t startx) {
 
@@ -226,16 +225,17 @@ namespace {
                widget->setFlag(QGraphicsItem::ItemIsMovable, true);
                widget->moveBy(100*startx, block->getStartAddress() - starty);
 
-               dis->printEachInstruction(block->getStartAddress(),
-                                         block->getEndAddress(),
-                                         [&](uint8_t* bytes,
-                                             size_t byte_count,
-                                             const std::string& line,
-                                             const std::string& ref) {
-                                                 widget->addItem(bytes, byte_count,
-                                                                 line.c_str() + 1, // remove \t
-                                                                 ref.c_str());
-                                         });
+               manager->getDisassembler()
+                       ->printEachInstruction(block->getStartAddress(),
+                                              block->getEndAddress(),
+                                              [&](uint8_t* bytes,
+                                                  size_t byte_count,
+                                                  const std::string& line,
+                                                  const std::string& ref) {
+                                                      widget->addItem(bytes, byte_count,
+                                                                      line.c_str() + 1, // remove \t
+                                                                      ref.c_str());
+                                              });
 
                BasicBlockWidget *tmp, *nextl(NULL), *nextr(NULL);
                BasicBlock * tmpblock;
@@ -243,18 +243,16 @@ namespace {
                        int xshift = 0;
                        if (block->getNextBlock(1) != 0)
                                xshift = 1;
-                       tmpblock = dis->getBasicBlock(block->getNextBlock(0));
-                       tmp = local__add_basic_block(tmpblock, dis,
-                                                    mainwindow,
+                       tmpblock = manager->getBasicBlock(block->getNextBlock(0));
+                       tmp = local__add_basic_block(tmpblock, mainwindow, manager,
                                                     known_blocks,
                                                     scene, starty, startx+xshift);
                        nextl = tmp;
                        tmp->addPrevious(widget);
                }
                if (block->getNextBlock(1) != 0) {
-                       tmpblock = dis->getBasicBlock(block->getNextBlock(1));
-                       tmp = local__add_basic_block(tmpblock, dis,
-                                                    mainwindow,
+                       tmpblock = manager->getBasicBlock(block->getNextBlock(1));
+                       tmp = local__add_basic_block(tmpblock, mainwindow, manager,
                                                     known_blocks,
                                                     scene, starty, startx-1);
                        nextr = tmp;