]> git.siccegge.de Git - tools.git/commitdiff
backup tool for zfs
authorChristoph Egger <christoph@christoph-egger.org>
Tue, 30 Sep 2014 17:49:40 +0000 (19:49 +0200)
committerChristoph Egger <christoph@christoph-egger.org>
Tue, 30 Sep 2014 17:49:40 +0000 (19:49 +0200)
backup-zfs [new file with mode: 0755]

diff --git a/backup-zfs b/backup-zfs
new file mode 100755 (executable)
index 0000000..7ac2f39
--- /dev/null
@@ -0,0 +1,184 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+
+from __future__ import print_function
+
+####################
+# Config
+SFTP_HOST = 'botero.siccegge.de'
+SFTP_DIR  = '/srv/backup/mitoraj'
+SFTP_USER = 'root'
+ZPOOL     = 'base'
+GPGUSER   = '9FED5C6CE206B70A585770CA965522B9D49AE731'
+BUFFER    = 1024 * 1024
+#
+####################
+
+import subprocess
+import os.path
+import sys
+import time
+import hashlib
+import paramiko
+
+term = {
+    'green':  "\033[0;32m",
+    'red':    "\033[0;31m",
+    'yellow': "\033[0;33m",
+    'purple': "\033[0;35m",
+    'none':   "\033[0m",
+    }
+
+sftp = None
+
+def print_colored(data, color):
+    sys.stdout.write(term[color])
+    sys.stdout.write(data)
+    sys.stdout.write(term['none'])
+    sys.stdout.write('\n')
+    sys.stdout.flush()
+
+def postprocess_datasets(datasets):
+    devices = set([entry.split('@')[0] for entry in datasets])
+
+    result = dict()
+    for device in devices:
+        result[device] = sorted([ entry.split('@')[1] for entry in datasets
+                                    if entry.startswith(device) ])
+
+    return result
+
+def sftp_connect():
+    global sftp
+    paramiko.util.log_to_file("/tmp/paramiko.log")
+
+    host_keys = paramiko.util.load_host_keys(os.path.expanduser('~/.ssh/known_hosts'))
+    hostkeytype = host_keys[SFTP_HOST].keys()[0]
+    hostkey = host_keys[SFTP_HOST][hostkeytype]
+
+    agent = paramiko.Agent()
+    transport = paramiko.Transport((SFTP_HOST, 22),
+                                   default_window_size=1024*BUFFER,
+                                   default_max_packet_size=64*BUFFER)
+    #  transport.max_packet_size = BUFFER
+    #  transport.window_size = BUFFER * 64
+    transport.connect(hostkey=hostkey)
+
+    for key in agent.get_keys():
+        try:
+            transport.auth_publickey(SFTP_USER, key)
+            break
+        except paramiko.SSHException:
+            continue
+
+    sftp = transport.open_sftp_client()
+    sftp.chdir(SFTP_DIR)
+
+def sftp_send(dataset, reference=None):
+    if reference is None:
+        filename = '%s.full.zfs.gpg' % dataset
+    else:
+        filename = '%s.from.%s.zfs.gpg' % (dataset, reference)
+
+    try:
+        sftp.stat(filename)
+        return
+    except:
+        pass
+
+    zfscommand = ['sudo', 'zfs', 'send', '-D', '%s/%s' % (ZPOOL, dataset)]
+    if reference is not None:
+        zfscommand = zfscommand + ['-i', reference]
+
+    zfs = subprocess.Popen(zfscommand, stdout=subprocess.PIPE, bufsize=2*BUFFER)
+    print(zfscommand)
+
+    gpgcommand = [ 'gpg', '--batch', '--compress-algo', 'ZLIB',
+                   '--sign', '--encrypt', '--recipient', GPGUSER ]
+    gpg = subprocess.Popen(gpgcommand, bufsize=2*BUFFER,
+                                       stdout=subprocess.PIPE,
+                                       stdin=zfs.stdout,
+                                       stderr=subprocess.PIPE)
+    print(gpgcommand)
+
+    junk = gpg.stdout.read(BUFFER)
+    gpg.poll()
+    if gpg.returncode not in [None, 0]:
+        print_colored("Error:\n\n" + gpg.stderr, 'red')
+        return
+
+    with open('/tmp/SHA256SUM', 'a') as digestfile:
+        with sftp.open(filename, 'xw', BUFFER) as remotefile:
+            digest = hashlib.sha256()
+            sys.stdout.write(term['purple'])
+            done = 0
+            startt = time.time()
+            while True:
+                if len(junk) == 0:
+                    break
+                #if len(junk) < BUFFER:
+                #    print_colored("short read: %d" % len(junk), 'yellow')
+                done = done + len(junk)
+#                sys.stdout.write('#')
+#                sys.stdout.flush()
+                sys.stdout.write("\r%s %.3f GB      (%.3f MB/s)          " % (term['green'], (1.0 * done) / (1024 ** 3), (done / (1024 ** 2 *(time.time() - startt)))))
+                sys.stdout.flush()
+                remotefile.write(junk)
+                digest.update(junk)
+                junk = gpg.stdout.read(BUFFER)
+
+            sys.stdout.write('\r')
+            print_colored(" %.3f GB DONE (%.3f MB/s)" % ((1.0 * done) / (1024 ** 3), (done / (1024 ** 2 *(time.time() - startt)))), 'green')
+            digestfile.write("%s  %s\n" % (digest.hexdigest(), filename))
+
+def syncronize(local_datasets, remote_datasets):
+    for device in local_datasets.keys():
+        current = ""
+        for dataset in local_datasets[device]:
+            last = current
+            current = dataset
+
+            if device in remote_datasets:
+                if dataset in remote_datasets[device]:
+                    print_colored("%s@%s -- found on remote server" % (device, dataset), 'yellow')
+                    continue
+
+            if last == '':
+                print_colored("Initial syncronization for device %s" % device, 'green')
+                sftp_send("%s@%s" % (device, dataset))
+                lastmonth = dataset
+                continue
+
+            if last[:7] == dataset[:7]:
+                print_colored("%s@%s -- incremental backup (reference: %s)" %
+                              (device, dataset, last), 'green')
+                sftp_send("%s@%s" % (device, dataset), last)
+            else:
+                print_colored("%s@%s -- full backup" % (device, dataset), 'green')
+                sftp_send("%s@%s" % (device, dataset))
+                #print_colored("%s@%s -- doing incremental backup" % (device, dataset), 'green')
+                #sftp_send("%s@%s" % (device, dataset), lastmonth)
+                #lastmonth = dataset
+
+def get_remote_datasets():
+    datasets = sftp.listdir()
+    datasets = filter(lambda x: '@' in x, datasets)
+
+    datasets = [ entry.split('.')[0] for entry in datasets ]
+
+    return postprocess_datasets(datasets)
+
+def get_local_datasets():
+    datasets = subprocess.check_output(['sudo', 'zfs', 'list', '-t', 'snapshot', '-H', '-o', 'name'])
+    datasets = datasets.strip().split('\n')
+
+    datasets = [ entry[5:] for entry in datasets ]
+
+    return postprocess_datasets(datasets)
+
+def main():
+    sftp_connect()
+    syncronize(get_local_datasets(), get_remote_datasets())
+
+if __name__ == '__main__':
+    main()