From 6067d72fbe1f4f64aa9aa7a3cf9c130a69da6b0a Mon Sep 17 00:00:00 2001 From: Christoph Egger Date: Tue, 30 Sep 2014 19:49:40 +0200 Subject: [PATCH] backup tool for zfs --- backup-zfs | 184 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 184 insertions(+) create mode 100755 backup-zfs diff --git a/backup-zfs b/backup-zfs new file mode 100755 index 0000000..7ac2f39 --- /dev/null +++ b/backup-zfs @@ -0,0 +1,184 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- + +from __future__ import print_function + +#################### +# Config +SFTP_HOST = 'botero.siccegge.de' +SFTP_DIR = '/srv/backup/mitoraj' +SFTP_USER = 'root' +ZPOOL = 'base' +GPGUSER = '9FED5C6CE206B70A585770CA965522B9D49AE731' +BUFFER = 1024 * 1024 +# +#################### + +import subprocess +import os.path +import sys +import time +import hashlib +import paramiko + +term = { + 'green': "\033[0;32m", + 'red': "\033[0;31m", + 'yellow': "\033[0;33m", + 'purple': "\033[0;35m", + 'none': "\033[0m", + } + +sftp = None + +def print_colored(data, color): + sys.stdout.write(term[color]) + sys.stdout.write(data) + sys.stdout.write(term['none']) + sys.stdout.write('\n') + sys.stdout.flush() + +def postprocess_datasets(datasets): + devices = set([entry.split('@')[0] for entry in datasets]) + + result = dict() + for device in devices: + result[device] = sorted([ entry.split('@')[1] for entry in datasets + if entry.startswith(device) ]) + + return result + +def sftp_connect(): + global sftp + paramiko.util.log_to_file("/tmp/paramiko.log") + + host_keys = paramiko.util.load_host_keys(os.path.expanduser('~/.ssh/known_hosts')) + hostkeytype = host_keys[SFTP_HOST].keys()[0] + hostkey = host_keys[SFTP_HOST][hostkeytype] + + agent = paramiko.Agent() + transport = paramiko.Transport((SFTP_HOST, 22), + default_window_size=1024*BUFFER, + default_max_packet_size=64*BUFFER) + # transport.max_packet_size = BUFFER + # transport.window_size = BUFFER * 64 + transport.connect(hostkey=hostkey) + + for key in agent.get_keys(): + try: + transport.auth_publickey(SFTP_USER, key) + break + except paramiko.SSHException: + continue + + sftp = transport.open_sftp_client() + sftp.chdir(SFTP_DIR) + +def sftp_send(dataset, reference=None): + if reference is None: + filename = '%s.full.zfs.gpg' % dataset + else: + filename = '%s.from.%s.zfs.gpg' % (dataset, reference) + + try: + sftp.stat(filename) + return + except: + pass + + zfscommand = ['sudo', 'zfs', 'send', '-D', '%s/%s' % (ZPOOL, dataset)] + if reference is not None: + zfscommand = zfscommand + ['-i', reference] + + zfs = subprocess.Popen(zfscommand, stdout=subprocess.PIPE, bufsize=2*BUFFER) + print(zfscommand) + + gpgcommand = [ 'gpg', '--batch', '--compress-algo', 'ZLIB', + '--sign', '--encrypt', '--recipient', GPGUSER ] + gpg = subprocess.Popen(gpgcommand, bufsize=2*BUFFER, + stdout=subprocess.PIPE, + stdin=zfs.stdout, + stderr=subprocess.PIPE) + print(gpgcommand) + + junk = gpg.stdout.read(BUFFER) + gpg.poll() + if gpg.returncode not in [None, 0]: + print_colored("Error:\n\n" + gpg.stderr, 'red') + return + + with open('/tmp/SHA256SUM', 'a') as digestfile: + with sftp.open(filename, 'xw', BUFFER) as remotefile: + digest = hashlib.sha256() + sys.stdout.write(term['purple']) + done = 0 + startt = time.time() + while True: + if len(junk) == 0: + break + #if len(junk) < BUFFER: + # print_colored("short read: %d" % len(junk), 'yellow') + done = done + len(junk) +# sys.stdout.write('#') +# sys.stdout.flush() + sys.stdout.write("\r%s %.3f GB (%.3f MB/s) " % (term['green'], (1.0 * done) / (1024 ** 3), (done / (1024 ** 2 *(time.time() - startt))))) + sys.stdout.flush() + remotefile.write(junk) + digest.update(junk) + junk = gpg.stdout.read(BUFFER) + + sys.stdout.write('\r') + print_colored(" %.3f GB DONE (%.3f MB/s)" % ((1.0 * done) / (1024 ** 3), (done / (1024 ** 2 *(time.time() - startt)))), 'green') + digestfile.write("%s %s\n" % (digest.hexdigest(), filename)) + +def syncronize(local_datasets, remote_datasets): + for device in local_datasets.keys(): + current = "" + for dataset in local_datasets[device]: + last = current + current = dataset + + if device in remote_datasets: + if dataset in remote_datasets[device]: + print_colored("%s@%s -- found on remote server" % (device, dataset), 'yellow') + continue + + if last == '': + print_colored("Initial syncronization for device %s" % device, 'green') + sftp_send("%s@%s" % (device, dataset)) + lastmonth = dataset + continue + + if last[:7] == dataset[:7]: + print_colored("%s@%s -- incremental backup (reference: %s)" % + (device, dataset, last), 'green') + sftp_send("%s@%s" % (device, dataset), last) + else: + print_colored("%s@%s -- full backup" % (device, dataset), 'green') + sftp_send("%s@%s" % (device, dataset)) + #print_colored("%s@%s -- doing incremental backup" % (device, dataset), 'green') + #sftp_send("%s@%s" % (device, dataset), lastmonth) + #lastmonth = dataset + +def get_remote_datasets(): + datasets = sftp.listdir() + datasets = filter(lambda x: '@' in x, datasets) + + datasets = [ entry.split('.')[0] for entry in datasets ] + + return postprocess_datasets(datasets) + +def get_local_datasets(): + datasets = subprocess.check_output(['sudo', 'zfs', 'list', '-t', 'snapshot', '-H', '-o', 'name']) + datasets = datasets.strip().split('\n') + + datasets = [ entry[5:] for entry in datasets ] + + return postprocess_datasets(datasets) + +def main(): + sftp_connect() + syncronize(get_local_datasets(), get_remote_datasets()) + +if __name__ == '__main__': + main() -- 2.39.5