]> git.siccegge.de Git - tools.git/blob - backup-zfs
Add output for total time
[tools.git] / backup-zfs
1 #!/usr/bin/python
2 # -*- coding: utf-8 -*-
3
4 from __future__ import print_function
5
6 ####################
7 # Config
8 SFTP_HOST = 'botero.siccegge.de'
9 SFTP_DIR = '/srv/backup/mitoraj'
10 SFTP_USER = 'root'
11 ZPOOL = 'base'
12 GPGUSER = '9FED5C6CE206B70A585770CA965522B9D49AE731'
13 BUFFER = 1024 * 1024
14 #
15 ####################
16
17 import subprocess
18 import os.path
19 import sys
20 import time
21 import hashlib
22 import paramiko
23
24 term = {
25 'green': "\033[0;32m",
26 'red': "\033[0;31m",
27 'yellow': "\033[0;33m",
28 'purple': "\033[0;35m",
29 'none': "\033[0m",
30 }
31
32 sftp = None
33
34 def print_colored(data, color):
35 sys.stdout.write(term[color])
36 sys.stdout.write(data)
37 sys.stdout.write(term['none'])
38 sys.stdout.write('\n')
39 sys.stdout.flush()
40
41 def postprocess_datasets(datasets):
42 devices = set([entry.split('@')[0] for entry in datasets])
43
44 result = dict()
45 for device in devices:
46 result[device] = sorted([ entry.split('@')[1] for entry in datasets
47 if entry.startswith(device) ])
48
49 return result
50
51 def sftp_connect():
52 global sftp
53 paramiko.util.log_to_file("/tmp/paramiko.log")
54
55 host_keys = paramiko.util.load_host_keys(os.path.expanduser('~/.ssh/known_hosts'))
56 hostkeytype = host_keys[SFTP_HOST].keys()[0]
57 hostkey = host_keys[SFTP_HOST][hostkeytype]
58
59 agent = paramiko.Agent()
60 transport = paramiko.Transport((SFTP_HOST, 22),
61 default_window_size=1024*BUFFER,
62 default_max_packet_size=64*BUFFER)
63 # transport.max_packet_size = BUFFER
64 # transport.window_size = BUFFER * 64
65 transport.connect(hostkey=hostkey)
66
67 for key in agent.get_keys():
68 try:
69 transport.auth_publickey(SFTP_USER, key)
70 break
71 except paramiko.SSHException:
72 continue
73
74 sftp = transport.open_sftp_client()
75 sftp.chdir(SFTP_DIR)
76
77 def sftp_send(dataset, reference=None):
78 if reference is None:
79 filename = '%s.full.zfs.gpg' % dataset
80 else:
81 filename = '%s.from.%s.zfs.gpg' % (dataset, reference)
82
83 try:
84 sftp.stat(filename)
85 return
86 except:
87 pass
88
89 zfscommand = ['sudo', 'zfs', 'send', '-D', '%s/%s' % (ZPOOL, dataset)]
90 if reference is not None:
91 zfscommand = zfscommand + ['-i', reference]
92
93 zfs = subprocess.Popen(zfscommand, stdout=subprocess.PIPE, bufsize=2*BUFFER)
94 print(zfscommand)
95
96 gpgcommand = [ 'gpg', '--batch', '--compress-algo', 'ZLIB',
97 '--sign', '--encrypt', '--recipient', GPGUSER ]
98 gpg = subprocess.Popen(gpgcommand, bufsize=2*BUFFER,
99 stdout=subprocess.PIPE,
100 stdin=zfs.stdout,
101 stderr=subprocess.PIPE)
102 print(gpgcommand)
103
104 junk = gpg.stdout.read(BUFFER)
105 gpg.poll()
106 if gpg.returncode not in [None, 0]:
107 print_colored("Error:\n\n" + gpg.stderr, 'red')
108 return
109
110 with open('/tmp/SHA256SUM', 'a') as digestfile:
111 with sftp.open(filename, 'xw', BUFFER) as remotefile:
112 digest = hashlib.sha256()
113 sys.stdout.write(term['purple'])
114 done = 0
115 startt = time.time()
116 while True:
117 if len(junk) == 0:
118 break
119 #if len(junk) < BUFFER:
120 # print_colored("short read: %d" % len(junk), 'yellow')
121 done = done + len(junk)
122 # sys.stdout.write('#')
123 # sys.stdout.flush()
124 sys.stdout.write("\r%s %.3f GB (%.3f MB/s) " % (term['green'], (1.0 * done) / (1024 ** 3), (done / (1024 ** 2 *(time.time() - startt)))))
125 sys.stdout.flush()
126 remotefile.write(junk)
127 digest.update(junk)
128 junk = gpg.stdout.read(BUFFER)
129
130 sys.stdout.write('\r')
131 timedelta = time.time() - startt
132 print_colored(" %.3f GB DONE (%.3f MB/s) Total: %02d%02d:%02d" % ((1.0 * done) / (1024 ** 3), (done / (1024 ** 2 *timedelta)), timedelta/3600, (timedelta/60)%60, timedelta%60), 'green')
133 digestfile.write("%s %s\n" % (digest.hexdigest(), filename))
134
135 def syncronize(local_datasets, remote_datasets):
136 for device in local_datasets.keys():
137 current = ""
138 for dataset in local_datasets[device]:
139 last = current
140 current = dataset
141
142 if device in remote_datasets:
143 if dataset in remote_datasets[device]:
144 print_colored("%s@%s -- found on remote server" % (device, dataset), 'yellow')
145 continue
146
147 if last == '':
148 print_colored("Initial syncronization for device %s" % device, 'green')
149 sftp_send("%s@%s" % (device, dataset))
150 lastmonth = dataset
151 continue
152
153 if last[:7] == dataset[:7]:
154 print_colored("%s@%s -- incremental backup (reference: %s)" %
155 (device, dataset, last), 'green')
156 sftp_send("%s@%s" % (device, dataset), last)
157 else:
158 print_colored("%s@%s -- full backup" % (device, dataset), 'green')
159 sftp_send("%s@%s" % (device, dataset))
160 #print_colored("%s@%s -- doing incremental backup" % (device, dataset), 'green')
161 #sftp_send("%s@%s" % (device, dataset), lastmonth)
162 #lastmonth = dataset
163
164 def get_remote_datasets():
165 datasets = sftp.listdir()
166 datasets = filter(lambda x: '@' in x, datasets)
167
168 datasets = [ entry.split('.')[0] for entry in datasets ]
169
170 return postprocess_datasets(datasets)
171
172 def get_local_datasets():
173 datasets = subprocess.check_output(['sudo', 'zfs', 'list', '-t', 'snapshot', '-H', '-o', 'name'])
174 datasets = datasets.strip().split('\n')
175
176 datasets = [ entry[5:] for entry in datasets ]
177
178 return postprocess_datasets(datasets)
179
180 def main():
181 sftp_connect()
182 syncronize(get_local_datasets(), get_remote_datasets())
183
184 if __name__ == '__main__':
185 main()