]> git.siccegge.de Git - tools.git/blob - backup-zfs
rebuild actual zonefiles
[tools.git] / backup-zfs
1 #!/usr/bin/python
2 # -*- coding: utf-8 -*-
3
4 from __future__ import print_function
5
6 ####################
7 # Config
8 SFTP_HOST = 'botero.siccegge.de'
9 SFTP_DIR = '/srv/backup/mitoraj'
10 SFTP_USER = 'root'
11 ZPOOL = 'base'
12 GPGUSER = '9FED5C6CE206B70A585770CA965522B9D49AE731'
13 BUFFER = 4 * 1024 * 1024
14 #
15 ####################
16
17 import subprocess
18 import os.path
19 import sys
20 import time
21 import hashlib
22 import paramiko
23
24 term = {
25 'green': "\033[0;32m",
26 'red': "\033[0;31m",
27 'yellow': "\033[0;33m",
28 'purple': "\033[0;35m",
29 'none': "\033[0m",
30 }
31
32 sftp = None
33
34 def print_colored(data, color):
35 sys.stdout.write(term[color])
36 sys.stdout.write(data)
37 sys.stdout.write(term['none'])
38 sys.stdout.write('\n')
39 sys.stdout.flush()
40
41 def postprocess_datasets(datasets):
42 devices = set([entry.split('@')[0] for entry in datasets])
43
44 result = dict()
45 for device in devices:
46 result[device] = sorted([ entry.split('@')[1] for entry in datasets
47 if entry.startswith(device) ])
48
49 return result
50
51 def sftp_connect():
52 global sftp
53 paramiko.util.log_to_file("/tmp/paramiko.log")
54
55 host_keys = paramiko.util.load_host_keys(os.path.expanduser('~/.ssh/known_hosts'))
56 hostkeytype = host_keys[SFTP_HOST].keys()[0]
57 hostkey = host_keys[SFTP_HOST][hostkeytype]
58
59 agent = paramiko.Agent()
60 transport = paramiko.Transport((SFTP_HOST, 22),
61 default_window_size=128*BUFFER,
62 default_max_packet_size=BUFFER)
63 # transport.max_packet_size = BUFFER
64 # transport.window_size = BUFFER * 64
65 transport.connect(hostkey=hostkey)
66
67 for key in agent.get_keys():
68 try:
69 transport.auth_publickey(SFTP_USER, key)
70 break
71 except paramiko.SSHException:
72 continue
73
74 sftp = transport.open_sftp_client()
75 sftp.chdir(SFTP_DIR)
76
77 def sftp_send(dataset, reference=None):
78 if reference is None:
79 filename = '%s.full.zfs.gpg' % dataset
80 else:
81 filename = '%s.from.%s.zfs.gpg' % (dataset, reference)
82
83 try:
84 sftp.stat(filename)
85 return
86 except:
87 pass
88
89 zfscommand = ['sudo', 'zfs', 'send', '-D', '%s/%s' % (ZPOOL, dataset)]
90 if reference is not None:
91 zfscommand = zfscommand + ['-i', reference]
92
93 zfs = subprocess.Popen(zfscommand, stdout=subprocess.PIPE, bufsize=2*BUFFER)
94 print(zfscommand)
95
96 gpgcommand = [ 'gpg2', '--batch', '--compress-algo', 'ZLIB',
97 '--sign', '--encrypt', '--recipient', GPGUSER ]
98 gpg = subprocess.Popen(gpgcommand, bufsize=2*BUFFER,
99 stdout=subprocess.PIPE,
100 stdin=zfs.stdout,
101 stderr=subprocess.PIPE)
102 print(gpgcommand)
103
104 junk = gpg.stdout.read(BUFFER)
105 gpg.poll()
106 if gpg.returncode not in [None, 0]:
107 print_colored("Error:\n\n" + gpg.stderr, 'red')
108 return
109
110 lastflush = 0
111 with open('/tmp/SHA256SUM', 'a') as digestfile:
112 with sftp.open(filename, 'xw', BUFFER) as remotefile:
113 remotefile.set_pipelined()
114 digest = hashlib.sha256()
115 sys.stdout.write(term['purple'])
116 done = 0
117 startt = time.time()
118 while True:
119 if len(junk) == 0:
120 break
121 #if len(junk) < BUFFER:
122 # print_colored("short read: %d" % len(junk), 'yellow')
123 done = done + len(junk)
124 # sys.stdout.write('#')
125 # sys.stdout.flush()
126 if done != len(junk):
127 sys.stdout.write(" \r%s %.3f GB (%.3f MB/s)" % (term['green'], (1.0 * done) / (1024 ** 3), (done / (1024 ** 2 *(time.time() - startt)))))
128 sys.stdout.flush()
129 remotefile.write(junk)
130
131 if done - lastflush > 128 * 1024**2:
132 remotefile.flush()
133 lastflush = done
134
135 digest.update(junk)
136 junk = gpg.stdout.read(BUFFER)
137
138 sys.stdout.write('\r')
139 timedelta = time.time() - startt
140 print_colored(" %.3f GB DONE (%.3f MB/s) Total: %02d:%02d:%02d" % ((1.0 * done) / (1024 ** 3), (done / (1024 ** 2 *timedelta)), timedelta/3600, (timedelta/60)%60, timedelta%60), 'green')
141 digestfile.write("%s %s\n" % (digest.hexdigest(), filename))
142
143 def syncronize(local_datasets, remote_datasets):
144 for device in local_datasets.keys():
145 current = ""
146 for dataset in local_datasets[device]:
147 last = current
148 current = dataset
149
150 if device in remote_datasets:
151 if dataset in remote_datasets[device]:
152 print_colored("%s@%s -- found on remote server" % (device, dataset), 'yellow')
153 continue
154
155 if last == '':
156 print_colored("Initial syncronization for device %s" % device, 'green')
157 sftp_send("%s@%s" % (device, dataset))
158 lastmonth = dataset
159 continue
160
161 if last[:7] == dataset[:7]:
162 print_colored("%s@%s -- incremental backup (reference: %s)" %
163 (device, dataset, last), 'green')
164 sftp_send("%s@%s" % (device, dataset), last)
165 else:
166 print_colored("%s@%s -- full backup" % (device, dataset), 'green')
167 sftp_send("%s@%s" % (device, dataset))
168 #print_colored("%s@%s -- doing incremental backup" % (device, dataset), 'green')
169 #sftp_send("%s@%s" % (device, dataset), lastmonth)
170 #lastmonth = dataset
171
172 def get_remote_datasets():
173 datasets = sftp.listdir()
174 datasets = filter(lambda x: '@' in x, datasets)
175
176 datasets = [ entry.split('.')[0] for entry in datasets ]
177
178 return postprocess_datasets(datasets)
179
180 def get_local_datasets():
181 datasets = subprocess.check_output(['sudo', 'zfs', 'list', '-t', 'snapshot', '-H', '-o', 'name'])
182 datasets = datasets.strip().split('\n')
183
184 datasets = [ entry[5:] for entry in datasets ]
185
186 return postprocess_datasets(datasets)
187
188 def main():
189 sftp_connect()
190 syncronize(get_local_datasets(), get_remote_datasets())
191
192 if __name__ == '__main__':
193 main()