Skip to content

Commit 89a9034

Browse files
committed
mongoexport for knowls and users, basic infrastructure like argument parsing and executing subprocesses
1 parent 6f35fdd commit 89a9034

File tree

4 files changed

+133
-3
lines changed

4 files changed

+133
-3
lines changed

.gitignore

+3
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
1+
# backup directory
2+
backup
3+
14
*.py[cod]
25

36
# C extensions

README.md

+6-3
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,7 @@
1-
lmfdb-data
2-
==========
1+
LMFDB DATA MANAGEMENT
2+
=====================
3+
4+
This is a colleciton of Python scripts for LMFDB data management.
5+
Goals: backup, restore, import, validation, merging and updating of all data.
6+
37

4-
data management: backup, restore, import, validation, merging and updating

backup.py

+104
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
#!/usr/bin/env python
2+
# -*- coding: utf8 -*-
3+
4+
# global list of backup types
5+
BACKUPS = ['knowl', 'user']
6+
7+
import os
8+
9+
def timestamp():
10+
'helper function that creates an ISO timestamp'
11+
from datetime import datetime
12+
return datetime.strftime(datetime.utcnow(), '%Y%m%d-%H%M')
13+
14+
15+
def execute(command):
16+
'this little helper actually executes the given tokenized command and executes it. shell is bypassed!'
17+
from subprocess import Popen, PIPE, STDOUT
18+
import sys
19+
command = map(str, command) # in case an int slipped in or smth like that
20+
process = Popen(command,
21+
shell=False,
22+
stdout=PIPE,
23+
stderr=STDOUT)
24+
25+
# Poll process for new output until finished
26+
while True:
27+
nextline = process.stdout.readline()
28+
if nextline == '' and process.poll() is not None:
29+
break
30+
sys.stdout.write(nextline)
31+
sys.stdout.flush()
32+
33+
output = process.communicate()[0]
34+
exitCode = process.returncode
35+
if (exitCode == 0):
36+
return output
37+
else:
38+
print 'ERROR:', output
39+
print 'Exit =', exitCode
40+
print 'CMD:', ' '.join(command)
41+
42+
43+
class LMFBDBackup(object):
44+
'this class contains functions to backup LMFDB'
45+
def __init__(self):
46+
self.args = self._parse_args()
47+
# print "ARGS:", self.args
48+
49+
if not os.path.exists(self.args.dir):
50+
print "creating", self.args.dir
51+
os.makedirs(self.args.dir)
52+
53+
for w in self.args.what:
54+
getattr(self, 'backup_%s' % w)()
55+
56+
def export(self, db, col, outfn):
57+
'''helper function, that creates the list of tokens for running the mongoexport utility'
58+
and then exports the given collection into the outfn file'''
59+
cmd = ['mongoexport',
60+
'-host', self.args.dbhost, '--port', self.args.dbport,
61+
'-d', db, '-c', col,
62+
'-o', outfn]
63+
return execute(cmd)
64+
65+
def backup_knowl(self):
66+
print "backup of all knowls"
67+
outfn = os.path.join(self.args.dir, "knowls-%s.json" % timestamp())
68+
print self.export('knowledge', 'knowls', outfn)
69+
70+
def backup_user(self):
71+
print "backup of all users"
72+
outfn = os.path.join(self.args.dir, "users-%s.json" % timestamp())
73+
print self.export('userdb', 'users', outfn)
74+
75+
def _parse_args(self):
76+
curdir = os.path.dirname(os.path.abspath(__file__))
77+
dir_default = os.path.join(curdir, 'backup')
78+
from argparse import ArgumentParser
79+
descr = 'Data Backup for LMFDB'
80+
epilog = 'LMFDB: www.lmfdb.org – github.org/LMFDB'
81+
parser = ArgumentParser(description=descr, epilog=epilog)
82+
83+
parser.add_argument('what',
84+
nargs='+',
85+
help='what should be backed up?',
86+
choices=BACKUPS)
87+
88+
parser.add_argument('-o', '--out',
89+
dest="dir",
90+
help='the target base directory for all backups. the default is "%(default)s",',
91+
default=dir_default)
92+
93+
parser.add_argument('--dbhost',
94+
help='the hostname of the database, default: %(default)s',
95+
default="localhost")
96+
97+
parser.add_argument('--dbport',
98+
help="the port number of the database, default: %(default)s",
99+
default=37010)
100+
101+
return parser.parse_args()
102+
103+
if __name__ == "__main__":
104+
bkb = LMFBDBackup()

codestyle.sh

+20
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
#!/usr/bin/env bash
2+
# This script uses pep8 / autopep8 to find Python style errors
3+
# or even fixes them.
4+
# Usage: ./codestyle.sh or ./codestyle.sh [path/to/python/file ...]
5+
6+
# Note: When using this codestyle fixing utility, don't forget that it
7+
# might introduce a lot of changes. That could be hard to merge!
8+
9+
# increased line length. might get shorter in time ... for now even ignored
10+
ARGS='--max-line-length=120 --ignore=E501'
11+
12+
# WARN: we set the aggressive flag
13+
AUTOPEP="autopep8 -i --aggressive $ARGS"
14+
15+
if [ -n "$1" ]; then
16+
$AUTOPEP "$@"
17+
else
18+
cd `dirname "$0"`
19+
find lmfdb -iname '*.py' | xargs pep8 $ARGS
20+
fi

0 commit comments

Comments
 (0)