Skip to content

Commit 095b712

Browse files
authored
Merge pull request #383 from ipdgroup/master
Implementing incremental by files, safer version of incremental backup.
2 parents 3a4aebb + 0f34ecb commit 095b712

File tree

2 files changed

+32
-4
lines changed

2 files changed

+32
-4
lines changed

README.rst

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,7 @@ CLI Help output::
8080
log level to use (default: info, possible levels:
8181
debug, info, warning, error, critical)
8282
-i, --incremental incremental backup
83+
--incremental-by-files incremental backup using modified time of files
8384
--starred include JSON output of starred repositories in backup
8485
--all-starred include starred repositories in backup [*]
8586
--watched include JSON output of watched repositories in backup
@@ -239,6 +240,12 @@ Using (``-i, --incremental``) will only request new data from the API **since th
239240

240241
This means any blocking errors on previous runs can cause a large amount of missing data in backups.
241242

243+
Using (``--incremental-by-files``) will request new data from the API **based on when the file was modified on filesystem**. e.g. if you modify the file yourself you may miss something.
244+
245+
Still saver than the previous version.
246+
247+
Specifically, issues and pull requests are handled like this.
248+
242249
Known blocking errors
243250
---------------------
244251

github_backup/github_backup.py

Lines changed: 25 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -181,6 +181,12 @@ def parse_args(args=None):
181181
dest="incremental",
182182
help="incremental backup",
183183
)
184+
parser.add_argument(
185+
"--incremental-by-files",
186+
action="store_true",
187+
dest="incremental_by_files",
188+
help="incremental backup based on modification date of files",
189+
)
184190
parser.add_argument(
185191
"--starred",
186192
action="store_true",
@@ -1114,16 +1120,24 @@ def backup_issues(args, repo_cwd, repository, repos_template):
11141120
comments_template = _issue_template + "/{0}/comments"
11151121
events_template = _issue_template + "/{0}/events"
11161122
for number, issue in list(issues.items()):
1123+
issue_file = "{0}/{1}.json".format(issue_cwd, number)
1124+
if args.incremental_by_files and os.path.isfile(issue_file):
1125+
modified = os.path.getmtime(issue_file)
1126+
modified = datetime.fromtimestamp(modified).strftime("%Y-%m-%dT%H:%M:%SZ")
1127+
if modified > issue["updated_at"]:
1128+
logger.info("Skipping issue {0} because it wasn't modified since last backup".format(number))
1129+
continue
1130+
11171131
if args.include_issue_comments or args.include_everything:
11181132
template = comments_template.format(number)
11191133
issues[number]["comment_data"] = retrieve_data(args, template)
11201134
if args.include_issue_events or args.include_everything:
11211135
template = events_template.format(number)
11221136
issues[number]["event_data"] = retrieve_data(args, template)
11231137

1124-
issue_file = "{0}/{1}.json".format(issue_cwd, number)
1125-
with codecs.open(issue_file, "w", encoding="utf-8") as f:
1138+
with codecs.open(issue_file + ".temp", "w", encoding="utf-8") as f:
11261139
json_dump(issue, f)
1140+
os.rename(issue_file + ".temp", issue_file) # Unlike json_dump, this is atomic
11271141

11281142

11291143
def backup_pulls(args, repo_cwd, repository, repos_template):
@@ -1176,6 +1190,13 @@ def backup_pulls(args, repo_cwd, repository, repos_template):
11761190
comments_template = _pulls_template + "/{0}/comments"
11771191
commits_template = _pulls_template + "/{0}/commits"
11781192
for number, pull in list(pulls.items()):
1193+
pull_file = "{0}/{1}.json".format(pulls_cwd, number)
1194+
if args.incremental_by_files and os.path.isfile(pull_file):
1195+
modified = os.path.getmtime(pull_file)
1196+
modified = datetime.fromtimestamp(modified).strftime("%Y-%m-%dT%H:%M:%SZ")
1197+
if modified > pull["updated_at"]:
1198+
logger.info("Skipping pull request {0} because it wasn't modified since last backup".format(number))
1199+
continue
11791200
if args.include_pull_comments or args.include_everything:
11801201
template = comments_regular_template.format(number)
11811202
pulls[number]["comment_regular_data"] = retrieve_data(args, template)
@@ -1185,9 +1206,9 @@ def backup_pulls(args, repo_cwd, repository, repos_template):
11851206
template = commits_template.format(number)
11861207
pulls[number]["commit_data"] = retrieve_data(args, template)
11871208

1188-
pull_file = "{0}/{1}.json".format(pulls_cwd, number)
1189-
with codecs.open(pull_file, "w", encoding="utf-8") as f:
1209+
with codecs.open(pull_file + ".temp", "w", encoding="utf-8") as f:
11901210
json_dump(pull, f)
1211+
os.rename(pull_file + ".temp", pull_file) # Unlike json_dump, this is atomic
11911212

11921213

11931214
def backup_milestones(args, repo_cwd, repository, repos_template):

0 commit comments

Comments
 (0)