Skip to content

Commit 6d76554

Browse files
committed
Added non-git source puller functionality
Handles non-git source compressed archives from google drive, dropbox, and any publicly available web address.
1 parent 1e57904 commit 6d76554

File tree

11 files changed

+1721
-6
lines changed

11 files changed

+1721
-6
lines changed

MANIFEST.in

+1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
include *.md
22
include LICENSE
33
include setup.cfg
4+
recursive-include nbgitpuller/plugins *
45
recursive-include nbgitpuller/static *
56
recursive-include nbgitpuller/templates *

nbgitpuller/handlers.py

+34-4
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,11 @@
1111

1212
from .pull import GitPuller
1313
from .version import __version__
14+
from .hookspecs import handle_files
15+
from .plugins.zip_puller import ZipSourceGoogleDriveDownloader
16+
from .plugins.zip_puller import ZipSourceDropBoxDownloader
17+
from .plugins.zip_puller import ZipSourceWebDownloader
18+
import pluggy
1419

1520

1621
class SyncHandler(IPythonHandler):
@@ -38,6 +43,17 @@ def emit(self, data):
3843
self.write('data: {}\n\n'.format(serialized_data))
3944
yield self.flush()
4045

46+
def setup_plugins(self, repo):
47+
pm = pluggy.PluginManager("nbgitpuller")
48+
pm.add_hookspecs(handle_files)
49+
if "drive.google.com" in repo:
50+
pm.register(ZipSourceGoogleDriveDownloader())
51+
elif "dropbox.com" in repo:
52+
pm.register(ZipSourceDropBoxDownloader())
53+
else:
54+
pm.register(ZipSourceWebDownloader())
55+
return pm
56+
4157
@web.authenticated
4258
@gen.coroutine
4359
def get(self):
@@ -53,6 +69,7 @@ def get(self):
5369
try:
5470
repo = self.get_argument('repo')
5571
branch = self.get_argument('branch', None)
72+
compressed = self.get_argument('compressed', "false")
5673
depth = self.get_argument('depth', None)
5774
if depth:
5875
depth = int(depth)
@@ -73,6 +90,12 @@ def get(self):
7390
self.set_header('content-type', 'text/event-stream')
7491
self.set_header('cache-control', 'no-cache')
7592

93+
if compressed == 'true':
94+
pm = self.setup_plugins(repo)
95+
results = pm.hook.handle_files(repo=repo, repo_parent_dir=repo_parent_dir)[0]
96+
repo_dir = repo_parent_dir + results["unzip_dir"]
97+
repo = "file://" + results["origin_repo_path"]
98+
7699
gp = GitPuller(repo, repo_dir, branch=branch, depth=depth, parent=self.settings['nbapp'])
77100

78101
q = Queue()
@@ -151,14 +174,15 @@ def get(self):
151174
repo = self.get_argument('repo')
152175
branch = self.get_argument('branch', None)
153176
depth = self.get_argument('depth', None)
177+
compressed = self.get_argument('compressed', "false")
154178
urlPath = self.get_argument('urlpath', None) or \
155-
self.get_argument('urlPath', None)
179+
self.get_argument('urlPath', None)
156180
subPath = self.get_argument('subpath', None) or \
157-
self.get_argument('subPath', '.')
181+
self.get_argument('subPath', '.')
158182
app = self.get_argument('app', app_env)
159183
parent_reldir = os.getenv('NBGITPULLER_PARENTPATH', '')
160184
targetpath = self.get_argument('targetpath', None) or \
161-
self.get_argument('targetPath', repo.split('/')[-1])
185+
self.get_argument('targetPath', repo.split('/')[-1])
162186

163187
if urlPath:
164188
path = urlPath
@@ -174,7 +198,13 @@ def get(self):
174198
self.write(
175199
self.render_template(
176200
'status.html',
177-
repo=repo, branch=branch, path=path, depth=depth, targetpath=targetpath, version=__version__
201+
repo=repo,
202+
branch=branch,
203+
compressed=compressed,
204+
path=path,
205+
depth=depth,
206+
targetpath=targetpath,
207+
version=__version__
178208
))
179209
self.flush()
180210

nbgitpuller/hookspecs.py

+21
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
import pluggy
2+
3+
hookspec = pluggy.HookspecMarker("nbgitpuller")
4+
5+
6+
@hookspec
7+
def handle_files(self, repo, repo_parent_dir):
8+
"""
9+
:param str repo: download url to source
10+
:param str repo_parent_dir: where we will store the downloaded repo
11+
:return two parameter json unzip_dir and origin_repo_path
12+
:rtype json object
13+
This handles the downloading of non-git source
14+
files into the user directory. Once downloaded,
15+
the files are merged into a local git repository.
16+
17+
Once the local git repository is updated(or created
18+
the first time), git puller can then handle this
19+
directory as it would sources coming from a
20+
git repository.
21+
"""

nbgitpuller/plugins/__init__.py

Whitespace-only changes.

nbgitpuller/plugins/plugin_helper.py

+118
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,118 @@
1+
import subprocess
2+
import os
3+
import stat
4+
import logging
5+
import requests
6+
from requests_file import FileAdapter
7+
import shutil
8+
import re
9+
10+
11+
# for large files from Google Drive
12+
def get_confirm_token(response):
13+
for key, value in response.cookies.items():
14+
if key.startswith('download_warning'):
15+
return value
16+
return None
17+
18+
19+
# sets up the a local repo that acts like a remote
20+
def initialize_local_repo(local_repo_path):
21+
logging.info(f"Creating local_repo_path: {local_repo_path}")
22+
os.makedirs(local_repo_path, exist_ok=True)
23+
24+
subprocess.check_output(["git", "init", "--bare"], cwd=local_repo_path)
25+
26+
27+
# local repo cloned from the "remote" which is in user drive
28+
def clone_local_origin_repo(origin_repo_path, temp_download_repo):
29+
logging.info(f"Creating temp_download_repo: {temp_download_repo}")
30+
os.makedirs(temp_download_repo, exist_ok=True)
31+
32+
cmd = ["git", "clone", f"file://{origin_repo_path}", temp_download_repo]
33+
subprocess.check_output(cmd, cwd=temp_download_repo)
34+
35+
36+
# this is needed to unarchive various formats(eg. zip, tgz, etc)
37+
def determine_file_extension(url, response):
38+
file_type = response.headers.get('content-type')
39+
content_disposition = response.headers.get('content-disposition')
40+
ext = None
41+
if content_disposition:
42+
fname = re.findall("filename\\*?=([^;]+)", content_disposition)
43+
fname = fname[0].strip().strip('"')
44+
ext = fname.split(".")[1]
45+
elif file_type and "/zip" in file_type:
46+
ext = "zip"
47+
else:
48+
url = url.split("/")[-1]
49+
if "?" in url:
50+
url = url[0:url.find('?')]
51+
if "." in url:
52+
ext = url.split(".")[1]
53+
54+
if not ext:
55+
m = f"Could not determine the file extension for unarchiving: {url}"
56+
raise Exception(m)
57+
return ext
58+
59+
60+
# the downloaded content is in the response -- unarchive and save to the disk
61+
def save_response_content(url, response, temp_download_repo):
62+
try:
63+
ext = determine_file_extension(url, response)
64+
CHUNK_SIZE = 32768
65+
temp_download_file = f"{temp_download_repo}/download.unk"
66+
with open(temp_download_file, "wb") as f:
67+
for chunk in response.iter_content(CHUNK_SIZE):
68+
# filter out keep-alive new chunks
69+
70+
if chunk:
71+
f.write(chunk)
72+
73+
shutil.unpack_archive(temp_download_file, temp_download_repo)
74+
75+
os.remove(temp_download_file)
76+
except Exception as e:
77+
m = f"Problem handling file download: {str(e)}"
78+
raise Exception(m)
79+
80+
81+
# grab archive file from url
82+
def fetch_files(url, id=-1):
83+
session = requests.Session()
84+
session.mount('file://', FileAdapter()) # add adapter for pytests
85+
response = session.get(url, params={'id': id}, stream=True)
86+
token = get_confirm_token(response)
87+
if token:
88+
params = {'id': id, 'confirm': token}
89+
response = session.get(url, params=params, stream=True)
90+
91+
return response
92+
93+
94+
# this drive the file handling -- called from zip_puller by all the
95+
# handle_files implementations for GoogleDrive, Dropbox, and standard
96+
# Web url
97+
def handle_files_helper(args):
98+
try:
99+
origin_repo = args["repo_parent_dir"] + args["origin_dir"]
100+
temp_download_repo = args["repo_parent_dir"] + args["download_dir"]
101+
if os.path.exists(temp_download_repo):
102+
shutil.rmtree(temp_download_repo)
103+
104+
if not os.path.exists(origin_repo):
105+
initialize_local_repo(origin_repo)
106+
107+
clone_local_origin_repo(origin_repo, temp_download_repo)
108+
save_response_content(args["repo"], args["response"], temp_download_repo)
109+
subprocess.check_output(["git", "add", "."], cwd=temp_download_repo)
110+
subprocess.check_output(["git", "-c", "[email protected]", "-c", "user.name=nbgitpuller", "commit", "-m", "test", "--allow-empty"], cwd=temp_download_repo)
111+
subprocess.check_output(["git", "push", "origin", "master"], cwd=temp_download_repo)
112+
unzipped_dirs = os.listdir(temp_download_repo)
113+
114+
dir_names = list(filter(lambda dir: ".git" not in dir, unzipped_dirs))
115+
return {"unzip_dir": dir_names[0], "origin_repo_path": origin_repo}
116+
except Exception as e:
117+
logging.exception(e)
118+
raise ValueError(e)

nbgitpuller/plugins/zip_puller.py

+79
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
from .plugin_helper import fetch_files
2+
from .plugin_helper import handle_files_helper
3+
import pluggy
4+
5+
hookimpl = pluggy.HookimplMarker("nbgitpuller")
6+
TEMP_DOWNLOAD_REPO_DIR = ".temp_download_repo"
7+
CACHED_ORIGIN_NON_GIT_REPO = ".origin_non_git_sources"
8+
9+
10+
# handles standard web addresses(not google drive or dropbox)
11+
class ZipSourceWebDownloader(object):
12+
@hookimpl
13+
def handle_files(self, repo, repo_parent_dir):
14+
"""
15+
:param str repo: publicly accessible url to compressed source files
16+
:param str repo_parent_dir: where we will store the downloaded repo
17+
:return two parameter json unzip_dir and origin_repo_path
18+
:rtype json object
19+
"""
20+
response = fetch_files(repo)
21+
args = {
22+
"repo": repo,
23+
"repo_parent_dir": repo_parent_dir,
24+
"response": response,
25+
"origin_dir": CACHED_ORIGIN_NON_GIT_REPO,
26+
"download_dir": TEMP_DOWNLOAD_REPO_DIR
27+
}
28+
return handle_files_helper(args)
29+
30+
31+
# handles downloads from google drive
32+
class ZipSourceGoogleDriveDownloader(object):
33+
def __init__(self):
34+
self.DOWNLOAD_URL = "https://docs.google.com/uc?export=download"
35+
36+
def get_id(self, repo):
37+
start_id_index = repo.index("d/") + 2
38+
end_id_index = repo.index("/view")
39+
return repo[start_id_index:end_id_index]
40+
41+
@hookimpl
42+
def handle_files(self, repo, repo_parent_dir):
43+
"""
44+
:param str repo: google drive share link to compressed source files
45+
:param str repo_parent_dir: where we will store the downloaded repo
46+
:return two parameter json unzip_dir and origin_repo_path
47+
:rtype json object
48+
"""
49+
response = fetch_files(self.DOWNLOAD_URL, self.get_id(repo))
50+
args = {
51+
"repo": repo,
52+
"repo_parent_dir": repo_parent_dir,
53+
"response": response,
54+
"origin_dir": CACHED_ORIGIN_NON_GIT_REPO,
55+
"download_dir": TEMP_DOWNLOAD_REPO_DIR
56+
}
57+
return handle_files_helper(args)
58+
59+
60+
# handles downloads from DropBox
61+
class ZipSourceDropBoxDownloader(object):
62+
@hookimpl
63+
def handle_files(self, repo, repo_parent_dir):
64+
"""
65+
:param str repo: dropbox download link to compressed source files
66+
:param str repo_parent_dir: where we will store the downloaded repo
67+
:return two parameter json unzip_dir and origin_repo_path
68+
:rtype json object
69+
"""
70+
repo = repo.replace("dl=0", "dl=1") # download set to 1 for dropbox
71+
response = fetch_files(repo)
72+
args = {
73+
"repo": repo,
74+
"repo_parent_dir": repo_parent_dir,
75+
"response": response,
76+
"origin_dir": CACHED_ORIGIN_NON_GIT_REPO,
77+
"download_dir": TEMP_DOWNLOAD_REPO_DIR
78+
}
79+
return handle_files_helper(args)

nbgitpuller/static/index.js

+6-1
Original file line numberDiff line numberDiff line change
@@ -12,12 +12,13 @@ require([
1212

1313
Terminal.applyAddon(fit);
1414

15-
function GitSync(baseUrl, repo, branch, depth, targetpath, path) {
15+
function GitSync(baseUrl, repo, branch, depth, compressed, targetpath, path) {
1616
// Class that talks to the API backend & emits events as appropriate
1717
this.baseUrl = baseUrl;
1818
this.repo = repo;
1919
this.branch = branch;
2020
this.depth = depth;
21+
this.compressed = compressed;
2122
this.targetpath = targetpath;
2223
this.redirectUrl = baseUrl + path;
2324

@@ -52,6 +53,9 @@ require([
5253
if (typeof this.branch !== 'undefined' && this.branch != undefined) {
5354
syncUrlParams['branch'] = this.branch;
5455
}
56+
if (typeof this.compressed !== 'undefined' && this.compressed != undefined) {
57+
syncUrlParams['compressed'] = this.compressed;
58+
}
5559
var syncUrl = this.baseUrl + 'git-pull/api?' + $.param(syncUrlParams);
5660

5761
this.eventSource = new EventSource(syncUrl);
@@ -133,6 +137,7 @@ require([
133137
utils.get_body_data('repo'),
134138
utils.get_body_data('branch'),
135139
utils.get_body_data('depth'),
140+
utils.get_body_data('compressed'),
136141
utils.get_body_data('targetpath'),
137142
utils.get_body_data('path')
138143
);

nbgitpuller/templates/status.html

+1
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
data-path="{{ path | urlencode }}"
88
{% if branch %}data-branch="{{ branch | urlencode }}"{% endif %}
99
{% if depth %}data-depth="{{ depth | urlencode }}"{% endif %}
10+
{% if compressed %}data-compressed="{{ compressed | urlencode }}"{% endif %}
1011
data-targetpath="{{ targetpath | urlencode }}"
1112
{% endblock %}
1213

setup.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
packages=find_packages(),
2222
include_package_data=True,
2323
platforms='any',
24-
install_requires=['notebook>=5.5.0', 'tornado'],
24+
install_requires=['notebook>=5.5.0', 'tornado', 'requests', 'requests-file'],
2525
data_files=[
2626
('etc/jupyter/jupyter_notebook_config.d', ['nbgitpuller/etc/nbgitpuller.json'])
2727
],

0 commit comments

Comments
 (0)