Skip to content

Commit d02f6cd

Browse files
committed
Added non-git source puller functionality
Handles non-git source compressed archives from google drive, dropbox, and any publicly available web address.
1 parent 1e57904 commit d02f6cd

File tree

11 files changed

+1738
-7
lines changed

11 files changed

+1738
-7
lines changed

MANIFEST.in

+1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
include *.md
22
include LICENSE
33
include setup.cfg
4+
recursive-include nbgitpuller/plugins *
45
recursive-include nbgitpuller/static *
56
recursive-include nbgitpuller/templates *

nbgitpuller/handlers.py

+38-5
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,12 @@
1111

1212
from .pull import GitPuller
1313
from .version import __version__
14+
from .hookspecs import NonGitSourceSpec
15+
from .plugins.zip_puller import ZipSourceGoogleDriveDownloader
16+
from .plugins.zip_puller import ZipSourceDropBoxDownloader
17+
from .plugins.zip_puller import ZipSourceWebDownloader
18+
import pluggy
19+
import distutils.util
1420

1521

1622
class SyncHandler(IPythonHandler):
@@ -38,6 +44,17 @@ def emit(self, data):
3844
self.write('data: {}\n\n'.format(serialized_data))
3945
yield self.flush()
4046

47+
def setup_plugins(self, repo):
48+
pm = pluggy.PluginManager("nbgitpuller")
49+
pm.add_hookspecs(NonGitSourceSpec)
50+
if "drive.google.com" in repo:
51+
pm.register(ZipSourceGoogleDriveDownloader())
52+
elif "dropbox.com" in repo:
53+
pm.register(ZipSourceDropBoxDownloader())
54+
else:
55+
pm.register(ZipSourceWebDownloader())
56+
return pm
57+
4158
@web.authenticated
4259
@gen.coroutine
4360
def get(self):
@@ -53,6 +70,7 @@ def get(self):
5370
try:
5471
repo = self.get_argument('repo')
5572
branch = self.get_argument('branch', None)
73+
compressed = self.get_argument('compressed', "false")
5674
depth = self.get_argument('depth', None)
5775
if depth:
5876
depth = int(depth)
@@ -73,6 +91,12 @@ def get(self):
7391
self.set_header('content-type', 'text/event-stream')
7492
self.set_header('cache-control', 'no-cache')
7593

94+
if distutils.util.strtobool(compressed):
95+
pm = self.setup_plugins(repo)
96+
results = pm.hook.handle_files(repo=repo, repo_parent_dir=repo_parent_dir)[0]
97+
repo_dir = repo_parent_dir + results["unzip_dir"]
98+
repo = "file://" + results["origin_repo_path"]
99+
76100
gp = GitPuller(repo, repo_dir, branch=branch, depth=depth, parent=self.settings['nbapp'])
77101

78102
q = Queue()
@@ -151,16 +175,19 @@ def get(self):
151175
repo = self.get_argument('repo')
152176
branch = self.get_argument('branch', None)
153177
depth = self.get_argument('depth', None)
178+
compressed = self.get_argument('compressed', "false")
154179
urlPath = self.get_argument('urlpath', None) or \
155-
self.get_argument('urlPath', None)
180+
self.get_argument('urlPath', None)
156181
subPath = self.get_argument('subpath', None) or \
157-
self.get_argument('subPath', '.')
182+
self.get_argument('subPath', '.')
158183
app = self.get_argument('app', app_env)
159184
parent_reldir = os.getenv('NBGITPULLER_PARENTPATH', '')
160185
targetpath = self.get_argument('targetpath', None) or \
161-
self.get_argument('targetPath', repo.split('/')[-1])
186+
self.get_argument('targetPath', repo.split('/')[-1])
162187

163-
if urlPath:
188+
if distutils.util.strtobool(compressed):
189+
path = 'tree/'
190+
elif urlPath:
164191
path = urlPath
165192
else:
166193
path = os.path.join(parent_reldir, targetpath, subPath)
@@ -174,7 +201,13 @@ def get(self):
174201
self.write(
175202
self.render_template(
176203
'status.html',
177-
repo=repo, branch=branch, path=path, depth=depth, targetpath=targetpath, version=__version__
204+
repo=repo,
205+
branch=branch,
206+
compressed=compressed,
207+
path=path,
208+
depth=depth,
209+
targetpath=targetpath,
210+
version=__version__
178211
))
179212
self.flush()
180213

nbgitpuller/hookspecs.py

+18
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
import pluggy
2+
3+
hookspec = pluggy.HookspecMarker("nbgitpuller")
4+
5+
6+
class NonGitSourceSpec(object):
7+
@hookspec
8+
def handle_files(self, repo, repo_parent_dir):
9+
"""
10+
This handles the downloading of non-git source
11+
files into the user directory. Once downloaded,
12+
the files are merged into a local git repository.
13+
14+
Once the local git repository is updated(or created
15+
the first time), git puller can then handle this
16+
directory as it would sources coming from a
17+
git repository.
18+
"""

nbgitpuller/plugins/__init__.py

Whitespace-only changes.

nbgitpuller/plugins/plugin_helper.py

+146
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,146 @@
1+
import subprocess
2+
import os
3+
import stat
4+
import logging
5+
import requests
6+
from requests_file import FileAdapter
7+
import shutil
8+
import re
9+
10+
11+
# for large files from Google Drive
12+
def get_confirm_token(response):
13+
for key, value in response.cookies.items():
14+
if key.startswith('download_warning'):
15+
return value
16+
return None
17+
18+
19+
# sets up the a local repo that acts like a remote
20+
def initialize_local_repo(local_repo_path):
21+
logging.info(f"Creating local_repo_path: {local_repo_path}")
22+
try:
23+
os.makedirs(local_repo_path)
24+
except OSError:
25+
logging.info(f"Directory exists: {local_repo_path}")
26+
27+
subprocess_helper("git init --bare", local_repo_path)
28+
29+
# Make our bare repository serveable over dumb HTTP
30+
hook_path = os.path.join(local_repo_path, 'hooks', 'post-update')
31+
os.rename(
32+
os.path.join(local_repo_path, 'hooks', 'post-update.sample'),
33+
hook_path
34+
)
35+
os.chmod(hook_path, os.stat(hook_path).st_mode | stat.S_IEXEC)
36+
37+
38+
# local repo cloned from the "remote" which is in user drive
39+
def clone_local_origin_repo(origin_repo_path, temp_download_repo):
40+
logging.info(f"Creating temp_download_repo: {temp_download_repo}")
41+
try:
42+
os.makedirs(temp_download_repo)
43+
except OSError:
44+
logging.info(f"Directory exists: {temp_download_repo}")
45+
46+
cmd = f"git clone file://{origin_repo_path} {temp_download_repo}"
47+
subprocess_helper(cmd, temp_download_repo)
48+
49+
50+
# this is needed to unarchive various formats(eg. zip, tgz, etc)
51+
def determine_file_extension(url, response):
52+
file_type = response.headers.get('content-type')
53+
content_disposition = response.headers.get('content-disposition')
54+
ext = None
55+
if content_disposition:
56+
fname = re.findall("filename\*?=([^;]+)", content_disposition)
57+
fname = fname[0].strip().strip('"')
58+
ext = fname.split(".")[1]
59+
elif file_type and "/zip" in file_type:
60+
ext = "zip"
61+
else:
62+
url = url.split("/")[-1]
63+
if "?" in url:
64+
url = url[0:url.find('?')]
65+
if "." in url:
66+
ext = url.split(".")[1]
67+
68+
if not ext:
69+
m = f"Could not determine the file extension for unarchiving: {url}"
70+
raise Exception(m)
71+
return ext
72+
73+
74+
# the downloaded content is in the response -- unarchive and save to the disk
75+
def save_response_content(url, response, temp_download_repo):
76+
try:
77+
ext = determine_file_extension(url, response)
78+
CHUNK_SIZE = 32768
79+
temp_download_file = f"{temp_download_repo}/download.{ext}"
80+
with open(temp_download_file, "wb") as f:
81+
for chunk in response.iter_content(CHUNK_SIZE):
82+
# filter out keep-alive new chunks
83+
if chunk:
84+
f.write(chunk)
85+
86+
shutil.unpack_archive(temp_download_file, temp_download_repo)
87+
88+
os.remove(temp_download_file)
89+
except Exception as e:
90+
m = f"Problem handling file download: {str(e)}"
91+
raise Exception(m)
92+
93+
94+
# grab archive file from url
95+
def fetch_files(url, id=-1):
96+
session = requests.Session()
97+
session.mount('file://', FileAdapter()) # add adapter for pytests
98+
response = session.get(url, params={'id': id}, stream=True)
99+
token = get_confirm_token(response)
100+
if token:
101+
params = {'id': id, 'confirm': token}
102+
response = session.get(url, params=params, stream=True)
103+
104+
return response
105+
106+
107+
# this drive the file handling -- called from zip_puller by all the
108+
# handle_files implementations for GoogleDrive, Dropbox, and standard
109+
# Web url
110+
def handle_files_helper(args):
111+
try:
112+
origin_repo = args["repo_parent_dir"] + args["origin_dir"]
113+
temp_download_repo = args["repo_parent_dir"] + args["download_dir"]
114+
if os.path.exists(temp_download_repo):
115+
shutil.rmtree(temp_download_repo)
116+
117+
if not os.path.exists(origin_repo):
118+
initialize_local_repo(origin_repo)
119+
120+
clone_local_origin_repo(origin_repo, temp_download_repo)
121+
save_response_content(args["repo"], args["response"], temp_download_repo)
122+
subprocess_helper("git add .", temp_download_repo)
123+
subprocess_helper("git -c [email protected] -c user.name=nbgitpuller commit -m test --allow-empty", temp_download_repo)
124+
subprocess_helper("git push origin master", temp_download_repo)
125+
unzipped_dirs = os.listdir(temp_download_repo)
126+
127+
dir_names = list(filter(lambda dir: ".git" not in dir, unzipped_dirs))
128+
return {"unzip_dir": dir_names[0], "origin_repo_path": origin_repo}
129+
except Exception as e:
130+
logging.exception(e)
131+
raise ValueError(e)
132+
133+
134+
# executes git commands for us
135+
def subprocess_helper(cmd, cwd):
136+
try:
137+
subprocess.run(
138+
cmd.split(" "),
139+
capture_output=True,
140+
text=True,
141+
check=True,
142+
cwd=cwd
143+
)
144+
except Exception:
145+
m = f"Problem executing git command: {cmd}"
146+
raise Exception(m)

nbgitpuller/plugins/zip_puller.py

+67
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
from .plugin_helper import fetch_files
2+
from .plugin_helper import handle_files_helper
3+
import pluggy
4+
5+
hookimpl = pluggy.HookimplMarker("nbgitpuller")
6+
TEMP_DOWNLOAD_REPO_DIR = ".temp_download_repo"
7+
CACHED_ORIGIN_NON_GIT_REPO = ".origin_non_git_sources"
8+
9+
10+
# handles standard web addresses(not google drive or dropbox)
11+
class ZipSourceWebDownloader(object):
12+
@hookimpl
13+
# repo --> this is download url
14+
# repo_parent_dir --> where we will create the repo
15+
def handle_files(self, repo, repo_parent_dir):
16+
response = fetch_files(repo)
17+
args = {
18+
"repo": repo,
19+
"repo_parent_dir": repo_parent_dir,
20+
"response": response,
21+
"origin_dir": CACHED_ORIGIN_NON_GIT_REPO,
22+
"download_dir": TEMP_DOWNLOAD_REPO_DIR
23+
}
24+
return handle_files_helper(args)
25+
26+
27+
# handles downloads from google drive
28+
class ZipSourceGoogleDriveDownloader(object):
29+
def __init__(self):
30+
self.DOWNLOAD_URL = "https://docs.google.com/uc?export=download"
31+
32+
def get_id(self, repo):
33+
start_id_index = repo.index("d/") + 2
34+
end_id_index = repo.index("/view")
35+
return repo[start_id_index:end_id_index]
36+
37+
# repo --> this is download url - it has the id
38+
# repo_parent_dir --> where we will create the repo
39+
@hookimpl
40+
def handle_files(self, repo, repo_parent_dir):
41+
response = fetch_files(self.DOWNLOAD_URL, self.get_id(repo))
42+
args = {
43+
"repo": repo,
44+
"repo_parent_dir": repo_parent_dir,
45+
"response": response,
46+
"origin_dir": CACHED_ORIGIN_NON_GIT_REPO,
47+
"download_dir": TEMP_DOWNLOAD_REPO_DIR
48+
}
49+
return handle_files_helper(args)
50+
51+
52+
# handles downloads from DropBox
53+
class ZipSourceDropBoxDownloader(object):
54+
# repo --> this is download url
55+
# repo_parent_dir --> where we will create the repo
56+
@hookimpl
57+
def handle_files(self, repo, repo_parent_dir):
58+
repo = repo.replace("dl=0", "dl=1") # download set to 1 for dropbox
59+
response = fetch_files(repo)
60+
args = {
61+
"repo": repo,
62+
"repo_parent_dir": repo_parent_dir,
63+
"response": response,
64+
"origin_dir": CACHED_ORIGIN_NON_GIT_REPO,
65+
"download_dir": TEMP_DOWNLOAD_REPO_DIR
66+
}
67+
return handle_files_helper(args)

nbgitpuller/static/index.js

+6-1
Original file line numberDiff line numberDiff line change
@@ -12,12 +12,13 @@ require([
1212

1313
Terminal.applyAddon(fit);
1414

15-
function GitSync(baseUrl, repo, branch, depth, targetpath, path) {
15+
function GitSync(baseUrl, repo, branch, depth, compressed, targetpath, path) {
1616
// Class that talks to the API backend & emits events as appropriate
1717
this.baseUrl = baseUrl;
1818
this.repo = repo;
1919
this.branch = branch;
2020
this.depth = depth;
21+
this.compressed = compressed;
2122
this.targetpath = targetpath;
2223
this.redirectUrl = baseUrl + path;
2324

@@ -52,6 +53,9 @@ require([
5253
if (typeof this.branch !== 'undefined' && this.branch != undefined) {
5354
syncUrlParams['branch'] = this.branch;
5455
}
56+
if (typeof this.compressed !== 'undefined' && this.compressed != undefined) {
57+
syncUrlParams['compressed'] = this.compressed;
58+
}
5559
var syncUrl = this.baseUrl + 'git-pull/api?' + $.param(syncUrlParams);
5660

5761
this.eventSource = new EventSource(syncUrl);
@@ -133,6 +137,7 @@ require([
133137
utils.get_body_data('repo'),
134138
utils.get_body_data('branch'),
135139
utils.get_body_data('depth'),
140+
utils.get_body_data('compressed'),
136141
utils.get_body_data('targetpath'),
137142
utils.get_body_data('path')
138143
);

nbgitpuller/templates/status.html

+1
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
data-path="{{ path | urlencode }}"
88
{% if branch %}data-branch="{{ branch | urlencode }}"{% endif %}
99
{% if depth %}data-depth="{{ depth | urlencode }}"{% endif %}
10+
{% if compressed %}data-compressed="{{ compressed | urlencode }}"{% endif %}
1011
data-targetpath="{{ targetpath | urlencode }}"
1112
{% endblock %}
1213

setup.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
packages=find_packages(),
2222
include_package_data=True,
2323
platforms='any',
24-
install_requires=['notebook>=5.5.0', 'tornado'],
24+
install_requires=['notebook>=5.5.0', 'tornado', 'requests', 'requests-file'],
2525
data_files=[
2626
('etc/jupyter/jupyter_notebook_config.d', ['nbgitpuller/etc/nbgitpuller.json'])
2727
],

0 commit comments

Comments
 (0)