Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

improved detection of files and directories in GoogleStorageMixin #129

Open
wants to merge 11 commits into
base: master
Choose a base branch
from
Open
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,5 @@
*.pyc
*.eggs/
*.egg-info
.idea
mezzanine-git/
2 changes: 1 addition & 1 deletion filebrowser_safe/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ def __len__(self):

@cached_property
def filetype(self):
if self.is_folder:
if not default_storage.type_checks_slow and self.is_folder:
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the browse view performs certain checks – which with GCS are now network calls – over and over, for each object in the directory listing. this one in particular seemed unnecessary, (especially when GCS's concept of a "folder" is so uncertain as it is).

return 'Folder'
return get_file_type(self.filename)

Expand Down
2 changes: 1 addition & 1 deletion filebrowser_safe/locale/cs/LC_MESSAGES/django.po
Original file line number Diff line number Diff line change
Expand Up @@ -322,7 +322,7 @@ msgstr[1] "%(counter) výsledky"
#: templates/filebrowser/include/toolbar.html:9
#, python-format
msgid "%(full_result_count)s total"
msgstr "%(full_result_count) celkem"
msgstr "%(full_result_count)s celkem"

#: templates/filebrowser/include/search.html:5
msgid "Clear Restrictions"
Expand Down
77 changes: 66 additions & 11 deletions filebrowser_safe/storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,10 @@
# coding: utf-8

# PYTHON IMPORTS
import itertools
import os
import shutil
import posixpath

# DJANGO IMPORTS
from django.core.files.move import file_move_safe
Expand All @@ -14,6 +16,7 @@ class StorageMixin(object):
"""
Adds some useful methods to the Storage class.
"""
type_checks_slow = False

def isdir(self, name):
"""
Expand Down Expand Up @@ -112,15 +115,32 @@ def makedirs(self, name):

def rmtree(self, name):
name = self._normalize_name(self._clean_name(name))
dirlist = self.listdir(self._encode_name(name))
for item in dirlist:
item.delete()
directories, files = self.listdir(self._encode_name(name))

for key in files:
self.delete('/'.join([name, key]))

for dirname in directories:
self.rmtree('/'.join([name, dirname]))


class GoogleStorageMixin(StorageMixin):

type_checks_slow = True # indicate that isfile/isdir should be avoided,
# for performance reasons, as appropriate

def isfile(self, name):
return self.exists(name)
# Because GCS does (semi-arbitrarily) create empty blobs for
# "folders," it's not enough to check whether the path exists;
# and, there's not (yet) any good way to differentiate these from
# proper files.
#
# It is POSSIBLE that an actual file name endswith / ...
# HOWEVER, it is unlikely, (and kind of evil).
#
# (Until then), just exclude paths out-of-hand if they're empty
# (i.e. the bucket root) OR if they end in /:
return bool(name) and not name.endswith('/') and self.exists(name)

def isdir(self, name):
# That's some inefficient implementation...
Expand All @@ -132,16 +152,25 @@ def isdir(self, name):
if self.isfile(name):
return False

name = self._normalize_name(self._clean_name(name))
dirlist = self.bucket.list(self._encode_name(name))
# rather than ``listdir()``, which retrieves all results, retrieve
# blob iterator directly, and return as soon as ANY retrieved
name = self._normalize_name(clean_name(name))
# For bucket.list_blobs and logic below name needs to end in /
if not name.endswith('/'):
name += '/'

# Check whether the iterator is empty
for item in dirlist:
iterator = self.bucket.list_blobs(prefix=self._encode_name(name), delimiter='/')
dirlist = itertools.chain(iterator, iterator.prefixes)

# Check for contents
try:
next(dirlist)
except StopIteration:
return False
else:
return True
return False

def move(self, old_file_name, new_file_name, allow_overwrite=False):

if self.exists(new_file_name):
if allow_overwrite:
self.delete(new_file_name)
Expand All @@ -163,6 +192,32 @@ def makedirs(self, name):

def rmtree(self, name):
name = self._normalize_name(self._clean_name(name))
dirlist = self.bucket.list(self._encode_name(name))
dirlist = self.listdir(self._encode_name(name))
for item in dirlist:
item.delete()

def _clean_name(self, name):
"""
Cleans the name so that Windows style paths work
"""
return clean_name(name)


def clean_name(name):
"""
Cleans the name so that Windows style paths work
"""
# Normalize Windows style paths
clean_name = posixpath.normpath(name).replace('\\', '/')

# os.path.normpath() can strip trailing slashes so we implement
# a workaround here.
if name.endswith('/') and not clean_name.endswith('/'):
# Add a trailing slash as it was stripped.
clean_name = clean_name + '/'

# Given an empty string, os.path.normpath() will return ., which we don't want
if clean_name == '.':
clean_name = ''

return clean_name
6 changes: 4 additions & 2 deletions filebrowser_safe/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,9 @@ def browse(request):
raise ImproperlyConfigured(_("Error finding Upload-Folder. Maybe it does not exist?"))
redirect_url = reverse("fb_browse") + query_helper(query, "", "dir")
return HttpResponseRedirect(redirect_url)
abs_path = os.path.join(get_directory(), path)

base_dir = get_directory()
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

get_directory() also requires a network call, and simply holding onto its result, here, made a significant difference.

abs_path = os.path.join(base_dir, path)

# INITIAL VARIABLES
results_var = {'results_total': 0, 'results_current': 0, 'delete_total': 0, 'images_total': 0, 'select_total': 0}
Expand All @@ -121,7 +123,7 @@ def browse(request):

# CREATE FILEOBJECT
url_path = "/".join([s.strip("/") for s in
[get_directory(), path.replace("\\", "/"), file] if s.strip("/")])
[base_dir, path.replace("\\", "/"), file] if s.strip("/")])
fileobject = FileObject(url_path)


Expand Down