diff --git a/signbank/dictionary/views.py b/signbank/dictionary/views.py index ac4f5f63..126cf9f6 100644 --- a/signbank/dictionary/views.py +++ b/signbank/dictionary/views.py @@ -2222,13 +2222,13 @@ def package(request): video_urls = {os.path.splitext(os.path.basename(gv.videofile.name))[0]: reverse('dictionary:protected_media', args=[gv.small_video(use_name=True) or gv.videofile.name]) for gv in GlossVideo.objects.filter(gloss__in=available_glosses, glossvideonme=None, glossvideoperspective=None, version=0) - if os.path.exists(str(gv.videofile.path)) + if gv.videofile and gv.videofile.name and os.path.exists(str(gv.videofile.path)) and os.path.getmtime(str(gv.videofile.path)) > since_timestamp} image_urls = {os.path.splitext(os.path.basename(gv.videofile.name))[0]: reverse('dictionary:protected_media', args=[gv.poster_file()]) for gv in GlossVideo.objects.filter(gloss__in=available_glosses, glossvideonme=None, glossvideoperspective=None, version=0) - if os.path.exists(str(gv.videofile.path)) - and os.path.getmtime(str(gv.videofile.path)) > since_timestamp} + if gv.videofile and gv.videofile.name and os.path.exists(str(gv.videofile.path)) + and os.path.getmtime(str(gv.videofile.path)) > since_timestamp} interface_language_code = get_interface_language_api(request, request.user) diff --git a/signbank/settings/server_specific/default.py b/signbank/settings/server_specific/default.py index de8d3540..4c5c336e 100644 --- a/signbank/settings/server_specific/default.py +++ b/signbank/settings/server_specific/default.py @@ -23,6 +23,7 @@ DATASET_METADATA_DIRECTORY = 'metadata_eafs' TEST_DATA_DIRECTORY = 'test_data' BACKUP_VIDEOS_FOLDER = 'video_backups' +DELETED_FILES_FOLDER = 'prullenmand' #Tmp folder to use TMP_DIR = '/tmp' diff --git a/signbank/video/admin.py b/signbank/video/admin.py index 3c1888f3..bf151f53 100755 --- a/signbank/video/admin.py +++ b/signbank/video/admin.py @@ -1,17 +1,28 @@ + from django.contrib import admin -from django import forms -from django.db import models -from signbank.video.models import GlossVideo, GlossVideoHistory, AnnotatedVideo, ExampleVideoHistory -from signbank.dictionary.models import Dataset, AnnotatedGloss +from signbank.video.models import (GlossVideo, GlossVideoHistory, AnnotatedVideo, ExampleVideoHistory, + filename_matches_nme, filename_matches_perspective, filename_matches_video, filename_matches_backup_video, flattened_video_path) +from signbank.dictionary.models import Dataset, AnnotatedGloss, Gloss from django.contrib.auth.models import User from signbank.settings.base import * -from signbank.settings.server_specific import WRITABLE_FOLDER, FILESYSTEM_SIGNBANK_GROUPS -from django.utils.translation import override, gettext_lazy as _ -from django.db.models import Q, Count, CharField, TextField, Value as V +from signbank.settings.server_specific import WRITABLE_FOLDER, FILESYSTEM_SIGNBANK_GROUPS, DEBUG_VIDEOS, DELETED_FILES_FOLDER +from django.utils.translation import gettext_lazy as _ +from signbank.tools import get_two_letter_dir +from signbank.video.convertvideo import video_file_type_extension +from pathlib import Path +import os +import stat +import shutil +import datetime as DT class GlossVideoDatasetFilter(admin.SimpleListFilter): - + """ + Filter the GlossVideo objects on the Dataset of the gloss + The values of lookups show in the right-hand column of the admin under a heading "Dataset" + Called from GlossVideoAdmin + :model: GlossVideoAdmin + """ title = _('Dataset') parameter_name = 'videos_per_dataset' @@ -28,7 +39,12 @@ def queryset(self, request, queryset): class GlossVideoFileSystemGroupFilter(admin.SimpleListFilter): - + """ + Filter the GlossVideo objects on the file system group of the video file + The values of lookups show in the right-hand column of the admin under a heading "File System Group" + Called from GlossVideoAdmin + :model: GlossVideoAdmin + """ title = _('File System Group') parameter_name = 'filesystem_group' @@ -41,12 +57,10 @@ def queryset(self, request, queryset): def matching_file_group(videofile, key): if not key: return False - from pathlib import Path video_file_full_path = Path(WRITABLE_FOLDER, videofile) - if video_file_full_path.exists(): - return video_file_full_path.group() == key - else: + if not video_file_full_path.exists(): return False + return video_file_full_path.group() == key queryset_res = queryset.values('id', 'videofile') results = [qv['id'] for qv in queryset_res @@ -59,7 +73,12 @@ def matching_file_group(videofile, key): class GlossVideoExistenceFilter(admin.SimpleListFilter): - + """ + Filter the GlossVideo objects on whether the the video file exists + The values of lookups show in the right-hand column of the admin under a heading "File Exists" + Called from GlossVideoAdmin + :model: GlossVideoAdmin + """ title = _('File Exists') parameter_name = 'file_exists' @@ -72,7 +91,8 @@ def queryset(self, request, queryset): def matching_file_exists(videofile, key): if not key: return False - from pathlib import Path + if 'glossvideo' not in videofile: + return False video_file_full_path = Path(WRITABLE_FOLDER, videofile) if video_file_full_path.exists(): return key == 'True' @@ -82,6 +102,131 @@ def matching_file_exists(videofile, key): queryset_res = queryset.values('id', 'videofile') results = [qv['id'] for qv in queryset_res if matching_file_exists(qv['videofile'], self.value())] + if self.value(): + return queryset.filter(id__in=results) + else: + return queryset.all() + + +class GlossVideoFilenameFilter(admin.SimpleListFilter): + """ + Filter the GlossVideo objects on whether the filename is correct for the type of video + The values of lookups show in the right-hand column of the admin under a heading "Filename Correct" + Called from GlossVideoAdmin + :model: GlossVideoAdmin + """ + title = _('Filename Correct') + parameter_name = 'filename_correct' + + def lookups(self, request, model_admin): + file_exists = tuple((b, b) for b in ('True', 'False')) + return file_exists + + def queryset(self, request, queryset): + + def matching_filename(videofile, nmevideo, perspective, version, key): + if not key: + return False + video_file_full_path = Path(WRITABLE_FOLDER, videofile) + if nmevideo: + filename_is_correct = filename_matches_nme(video_file_full_path) is not None + return key == str(filename_is_correct) + elif perspective: + filename_is_correct = filename_matches_perspective(video_file_full_path) is not None + return key == str(filename_is_correct) + elif version > 0: + filename_is_correct = filename_matches_backup_video(video_file_full_path) is not None + return key == str(filename_is_correct) + else: + filename_is_correct = filename_matches_video(video_file_full_path) is not None + return key == str(filename_is_correct) + + queryset_res = queryset.values('id', 'videofile', 'glossvideonme', 'glossvideoperspective', 'version') + results = [qv['id'] for qv in queryset_res + if matching_filename(qv['videofile'], + qv['glossvideonme'], + qv['glossvideoperspective'], qv['version'], self.value())] + + if self.value(): + return queryset.filter(id__in=results) + else: + return queryset.all() + + +class GlossVideoNMEFilter(admin.SimpleListFilter): + """ + Filter the GlossVideo objects on whether the video is an NME Video + The values of lookups show in the right-hand column of the admin under a heading "NME Video" + Called from GlossVideoAdmin + :model: GlossVideoAdmin + """ + title = _('NME Video') + parameter_name = 'nme_videos' + + def lookups(self, request, model_admin): + nme_video = tuple((b, b) for b in ('True', 'False')) + return nme_video + + def queryset(self, request, queryset): + if self.value(): + if self.value() == 'True': + return queryset.filter(glossvideonme__isnull=False) + else: + return queryset.filter(glossvideonme__isnull=True) + return queryset.all() + + +class GlossVideoPerspectiveFilter(admin.SimpleListFilter): + """ + Filter the GlossVideo objects on whether the video is a Perspective Video + The values of lookups show in the right-hand column of the admin under a heading "Perspective Video" + Called from GlossVideoAdmin + :model: GlossVideoAdmin + """ + title = _('Perspective Video') + parameter_name = 'perspective_videos' + + def lookups(self, request, model_admin): + perspective_video = tuple((b, b) for b in ('True', 'False')) + return perspective_video + + def queryset(self, request, queryset): + if self.value(): + if self.value() == 'True': + return queryset.filter(glossvideoperspective__isnull=False) + else: + return queryset.filter(glossvideoperspective__isnull=True) + return queryset.all() + + +class GlossVideoFileTypeFilter(admin.SimpleListFilter): + """ + Filter the GlossVideo objects on whether the video is an MP4 video + The values of lookups show in the right-hand column of the admin under a heading "MP4 File" + Called from GlossVideoAdmin + :model: GlossVideoAdmin + """ + title = _('MP4 File') + parameter_name = 'file_type' + + def lookups(self, request, model_admin): + file_type = tuple((b, b) for b in ('True', 'False')) + return file_type + + def queryset(self, request, queryset): + + def matching_file_type(videofile, key): + if not key: + return False + video_file_full_path = Path(WRITABLE_FOLDER, videofile) + if not video_file_full_path.exists(): + return key == 'False' + file_extension = video_file_type_extension(video_file_full_path) + return key == str((file_extension == '.mp4')) + + queryset_res = queryset.values('id', 'videofile') + results = [qv['id'] for qv in queryset_res + if matching_file_type(qv['videofile'], self.value())] if self.value(): return queryset.filter(id__in=results) @@ -89,86 +234,314 @@ def matching_file_exists(videofile, key): return queryset.all() +class GlossVideoBackupFilter(admin.SimpleListFilter): + """ + Filter the GlossVideo objects on whether the video is a backup video + The values of lookups show in the right-hand column of the admin under a heading "Backup Video" + Called from GlossVideoAdmin + :model: GlossVideoAdmin + """ + title = _('Backup Video') + parameter_name = 'backup_videos' + + def lookups(self, request, model_admin): + is_backup = tuple((b, b) for b in ('True', 'False')) + return is_backup + + def queryset(self, request, queryset): + if self.value(): + if self.value() == 'True': + return queryset.filter(version__gt=0) + else: + return queryset.filter(version=0) + return queryset.all() + + +@admin.action(description="Rename normal video files to match type") +def rename_extension_videos(modeladmin, request, queryset): + """ + Command for the GlossVideo objects selected in queryset + The command appears in the admin pull-down list of commands for the selected gloss videos + The command determines which glosses are selected, then retrieves the normal video objects for those glosses + This allows the user to merely select one of the objects and hereby change them all instead of numerous selections + For those gloss video objects, it renames the file if the filename is not correct + This also applies to wrong video types in filenames, e.g., a webm video that has mp4 in its filename + This applies to backup videos as well as normal videos + Called from GlossVideoAdmin + :model: GlossVideoAdmin + """ + # retrieve glosses of selected GlossVideo objects + distinct_glosses = Gloss.objects.filter(glossvideo__in=queryset).distinct() + + for gloss in distinct_glosses: + for glossvideo in GlossVideo.objects.filter(gloss=gloss, glossvideonme=None, glossvideoperspective=None).order_by('version', 'id'): + current_relative_path = str(glossvideo.videofile) + if not current_relative_path: + # make sure the path is not empty + continue + + video_file_full_path = os.path.join(WRITABLE_FOLDER, current_relative_path) + + # use the file system command 'file' to determine the extension for the type of video file + desired_video_extension = video_file_type_extension(video_file_full_path) + if not desired_video_extension: + # if we get here, the file extension for the video type could not be determined + # either there is no file for this object or it has an unknown video type + continue + + # retrieve the actual filename stored in the gloss video object + # and also compute the name it should have + + base_filename = os.path.basename(video_file_full_path) + + idgloss = gloss.idgloss + two_letter_dir = get_two_letter_dir(idgloss) + dataset_dir = gloss.lemma.dataset.acronym + desired_filename_without_extension = f'{idgloss}-{gloss.id}' + + if glossvideo.version > 0: + desired_extension = f'{desired_video_extension}.bak{glossvideo.id}' + else: + desired_extension = desired_video_extension + + desired_filename = desired_filename_without_extension + desired_extension + if base_filename == desired_filename: + continue + + # if we get to here, the file has the wrong path + # the path needs to be fixed and the file renamed + source = os.path.join(WRITABLE_FOLDER, current_relative_path) + destination = os.path.join(WRITABLE_FOLDER, GLOSS_VIDEO_DIRECTORY, + dataset_dir, two_letter_dir, desired_filename) + desired_relative_path = os.path.join(GLOSS_VIDEO_DIRECTORY, + dataset_dir, two_letter_dir, desired_filename) + if DEBUG_VIDEOS: + print('video:admin:rename_extension_videos:os.rename: ', source, destination) + if os.path.exists(video_file_full_path): + os.rename(source, destination) + if DEBUG_VIDEOS: + print('video:admin:rename_extension_videos:videofile.name := ', desired_relative_path) + glossvideo.videofile.name = desired_relative_path + glossvideo.save() + + +@admin.action(description="Move selected backup files to trash") +def remove_backups(modeladmin, request, queryset): + """ + Command for the GlossVideo objects selected in queryset + The command appears in the admin pull-down list of commands for the selected gloss videos + The command moves the selected backup files to the DELETED_FILES_FOLDER location + To prevent the gloss video object from pointing to the deleted files folder location + the name stored in the object is set to empty before deleting the object + Other selected objects are ignored + This allows the user to keep a number of the backup files by not selecting everything + Called from GlossVideoAdmin + :model: GlossVideoAdmin + """ + # make sure the queryset only applies to backups for normal videos + for obj in queryset.filter(glossvideonme=None, glossvideoperspective=None, version__gt=0): + relative_path = str(obj.videofile) + if not relative_path: + continue + video_file_full_path = os.path.join(WRITABLE_FOLDER, relative_path) + if not os.path.exists(video_file_full_path): + if DEBUG_VIDEOS: + print('video:admin:remove_backups:delete object: ', relative_path) + obj.delete() + continue + # move the video file to DELETED_FILES_FOLDER and erase the videofile name in the object + # this is done to avoid the signals on GlossVideo delete + deleted_file_name = flattened_video_path(relative_path) + destination = os.path.join(WRITABLE_FOLDER, DELETED_FILES_FOLDER, deleted_file_name) + destination_dir = os.path.dirname(destination) + if not os.path.exists(destination_dir): + os.makedirs(destination_dir) + if os.path.isdir(destination_dir): + try: + obj.videofile.name = "" + obj.save() + shutil.move(video_file_full_path, destination) + if DEBUG_VIDEOS: + print('video:admin:remove_backups:shutil.move: ', video_file_full_path, destination) + except (OSError, PermissionError) as e: + print(e) + continue + # the object does not point to anything anymore, so it can be deleted + if DEBUG_VIDEOS: + print('video:admin:remove_backups:delete object: ', relative_path) + obj.delete() + + +@admin.action(description="Renumber selected backups") +def renumber_backups(modeladmin, request, queryset): + """ + Command for the GlossVideo objects selected in queryset + The command appears in the admin pull-down list of commands for the selected gloss videos + The command renumbers the backup video objects for the GlossVideo queryset + The command determines which glosses are selected, then retrieves the backup video objects for those glosses + This allows the user to merely select one of the objects and hereby renumber them all + Because the backup objects are numbered by version, all of the objects must be renumbered + Called from GlossVideoAdmin + :model: GlossVideoAdmin + """ + # retrieve glosses of selected GlossVideo objects for later step + distinct_glosses = Gloss.objects.filter(glossvideo__in=queryset).distinct() + # construct data structure for glosses and backup videos including those that are not selected + lookup_backup_files = dict() + for gloss in distinct_glosses: + lookup_backup_files[gloss] = GlossVideo.objects.filter(gloss=gloss, + glossvideonme=None, + glossvideoperspective=None, + version__gt=0).order_by('version', 'id') + for gloss, videos in lookup_backup_files.items(): + # enumerate over the backup videos and give them new version numbers + # the version of the gloss video object is updated since objects may have been deleted + for inx, video in enumerate(videos, 1): + if inx == video.version: + continue + video.version = inx + video.save() + + +@admin.action(description="Set incorrect NME/Perspective filenames to empty string") +def unlink_files(modeladmin, request, queryset): + """ + Command for the GlossVideo objects selected in queryset + The command appears in the admin pull-down list of commands for the selected gloss videos + The command only applies to Perspective and NME videos, other selected videos are ignored. + Allow to erase the filename from an object if it has the wrong format + This is for the purpose of repairing doubly linked files where the subclass object points to the normal video + Once the filename has been cleared, the user can delete the object as normal with the Admin delete command + This prevents a normal video linked to by a subclass video from being deleted + Called from GlossVideoAdmin + :model: GlossVideoAdmin + """ + for obj in queryset: + if not hasattr(obj, 'glossvideonme') and not hasattr(obj, 'glossvideoperspective'): + # the selected gloss video is not a subclass video + continue + relative_path = str(obj.videofile) + if not relative_path: + continue + video_file_full_path = Path(WRITABLE_FOLDER, relative_path) + # ignore the files that have the correct filename + if hasattr(obj, 'glossvideonme') and filename_matches_nme(video_file_full_path): + continue + if hasattr(obj, 'glossvideoperspective') and filename_matches_perspective(video_file_full_path): + continue + + # erase the file path since it has the wrong format + if DEBUG_VIDEOS: + print('unlink_files: erase incorrect path from NME or Perspective object: ', obj, video_file_full_path) + obj.videofile.name = "" + obj.save() + + class GlossVideoAdmin(admin.ModelAdmin): - list_display = ['id', 'gloss', 'video_file', 'perspective', 'NME', 'file_timestamp', 'file_group', 'permissions', 'file_size', 'version'] - list_filter = (GlossVideoDatasetFilter, GlossVideoFileSystemGroupFilter, GlossVideoExistenceFilter) + list_display = ['id', 'gloss', 'video_file', 'perspective', 'NME', 'file_timestamp', 'file_group', 'permissions', 'file_size', 'video_type', 'version'] + list_filter = (GlossVideoDatasetFilter, GlossVideoFileSystemGroupFilter, GlossVideoExistenceFilter, + GlossVideoFileTypeFilter, GlossVideoNMEFilter, GlossVideoPerspectiveFilter, + GlossVideoFilenameFilter, GlossVideoBackupFilter) - search_fields = ['^gloss__annotationidglosstranslation__text'] + search_fields = ['^gloss__annotationidglosstranslation__text', '^gloss__lemma__lemmaidglosstranslation__text'] + actions = [rename_extension_videos, remove_backups, renumber_backups, unlink_files] def video_file(self, obj=None): - # this will display the full path in the list view - if obj is None: + """ + column VIDEO FILE + this will display the full path in the list view, also for non-existent files + this allows to browse the file paths also on the development servers + """ + if obj is None or not str(obj.videofile): return "" - import os video_file_full_path = os.path.join(WRITABLE_FOLDER, str(obj.videofile)) return video_file_full_path def perspective(self, obj=None): + """ + column PERSPECTIVE + This will be True if the object is of subclass GlossVideoPerspective + """ if obj is None: return "" return obj.is_glossvideoperspective() is True def NME(self, obj=None): + """ + column NME + This will be True if the object is of subclass GlossVideoNME + """ if obj is None: return "" return obj.is_glossvideonme() is True def file_timestamp(self, obj=None): - # if the file exists, this will display its timestamp in the list view - if obj is None: + """ + column FILE TIMESTAMP + if the file exists, this will display its timestamp in the list view + """ + if obj is None or not str(obj.videofile): return "" - import os - import datetime video_file_full_path = os.path.join(WRITABLE_FOLDER, str(obj.videofile)) - if os.path.exists(video_file_full_path): - return datetime.datetime.fromtimestamp(os.path.getctime(video_file_full_path)) - else: + if not os.path.exists(video_file_full_path): return "" + return DT.datetime.fromtimestamp(os.path.getctime(video_file_full_path)) def file_group(self, obj=None): - # this will display a group in the list view - if obj is None: + """ + column FILE GROUP + if the file exists, this will display the file system group in the list view + """ + if obj is None or not str(obj.videofile): return "" - else: - from pathlib import Path - video_file_full_path = Path(WRITABLE_FOLDER, str(obj.videofile)) - if video_file_full_path.exists(): - group = video_file_full_path.group() - return group - else: - return "" + video_file_full_path = Path(WRITABLE_FOLDER, str(obj.videofile)) + if not video_file_full_path.exists(): + return "" + group = video_file_full_path.group() + return group def file_size(self, obj=None): - # this will display a group in the list view - if obj is None: + """ + column FILE SIZE + if the file exists, this will display the file size in the list view + """ + if obj is None or not str(obj.videofile): return "" - else: - from pathlib import Path - video_file_full_path = Path(WRITABLE_FOLDER, str(obj.videofile)) - if video_file_full_path.exists(): - size = str(video_file_full_path.stat().st_size) - return size - else: - return "" + video_file_full_path = Path(WRITABLE_FOLDER, str(obj.videofile)) + if not video_file_full_path.exists(): + return "" + size = str(video_file_full_path.stat().st_size) + return size def permissions(self, obj=None): - # this will display a group in the list view - if obj is None: + """ + column PERMISSIONS + if the file exists, this will display the file system permissions in the list view + """ + if obj is None or not str(obj.videofile): return "" - else: - from pathlib import Path - import stat - video_file_full_path = Path(WRITABLE_FOLDER, str(obj.videofile)) - if video_file_full_path.exists(): - stats = stat.filemode(video_file_full_path.stat().st_mode) - return stats - else: - return "" + video_file_full_path = Path(WRITABLE_FOLDER, str(obj.videofile)) + if not video_file_full_path.exists(): + return "" + stats = stat.filemode(video_file_full_path.stat().st_mode) + return stats + + def video_type(self, obj=None): + """ + column VIDEO TYPE + if the file exists, this will display the video type in file extension format + """ + if obj is None or not str(obj.videofile): + return "" + video_file_full_path = os.path.join(WRITABLE_FOLDER, str(obj.videofile)) + if not os.path.exists(video_file_full_path): + return "" + return video_file_type_extension(video_file_full_path) def get_list_display_links(self, request, list_display): - # do not allow the user to view individual revisions in list + # do not allow the user to click on data of individual elements in the list display self.list_display_links = (None, ) return self.list_display_links @@ -176,6 +549,7 @@ def has_add_permission(self, request): return False def has_delete_permission(self, request, obj=None): + # Only allow to delete objects without any file if not self.file_timestamp(obj): return True return False @@ -243,6 +617,7 @@ def queryset(self, request, queryset): return queryset.filter(annotatedsentence_id__in=annotated_sentences_ids) return queryset.all() + class AnnotatedVideoAdmin(admin.ModelAdmin): actions = None list_display = ('dataset', 'annotated_sentence', 'video_file', 'timestamp', 'eaf_file', 'eaf_timestamp', 'url', 'source') @@ -278,11 +653,9 @@ def timestamp(self, obj=None): # if the file exists, this will display its timestamp in the list view if obj is None: return "" - import os - import datetime video_file_full_path = os.path.join(WRITABLE_FOLDER, str(obj.videofile)) if os.path.exists(video_file_full_path): - return datetime.datetime.fromtimestamp(os.path.getctime(video_file_full_path)) + return DT.datetime.fromtimestamp(os.path.getctime(video_file_full_path)) else: return "" @@ -295,11 +668,9 @@ def eaf_timestamp(self, obj=None): # if the file exists, this will display its timestamp in the list view if obj is None: return "" - import os - import datetime eaf_file_full_path = os.path.join(WRITABLE_FOLDER, str(obj.eaffile)) if os.path.exists(eaf_file_full_path): - return datetime.datetime.fromtimestamp(os.path.getctime(eaf_file_full_path)) + return DT.datetime.fromtimestamp(os.path.getctime(eaf_file_full_path)) else: return "" diff --git a/signbank/video/convertvideo.py b/signbank/video/convertvideo.py index 97b17de9..5f29216e 100755 --- a/signbank/video/convertvideo.py +++ b/signbank/video/convertvideo.py @@ -213,10 +213,44 @@ def make_thumbnail_video(sourcefile, targetfile): os.remove(temp_target) +# this is only for documentation purposes in the patterns, it's not a setting +# these were found to work properly on Ubuntu and match older files and older code +ACCEPTABLE_VIDEO_EXTENSIONS = ['.mp4', '.mov', '.webm', '.m4v', '.mkv', '.m2v'] + + +def extension_on_filename(filename): + # used to retrieve a video type file extension from a filename where there is no file + # if this is a backup file, then the extension at the end is not the video file type extension + # otherwise, just retrieve the normal extension from the filename + # caveat, some video files in the database have weird backup sequences and have no video extension + # the .mp4 is for those + filename_with_extension = os.path.basename(filename) + filename_without_extension, ext = os.path.splitext(os.path.basename(filename)) + + if ext in ACCEPTABLE_VIDEO_EXTENSIONS: + return ext + + m = re.search(r".+-(\d+)\.(mp4|m4v|mov|webm|mkv|m2v)\.(bak\d+)$", filename_with_extension) + if m: + return m.group(2) + m = re.search(r".+-(\d+)\.(mp4|m4v|mov|webm|mkv|m2v)\.(bak\d+)$", filename_without_extension) + if m: + return m.group(2) + # the function is only called if there is no file + # if we get here, the filename does not match any correct pattern and the extension + # does not match any video file type + # this allows the function to work on the development servers + return '.mp4' + + def video_file_type_extension(video_file_full_path): + + if not video_file_full_path or 'glossvideo' not in video_file_full_path: + return '' + if not os.path.exists(video_file_full_path): - # the video file does not exist - return ".mp4" + return extension_on_filename(video_file_full_path) + filetype_output = subprocess.run(["file", video_file_full_path], stdout=subprocess.PIPE) filetype = str(filetype_output.stdout) if 'MOV' in filetype: @@ -232,10 +266,10 @@ def video_file_type_extension(video_file_full_path): elif 'MPEG-2' in filetype: desired_video_extension = '.m2v' else: - # no match found, print something to the log and just keep using mp4 + # no match found, print something to the log and just keep using what's on the filename if DEBUG_VIDEOS: print('video:admin:convertvideo:video_file_type_extension:file:UNKNOWN ', filetype) - desired_video_extension = '.mp4' + desired_video_extension = extension_on_filename(video_file_full_path) return desired_video_extension diff --git a/signbank/video/models.py b/signbank/video/models.py index 68b71607..fa82f786 100755 --- a/signbank/video/models.py +++ b/signbank/video/models.py @@ -10,7 +10,7 @@ import stat import shutil -from signbank.video.convertvideo import extract_frame, convert_video, probe_format, make_thumbnail_video, generate_image_sequence, remove_stills +from signbank.video.convertvideo import extract_frame, convert_video, make_thumbnail_video, generate_image_sequence, remove_stills from django.core.files.storage import FileSystemStorage from django.contrib.auth import models as authmodels @@ -28,6 +28,42 @@ from signbank.dictionary.models import Gloss, Language +def filename_matches_nme(filename): + filename_without_extension, ext = os.path.splitext(os.path.basename(filename)) + return re.search(r".+-(\d+)_(nme_\d+|nme_\d+_left|nme_\d+_right)$", filename_without_extension) + + +def filename_matches_perspective(filename): + filename_without_extension, ext = os.path.splitext(os.path.basename(filename)) + return re.search(r".+-(\d+)_(left|right|nme_\d+_left|nme_\d+_right)$", filename_without_extension) + + +def filename_matches_video(filename): + filename_without_extension, ext = os.path.splitext(os.path.basename(filename)) + return re.search(r".+-(\d+)$", filename_without_extension) + + +def filename_matches_backup_video(filename): + filename_with_extension = os.path.basename(filename) + return re.search(r".+-(\d+)\.(mp4|m4v|mov|webm|mkv|m2v)\.(bak\d+)$", filename_with_extension) + + +def flattened_video_path(relative_path): + """ + This constructs the filename to be used in the DELETED_FILES_FOLDER + Take apart the gloss video relative path + If this succeeds, prefix the filename with the dataset-specific components + Otherwise just return the filename + """ + relative_path_folders, filename = os.path.split(relative_path) + m = re.search(r"^glossvideo/(.+)/(..)$", relative_path_folders) + if m: + dataset_folder = m.group(1) + two_char_folder = m.group(2) + return f"{dataset_folder}_{two_char_folder}_{filename}" + return filename + + class GlossVideoStorage(FileSystemStorage): """Implement our shadowing video storage system""" @@ -355,7 +391,7 @@ def make_poster_image(self): from signbank.tools import generate_still_image try: generate_still_image(self) - except OSError: + except (OSError, PermissionError): import sys print('Error generating still image', sys.exc_info()) @@ -377,7 +413,7 @@ def delete_files(self): small_video_path = self.small_video() try: os.unlink(self.videofile.path) - except OSError: + except (OSError, PermissionError): pass def reversion(self, revert=False): @@ -503,7 +539,7 @@ def delete_files(self, only_eaf=False): if not only_eaf: video_path = os.path.join(settings.WRITABLE_FOLDER, settings.ANNOTATEDSENTENCE_VIDEO_DIRECTORY, self.annotatedsentence.get_dataset().acronym, str(self.annotatedsentence.id)) shutil.rmtree(video_path) - except OSError: + except (OSError, PermissionError): pass def get_eaffile_name(self): @@ -694,6 +730,8 @@ def small_video(self, use_name=False): """Return the URL of the small version for this video :param use_name: whether videofile.name should be used instead of videofile.path """ + if not self.videofile: + return None small_video_path = add_small_appendix(self.videofile.path) if os.path.exists(small_video_path): if use_name: @@ -730,7 +768,7 @@ def make_poster_image(self): from signbank.tools import generate_still_image try: generate_still_image(self) - except OSError: + except (OSError, PermissionError): import sys print('Error generating still image', sys.exc_info()) @@ -752,6 +790,9 @@ def delete_files(self): if settings.DEBUG_VIDEOS: print('delete_files GlossVideo: ', str(self.videofile)) + if not self.videofile or not self.videofile.name: + return + small_video_path = self.small_video() try: os.unlink(self.videofile.path) @@ -818,8 +859,6 @@ def __str__(self): # this coercion to a string type sometimes causes special characters in the filename to be a problem # code has been introduced elsewhere to make sure paths are the correct encoding glossvideoname = self.videofile.name - if settings.DEBUG_VIDEOS: - print('__str__ GlossVideo: ', self.videofile.name) return glossvideoname def is_glossvideonme(self): @@ -942,37 +981,51 @@ def move_video(self, move_files_on_disk=True): :return: """ old_path = str(self.videofile) + if not move_files_on_disk or not old_path: + return new_path = get_video_file_path(self, old_path, nmevideo=True, perspective='', offset=self.offset, version=0) - if old_path != new_path: - if move_files_on_disk: - source = os.path.join(settings.WRITABLE_FOLDER, old_path) - destination = os.path.join(settings.WRITABLE_FOLDER, new_path) - if os.path.exists(source): - destination_dir = os.path.dirname(destination) - if not os.path.exists(destination_dir): - os.makedirs(destination_dir) - if os.path.isdir(destination_dir): - shutil.move(source, destination) + if old_path == new_path: + return + source = os.path.join(settings.WRITABLE_FOLDER, old_path) + destination = os.path.join(settings.WRITABLE_FOLDER, new_path) + if not os.path.exists(source): + return + destination_dir = os.path.dirname(destination) + if not os.path.exists(destination_dir): + os.makedirs(destination_dir) + if os.path.isdir(destination_dir): + shutil.move(source, destination) - self.videofile.name = new_path - self.save() + self.videofile.name = new_path + self.save() def delete_files(self): """Delete the files associated with this object""" - if settings.DEBUG_VIDEOS: - print('delete_files GlossVideoNME: ', str(self.videofile)) + old_path = str(self.videofile) + if not old_path: + return True + file_system_path = os.path.join(settings.WRITABLE_FOLDER, old_path) + if filename_matches_nme(file_system_path) is None: + # this points to the normal video file, just erase the name rather than delete file + self.videofile.name = "" + self.save() + return True + if not os.path.exists(file_system_path): + return True try: - os.unlink(self.videofile.path) - except (OSError, PermissionError): - if settings.DEBUG_VIDEOS: - print('delete_files exception GlossVideo OSError, PermissionError: ', str(self.videofile)) - pass + os.unlink(file_system_path) + return True + except (OSError, PermissionError) as e: + print(e) + return False def reversion(self, revert=False): """Delete the video file of this object""" - print("DELETE NME VIDEO", self.videofile.name) - self.delete_files() - self.delete() + status = self.delete_files() + if not status: + print("DELETE NME VIDEO FAILED: ", self.videofile.name) + else: + self.delete() PERSPECTIVE_CHOICES = (('left', 'Left'), @@ -1002,6 +1055,8 @@ def move_video(self, move_files_on_disk=True): return # other code does this too. It's a dubious way to obtain the path old_path = str(self.videofile) + if not old_path: + return new_path = get_video_file_path(self, old_path, nmevideo=False, perspective=str(self.perspective)) if old_path == new_path: return @@ -1015,36 +1070,36 @@ def move_video(self, move_files_on_disk=True): if os.path.isdir(destination_dir): shutil.move(source, destination) - self.videofile.name = new_path - self.save() - else: - # on the production server this is a problem - msg = "Perspective video file not found: " + source - print(msg) + self.videofile.name = new_path + self.save() def delete_files(self): """Delete the files associated with this object""" old_path = str(self.videofile) + if not old_path: + return True file_system_path = os.path.join(settings.WRITABLE_FOLDER, old_path) - if settings.DEBUG_VIDEOS: - print('perspective video delete files: ', file_system_path) + if filename_matches_perspective(file_system_path) is None: + # this points to the normal video file, just erase the name rather than delete file + self.videofile.name = "" + self.save() + return True if not os.path.exists(file_system_path): - # Video file not found on server - # on the production server this is a problem - msg = "Perspective video file not found: " + file_system_path - print(msg) - return + return True try: os.unlink(file_system_path) - except OSError: - msg = "Perspective video file could not be deleted: " + file_system_path - print(msg) + return True + except (PermissionError, OSError) as e: + print(e) + return False def reversion(self, revert=False): """Delete the video file of this object""" - print("DELETE Perspective VIDEO", self.videofile.name) - self.delete_files() - self.delete() + status = self.delete_files() + if not status: + print("DELETE Perspective VIDEO FAILED: ", self.videofile.name) + else: + self.delete() def move_videos_for_filter(filter, move_files_on_disk: bool=False) -> None: @@ -1206,10 +1261,10 @@ def delete_files(sender, instance, **kwargs): print('delete_files settings.DELETE_FILES_ON_GLOSSVIDEO_DELETE: ', settings.DELETE_FILES_ON_GLOSSVIDEO_DELETE) if hasattr(instance, 'glossvideonme'): # before deleting a GlossVideoNME object, delete the files - instance.delete_files() + status = instance.delete_files() elif hasattr(instance, 'glossvideoperspective'): # before deleting a GlossVideoPerspective object, delete the files - instance.delete_files() + status = instance.delete_files() elif settings.DELETE_FILES_ON_GLOSSVIDEO_DELETE: # before a GlossVideo object, only delete the files if the setting is True # default.py has this set to false so primary gloss video files are (never) deleted diff --git a/signbank/video/video_admin_wiki.txt b/signbank/video/video_admin_wiki.txt new file mode 100644 index 00000000..e98e4a7d --- /dev/null +++ b/signbank/video/video_admin_wiki.txt @@ -0,0 +1,131 @@ + +GLOSS VIDEO ADMIN + + +Gloss Video Admin allows to view the filenames and file properties of stored video objects. +The video files themselves are stored in location GLOSS_VIDEO_DIRECTORY in a folder for the dataset. +The dataset folder is organised in sub-folders based on the first two characters of the lemma text of the gloss. +The lemma text is that of the default language of the dataset. The dataset folder is the dataset acronym. + +Caution: Modification of either the dataset acronym or its default language will rename and move all of the video files for the dataset. + + +The Gloss Video Admin is described below. + + +VIDEO FILENAMES + +Gloss video filenames have the following structure, where attribute idgloss is the lemma text in the default language of the dataset. +The description uses pseudo-code patterns that include regular expression syntax. Video file type extensions are shown explicitly. + +Primary video: {gloss.idgloss}-{gloss.id}.(mp4|m4v|mov|webm|mkv|m2v) + +Perspective video: {gloss.idgloss}-{gloss.id}_(left|right|nme_\d+_left|nme_\d+_right).(mp4|m4v|mov|webm|mkv|m2v) +The perspective video pattern also matches perspective NME video filenames. + +Non-manual elements video: {gloss.idgloss}-{gloss.id}_(nme_\d+|nme_\d+_left|nme_\d+_right).(mp4|m4v|mov|webm|mkv|m2v) +The ciphers string after the "nme_" in the pattern is the ordering index of the video, for identification and display. + +Backup video: {gloss.idgloss}-{gloss.id}.(mp4|m4v|mov|webm|mkv|m2v).(bak\d+) +The ciphers string after the "bak" is the ID of the backup video object. These are internal and not visible to users. + + + +COLUMNS + +The tabular display shows the following columns as indicated in list_display of the class GlossVideoAdmin: + + ID, GLOSS, VIDEO FILE, PERSPECTIVE, NME, FILE TIMESTAMP, FILE GROUP, PERMISSIONS, FILE SIZE, VIDEO TYPE, VERSION + +Some of these column values are computed based on the associated video file. +For these, there are corresponding methods in the class. Django displays the columns in upper case. +The version column indicates a backup file when it's greater than 0. + + +FILTERS + +Filters are available as shown in list_filter to reduce the amount of data shown in the table. +For each filter, a class model has been defined. The filters appear in the right-hand column of the admin. + +GlossVideoDatasetFilter (Dataset) filters on the dataset. A list of all the dataset acronyms is shown. + +GlossVideoFileSystemGroupFilter (File System Group) filters on the file system group. The selection in FILESYSTEM_SIGNBANK_GROUPS is shown. + +The remaining filters are all Boolean + +GlossVideoExistenceFilter (File Exists) filters on whether a video file exists for the gloss video object + +GlossVideoFileTypeFilter (MP4 File) filters on the type of the video file, MP4 File (True, False) + +GlossVideoNMEFilter (NME Video) filters on whether the gloss video object is a non-manual elements video + +GlossVideoPerspectiveFilter (Perspective Video) filters on whether the gloss video object is a perspective video + +GlossVideoFilenameFilter (Filename Correct) filters on whether the filename associated with the file is correct +Methods defined in video models.py are available for this. +They compare the filename to acceptable patterns for the type of file. +Some older files use an older pattern for the backup filenames that did not include the video file type extension in the filename. (See Commands) + + +GlossVideoBackupFilter (Backup Video) filters on whether the gloss video object is a backup video + + +SEARCH + +There is one search field input area in the Gloss Video Admin. +Here you can search on the initial text (regular expression carrot) that appears in the gloss annotation or lemma. +The Django query strings appear in search_fields: + +'^gloss__annotationidglosstranslation__text' search on the gloss annotation text, any language + +'^gloss__lemma__lemmaidglosstranslation__text' search on the lemma annotation text, any language + + +ACTIONS + +Django allows to make a selection from the objects displayed on the specific page of admin results. +To this queryset, a command can be applied to the selected objects. +These are shown in the actions field of the class. The commands are shown in a pull-down list. +The primary default action is Delete, which also appears in the pull-down. +Delete does not appear in the actions list of the class, but as a method of the class. + +Signbank only allows to delete gloss video objects in the admin when there is no video file. +This is because of the backup system, which is invoked via signals when an object is deleted. +As a safeguard, normal delete is thus not available if there is a video file. + +Due to legacy code, backup files have undergone name changes over the years. +Originally, the backup files had as an extension, sequences of ".bak.bak" added to them where the number of extensions corresponded to the version. +The video file type was omitted because all videos were converted to mp4. +However, with the introduction of webcam capture and the API, the various video formats were no longer converted. +It's also browser-specific whether a file is a video file. + +The Gloss Video Admin queryset commands are as follows: + + +"Rename normal video files to match type" (rename_extension_videos) + +This command only applies to selected normal video file objects, including backup files. +The filenames are updated to match the acceptable pattern. +This has the result of repairing legacy backup video names to match the new format. +It also makes the video file type file extension match the type of the video file. +This is necessary for legacy files where there was no video type in the filename. +It also applies to videos that were not converted to mp4 but include mp4 in the filename. + + +"Move selected backup files to trash" (remove_backups) + +This command moves the selected backup files to the DELETED_FILES_FOLDER location. +This only applies to objects in the query, allowing the user to keep a number of the backup files by not selecting everything. +The files are renamed as {dataset_acronym}_{two_char_lemma}_{filename}. This allows convenient sorting of the DELETED_FILES_FOLDER. + +"Renumber selected backups" (renumber_backups) + +If some backup files are missing or the objects have been deleted, the version numbers can be made sequential again with this command. +This also works for all the backup objects for the gloss of any selected objects, allowing to only select one of them, but renumber them all. + +"Set incorrect NME/Perspective filenames to empty string" (unlink_files) + +For some files in the database, a subclass perspective or NME video may be pointing to the normal video file. +These can be found using the filter Filename Correct plus the subclass filter. +Using this command, the name stored in the object can be set to empty to allow deletion of the object without deleting the file. +This command is only applied on subclass objects and when the filename is not correct. Other objects in the queryset are ignored.