From d56fb9c98496c88f6ab8c0999cb300b551dc2129 Mon Sep 17 00:00:00 2001 From: Daniel Parks Date: Mon, 30 Dec 2024 09:31:59 -0800 Subject: [PATCH] Expand list of invalid characters in file names MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There are issues with certain characters in file names (see issue #8 — Check how Anki handles special characters in `[sound: ...]`). I’ve listed my best guess at which characters are a problem in `FILENAME_ILLEGAL_CHARS`, added escaping to prevent then, and added a check to ensure that none made it into the video ID, which is embedded directly in a file name. --- yanki/video.py | 31 +++++++++++++++++++++++-------- 1 file changed, 23 insertions(+), 8 deletions(-) diff --git a/yanki/video.py b/yanki/video.py index e6c2dcb..5c7fc49 100644 --- a/yanki/video.py +++ b/yanki/video.py @@ -20,6 +20,10 @@ STILL_FORMATS = frozenset(['png', 'jpeg', 'jpg']) TIME_FORMAT = '%0.06f' +FILENAME_ILLEGAL_CHARS = '/"[]' + +def chars_in(chars, input): + return [char for char in chars if char in input] class BadURL(ValueError): pass @@ -72,7 +76,16 @@ def url_to_id(url_str): # Try to load the URL with yt_dlp and see what happens. pass - return url_str.replace('|', '||').replace('/', '|') + # FIXME check this against FILENAME_ILLEGAL_CHARS somehow + return ( + url_str + .replace('\\', '\\\\') + .replace('|', r'\|') + .replace('"', r"\'") + .replace('[', r"\(") + .replace(']', r"\)") + .replace('/', '|') + ) def file_url_to_path(url): parts = urlparse(url) @@ -102,15 +115,17 @@ def __init__(self, url, working_dir='.', cache_path='.', reprocess=False, logger self.cache_path = cache_path self.reprocess = reprocess self.logger = logger + + self.id = url_to_id(url) + invalid = chars_in(FILENAME_ILLEGAL_CHARS, self.id) + if invalid: + raise BadURL( + f'Invalid characters ({"".join(invalid)}) in video ID: {repr(self.id)}' + ) + self._info = None self._raw_metadata = None self._format = None - - # ffmpeg parameters. Every run with the same parameters should produce the - # same video (assuming the source hasn’t changed). - self.id = url_to_id(url) - if '/' in self.id: - raise BadURL(f"Invalid '/' in video ID: {repr(self.id)}") self._crop = None self._overlay_text = '' self._slow_filter = None @@ -133,7 +148,7 @@ def raw_metadata_cache_path(self): def processed_video_cache_path(self, prefix='processed_'): parameters = '_'.join(self.parameters()) - if '/' in parameters or len(parameters) > 60: + if len(parameters) > 60 or chars_in(FILENAME_ILLEGAL_CHARS, parameters): parameters = hashlib.blake2b( parameters.encode(encoding='utf-8'), digest_size=16,