From 2c3f6a1d57c6591be018d49978908d4fa47b0340 Mon Sep 17 00:00:00 2001 From: fladd Date: Fri, 14 Mar 2025 14:18:49 +0100 Subject: [PATCH 1/4] Audio improvements - cleaned up accessibility of Decoder attributes - added option to ask for custom audio_fps and audio_nbytes when loading media from file - added option to set Moviepy VideoFileClip directly (for custom external clip processing before playing with mediadecoder) - added parameter `pygame_buffersize` to PygameSoundrenderer init (for using a Pygame mixer buffersize smaller than the size of one video frame, which will improve video/audio synchronization) - added conversion of audio to unsigned int if Pygame mixer uses this format - added conversion of 32 bit in to 32 bit float if Pygame mixer uses this format - fix/hack for 8-bit audio bug in MoviePy (see https://github.com/Zulko/moviepy/issues/2397) --- mediadecoder/decoder.py | 212 ++++++++++++------ mediadecoder/soundrenderers/pygamerenderer.py | 45 +++- 2 files changed, 182 insertions(+), 75 deletions(-) diff --git a/mediadecoder/decoder.py b/mediadecoder/decoder.py index 37ed0ff..a13de15 100644 --- a/mediadecoder/decoder.py +++ b/mediadecoder/decoder.py @@ -52,7 +52,8 @@ class Decoder(object): be passed a callback function to which decoded video frames should be passed. """ - def __init__(self, mediafile=None, videorenderfunc=None, play_audio=True): + def __init__(self, mediafile=None, videorenderfunc=None, play_audio=True, + audio_fps=44100, audio_nbytes=2): """ Constructor. @@ -68,30 +69,36 @@ def __init__(self, mediafile=None, videorenderfunc=None, play_audio=True): - frame (numpy.ndarray): the videoframe to be rendered play_audio : bool, optional Whether audio of the clip should be played. + audio_fps : int, optional + The requested sample rate of the audio stream (default=44100). + audio_nbytes : int, optional + The number of bytes to encode the audio with: 1 for 8bit audio, + 2 for 16bit audio, 4 for 32bit audio (default=2). + """ # Create an internal timer - self.clock = Timer() + self._clock = Timer() # Load a video file if specified, but allow users to do this later # by initializing all variables to None - if not self.load_media(mediafile, play_audio): - self.reset() + self.reset() + self.load_media(mediafile, play_audio, audio_fps, audio_nbytes) # Set callback function if set self.set_videoframerender_callback(videorenderfunc) # Store instance variables - self.play_audio = play_audio + self._play_audio = play_audio @property def frame_interval(self): - """Duration in seconds of a single frame.""" - return self.clock.frame_interval + """Duration in seconds of a single video frame.""" + return self._clock.frame_interval @property def current_frame_no(self): """Current frame_no of video.""" - return self.clock.current_frame + return self._clock.current_frame @property def current_videoframe(self): @@ -101,7 +108,7 @@ def current_videoframe(self): @property def current_playtime(self): """Clocks current runtime in seconds.""" - return self.clock.time + return self._clock.time @property def loop(self): @@ -122,20 +129,97 @@ def loop(self, value): raise TypeError("can only be True or False") self._loop = value + @property + def clip(self): + """Currently loaded media clip.""" + return self._clip + + @clip.setter + def clip(self, value): + """Currently loaded Moviepy VideoFileClip. + + Parameters + ---------- + + value : moviepy.video.io.VideoFileClip + the clip + + """ + + self._clip = value + + ## Timing variables + # Clip duration + self._clock.max_duration = self.clip.duration + logger.debug("Video clip duration: {}s".format(self.clip.duration)) + + # Frames per second of clip + self._clock.fps = self.clip.fps + logger.debug("Video clip FPS: {}".format(self.clip.fps)) + + if self.clip.audio: + logger.debug("Audio loaded: \n{}".format(self.audioformat)) + logger.debug( + "Creating audio buffer of length: {}".format(queue_length) + ) + self.audioqueue = Queue(queue_length) + + self._status = READY + + @property + def loaded_file(self): + """Name of loaded media file.""" + if self.clip is not None: + return os.path.split(self.clip.filename)[1] + + @property + def fps(self): + """Video frames per second.""" + if self.clip is not None: + return self.clip.fps + + @property + def duration(self): + """Total duration in seconds.""" + if self.clip is not None: + return self.clip.duration + + @property + def status(self): + """Decoder status.""" + return self._status + + @property + def audioformat(self): + """Audio stream parameters.""" + if self.clip is not None and self._play_audio and self.clip.audio: + if self._8bit_hack_applied: # see https://github.com/Zulko/moviepy/issues/2397 + nbytes = 1 + else: + nbytes = self.clip.audio.reader.nbytes + return { + "nbytes": nbytes, + "nchannels": self.clip.audio.nchannels, + "fps": self.clip.audio.fps, + "buffersize": int(self.frame_interval * self.clip.audio.fps) + } + def reset(self): """Resets the player and discards loaded data.""" - self.clip = None - self.loaded_file = None + self._clip = None + self._loaded_file = None - self.fps = None - self.duration = None + self._fps = None + self._duration = None - self.status = UNINITIALIZED - self.clock.reset() + self._status = UNINITIALIZED + self._clock.reset() - self.loop_count = 0 + self._loop_count = 0 + self._8bit_hack_applied = False - def load_media(self, mediafile, play_audio=True): + def load_media(self, mediafile, play_audio=True, audio_fps=44100, + audio_nbytes=2): """Loads a media file to decode. If an audiostream is detected, its parameters will be stored in a @@ -154,48 +238,32 @@ def load_media(self, mediafile, play_audio=True): mediafile : str The path to the media file to load. play_audio : bool, optional - Indicates whether the audio of a movie should be played. + Indicates whether the audio of a movie should be played + (default=True) + audio_fps : int, optional + The requested sample rate of the audio stream (default=44100). + audio_nbytes : int, optional + The number of bytes to encode the audio with: 1 for 8bit audio, + 2 for 16bit audio, 4 for 32bit audio (default=2). Raises ------ IOError When the file could not be found or loaded. """ + if not mediafile is None: if os.path.isfile(mediafile): - self.clip = VideoFileClip(mediafile, audio=play_audio) - - self.loaded_file = os.path.split(mediafile)[1] - - ## Timing variables - # Clip duration - self.duration = self.clip.duration - self.clock.max_duration = self.clip.duration - logger.debug("Video clip duration: {}s".format(self.duration)) - - # Frames per second of clip - self.fps = self.clip.fps - self.clock.fps = self.clip.fps - logger.debug("Video clip FPS: {}".format(self.fps)) - - if play_audio and self.clip.audio: - buffersize = int(self.frame_interval * self.clip.audio.fps) - self.audioformat = { - "nbytes": 2, - "nchannels": self.clip.audio.nchannels, - "fps": self.clip.audio.fps, - "buffersize": buffersize, - } - logger.debug("Audio loaded: \n{}".format(self.audioformat)) - logger.debug( - "Creating audio buffer of length: {}".format(queue_length) - ) - self.audioqueue = Queue(queue_length) - else: - self.audioformat = None + if audio_nbytes == 1: + self._8bit_hack_applied = True + audio_nbytes = 2 + self._play_audio = play_audio + self.clip = VideoFileClip(mediafile, audio=play_audio, + audio_fps=audio_fps, + audio_nbytes=audio_nbytes) + logger.debug("Loaded {0}".format(mediafile)) - self.status = READY return True else: raise IOError("File not found: {0}".format(mediafile)) @@ -269,7 +337,7 @@ def play(self): ### If all is in order start the general playing loop if self.status == READY: - self.status = PLAYING + self._status = PLAYING self.last_frame_no = 0 @@ -302,20 +370,20 @@ def pause(self): # Recalculate audio stream position to make sure it is not out of # sync with the video self.__calculate_audio_frames() - self.status = PLAYING - self.clock.pause() + self._status = PLAYING + self._clock.pause() elif self.status == PLAYING: - self.status = PAUSED - self.clock.pause() + self._status = PAUSED + self._clock.pause() def stop(self): """Stops the video stream and resets the clock.""" logger.debug("Stopping playback") # Stop the clock - self.clock.stop() - # Set plauyer status to ready - self.status = READY + self._clock.stop() + # Set player status to ready + self._status = READY def seek(self, value): """Seek to the specified time. @@ -335,10 +403,10 @@ def seek(self, value): # Pause the stream self.pause() # Make sure the movie starts at 1s as 0s gives trouble. - self.clock.time = max(0.5, value) + self._clock.time = max(0.5, value) logger.debug( "Seeking to {} seconds; frame {}".format( - self.clock.time, self.clock.current_frame + self._clock.time, self._clock.current_frame ) ) if self.audioformat: @@ -358,7 +426,7 @@ def __calculate_audio_frames(self): if self.audioformat is None: return - start_frame = self.clock.current_frame + start_frame = self._clock.current_frame totalsize = int(self.clip.audio.fps * self.clip.audio.duration) self.audio_times = list(range(0, totalsize, self.audioformat["buffersize"])) + [ totalsize @@ -377,24 +445,24 @@ def __render(self): self.__render_videoframe() # Start videoclock with start of this thread - self.clock.start() + self._clock.start() logger.debug("Started rendering loop.") # Main rendering loop while self.status in [PLAYING, PAUSED]: - current_frame_no = self.clock.current_frame + current_frame_no = self._clock.current_frame # Check if end of clip has been reached - if self.clock.time >= self.duration: - logger.debug("End of stream reached at {}".format(self.clock.time)) + if self._clock.time >= self.duration: + logger.debug("End of stream reached at {}".format(self._clock.time)) if self.loop: logger.debug("Looping: restarting stream") # Seek to the start self.rewind() - self.loop_count += 1 + self._loop_count += 1 else: # End of stream has been reached - self.status = EOS + self._status = EOS break if self.last_frame_no != current_frame_no: @@ -408,7 +476,7 @@ def __render(self): time.sleep(0.005) # Stop the clock. - self.clock.stop() + self._clock.stop() logger.debug("Rendering stopped.") def __render_videoframe(self): @@ -417,7 +485,7 @@ def __render_videoframe(self): Sets the frame as the __current_video_frame and passes it on to __videorenderfunc() if it is set.""" - new_videoframe = self.clip.get_frame(self.clock.time) + new_videoframe = self.clip.get_frame(self._clock.time) # Pass it to the callback function if this is set if callable(self.__videorenderfunc): self.__videorenderfunc(new_videoframe) @@ -430,6 +498,11 @@ def __audiorender_thread(self): new_audioframe = None logger.debug("Started audio rendering thread.") + if self._8bit_hack_applied: + nbytes = 1 + else: + nbytes = self.audioformat["nbytes"] + while self.status in [PLAYING, PAUSED]: # Retrieve audiochunk if self.status == PLAYING: @@ -453,8 +526,9 @@ def __audiorender_thread(self): # sure this doesn't crash the whole program. new_audioframe = self.clip.audio.to_soundarray( tt=chunk, - buffersize=self.frame_interval * self.clip.audio.fps, quantize=True, + nbytes=nbytes, + buffersize=self.frame_interval * self.clip.audio.fps, ) except OSError as e: logger.warning("Sound decoding error: {}".format(e)) diff --git a/mediadecoder/soundrenderers/pygamerenderer.py b/mediadecoder/soundrenderers/pygamerenderer.py index 8eb2335..ce65828 100644 --- a/mediadecoder/soundrenderers/pygamerenderer.py +++ b/mediadecoder/soundrenderers/pygamerenderer.py @@ -2,6 +2,7 @@ import threading import warnings + try: # Python 3 from queue import Queue, Empty @@ -17,7 +18,7 @@ class SoundrendererPygame(threading.Thread, SoundRenderer): """Uses pygame.mixer to play sound""" - def __init__(self, audioformat, queue=None): + def __init__(self, audioformat, queue=None, pygame_buffersize=None): """Constructor. Creates a pygame sound renderer using pygame.mixer. @@ -25,9 +26,12 @@ def __init__(self, audioformat, queue=None): ---------- audioformat : dict A dictionary containing the properties of the audiostream - queue : Queue.queue + queue : Queue.queue, optional A queue object which serves as a buffer on which the individual - audio frames are placed by the decoder. + audio frames are placed by the decoder (default=None). + pygame_buffersize : int, optional + The buffersize to be used in the Pygame mixer (default=None). + """ global pygame import pygame @@ -43,17 +47,31 @@ def __init__(self, audioformat, queue=None): fps = audioformat["fps"] nchannels = audioformat["nchannels"] - nbytes = audioformat["nbytes"] - buffersize = audioformat["buffersize"] + self._nbytes = nbytes = audioformat["nbytes"] + if pygame_buffersize: + buffersize = pygame_buffersize + else: + buffersize = audioformat["buffersize"] if pygame.mixer.get_init() is None: - pygame.mixer.init(fps, -8 * nbytes, nchannels, buffersize) + if nbytes in (1, 2): + fmt = -8 * nbytes + elif nbytes == 4: + fmt = 32 + pygame.mixer.init(fps, fmt, nchannels, buffersize) self._own_mixer = True else: self._own_mixer = False def run(self): """Main thread function.""" + global pygame + import pygame + + import numpy as np + + pygame_mixer_unsigned = pygame.mixer.get_init()[1] > 0 + if not hasattr(self, "queue"): raise RuntimeError("Audio queue is not intialized.") @@ -64,6 +82,21 @@ def run(self): if chunk is None: try: frame = self.queue.get(timeout=queue_timeout) + + # Moviepy only supports 8, 16 and 32 bit signed integer. + # Pygame also supports 8 and 16 bit unsigned integer, as + # well as 32 bit floating point. In case the Pygame mixer + # uses one of those formats, we need to convert each audio + # frame to that on the fly. + if pygame_mixer_unsigned: # signed int --> unsigned int + if self._nbytes == 1: + frame = frame.astype(np.uint8) + 128 + if self._nbytes == 2: + frame = frame.astype(np.uint16) + 32768 + if self._nbytes == 4: # signed int --> float + frame = (frame.astype(np.float32) / + np.iinfo(np.int32).max) + chunk = pygame.sndarray.make_sound(frame) except Empty: continue From f665fa762c1eb0f7af1300bd799128fa9324b6bc Mon Sep 17 00:00:00 2001 From: fladd Date: Sun, 16 Mar 2025 03:21:02 +0100 Subject: [PATCH 2/4] New 'audio_nchannels' argument in Decoder init --- mediadecoder/decoder.py | 39 ++++++++++++++++++++++++++++----------- 1 file changed, 28 insertions(+), 11 deletions(-) diff --git a/mediadecoder/decoder.py b/mediadecoder/decoder.py index a13de15..9d8e1b1 100644 --- a/mediadecoder/decoder.py +++ b/mediadecoder/decoder.py @@ -7,6 +7,8 @@ # MoviePy try: from moviepy.video.io.VideoFileClip import VideoFileClip + from moviepy.audio.io.AudioFileClip import AudioFileClip + from moviepy.audio.io.readers import FFMPEG_AudioReader import numpy as np except ImportError as e: try: @@ -53,7 +55,7 @@ class Decoder(object): """ def __init__(self, mediafile=None, videorenderfunc=None, play_audio=True, - audio_fps=44100, audio_nbytes=2): + audio_fps=44100, audio_nbytes=2, audio_nchannels=2): """ Constructor. @@ -74,6 +76,8 @@ def __init__(self, mediafile=None, videorenderfunc=None, play_audio=True, audio_nbytes : int, optional The number of bytes to encode the audio with: 1 for 8bit audio, 2 for 16bit audio, 4 for 32bit audio (default=2). + audio_nchannels : int, optional + The number of channels to encode the audio with (default=2). """ # Create an internal timer @@ -82,7 +86,8 @@ def __init__(self, mediafile=None, videorenderfunc=None, play_audio=True, # Load a video file if specified, but allow users to do this later # by initializing all variables to None self.reset() - self.load_media(mediafile, play_audio, audio_fps, audio_nbytes) + self.load_media(mediafile, play_audio, audio_fps, audio_nbytes, + audio_nchannels) # Set callback function if set self.set_videoframerender_callback(videorenderfunc) @@ -219,7 +224,7 @@ def reset(self): self._8bit_hack_applied = False def load_media(self, mediafile, play_audio=True, audio_fps=44100, - audio_nbytes=2): + audio_nbytes=2, audio_nchannels=2): """Loads a media file to decode. If an audiostream is detected, its parameters will be stored in a @@ -236,20 +241,22 @@ def load_media(self, mediafile, play_audio=True, audio_fps=44100, Parameters ---------- mediafile : str - The path to the media file to load. + The path to the media file to load. play_audio : bool, optional - Indicates whether the audio of a movie should be played - (default=True) + Indicates whether the audio of a movie should be played + (default=True) audio_fps : int, optional - The requested sample rate of the audio stream (default=44100). + The requested sample rate of the audio stream (default=44100). audio_nbytes : int, optional - The number of bytes to encode the audio with: 1 for 8bit audio, - 2 for 16bit audio, 4 for 32bit audio (default=2). + The number of bytes to encode the audio with: 1 for 8bit audio, + 2 for 16bit audio, 4 for 32bit audio (default=2). + audio_nchannels : int, optional + The number of channels to encode the audio with (default=2). Raises ------ IOError - When the file could not be found or loaded. + When the file could not be found or loaded. """ if not mediafile is None: @@ -261,7 +268,17 @@ def load_media(self, mediafile, play_audio=True, audio_fps=44100, self.clip = VideoFileClip(mediafile, audio=play_audio, audio_fps=audio_fps, audio_nbytes=audio_nbytes) - + if play_audio and audio_nchannels !=2: + # Run FFMPEG_AudioReader again to set nchannels + self.clip.audio.reader = FFMPEG_AudioReader( + mediafile, self.clip.audio.reader.buffersize, + fps=audio_fps, nbytes=audio_nbytes, + nchannels=audio_nchannels) + self.clip.audio.nchannels=audio_nchannels + else: + self.clip = VideoFileClip(mediafile, audio=play_audio, + audio_fps=audio_fps, + audio_nbytes=audio_nbytes) logger.debug("Loaded {0}".format(mediafile)) return True From 19ed48a3854acf21006de5f3ae7afce7c508af1e Mon Sep 17 00:00:00 2001 From: Florian Krause Date: Mon, 17 Mar 2025 15:28:25 +0100 Subject: [PATCH 3/4] New 'target_resolution' argument in Decoder init This allows requesting a custom video resolution (as opposed to the one encoded in the video file). --- mediadecoder/decoder.py | 47 +++++++++++++++++++++++++++-------------- 1 file changed, 31 insertions(+), 16 deletions(-) diff --git a/mediadecoder/decoder.py b/mediadecoder/decoder.py index 9d8e1b1..db3dba0 100644 --- a/mediadecoder/decoder.py +++ b/mediadecoder/decoder.py @@ -55,22 +55,26 @@ class Decoder(object): """ def __init__(self, mediafile=None, videorenderfunc=None, play_audio=True, - audio_fps=44100, audio_nbytes=2, audio_nchannels=2): + target_resolution=None, audio_fps=44100, audio_nbytes=2, + audio_nchannels=2): """ Constructor. Parameters ---------- mediafile : str, optional - The path to the mediafile to be loaded (default: None) - videorenderfunc : callable (default: None) - Callback function that takes care of the actual - Rendering of the videoframe.\ - The specified renderfunc should be able to accept the following - arguments: + The path to the mediafile to be loaded. + videorenderfunc : callable, optional + Callback function that takes care of the actual rendering of the + videoframe. The specified function should be able to accept the + following arguments: - frame (numpy.ndarray): the videoframe to be rendered play_audio : bool, optional - Whether audio of the clip should be played. + Whether audio of the clip should be played (default=True). + target_resolution : (int, int), optional + To request a specific video resolution (width, height) in pixels. + If either dimension is None, the frames are resized by keeping the + existing aspect ratio. audio_fps : int, optional The requested sample rate of the audio stream (default=44100). audio_nbytes : int, optional @@ -78,7 +82,6 @@ def __init__(self, mediafile=None, videorenderfunc=None, play_audio=True, 2 for 16bit audio, 4 for 32bit audio (default=2). audio_nchannels : int, optional The number of channels to encode the audio with (default=2). - """ # Create an internal timer self._clock = Timer() @@ -86,8 +89,8 @@ def __init__(self, mediafile=None, videorenderfunc=None, play_audio=True, # Load a video file if specified, but allow users to do this later # by initializing all variables to None self.reset() - self.load_media(mediafile, play_audio, audio_fps, audio_nbytes, - audio_nchannels) + self.load_media(mediafile, play_audio, target_resolution, audio_fps, + audio_nbytes, audio_nchannels) # Set callback function if set self.set_videoframerender_callback(videorenderfunc) @@ -209,6 +212,12 @@ def audioformat(self): "buffersize": int(self.frame_interval * self.clip.audio.fps) } + @property + def resolution(self): + """Video resolution in pixels.""" + if self.clip is not None: + return self.clip.size + def reset(self): """Resets the player and discards loaded data.""" self._clip = None @@ -223,8 +232,8 @@ def reset(self): self._loop_count = 0 self._8bit_hack_applied = False - def load_media(self, mediafile, play_audio=True, audio_fps=44100, - audio_nbytes=2, audio_nchannels=2): + def load_media(self, mediafile, play_audio=True, target_resolution=None, + audio_fps=44100, audio_nbytes=2, audio_nchannels=2): """Loads a media file to decode. If an audiostream is detected, its parameters will be stored in a @@ -245,6 +254,10 @@ def load_media(self, mediafile, play_audio=True, audio_fps=44100, play_audio : bool, optional Indicates whether the audio of a movie should be played (default=True) + target_resolution : (int, int), optional + To request a specific video resolution (width, height) in pixels. + If either dimension is None, the frames are resized by keeping the + existing aspect ratio. audio_fps : int, optional The requested sample rate of the audio stream (default=44100). audio_nbytes : int, optional @@ -266,6 +279,7 @@ def load_media(self, mediafile, play_audio=True, audio_fps=44100, audio_nbytes = 2 self._play_audio = play_audio self.clip = VideoFileClip(mediafile, audio=play_audio, + target_resolution=target_resolution, audio_fps=audio_fps, audio_nbytes=audio_nbytes) if play_audio and audio_nchannels !=2: @@ -276,9 +290,10 @@ def load_media(self, mediafile, play_audio=True, audio_fps=44100, nchannels=audio_nchannels) self.clip.audio.nchannels=audio_nchannels else: - self.clip = VideoFileClip(mediafile, audio=play_audio, - audio_fps=audio_fps, - audio_nbytes=audio_nbytes) + self.clip = VideoFileClip( + mediafile, audio=play_audio, + target_resolution=target_resolution, + audio_fps=audio_fps, audio_nbytes=audio_nbytes) logger.debug("Loaded {0}".format(mediafile)) return True From 06ae6d476bedffa2c568ff556d219ae9134a010b Mon Sep 17 00:00:00 2001 From: Florian Krause Date: Tue, 18 Mar 2025 15:01:44 +0100 Subject: [PATCH 4/4] Update decoder.py Fix for redundant reloading of file --- mediadecoder/decoder.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/mediadecoder/decoder.py b/mediadecoder/decoder.py index db3dba0..9099302 100644 --- a/mediadecoder/decoder.py +++ b/mediadecoder/decoder.py @@ -289,11 +289,6 @@ def load_media(self, mediafile, play_audio=True, target_resolution=None, fps=audio_fps, nbytes=audio_nbytes, nchannels=audio_nchannels) self.clip.audio.nchannels=audio_nchannels - else: - self.clip = VideoFileClip( - mediafile, audio=play_audio, - target_resolution=target_resolution, - audio_fps=audio_fps, audio_nbytes=audio_nbytes) logger.debug("Loaded {0}".format(mediafile)) return True