diff --git a/mediadecoder/decoder.py b/mediadecoder/decoder.py index 37ed0ff..9099302 100644 --- a/mediadecoder/decoder.py +++ b/mediadecoder/decoder.py @@ -7,6 +7,8 @@ # MoviePy try: from moviepy.video.io.VideoFileClip import VideoFileClip + from moviepy.audio.io.AudioFileClip import AudioFileClip + from moviepy.audio.io.readers import FFMPEG_AudioReader import numpy as np except ImportError as e: try: @@ -52,46 +54,59 @@ class Decoder(object): be passed a callback function to which decoded video frames should be passed. """ - def __init__(self, mediafile=None, videorenderfunc=None, play_audio=True): + def __init__(self, mediafile=None, videorenderfunc=None, play_audio=True, + target_resolution=None, audio_fps=44100, audio_nbytes=2, + audio_nchannels=2): """ Constructor. Parameters ---------- mediafile : str, optional - The path to the mediafile to be loaded (default: None) - videorenderfunc : callable (default: None) - Callback function that takes care of the actual - Rendering of the videoframe.\ - The specified renderfunc should be able to accept the following - arguments: + The path to the mediafile to be loaded. + videorenderfunc : callable, optional + Callback function that takes care of the actual rendering of the + videoframe. The specified function should be able to accept the + following arguments: - frame (numpy.ndarray): the videoframe to be rendered play_audio : bool, optional - Whether audio of the clip should be played. + Whether audio of the clip should be played (default=True). + target_resolution : (int, int), optional + To request a specific video resolution (width, height) in pixels. + If either dimension is None, the frames are resized by keeping the + existing aspect ratio. + audio_fps : int, optional + The requested sample rate of the audio stream (default=44100). + audio_nbytes : int, optional + The number of bytes to encode the audio with: 1 for 8bit audio, + 2 for 16bit audio, 4 for 32bit audio (default=2). + audio_nchannels : int, optional + The number of channels to encode the audio with (default=2). """ # Create an internal timer - self.clock = Timer() + self._clock = Timer() # Load a video file if specified, but allow users to do this later # by initializing all variables to None - if not self.load_media(mediafile, play_audio): - self.reset() + self.reset() + self.load_media(mediafile, play_audio, target_resolution, audio_fps, + audio_nbytes, audio_nchannels) # Set callback function if set self.set_videoframerender_callback(videorenderfunc) # Store instance variables - self.play_audio = play_audio + self._play_audio = play_audio @property def frame_interval(self): - """Duration in seconds of a single frame.""" - return self.clock.frame_interval + """Duration in seconds of a single video frame.""" + return self._clock.frame_interval @property def current_frame_no(self): """Current frame_no of video.""" - return self.clock.current_frame + return self._clock.current_frame @property def current_videoframe(self): @@ -101,7 +116,7 @@ def current_videoframe(self): @property def current_playtime(self): """Clocks current runtime in seconds.""" - return self.clock.time + return self._clock.time @property def loop(self): @@ -122,20 +137,103 @@ def loop(self, value): raise TypeError("can only be True or False") self._loop = value + @property + def clip(self): + """Currently loaded media clip.""" + return self._clip + + @clip.setter + def clip(self, value): + """Currently loaded Moviepy VideoFileClip. + + Parameters + ---------- + + value : moviepy.video.io.VideoFileClip + the clip + + """ + + self._clip = value + + ## Timing variables + # Clip duration + self._clock.max_duration = self.clip.duration + logger.debug("Video clip duration: {}s".format(self.clip.duration)) + + # Frames per second of clip + self._clock.fps = self.clip.fps + logger.debug("Video clip FPS: {}".format(self.clip.fps)) + + if self.clip.audio: + logger.debug("Audio loaded: \n{}".format(self.audioformat)) + logger.debug( + "Creating audio buffer of length: {}".format(queue_length) + ) + self.audioqueue = Queue(queue_length) + + self._status = READY + + @property + def loaded_file(self): + """Name of loaded media file.""" + if self.clip is not None: + return os.path.split(self.clip.filename)[1] + + @property + def fps(self): + """Video frames per second.""" + if self.clip is not None: + return self.clip.fps + + @property + def duration(self): + """Total duration in seconds.""" + if self.clip is not None: + return self.clip.duration + + @property + def status(self): + """Decoder status.""" + return self._status + + @property + def audioformat(self): + """Audio stream parameters.""" + if self.clip is not None and self._play_audio and self.clip.audio: + if self._8bit_hack_applied: # see https://github.com/Zulko/moviepy/issues/2397 + nbytes = 1 + else: + nbytes = self.clip.audio.reader.nbytes + return { + "nbytes": nbytes, + "nchannels": self.clip.audio.nchannels, + "fps": self.clip.audio.fps, + "buffersize": int(self.frame_interval * self.clip.audio.fps) + } + + @property + def resolution(self): + """Video resolution in pixels.""" + if self.clip is not None: + return self.clip.size + def reset(self): """Resets the player and discards loaded data.""" - self.clip = None - self.loaded_file = None + self._clip = None + self._loaded_file = None - self.fps = None - self.duration = None + self._fps = None + self._duration = None - self.status = UNINITIALIZED - self.clock.reset() + self._status = UNINITIALIZED + self._clock.reset() - self.loop_count = 0 + self._loop_count = 0 + self._8bit_hack_applied = False - def load_media(self, mediafile, play_audio=True): + def load_media(self, mediafile, play_audio=True, target_resolution=None, + audio_fps=44100, audio_nbytes=2, audio_nchannels=2): """Loads a media file to decode. If an audiostream is detected, its parameters will be stored in a @@ -152,50 +250,47 @@ def load_media(self, mediafile, play_audio=True): Parameters ---------- mediafile : str - The path to the media file to load. + The path to the media file to load. play_audio : bool, optional - Indicates whether the audio of a movie should be played. + Indicates whether the audio of a movie should be played + (default=True) + target_resolution : (int, int), optional + To request a specific video resolution (width, height) in pixels. + If either dimension is None, the frames are resized by keeping the + existing aspect ratio. + audio_fps : int, optional + The requested sample rate of the audio stream (default=44100). + audio_nbytes : int, optional + The number of bytes to encode the audio with: 1 for 8bit audio, + 2 for 16bit audio, 4 for 32bit audio (default=2). + audio_nchannels : int, optional + The number of channels to encode the audio with (default=2). Raises ------ IOError - When the file could not be found or loaded. + When the file could not be found or loaded. """ + if not mediafile is None: if os.path.isfile(mediafile): - self.clip = VideoFileClip(mediafile, audio=play_audio) - - self.loaded_file = os.path.split(mediafile)[1] - - ## Timing variables - # Clip duration - self.duration = self.clip.duration - self.clock.max_duration = self.clip.duration - logger.debug("Video clip duration: {}s".format(self.duration)) - - # Frames per second of clip - self.fps = self.clip.fps - self.clock.fps = self.clip.fps - logger.debug("Video clip FPS: {}".format(self.fps)) - - if play_audio and self.clip.audio: - buffersize = int(self.frame_interval * self.clip.audio.fps) - self.audioformat = { - "nbytes": 2, - "nchannels": self.clip.audio.nchannels, - "fps": self.clip.audio.fps, - "buffersize": buffersize, - } - logger.debug("Audio loaded: \n{}".format(self.audioformat)) - logger.debug( - "Creating audio buffer of length: {}".format(queue_length) - ) - self.audioqueue = Queue(queue_length) - else: - self.audioformat = None + if audio_nbytes == 1: + self._8bit_hack_applied = True + audio_nbytes = 2 + self._play_audio = play_audio + self.clip = VideoFileClip(mediafile, audio=play_audio, + target_resolution=target_resolution, + audio_fps=audio_fps, + audio_nbytes=audio_nbytes) + if play_audio and audio_nchannels !=2: + # Run FFMPEG_AudioReader again to set nchannels + self.clip.audio.reader = FFMPEG_AudioReader( + mediafile, self.clip.audio.reader.buffersize, + fps=audio_fps, nbytes=audio_nbytes, + nchannels=audio_nchannels) + self.clip.audio.nchannels=audio_nchannels logger.debug("Loaded {0}".format(mediafile)) - self.status = READY return True else: raise IOError("File not found: {0}".format(mediafile)) @@ -269,7 +364,7 @@ def play(self): ### If all is in order start the general playing loop if self.status == READY: - self.status = PLAYING + self._status = PLAYING self.last_frame_no = 0 @@ -302,20 +397,20 @@ def pause(self): # Recalculate audio stream position to make sure it is not out of # sync with the video self.__calculate_audio_frames() - self.status = PLAYING - self.clock.pause() + self._status = PLAYING + self._clock.pause() elif self.status == PLAYING: - self.status = PAUSED - self.clock.pause() + self._status = PAUSED + self._clock.pause() def stop(self): """Stops the video stream and resets the clock.""" logger.debug("Stopping playback") # Stop the clock - self.clock.stop() - # Set plauyer status to ready - self.status = READY + self._clock.stop() + # Set player status to ready + self._status = READY def seek(self, value): """Seek to the specified time. @@ -335,10 +430,10 @@ def seek(self, value): # Pause the stream self.pause() # Make sure the movie starts at 1s as 0s gives trouble. - self.clock.time = max(0.5, value) + self._clock.time = max(0.5, value) logger.debug( "Seeking to {} seconds; frame {}".format( - self.clock.time, self.clock.current_frame + self._clock.time, self._clock.current_frame ) ) if self.audioformat: @@ -358,7 +453,7 @@ def __calculate_audio_frames(self): if self.audioformat is None: return - start_frame = self.clock.current_frame + start_frame = self._clock.current_frame totalsize = int(self.clip.audio.fps * self.clip.audio.duration) self.audio_times = list(range(0, totalsize, self.audioformat["buffersize"])) + [ totalsize @@ -377,24 +472,24 @@ def __render(self): self.__render_videoframe() # Start videoclock with start of this thread - self.clock.start() + self._clock.start() logger.debug("Started rendering loop.") # Main rendering loop while self.status in [PLAYING, PAUSED]: - current_frame_no = self.clock.current_frame + current_frame_no = self._clock.current_frame # Check if end of clip has been reached - if self.clock.time >= self.duration: - logger.debug("End of stream reached at {}".format(self.clock.time)) + if self._clock.time >= self.duration: + logger.debug("End of stream reached at {}".format(self._clock.time)) if self.loop: logger.debug("Looping: restarting stream") # Seek to the start self.rewind() - self.loop_count += 1 + self._loop_count += 1 else: # End of stream has been reached - self.status = EOS + self._status = EOS break if self.last_frame_no != current_frame_no: @@ -408,7 +503,7 @@ def __render(self): time.sleep(0.005) # Stop the clock. - self.clock.stop() + self._clock.stop() logger.debug("Rendering stopped.") def __render_videoframe(self): @@ -417,7 +512,7 @@ def __render_videoframe(self): Sets the frame as the __current_video_frame and passes it on to __videorenderfunc() if it is set.""" - new_videoframe = self.clip.get_frame(self.clock.time) + new_videoframe = self.clip.get_frame(self._clock.time) # Pass it to the callback function if this is set if callable(self.__videorenderfunc): self.__videorenderfunc(new_videoframe) @@ -430,6 +525,11 @@ def __audiorender_thread(self): new_audioframe = None logger.debug("Started audio rendering thread.") + if self._8bit_hack_applied: + nbytes = 1 + else: + nbytes = self.audioformat["nbytes"] + while self.status in [PLAYING, PAUSED]: # Retrieve audiochunk if self.status == PLAYING: @@ -453,8 +553,9 @@ def __audiorender_thread(self): # sure this doesn't crash the whole program. new_audioframe = self.clip.audio.to_soundarray( tt=chunk, - buffersize=self.frame_interval * self.clip.audio.fps, quantize=True, + nbytes=nbytes, + buffersize=self.frame_interval * self.clip.audio.fps, ) except OSError as e: logger.warning("Sound decoding error: {}".format(e)) diff --git a/mediadecoder/soundrenderers/pygamerenderer.py b/mediadecoder/soundrenderers/pygamerenderer.py index 8eb2335..ce65828 100644 --- a/mediadecoder/soundrenderers/pygamerenderer.py +++ b/mediadecoder/soundrenderers/pygamerenderer.py @@ -2,6 +2,7 @@ import threading import warnings + try: # Python 3 from queue import Queue, Empty @@ -17,7 +18,7 @@ class SoundrendererPygame(threading.Thread, SoundRenderer): """Uses pygame.mixer to play sound""" - def __init__(self, audioformat, queue=None): + def __init__(self, audioformat, queue=None, pygame_buffersize=None): """Constructor. Creates a pygame sound renderer using pygame.mixer. @@ -25,9 +26,12 @@ def __init__(self, audioformat, queue=None): ---------- audioformat : dict A dictionary containing the properties of the audiostream - queue : Queue.queue + queue : Queue.queue, optional A queue object which serves as a buffer on which the individual - audio frames are placed by the decoder. + audio frames are placed by the decoder (default=None). + pygame_buffersize : int, optional + The buffersize to be used in the Pygame mixer (default=None). + """ global pygame import pygame @@ -43,17 +47,31 @@ def __init__(self, audioformat, queue=None): fps = audioformat["fps"] nchannels = audioformat["nchannels"] - nbytes = audioformat["nbytes"] - buffersize = audioformat["buffersize"] + self._nbytes = nbytes = audioformat["nbytes"] + if pygame_buffersize: + buffersize = pygame_buffersize + else: + buffersize = audioformat["buffersize"] if pygame.mixer.get_init() is None: - pygame.mixer.init(fps, -8 * nbytes, nchannels, buffersize) + if nbytes in (1, 2): + fmt = -8 * nbytes + elif nbytes == 4: + fmt = 32 + pygame.mixer.init(fps, fmt, nchannels, buffersize) self._own_mixer = True else: self._own_mixer = False def run(self): """Main thread function.""" + global pygame + import pygame + + import numpy as np + + pygame_mixer_unsigned = pygame.mixer.get_init()[1] > 0 + if not hasattr(self, "queue"): raise RuntimeError("Audio queue is not intialized.") @@ -64,6 +82,21 @@ def run(self): if chunk is None: try: frame = self.queue.get(timeout=queue_timeout) + + # Moviepy only supports 8, 16 and 32 bit signed integer. + # Pygame also supports 8 and 16 bit unsigned integer, as + # well as 32 bit floating point. In case the Pygame mixer + # uses one of those formats, we need to convert each audio + # frame to that on the fly. + if pygame_mixer_unsigned: # signed int --> unsigned int + if self._nbytes == 1: + frame = frame.astype(np.uint8) + 128 + if self._nbytes == 2: + frame = frame.astype(np.uint16) + 32768 + if self._nbytes == 4: # signed int --> float + frame = (frame.astype(np.float32) / + np.iinfo(np.int32).max) + chunk = pygame.sndarray.make_sound(frame) except Empty: continue