diff --git a/forge.config.js b/forge.config.js
index 3e5bfc86..c261015e 100644
--- a/forge.config.js
+++ b/forge.config.js
@@ -12,7 +12,7 @@ module.exports = {
   packagerConfig: {
     name: "Clapper",
     asar: true,
-    icon: "./public/logos/CL.png",
+    icon: "./public/images/logos/CL.png",
     osxSign: {},
 
     // One or more files to be copied directly into the app's
@@ -62,7 +62,7 @@ module.exports = {
       name: '@electron-forge/maker-deb',
       config: {
         options: {
-          icon: './public/logos/CL.png'
+          icon: './public/images/logos/CL.png'
         }
       },
     },
@@ -70,7 +70,7 @@ module.exports = {
       name: '@electron-forge/maker-dmg',
       config: {
         options: {
-          icon: './public/logos/CL.icns'
+          icon: './public/images/logos/CL.icns'
         }
       },
     },
diff --git a/package.json b/package.json
index 291cdd2e..bb62ae79 100644
--- a/package.json
+++ b/package.json
@@ -14,9 +14,10 @@
   },
   "scripts": {
     "dev": "npm i && npm run checks && next dev",
-    "build": "npm i && npm run checks && rm -Rf out && next build && npm run build:copyassets",
+    "build": "npm i && npm run prepare && npm run checks && rm -Rf out && next build && npm run build:copyassets",
+    "build:ci": "rm -Rf out && npm run prepare && next build && npm run build:copyassets",
     "build:copyassets": "cp -R public .next/standalone/public && cp -R .next/static .next/standalone/.next/static",
-    "build:ci": "rm -Rf out && next build && npm run build:copyassets",
+    "prepare": "cp -R node_modules/mediainfo.js/dist/MediaInfoModule.wasm public/wasm/",
     "start": "next start",
     "start:prod": "node .next/standalone/server.js",
     "checks": "npm run format:fix && npm run lint",
diff --git a/public/wasm/MediaInfoModule.wasm b/public/wasm/MediaInfoModule.wasm
new file mode 100644
index 00000000..39abb23b
Binary files /dev/null and b/public/wasm/MediaInfoModule.wasm differ
diff --git a/src/lib/utils/base64DataUriToFile.ts b/src/lib/utils/base64DataUriToFile.ts
new file mode 100644
index 00000000..404a737b
--- /dev/null
+++ b/src/lib/utils/base64DataUriToFile.ts
@@ -0,0 +1,12 @@
+export function base64DataUriToFile(dataUrl: string, fileName: string) {
+  var arr = dataUrl.split(',')
+  const st = `${arr[0] || ''}`
+  const mime = `${st.match(/:(.*?);/)?.[1] || ''}`
+  const bstr = atob(arr[arr.length - 1])
+  let n = bstr.length
+  const u8arr = new Uint8Array(n)
+  while(n--){
+      u8arr[n] = bstr.charCodeAt(n);
+  }
+  return new File([u8arr], fileName, {type:mime});
+}
\ No newline at end of file
diff --git a/src/services/io/createFullVideo.ts b/src/services/io/createFullVideo.ts
index b6afdf93..01647f71 100644
--- a/src/services/io/createFullVideo.ts
+++ b/src/services/io/createFullVideo.ts
@@ -1,481 +1,15 @@
 import { UUID } from '@aitube/clap'
-import { FFmpeg } from '@ffmpeg/ffmpeg'
-import { toBlobURL } from '@ffmpeg/util'
-
-const TAG = 'io/createFullVideo'
-
-export type FFMPegVideoInput = {
-  data: Uint8Array | null
-  startTimeInMs: number
-  endTimeInMs: number
-  durationInSecs: number
-}
-
-export type FFMPegAudioInput = FFMPegVideoInput
-
-/**
- * Download and load single and multi-threading FFMPeg.
- * MT for video
- * ST for audio (as MT has issues with it)
- * toBlobURL is used to bypass CORS issues, urls with the same domain can be used directly.
- */
-async function initializeFFmpeg() {
-  const [ffmpegSt, ffmpegMt] = [new FFmpeg(), new FFmpeg()]
-  const baseStURL = 'https://unpkg.com/@ffmpeg/core@0.12.6/dist/umd'
-  const baseMtURL = 'https://unpkg.com/@ffmpeg/core-mt@0.12.6/dist/umd'
-
-  ffmpegSt.on('log', ({ message }) => {
-    console.log(TAG, 'FFmpeg Single-Thread:', message)
-  })
-
-  ffmpegMt.on('log', ({ message }) => {
-    console.log(TAG, 'FFmpeg Multi-Thread:', message)
-  })
-
-  await ffmpegSt.load({
-    coreURL: await toBlobURL(`${baseStURL}/ffmpeg-core.js`, 'text/javascript'),
-    wasmURL: await toBlobURL(
-      `${baseStURL}/ffmpeg-core.wasm`,
-      'application/wasm'
-    ),
-  })
-
-  await ffmpegMt.load({
-    coreURL: await toBlobURL(`${baseMtURL}/ffmpeg-core.js`, 'text/javascript'),
-    wasmURL: await toBlobURL(
-      `${baseMtURL}/ffmpeg-core.wasm`,
-      'application/wasm'
-    ),
-    workerURL: await toBlobURL(
-      `${baseMtURL}/ffmpeg-core.worker.js`,
-      'text/javascript'
-    ),
-  })
-
-  return [ffmpegSt, ffmpegMt] as [FFmpeg, FFmpeg]
-}
-
-/**
- * Get loaded FFmpeg.
- */
-let ffmpegInstance: [FFmpeg, FFmpeg]
-export async function loadFFmpegSt() {
-  if (!ffmpegInstance) ffmpegInstance = await initializeFFmpeg()
-  return ffmpegInstance[0]
-}
-
-export async function loadFFmpegMt() {
-  if (!ffmpegInstance) ffmpegInstance = await initializeFFmpeg()
-  return ffmpegInstance[1]
-}
-
-/**
- * Creates an exclusive logger for the FFmpeg calls inside the provided method,
- * it calculates the progress based on raw FFmpeg logs and the provided `totalTimeInMs`.
- *
- * @param totalTimeInMs
- * @param method
- * @param callback
- * @param {number} callback.progress - The progress of the FFmpeg process from 0 to 100.
- * @returns
- */
-async function captureFFmpegProgress(
-  ffmpeg: FFmpeg,
-  totalTimeInMs: number,
-  method: () => any,
-  callback: (progress: number) => void
-): Promise<any> {
-  const extractProgressTimeMsFromLogs = (log: string): number | null => {
-    // `frame` for videos, `size` for audios
-    if (!log.startsWith('frame') && !log.startsWith('size')) return null
-    const timeRegex = /time=(\d{2}):(\d{2}):(\d{2})\.(\d{2})/
-    const match = log.match(timeRegex)
-    if (match) {
-      const hours = parseInt(match[1])
-      const minutes = parseInt(match[2])
-      const seconds = parseInt(match[3])
-      const centiseconds = parseInt(match[4])
-      const totalMilliseconds =
-        hours * 3600000 + minutes * 60000 + seconds * 1000 + centiseconds * 10
-      return totalMilliseconds
-    }
-    return null
-  }
-  let ffmpegLog = true
-  ffmpeg.on('log', ({ message }) => {
-    if (!ffmpegLog) return
-    const timeInMs = extractProgressTimeMsFromLogs(message)
-    if (timeInMs) callback((timeInMs / totalTimeInMs) * 100)
-  })
-  const result = await method()
-  ffmpegLog = false
-  return result
-}
-
-/**
- * It will calculate a proportional progress between a targetProgress and a startProgress
- *
- * @param startProgress e.g. 50
- * @param progress e.g. 50
- * @param targetProgress e.g. 70
- * @returns e.g. 60, because 50% of progress between 70% and 50%, would result on 60%
- */
-function calculateProgress(
-  startProgress: number,
-  progress: number,
-  targetProgress: number
-): number {
-  return startProgress + (progress * (targetProgress - startProgress)) / 100
-}
-
-/**
- * Creates an empty black video and appends it to the
- * provided `fileListContentArray`.
- *
- * @param duration time in milliseconds
- * @param width
- * @param height
- * @param filename
- * @param fileListContentArray fileList.txt where to append the file name
- * @param onProgress callback to capture the progress of this method
- */
-export async function addEmptyVideo(
-  durationInSecs: number,
-  width: number,
-  height: number,
-  filename: string,
-  fileListContentArray: string[],
-  onProgress?: (progress: number, message?: string) => void
-) {
-  const ffmpeg = await loadFFmpegMt()
-  let targetPartialProgress = 0
-
-  // For some reason, creating empty video with silent audio
-  // in one exec doesn't work, we need to split it.
-
-  console.log(
-    TAG,
-    'Creating empty video',
-    filename,
-    width,
-    height,
-    durationInSecs
-  )
-  let currentProgress = 0
-  targetPartialProgress = 50
-
-  await captureFFmpegProgress(
-    ffmpeg,
-    durationInSecs * 1000,
-    async () => {
-      await ffmpeg.exec([
-        '-f',
-        'lavfi',
-        '-i',
-        `color=c=black:s=${width}x${height}:d=${durationInSecs}`,
-        '-c:v',
-        'libx264',
-        '-t',
-        `${durationInSecs}`,
-        '-loglevel',
-        'verbose',
-        `base_${filename}`,
-      ])
-    },
-    (progress) => {
-      onProgress?.((progress / 100) * targetPartialProgress)
-    }
-  )
-
-  console.log(
-    TAG,
-    'Adding silent audio to empty video',
-    filename,
-    width,
-    height,
-    durationInSecs
-  )
-  currentProgress = 50
-  targetPartialProgress = 100
-
-  const exitCode = await ffmpeg.exec([
-    '-i',
-    `base_${filename}`,
-    '-f',
-    'lavfi',
-    '-i',
-    'anullsrc',
-    '-c:v',
-    'copy',
-    '-c:a',
-    'aac',
-    '-t',
-    `${durationInSecs}`,
-    '-loglevel',
-    'verbose',
-    filename,
-  ])
-
-  if (exitCode) {
-    throw new Error(`${TAG}: Unexpect error while creating empty video`)
-  }
-
-  console.log(TAG, 'Empty video created', filename)
-  fileListContentArray.push(`file ${filename}`)
-}
-
-/**
- * Creates the full mixed audio including silence
- * segments and loads it into ffmpeg with the given `filename`.
- * @param onProgress callback to capture the progress of this method
- * @throws Error if ffmpeg returns exit code 1
- */
-export async function createFullAudio(
-  audios: FFMPegAudioInput[],
-  filename: string,
-  totalVideoDurationInMs: number,
-  onProgress?: (progress: number, message: string) => void
-): Promise<void> {
-  console.log(TAG, 'Creating full audio', filename)
-
-  const ffmpeg = await loadFFmpegSt()
-  const filterComplexParts = []
-  const baseFilename = `base_${filename}`
-  let currentProgress = 0
-  let targetProgress = 25
-
-  // To mix audios at given times, we need a first empty base audio track
-
-  await captureFFmpegProgress(
-    ffmpeg,
-    totalVideoDurationInMs,
-    async () => {
-      await ffmpeg.exec([
-        '-f',
-        'lavfi',
-        '-i',
-        'anullsrc',
-        '-t',
-        `${totalVideoDurationInMs / 1000}`,
-        '-loglevel',
-        'verbose',
-        !audios.length ? filename : baseFilename,
-      ])
-    },
-    (progress) => {
-      onProgress?.(
-        calculateProgress(currentProgress, progress, targetProgress),
-        'Creating base audio...'
-      )
-    }
-  )
-
-  // If there is no audios, the base audio is the final one
-  if (!audios.length) return onProgress?.(100, 'Prepared audios...')
-
-  currentProgress = targetProgress
-  targetProgress = 50
-
-  // Mix audios based on their start times
-
-  const audioInputFiles = ['-i', baseFilename]
-  for (let index = 0; index < audios.length; index++) {
-    onProgress?.(currentProgress, 'Creating base audio...')
-    console.log(TAG, `Processing audio #${index}`)
-    const audio = audios[index]
-    const expectedProgressForItem = ((1 / audios.length) * targetProgress) / 100
-    if (!audio.data) continue
-    const audioFilename = `audio_${UUID()}.mp3`
-    await ffmpeg.writeFile(audioFilename, audio.data)
-    audioInputFiles.push('-i', audioFilename)
-    const delay = audio.startTimeInMs
-    const durationInSecs = audio.endTimeInMs - audio.startTimeInMs / 1000
-    filterComplexParts.push(
-      `[${index + 1}:a]atrim=0:${durationInSecs},adelay=${delay}|${delay}[delayed${index}]`
-    )
-    currentProgress += expectedProgressForItem * 100
-  }
-
-  const amixInputs = `[0:a]${audios.map((_, index) => `[delayed${index}]`).join('')}amix=inputs=${audios.length + 1}:duration=longest`
-  filterComplexParts.push(`${amixInputs}[a]`)
-  const filterComplex = filterComplexParts.join('; ')
-
-  currentProgress = targetProgress
-  targetProgress = 100
-
-  const createFullAudioExitCode = await captureFFmpegProgress(
-    ffmpeg,
-    totalVideoDurationInMs,
-    async () => {
-      await ffmpeg.exec([
-        ...audioInputFiles,
-        '-filter_complex',
-        filterComplex,
-        '-map',
-        '[a]',
-        '-t',
-        `${totalVideoDurationInMs / 1000}`,
-        '-loglevel',
-        'verbose',
-        filename,
-      ])
-    },
-    (progress) => {
-      onProgress?.(
-        calculateProgress(currentProgress, progress, targetProgress),
-        'Mixing audios...'
-      )
-    }
-  )
-
-  if (createFullAudioExitCode) {
-    throw new Error(`${TAG}: Error while creating full audio!`)
-  }
-  onProgress?.(targetProgress, 'Prepared audios...')
-}
-
-/**
- * Creates the full silent video including empty black
- * segments and loads it into ffmpeg with the given `filename`.
- * @param onProgress callback to capture the progress of this method
- * @throws Error if ffmpeg returns exit code 1
- */
-export async function createFullSilentVideo(
-  videos: FFMPegVideoInput[],
-  filename: string,
-  totalVideoDurationInMs: number,
-  width: number,
-  height: number,
-  excludeEmptyContent = false,
-  onProgress?: (progress: number, message: string) => void
-) {
-  const ffmpeg = await loadFFmpegMt()
-  const fileList = 'fileList.txt'
-  const fileListContentArray = []
-
-  // Complete array of videos including concatenated empty segments
-  // This is helpful for cleaner progress log
-  let lastStartTimeVideoInMs = 0
-  let videosWithGaps: FFMPegVideoInput[]
-
-  if (!videos.length) {
-    videosWithGaps = [
-      {
-        startTimeInMs: 0,
-        endTimeInMs: totalVideoDurationInMs,
-        data: null,
-        durationInSecs: totalVideoDurationInMs / 1000,
-      },
-    ]
-  } else {
-    videosWithGaps = videos.reduce((arr: FFMPegVideoInput[], video, index) => {
-      const emptyVideoDurationInMs =
-        video.startTimeInMs - lastStartTimeVideoInMs
-      if (emptyVideoDurationInMs) {
-        arr.push({
-          startTimeInMs: lastStartTimeVideoInMs,
-          endTimeInMs: lastStartTimeVideoInMs + emptyVideoDurationInMs,
-          data: null,
-          durationInSecs: emptyVideoDurationInMs / 1000,
-        })
-      }
-      arr.push(video)
-      lastStartTimeVideoInMs = video.endTimeInMs
-      if (
-        index == videos.length - 1 &&
-        lastStartTimeVideoInMs < totalVideoDurationInMs
-      ) {
-        arr.push({
-          startTimeInMs: lastStartTimeVideoInMs,
-          endTimeInMs: totalVideoDurationInMs,
-          data: null,
-          durationInSecs:
-            (totalVideoDurationInMs - lastStartTimeVideoInMs) / 1000,
-        })
-      }
-      return arr
-    }, [])
-  }
-
-  onProgress?.(0, 'Preparing videos...')
-
-  // Arbitrary percentage, as `concat` is fast,
-  // then estimate the generation of gap videos
-  // as the 70% of the work
-  let currentProgress = 0
-  let targetProgress = 70
-
-  for (const video of videosWithGaps) {
-    const expectedProgressForItem =
-      (((video.durationInSecs * 1000) / totalVideoDurationInMs) *
-        targetProgress) /
-      100
-    if (!video.data) {
-      if (excludeEmptyContent) continue
-      let collectedProgress = 0
-      await addEmptyVideo(
-        video.durationInSecs,
-        width,
-        height,
-        `empty_video_${UUID()}.mp4`,
-        fileListContentArray,
-        (progress) => {
-          const subProgress = progress / 100
-          currentProgress +=
-            (expectedProgressForItem * subProgress - collectedProgress) * 100
-          console.log(TAG, 'Current progress', currentProgress)
-          onProgress?.(currentProgress, 'Preparing videos...')
-          collectedProgress = expectedProgressForItem * subProgress
-        }
-      )
-    } else {
-      const videoFilename = `video_${UUID()}.mp4`
-      await ffmpeg.writeFile(videoFilename, video.data)
-      fileListContentArray.push(`file ${videoFilename}`)
-      currentProgress += expectedProgressForItem * 100
-      console.log(TAG, 'Current progress', currentProgress)
-      onProgress?.(currentProgress, 'Preparing videos...')
-    }
-  }
-
-  onProgress?.(targetProgress, 'Concatenating videos...')
-  currentProgress = 70
-  targetProgress = 100
-
-  const fileListContent = fileListContentArray.join('\n')
-  await ffmpeg.writeFile(fileList, fileListContent)
-
-  const creatBaseFullVideoExitCode = await captureFFmpegProgress(
-    ffmpeg,
-    totalVideoDurationInMs,
-    async () => {
-      await ffmpeg.exec([
-        '-f',
-        'concat',
-        '-safe',
-        '0',
-        '-i',
-        fileList,
-        '-loglevel',
-        'verbose',
-        '-c',
-        'copy',
-        filename,
-      ])
-    },
-    (progress: number) => {
-      onProgress?.(
-        calculateProgress(currentProgress, progress, targetProgress),
-        'Merging audio and video...'
-      )
-    }
-  )
-
-  if (creatBaseFullVideoExitCode) {
-    throw new Error(`${TAG}: Error while creating base full video!`)
-  }
-  onProgress?.(targetProgress, 'Concatenating videos...')
-}
+import {
+  calculateProgress,
+  captureFFmpegProgress,
+  createFullAudio,
+  createFullSilentVideo,
+  FFMPegAudioInput,
+  FFMPegVideoInput,
+  loadFFmpegMt,
+  loadFFmpegSt,
+  TAG,
+} from './ffmpegUtils'
 
 /**
  * Creates full video with audio using `@ffmpeg/ffmpeg` multi-core,
diff --git a/src/services/io/extractCaptionFromFrame.ts b/src/services/io/extractCaptionFromFrameMoondream.ts
similarity index 92%
rename from src/services/io/extractCaptionFromFrame.ts
rename to src/services/io/extractCaptionFromFrameMoondream.ts
index 014124d0..d25eb23a 100644
--- a/src/services/io/extractCaptionFromFrame.ts
+++ b/src/services/io/extractCaptionFromFrameMoondream.ts
@@ -5,7 +5,7 @@ import {
   RawImage,
 } from '@xenova/transformers'
 
-export async function extractCaptionFromFrame(
+export async function extractCaptionFromFrameMoondream(
   imageInBase64DataUri: string
 ): Promise<string> {
   if (!(navigator as any).gpu) {
@@ -16,7 +16,8 @@ export async function extractCaptionFromFrame(
 2. You need to enable WebGPU (depends on your browser, see below)
 
 2.1 For Chrome: Perform the following operations in the Chrome / Microsoft Edge address bar
-The chrome://flags/#enable-unsafe-webgpu flag must be enabled (not enable-webgpu-developer-features). Linux experimental support also requires launching the browser with --enable-features=Vulkan.
+The chrome://flags/#enable-unsafe-webgpu flag must be enabled (not enable-webgpu-developer-features).
+Linux experimental support also requires launching the browser with --enable-features=Vulkan.
 
 2.2 For Safari 18 (macOS 15): WebGPU is enabled by default
 
diff --git a/src/services/io/extractCaptionsFromFrames.ts b/src/services/io/extractCaptionsFromFrames.ts
new file mode 100644
index 00000000..575ba547
--- /dev/null
+++ b/src/services/io/extractCaptionsFromFrames.ts
@@ -0,0 +1,92 @@
+import {
+  AutoProcessor,
+  AutoTokenizer,
+  Florence2ForConditionalGeneration,
+  RawImage,
+} from '@xenova/transformers'
+
+export async function extractCaptionsFromFrames(
+  images: string[] = [],
+  onProgress: (progress: number, storyboardIndex: number, nbStoryboards: number) => void
+): Promise<string[]> {
+  if (!(navigator as any).gpu) {
+    throw new Error(`Please enable WebGPU to analyze video frames:
+    
+1. You need a modern browser such as Google Chrome 113+, Microsoft Edge 113+, Safari 18 (macOS 15), Firefox Nightly
+
+2. You need to enable WebGPU (depends on your browser, see below)
+
+2.1 For Chrome: Perform the following operations in the Chrome / Microsoft Edge address bar
+The chrome://flags/#enable-unsafe-webgpu flag must be enabled (not enable-webgpu-developer-features).
+Linux experimental support also requires launching the browser with --enable-features=Vulkan.
+
+2.2 For Safari 18 (macOS 15): WebGPU is enabled by default
+
+2.3 For Firefox Nightly: Type about:config in the address bar and set 'dom.webgpu.enabled" to true
+`)
+  }
+
+  let progress = 0
+  onProgress(progress, 0, images.length)
+  // for code example, see:
+  // https://github.com/xenova/transformers.js/pull/545#issuecomment-2183625876
+
+  // Load model, processor, and tokenizer
+  const model_id = 'onnx-community/Florence-2-base-ft'
+  const model = await Florence2ForConditionalGeneration.from_pretrained(
+    model_id,
+    {
+      dtype: 'fp32',
+    }
+  )
+
+  onProgress(progress = 5, 0, images.length)
+
+  const processor = await AutoProcessor.from_pretrained(model_id)
+
+  onProgress(progress = 10, 0, images.length)
+
+  const tokenizer = await AutoTokenizer.from_pretrained(model_id)
+
+  onProgress(progress = 15, 0, images.length)
+
+  // not all prompts will work properly, see the official examples:
+  // https://huggingface.co/microsoft/Florence-2-base-ft/blob/e7a5acc73559546de6e12ec0319cd7cc1fa2437c/processing_florence2.py#L115-L117
+
+  // Prepare text inputs
+  const prompts = 'Describe with a paragraph what is shown in the image.'
+  const text_inputs = tokenizer(prompts)
+
+  let i = 1
+  const captions: string[] = []
+  for (const imageInBase64DataUri of images) {
+
+    console.log('analyzing image:', imageInBase64DataUri.slice(0, 64))
+    // Prepare vision inputs
+    const image = await RawImage.fromURL(imageInBase64DataUri)
+    const vision_inputs = await processor(image)
+
+    console.log(' - generating caption..')
+    // Generate text
+    const generated_ids = await model.generate({
+      ...text_inputs,
+      ...vision_inputs,
+      max_new_tokens: 100,
+    })
+
+    // Decode generated text
+    const generated_text = tokenizer.batch_decode(generated_ids, {
+      skip_special_tokens: true,
+    })
+
+    const caption = `${generated_text[0] || ''}`
+    console.log(' - caption:', caption)
+
+    const relativeProgress = i / images.length
+
+    progress += relativeProgress * 75
+    onProgress(progress, i, images.length)
+    captions.push(caption)
+  }
+  return captions
+}
diff --git a/src/services/io/extractFramesFromVideo.ts b/src/services/io/extractFramesFromVideo.ts
index 78f22875..528ae7db 100644
--- a/src/services/io/extractFramesFromVideo.ts
+++ b/src/services/io/extractFramesFromVideo.ts
@@ -1,3 +1,5 @@
+'use client'
+
 import { FFmpeg } from '@ffmpeg/ffmpeg'
 import { toBlobURL } from '@ffmpeg/util'
 import mediaInfoFactory, {
@@ -17,14 +19,20 @@ interface FrameExtractorOptions {
   maxHeight: number
   sceneSamplingRate: number // Percentage of additional frames between scene changes (0-100)
   onProgress?: (progress: number) => void // Callback function for progress updates
+  debug?: boolean
 }
 
-async function extractFramesFromVideo(
+export async function extractFramesFromVideo(
   videoBlob: Blob,
   options: FrameExtractorOptions
 ): Promise<string[]> {
   // Initialize MediaInfo
-  const mediaInfo = await mediaInfoFactory({ format: 'object' })
+  const mediaInfo = await mediaInfoFactory({
+    format: 'object',
+    locateFile: () => {
+      return '/wasm/MediaInfoModule.wasm'
+    },
+  })
 
   // Get video duration using MediaInfo
   const getSize = () => videoBlob.size
@@ -42,19 +50,33 @@ async function extractFramesFromVideo(
       reader.readAsArrayBuffer(videoBlob.slice(offset, offset + chunkSize))
     })
 
+  if (options.debug) {
+    console.log('calling await mediaInfo.analyzeData(getSize, readChunk)')
+  }
+
   const result = await mediaInfo.analyzeData(getSize, readChunk)
+  if (options.debug) {
+    console.log('result = ', result)
+  }
 
   let duration: number = 0
 
   for (const track of result.media?.track || []) {
-    ///  '@type': "General" | "Video" | "Audio" | "Text" | "Image" | "Menu" | "Other"
+    if (options.debug) {
+      console.log('track = ', track)
+    }
+
     let maybeDuration: number = 0
     if (track['@type'] === 'Audio') {
       const audioTrack = track as AudioTrack
-      maybeDuration = audioTrack.Duration || 0
+      maybeDuration = audioTrack.Duration
+        ? parseFloat(`${audioTrack.Duration || 0}`)
+        : 0
     } else if (track['@type'] === 'Video') {
       const videoTrack = track as VideoTrack
-      maybeDuration = videoTrack.Duration || 0
+      maybeDuration = videoTrack.Duration
+        ? parseFloat(`${videoTrack.Duration || 0}`)
+        : 0
     }
     if (
       typeof maybeDuration === 'number' &&
@@ -69,6 +91,10 @@ async function extractFramesFromVideo(
     throw new Error('Could not determine video duration (or it is length 0)')
   }
 
+  if (options.debug) {
+    console.log('duration in seconds:', duration)
+  }
+
   // Initialize FFmpeg
   const ffmpeg = new FFmpeg()
   const baseURL = 'https://unpkg.com/@ffmpeg/core@0.12.6/dist/umd'
@@ -78,17 +104,26 @@ async function extractFramesFromVideo(
     wasmURL: await toBlobURL(`${baseURL}/ffmpeg-core.wasm`, 'application/wasm'),
   })
 
+  if (options.debug) {
+    console.log('FFmpeg loaded!')
+  }
+
   // Write video file to FFmpeg's file system
   const videoUint8Array = new Uint8Array(await videoBlob.arrayBuffer())
   await ffmpeg.writeFile('input.mp4', videoUint8Array)
-
+  if (options.debug) {
+    console.log('input.mp4 written!')
+  }
   // Prepare FFmpeg command
   const sceneFilter = `select='gt(scene,0.4)'`
   const additionalFramesFilter = `select='not(mod(n,${Math.floor(100 / options.sceneSamplingRate)}))'`
-  const scaleFilter = `scale=iw*min(${options.maxWidth}/iw\,${options.maxHeight}/ih):ih*min(${options.maxWidth}/iw\,${options.maxHeight}/ih)`
+  const scaleFilter = `scale='min(${options.maxWidth},iw)':min'(${options.maxHeight},ih)':force_original_aspect_ratio=decrease`
 
   let lastProgress = 0
   ffmpeg.on('log', ({ message }) => {
+    if (options.debug) {
+      console.log('FFmpeg log:', message)
+    }
     const timeMatch = message.match(/time=(\d{2}):(\d{2}):(\d{2}\.\d{2})/)
     if (timeMatch) {
       const [, hours, minutes, seconds] = timeMatch
@@ -102,40 +137,82 @@ async function extractFramesFromVideo(
     }
   })
 
-  await ffmpeg.exec([
+  const ffmpegCommand = [
     '-i',
     'input.mp4',
+    '-loglevel',
+    'verbose',
     '-vf',
     `${sceneFilter},${additionalFramesFilter},${scaleFilter}`,
     '-vsync',
-    '0',
+    '2',
     '-q:v',
     '2',
+    '-f',
+    'image2',
+    '-frames:v',
+    '1000', // Limit the number of frames to extract
     `frames_%03d.${options.format}`,
-  ])
+  ]
+
+  if (options.debug) {
+    console.log('Executing FFmpeg command:', ffmpegCommand.join(' '))
+  }
+
+  try {
+    await ffmpeg.exec(ffmpegCommand)
+  } catch (error) {
+    console.error('FFmpeg execution error:', error)
+    throw error
+  }
 
   // Read generated frames
   const files = await ffmpeg.listDir('/')
+  if (options.debug) {
+    console.log('All files in FFmpeg filesystem:', files)
+  }
   const frameFiles = files.filter(
     (file) =>
       file.name.startsWith('frames_') &&
       file.name.endsWith(`.${options.format}`)
   )
+  if (options.debug) {
+    console.log('Frame files found:', frameFiles.length)
+  }
 
   const frames: string[] = []
+  const encoder = new TextEncoder()
+
   for (let i = 0; i < frameFiles.length; i++) {
     const file = frameFiles[i]
-    const frameData = await ffmpeg.readFile(file.name)
-    const base64Frame = btoa(
-      String.fromCharCode.apply(null, frameData as unknown as number[])
-    )
-    frames.push(`data:image/${options.format};base64,${base64Frame}`)
-
-    // Update progress for frame processing (from 90% to 100%)
-    options.onProgress?.(90 + Math.round(((i + 1) / frameFiles.length) * 10))
+    if (options.debug) {
+      console.log(`Processing frame file: ${file.name}`)
+    }
+    try {
+      const frameData = await ffmpeg.readFile(file.name)
+
+      // Convert Uint8Array to Base64 string without using btoa
+      let binary = ''
+      const bytes = new Uint8Array(frameData as any)
+      const len = bytes.byteLength
+      for (let i = 0; i < len; i++) {
+        binary += String.fromCharCode(bytes[i])
+      }
+      const base64Frame = window.btoa(binary)
+
+      frames.push(`data:image/${options.format};base64,${base64Frame}`)
+
+      // Update progress for frame processing (from 90% to 100%)
+      options.onProgress?.(90 + Math.round(((i + 1) / frameFiles.length) * 10))
+    } catch (error) {
+      console.error(`Error processing frame ${file.name}:`, error)
+      // You can choose to either skip this frame or throw an error
+      // throw error; // Uncomment this line if you want to stop processing on any error
+    }
   }
 
+  if (options.debug) {
+    console.log(`Total frames processed: ${frames.length}`)
+  }
   return frames
 }
-
-export default extractFramesFromVideo
diff --git a/src/services/io/ffmpegUtils.ts b/src/services/io/ffmpegUtils.ts
new file mode 100644
index 00000000..40b85972
--- /dev/null
+++ b/src/services/io/ffmpegUtils.ts
@@ -0,0 +1,478 @@
+import { UUID } from '@aitube/clap'
+import { FFmpeg } from '@ffmpeg/ffmpeg'
+import { toBlobURL } from '@ffmpeg/util'
+
+export const TAG = 'io/createFullVideo'
+
+export type FFMPegVideoInput = {
+  data: Uint8Array | null
+  startTimeInMs: number
+  endTimeInMs: number
+  durationInSecs: number
+}
+
+export type FFMPegAudioInput = FFMPegVideoInput
+
+/**
+ * Download and load single and multi-threading FFMPeg.
+ * MT for video
+ * ST for audio (as MT has issues with it)
+ * toBlobURL is used to bypass CORS issues, urls with the same domain can be used directly.
+ */
+export async function initializeFFmpeg() {
+  const [ffmpegSt, ffmpegMt] = [new FFmpeg(), new FFmpeg()]
+  const baseStURL = 'https://unpkg.com/@ffmpeg/core@0.12.6/dist/umd'
+  const baseMtURL = 'https://unpkg.com/@ffmpeg/core-mt@0.12.6/dist/umd'
+
+  ffmpegSt.on('log', ({ message }) => {
+    console.log(TAG, 'FFmpeg Single-Thread:', message)
+  })
+
+  ffmpegMt.on('log', ({ message }) => {
+    console.log(TAG, 'FFmpeg Multi-Thread:', message)
+  })
+
+  await ffmpegSt.load({
+    coreURL: await toBlobURL(`${baseStURL}/ffmpeg-core.js`, 'text/javascript'),
+    wasmURL: await toBlobURL(
+      `${baseStURL}/ffmpeg-core.wasm`,
+      'application/wasm'
+    ),
+  })
+
+  await ffmpegMt.load({
+    coreURL: await toBlobURL(`${baseMtURL}/ffmpeg-core.js`, 'text/javascript'),
+    wasmURL: await toBlobURL(
+      `${baseMtURL}/ffmpeg-core.wasm`,
+      'application/wasm'
+    ),
+    workerURL: await toBlobURL(
+      `${baseMtURL}/ffmpeg-core.worker.js`,
+      'text/javascript'
+    ),
+  })
+
+  return [ffmpegSt, ffmpegMt] as [FFmpeg, FFmpeg]
+}
+
+/**
+ * Get loaded FFmpeg.
+ */
+let ffmpegInstance: [FFmpeg, FFmpeg]
+export async function loadFFmpegSt() {
+  if (!ffmpegInstance) ffmpegInstance = await initializeFFmpeg()
+  return ffmpegInstance[0]
+}
+
+export async function loadFFmpegMt() {
+  if (!ffmpegInstance) ffmpegInstance = await initializeFFmpeg()
+  return ffmpegInstance[1]
+}
+
+/**
+ * Creates an exclusive logger for the FFmpeg calls inside the provided method,
+ * it calculates the progress based on raw FFmpeg logs and the provided `totalTimeInMs`.
+ *
+ * @param totalTimeInMs
+ * @param method
+ * @param callback
+ * @param {number} callback.progress - The progress of the FFmpeg process from 0 to 100.
+ * @returns
+ */
+export async function captureFFmpegProgress(
+  ffmpeg: FFmpeg,
+  totalTimeInMs: number,
+  method: () => any,
+  callback: (progress: number) => void
+): Promise<any> {
+  const extractProgressTimeMsFromLogs = (log: string): number | null => {
+    // `frame` for videos, `size` for audios
+    if (!log.startsWith('frame') && !log.startsWith('size')) return null
+    const timeRegex = /time=(\d{2}):(\d{2}):(\d{2})\.(\d{2})/
+    const match = log.match(timeRegex)
+    if (match) {
+      const hours = parseInt(match[1])
+      const minutes = parseInt(match[2])
+      const seconds = parseInt(match[3])
+      const centiseconds = parseInt(match[4])
+      const totalMilliseconds =
+        hours * 3600000 + minutes * 60000 + seconds * 1000 + centiseconds * 10
+      return totalMilliseconds
+    }
+    return null
+  }
+  let ffmpegLog = true
+  ffmpeg.on('log', ({ message }) => {
+    if (!ffmpegLog) return
+    const timeInMs = extractProgressTimeMsFromLogs(message)
+    if (timeInMs) callback((timeInMs / totalTimeInMs) * 100)
+  })
+  const result = await method()
+  ffmpegLog = false
+  return result
+}
+
+/**
+ * It will calculate a proportional progress between a targetProgress and a startProgress
+ *
+ * @param startProgress e.g. 50
+ * @param progress e.g. 50
+ * @param targetProgress e.g. 70
+ * @returns e.g. 60, because 50% of progress between 70% and 50%, would result on 60%
+ */
+export function calculateProgress(
+  startProgress: number,
+  progress: number,
+  targetProgress: number
+): number {
+  return startProgress + (progress * (targetProgress - startProgress)) / 100
+}
+
+/**
+ * Creates an empty black video and appends it to the
+ * provided `fileListContentArray`.
+ *
+ * @param duration time in milliseconds
+ * @param width
+ * @param height
+ * @param filename
+ * @param fileListContentArray fileList.txt where to append the file name
+ * @param onProgress callback to capture the progress of this method
+ */
+export async function addEmptyVideo(
+  durationInSecs: number,
+  width: number,
+  height: number,
+  filename: string,
+  fileListContentArray: string[],
+  onProgress?: (progress: number, message?: string) => void
+) {
+  const ffmpeg = await loadFFmpegMt()
+  let targetPartialProgress = 0
+
+  // For some reason, creating empty video with silent audio
+  // in one exec doesn't work, we need to split it.
+
+  console.log(
+    TAG,
+    'Creating empty video',
+    filename,
+    width,
+    height,
+    durationInSecs
+  )
+  let currentProgress = 0
+  targetPartialProgress = 50
+
+  await captureFFmpegProgress(
+    ffmpeg,
+    durationInSecs * 1000,
+    async () => {
+      await ffmpeg.exec([
+        '-f',
+        'lavfi',
+        '-i',
+        `color=c=black:s=${width}x${height}:d=${durationInSecs}`,
+        '-c:v',
+        'libx264',
+        '-t',
+        `${durationInSecs}`,
+        '-loglevel',
+        'verbose',
+        `base_${filename}`,
+      ])
+    },
+    (progress) => {
+      onProgress?.((progress / 100) * targetPartialProgress)
+    }
+  )
+
+  console.log(
+    TAG,
+    'Adding silent audio to empty video',
+    filename,
+    width,
+    height,
+    durationInSecs
+  )
+  currentProgress = 50
+  targetPartialProgress = 100
+
+  const exitCode = await ffmpeg.exec([
+    '-i',
+    `base_${filename}`,
+    '-f',
+    'lavfi',
+    '-i',
+    'anullsrc',
+    '-c:v',
+    'copy',
+    '-c:a',
+    'aac',
+    '-t',
+    `${durationInSecs}`,
+    '-loglevel',
+    'verbose',
+    filename,
+  ])
+
+  if (exitCode) {
+    throw new Error(`${TAG}: Unexpect error while creating empty video`)
+  }
+
+  console.log(TAG, 'Empty video created', filename)
+  fileListContentArray.push(`file ${filename}`)
+}
+
+/**
+ * Creates the full mixed audio including silence
+ * segments and loads it into ffmpeg with the given `filename`.
+ * @param onProgress callback to capture the progress of this method
+ * @throws Error if ffmpeg returns exit code 1
+ */
+export async function createFullAudio(
+  audios: FFMPegAudioInput[],
+  filename: string,
+  totalVideoDurationInMs: number,
+  onProgress?: (progress: number, message: string) => void
+): Promise<void> {
+  console.log(TAG, 'Creating full audio', filename)
+
+  const ffmpeg = await loadFFmpegSt()
+  const filterComplexParts = []
+  const baseFilename = `base_${filename}`
+  let currentProgress = 0
+  let targetProgress = 25
+
+  // To mix audios at given times, we need a first empty base audio track
+
+  await captureFFmpegProgress(
+    ffmpeg,
+    totalVideoDurationInMs,
+    async () => {
+      await ffmpeg.exec([
+        '-f',
+        'lavfi',
+        '-i',
+        'anullsrc',
+        '-t',
+        `${totalVideoDurationInMs / 1000}`,
+        '-loglevel',
+        'verbose',
+        !audios.length ? filename : baseFilename,
+      ])
+    },
+    (progress) => {
+      onProgress?.(
+        calculateProgress(currentProgress, progress, targetProgress),
+        'Creating base audio...'
+      )
+    }
+  )
+
+  // If there is no audios, the base audio is the final one
+  if (!audios.length) return onProgress?.(100, 'Prepared audios...')
+
+  currentProgress = targetProgress
+  targetProgress = 50
+
+  // Mix audios based on their start times
+
+  const audioInputFiles = ['-i', baseFilename]
+  for (let index = 0; index < audios.length; index++) {
+    onProgress?.(currentProgress, 'Creating base audio...')
+    console.log(TAG, `Processing audio #${index}`)
+    const audio = audios[index]
+    const expectedProgressForItem = ((1 / audios.length) * targetProgress) / 100
+    if (!audio.data) continue
+    const audioFilename = `audio_${UUID()}.mp3`
+    await ffmpeg.writeFile(audioFilename, audio.data)
+    audioInputFiles.push('-i', audioFilename)
+    const delay = audio.startTimeInMs
+    const durationInSecs = audio.endTimeInMs - audio.startTimeInMs / 1000
+    filterComplexParts.push(
+      `[${index + 1}:a]atrim=0:${durationInSecs},adelay=${delay}|${delay}[delayed${index}]`
+    )
+    currentProgress += expectedProgressForItem * 100
+  }
+
+  const amixInputs = `[0:a]${audios.map((_, index) => `[delayed${index}]`).join('')}amix=inputs=${audios.length + 1}:duration=longest`
+  filterComplexParts.push(`${amixInputs}[a]`)
+  const filterComplex = filterComplexParts.join('; ')
+
+  currentProgress = targetProgress
+  targetProgress = 100
+
+  const createFullAudioExitCode = await captureFFmpegProgress(
+    ffmpeg,
+    totalVideoDurationInMs,
+    async () => {
+      await ffmpeg.exec([
+        ...audioInputFiles,
+        '-filter_complex',
+        filterComplex,
+        '-map',
+        '[a]',
+        '-t',
+        `${totalVideoDurationInMs / 1000}`,
+        '-loglevel',
+        'verbose',
+        filename,
+      ])
+    },
+    (progress) => {
+      onProgress?.(
+        calculateProgress(currentProgress, progress, targetProgress),
+        'Mixing audios...'
+      )
+    }
+  )
+
+  if (createFullAudioExitCode) {
+    throw new Error(`${TAG}: Error while creating full audio!`)
+  }
+  onProgress?.(targetProgress, 'Prepared audios...')
+}
+
+/**
+ * Creates the full silent video including empty black
+ * segments and loads it into ffmpeg with the given `filename`.
+ * @param onProgress callback to capture the progress of this method
+ * @throws Error if ffmpeg returns exit code 1
+ */
+export async function createFullSilentVideo(
+  videos: FFMPegVideoInput[],
+  filename: string,
+  totalVideoDurationInMs: number,
+  width: number,
+  height: number,
+  excludeEmptyContent = false,
+  onProgress?: (progress: number, message: string) => void
+) {
+  const ffmpeg = await loadFFmpegMt()
+  const fileList = 'fileList.txt'
+  const fileListContentArray = []
+
+  // Complete array of videos including concatenated empty segments
+  // This is helpful for cleaner progress log
+  let lastStartTimeVideoInMs = 0
+  let videosWithGaps: FFMPegVideoInput[]
+
+  if (!videos.length) {
+    videosWithGaps = [
+      {
+        startTimeInMs: 0,
+        endTimeInMs: totalVideoDurationInMs,
+        data: null,
+        durationInSecs: totalVideoDurationInMs / 1000,
+      },
+    ]
+  } else {
+    videosWithGaps = videos.reduce((arr: FFMPegVideoInput[], video, index) => {
+      const emptyVideoDurationInMs =
+        video.startTimeInMs - lastStartTimeVideoInMs
+      if (emptyVideoDurationInMs) {
+        arr.push({
+          startTimeInMs: lastStartTimeVideoInMs,
+          endTimeInMs: lastStartTimeVideoInMs + emptyVideoDurationInMs,
+          data: null,
+          durationInSecs: emptyVideoDurationInMs / 1000,
+        })
+      }
+      arr.push(video)
+      lastStartTimeVideoInMs = video.endTimeInMs
+      if (
+        index == videos.length - 1 &&
+        lastStartTimeVideoInMs < totalVideoDurationInMs
+      ) {
+        arr.push({
+          startTimeInMs: lastStartTimeVideoInMs,
+          endTimeInMs: totalVideoDurationInMs,
+          data: null,
+          durationInSecs:
+            (totalVideoDurationInMs - lastStartTimeVideoInMs) / 1000,
+        })
+      }
+      return arr
+    }, [])
+  }
+
+  onProgress?.(0, 'Preparing videos...')
+
+  // Arbitrary percentage, as `concat` is fast,
+  // then estimate the generation of gap videos
+  // as the 70% of the work
+  let currentProgress = 0
+  let targetProgress = 70
+
+  for (const video of videosWithGaps) {
+    const expectedProgressForItem =
+      (((video.durationInSecs * 1000) / totalVideoDurationInMs) *
+        targetProgress) /
+      100
+    if (!video.data) {
+      if (excludeEmptyContent) continue
+      let collectedProgress = 0
+      await addEmptyVideo(
+        video.durationInSecs,
+        width,
+        height,
+        `empty_video_${UUID()}.mp4`,
+        fileListContentArray,
+        (progress) => {
+          const subProgress = progress / 100
+          currentProgress +=
+            (expectedProgressForItem * subProgress - collectedProgress) * 100
+          console.log(TAG, 'Current progress', currentProgress)
+          onProgress?.(currentProgress, 'Preparing videos...')
+          collectedProgress = expectedProgressForItem * subProgress
+        }
+      )
+    } else {
+      const videoFilename = `video_${UUID()}.mp4`
+      await ffmpeg.writeFile(videoFilename, video.data)
+      fileListContentArray.push(`file ${videoFilename}`)
+      currentProgress += expectedProgressForItem * 100
+      console.log(TAG, 'Current progress', currentProgress)
+      onProgress?.(currentProgress, 'Preparing videos...')
+    }
+  }
+
+  onProgress?.(targetProgress, 'Concatenating videos...')
+  currentProgress = 70
+  targetProgress = 100
+
+  const fileListContent = fileListContentArray.join('\n')
+  await ffmpeg.writeFile(fileList, fileListContent)
+
+  const creatBaseFullVideoExitCode = await captureFFmpegProgress(
+    ffmpeg,
+    totalVideoDurationInMs,
+    async () => {
+      await ffmpeg.exec([
+        '-f',
+        'concat',
+        '-safe',
+        '0',
+        '-i',
+        fileList,
+        '-loglevel',
+        'verbose',
+        '-c',
+        'copy',
+        filename,
+      ])
+    },
+    (progress: number) => {
+      onProgress?.(
+        calculateProgress(currentProgress, progress, targetProgress),
+        'Merging audio and video...'
+      )
+    }
+  )
+
+  if (creatBaseFullVideoExitCode) {
+    throw new Error(`${TAG}: Error while creating base full video!`)
+  }
+  onProgress?.(targetProgress, 'Concatenating videos...')
+}
diff --git a/src/services/io/parseFileIntoSegments.ts b/src/services/io/parseFileIntoSegments.ts
index 8d6d2f1e..9c14daa3 100644
--- a/src/services/io/parseFileIntoSegments.ts
+++ b/src/services/io/parseFileIntoSegments.ts
@@ -13,6 +13,8 @@ import {
   SegmentEditionStatus,
   SegmentVisibility,
   TimelineSegment,
+  useTimeline,
+  TimelineStore
 } from '@aitube/timeline'
 
 import { blobToBase64DataUri } from '@/lib/utils/blobToBase64DataUri'
@@ -28,6 +30,7 @@ export async function parseFileIntoSegments({
    */
   file: File
 }): Promise<TimelineSegment[]> {
+  const timeline: TimelineStore = useTimeline.getState()
   // console.log(`parseFileIntoSegments(): filename = ${file.name}`)
   // console.log(`parseFileIntoSegments(): file size = ${file.size} bytes`)
   // console.log(`parseFileIntoSegments(): file type = ${file.type}`)
@@ -44,11 +47,61 @@ export async function parseFileIntoSegments({
   const newSegments: TimelineSegment[] = []
 
   switch (file.type) {
+    case 'image/jpeg':
+    case 'image/png':
+    case 'image/avif':
+    case 'image/heic':
     case 'image/webp':
       type = 'image'
       resourceCategory = 'control_image'
+      const startTimeInMs = cursorInSteps * DEFAULT_DURATION_IN_MS_PER_STEP
+      const durationInSteps = 4
+      const durationInMs = durationInSteps * DEFAULT_DURATION_IN_MS_PER_STEP
+      const endTimeInMs = startTimeInMs + durationInMs
+
+      // ok let's stop for a minute there:
+      // if someone drops a .mp3, and assuming we don't yet have the UI to select the category,
+      // do you think it should be a SOUND, a VOICE or a MUSIC by default?
+      // I expect people will use AI service providers for sound and voice,
+      // maybe in some case music too, but there are also many people
+      // who will want to use their own track eg. to create a music video
+      const category = ClapSegmentCategory.MUSIC
+
+      const assetUrl = await blobToBase64DataUri(file)
+
+      const newSegmentData: Partial<TimelineSegment> = {
+        prompt: 'audio track',
+        startTimeInMs, // start time of the segment
+        endTimeInMs, // end time of the segment (startTimeInMs + durationInMs)
+        status: ClapSegmentStatus.COMPLETED,
+        // track: findFreeTrack({ segments, startTimeInMs, endTimeInMs }), // track row index
+        label: `${file.name} (${Math.round(durationInMs / 1000)}s @ ${Math.round(bpm * 100) / 100} BPM)`, // a short label to name the segment (optional, can be human or LLM-defined)
+        category,
+        assetUrl,
+        assetDurationInMs: endTimeInMs,
+        assetSourceType: ClapAssetSource.DATA,
+        assetFileFormat: `${file.type}`,
+      }
+
+      const timelineSegment = await clapSegmentToTimelineSegment(
+        newSegment(newSegmentData)
+      )
+      timelineSegment.outputType = ClapOutputType.AUDIO
+      timelineSegment.outputGain = 1.0
+      timelineSegment.audioBuffer = audioBuffer
+
+      // we assume we want it to be immediately visible
+      timelineSegment.visibility = SegmentVisibility.VISIBLE
+
+      // console.log("newSegment:", audioSegment)
+
+      // poof! type disappears.. it's magic
+      newSegments.push(timelineSegment)
       break
 
+      break
+
+
     case 'audio/mpeg': // this is the "official" one
     case 'audio/mp3': // this is just an alias
     case 'audio/wav':
diff --git a/src/services/io/useIO.ts b/src/services/io/useIO.ts
index 9e11f667..e1d05f0b 100644
--- a/src/services/io/useIO.ts
+++ b/src/services/io/useIO.ts
@@ -40,11 +40,11 @@ import {
   formatSegmentForExport,
 } from '@/lib/utils/formatSegmentForExport'
 import { sleep } from '@/lib/utils/sleep'
-import {
-  FFMPegAudioInput,
-  FFMPegVideoInput,
-  createFullVideo,
-} from './createFullVideo'
+import { FFMPegAudioInput, FFMPegVideoInput } from './ffmpegUtils'
+import { createFullVideo } from './createFullVideo'
+import { extractFramesFromVideo } from './extractFramesFromVideo'
+import { extractCaptionsFromFrames } from './extractCaptionsFromFrames'
+import { base64DataUriToFile } from '@/lib/utils/base64DataUriToFile'
 
 export const useIO = create<IOStore>((set, get) => ({
   ...getDefaultIOState(),
@@ -107,6 +107,69 @@ export const useIO = create<IOStore>((set, get) => ({
       }
 
       const isVideoFile = fileType.startsWith('video/')
+      if (isVideoFile) {
+        const storyboardExtractionTask = useTasks.getState().add({
+          category: TaskCategory.IMPORT,
+          visibility: TaskVisibility.BLOCKER,
+          initialMessage: `Extracting storyboards..`,
+          successMessage: `Extracting storyboards.. 100% done`,
+          value: 0,
+        })
+
+        const frames = await extractFramesFromVideo(file, {
+          format: 'png', // in theory we could also use 'jpg', but this freezes FFmpeg
+          maxWidth: 1024,
+          maxHeight: 576,
+          sceneSamplingRate: 100,
+          onProgress: (progress: number) => {
+            storyboardExtractionTask.setProgress({
+              message: `Extracting storyboards.. ${progress}% done`,
+              value: progress,
+            })
+          },
+        })
+
+        let i = 0
+        for (const frame of frames) {
+          const frameFile = base64DataUriToFile(frame, `storyboard_${i++}.png`)
+          const newSegments = await parseFileIntoSegments({ file: frameFile })
+
+          console.log('calling timeline.addSegments with:', newSegments)
+          await timeline.addSegments({
+            segments: newSegments,
+          })
+        }
+
+        storyboardExtractionTask.success()
+
+        const enableCaptioning = false
+
+        if (enableCaptioning) {
+          const captioningTask = useTasks.getState().add({
+            category: TaskCategory.IMPORT,
+            // visibility: TaskVisibility.BLOCKER,
+
+            // since this is very long task, we can run it in the background
+            visibility: TaskVisibility.BACKGROUND,
+            initialMessage: `Analyzing storyboards..`,
+            successMessage: `Analyzing storyboards.. 100% done`,
+            value: 0,
+          })
+
+          console.log('calling extractCaptionsFromFrames() with:', frames)
+          const captions = await extractCaptionsFromFrames(frames, (progress: number, storyboardIndex: number, nbStoryboards: number) => {
+            captioningTask.setProgress({
+              message: `Analyzing storyboards (${progress}%)`,
+              value: progress,
+            })
+          })
+          console.log('captions:', captions)
+          // TODO: add 
+
+          captioningTask.success()
+        }
+
+      }
     }
   },
   openScreenplay: async (