From 3e6b235d25c0a82b4d64747933f14db0c10e59b0 Mon Sep 17 00:00:00 2001 From: Julian Bilcke Date: Sun, 25 Aug 2024 19:35:16 +0200 Subject: [PATCH] add face swap for character consistency --- .../app/api/resolve/providers/falai/index.ts | 66 +++++++++-- .../app/api/resolve/providers/falai/types.ts | 19 +++- packages/app/src/app/api/resolve/route.ts | 40 +++---- .../app/src/components/tasks/useTasks.tsx | 78 +++++++------ .../toolbars/top-menu/image/index.tsx | 2 + .../top-menu/lists/ImageFaceswapWorkflows.tsx | 95 ++++++++++++++++ packages/app/src/services/api/resolve.ts | 19 +++- .../getSegmentWorkflowProviderAndEngine.ts | 40 +++++++ .../workflows/common/defaultValues.ts | 29 +++++ .../workflows/falai/defaultWorkflows.ts | 50 ++++++++- .../workflow-editor/workflows/piapi/index.ts | 2 +- .../app/src/services/resolver/useResolver.ts | 105 ++++++++++++++---- .../settings/getDefaultSettingsState.ts | 2 + .../app/src/services/settings/useSettings.ts | 30 +++++ .../settings/workflows/parseWorkflow.ts | 23 +--- .../src/analysis/analyzeScreenplay.ts | 4 + packages/clap/src/types.ts | 15 +++ packages/clapper-services/src/settings.ts | 6 + .../src/components/cells/RedrawButton.tsx | 6 + 19 files changed, 518 insertions(+), 113 deletions(-) create mode 100644 packages/app/src/components/toolbars/top-menu/lists/ImageFaceswapWorkflows.tsx create mode 100644 packages/app/src/services/editors/workflow-editor/getSegmentWorkflowProviderAndEngine.ts diff --git a/packages/app/src/app/api/resolve/providers/falai/index.ts b/packages/app/src/app/api/resolve/providers/falai/index.ts index b6366746..9d8f162f 100644 --- a/packages/app/src/app/api/resolve/providers/falai/index.ts +++ b/packages/app/src/app/api/resolve/providers/falai/index.ts @@ -5,6 +5,7 @@ import { ClapMediaOrientation, ClapSegmentCategory } from '@aitube/clap' import { FalAiAudioResponse, FalAiImageResponse, + FalAiImagesResponse, FalAiSpeechResponse, FalAiVideoResponse, } from './types' @@ -40,7 +41,9 @@ export async function resolveSegment( return segment } - let result: FalAiImageResponse | undefined = undefined + let result: FalAiImagesResponse | undefined = undefined + + let isUsingIntegratedFaceId = false if (model === 'fal-ai/pulid') { if (!request.prompts.image.identity) { @@ -48,7 +51,9 @@ export async function resolveSegment( // console.log(`warning: user selected model ${request.settings.falAiModelForImage}, but no character was found. Falling back to fal-ai/flux-pro`) // dirty fix to fallback to a non-face model - model = 'fal-ai/flux-pro' + model = 'fal-ai/flux/schnell' + } else { + isUsingIntegratedFaceId = true } } @@ -100,7 +105,7 @@ export async function resolveSegment( enable_safety_checker: request.settings.censorNotForAllAudiencesContent, }, - })) as FalAiImageResponse + })) as FalAiImagesResponse } else if (model === 'fal-ai/flux-general') { // note: this isn't the right place to do this, because maybe the LoRAs are dynamic const loraModel = getWorkflowLora( @@ -135,7 +140,7 @@ export async function resolveSegment( enable_safety_checker: request.settings.censorNotForAllAudiencesContent, }, - })) as FalAiImageResponse + })) as FalAiImagesResponse } else { result = (await fal.run(model, { input: { @@ -150,11 +155,14 @@ export async function resolveSegment( enable_safety_checker: request.settings.censorNotForAllAudiencesContent, }, - })) as FalAiImageResponse + })) as FalAiImagesResponse } if (request.settings.censorNotForAllAudiencesContent) { - if (result.has_nsfw_concepts.includes(true)) { + if ( + Array.isArray(result.has_nsfw_concepts) && + result.has_nsfw_concepts.includes(true) + ) { throw new Error( `The generated content has been filtered according to your safety settings` ) @@ -162,6 +170,47 @@ export async function resolveSegment( } segment.assetUrl = result.images[0]?.url || '' + + const imageFaceswapWorkflowModel = + request.settings.imageFaceswapWorkflow.data || '' + + if (!isUsingIntegratedFaceId && imageFaceswapWorkflowModel) { + try { + const faceSwapResult = (await fal.run(imageFaceswapWorkflowModel, { + input: { + base_image_url: segment.assetUrl, + swap_image_url: request.prompts.image.identity, + + sync_mode: true, + num_images: 1, + enable_safety_checker: + request.settings.censorNotForAllAudiencesContent, + }, + })) as FalAiImageResponse + + // note how it is + const imageResult = faceSwapResult.image?.url || '' + + if (!imageResult) { + throw new Error(`the generate image is empty`) + } + + if (request.settings.censorNotForAllAudiencesContent) { + if ( + Array.isArray(result.has_nsfw_concepts) && + result.has_nsfw_concepts.includes(true) + ) { + throw new Error( + `The generated content has been filtered according to your safety settings` + ) + } + } + + segment.assetUrl = imageResult + } catch (err) { + console.error(`failed to run a face-swap using Fal.ai:`, err) + } + } } else if (request.segment.category === ClapSegmentCategory.VIDEO) { model = request.settings.videoGenerationWorkflow.data || '' @@ -190,7 +239,10 @@ export async function resolveSegment( })) as FalAiVideoResponse if (request.settings.censorNotForAllAudiencesContent) { - if (result.has_nsfw_concepts.includes(true)) { + if ( + Array.isArray(result.has_nsfw_concepts) && + result.has_nsfw_concepts.includes(true) + ) { throw new Error( `The generated content has been filtered according to your safety settings` ) diff --git a/packages/app/src/app/api/resolve/providers/falai/types.ts b/packages/app/src/app/api/resolve/providers/falai/types.ts index a6f1edbc..15f95445 100644 --- a/packages/app/src/app/api/resolve/providers/falai/types.ts +++ b/packages/app/src/app/api/resolve/providers/falai/types.ts @@ -1,4 +1,4 @@ -export type FalAiImageResponse = { +export type FalAiImagesResponse = { prompt: string timings: { inference: number } has_nsfw_concepts: boolean[] @@ -7,10 +7,27 @@ export type FalAiImageResponse = { url: string width: number height: number + file_name: string + file_size: string content_type: string }[] } +export type FalAiImageResponse = { + prompt: string + timings: { inference: number } + has_nsfw_concepts: boolean[] + seed: number + image: { + url: string + width: number + height: number + file_name: string + file_size: string + content_type: string + } +} + export type FalAiVideoResponse = { video: { url: string diff --git a/packages/app/src/app/api/resolve/route.ts b/packages/app/src/app/api/resolve/route.ts index d09f1bb3..b3dc6071 100644 --- a/packages/app/src/app/api/resolve/route.ts +++ b/packages/app/src/app/api/resolve/route.ts @@ -9,6 +9,7 @@ import { ClapAssetSource, ClapWorkflowEngine, } from '@aitube/clap' +import { TimelineSegment } from '@aitube/timeline' import { resolveSegmentUsingHuggingFace, @@ -32,7 +33,7 @@ import { ResolveRequest } from '@aitube/clapper-services' import { decodeOutput } from '@/lib/utils/decodeOutput' import { getTypeAndExtension } from '@/lib/utils/getTypeAndExtension' import { getMediaInfo } from '@/lib/ffmpeg/getMediaInfo' -import { TimelineSegment } from '@aitube/timeline' +import { getSegmentWorkflowProviderAndEngine } from '@/services/editors/workflow-editor/getSegmentWorkflowProviderAndEngine' type ProviderFn = (request: ResolveRequest) => Promise @@ -44,34 +45,27 @@ export async function POST(req: NextRequest) { // await throwIfInvalidToken(req.headers.get("Authorization")) const request = (await req.json()) as ResolveRequest - const workflow: ClapWorkflow | undefined = - request.segment.category === ClapSegmentCategory.STORYBOARD - ? request.settings.imageGenerationWorkflow - : request.segment.category === ClapSegmentCategory.VIDEO - ? request.settings.videoGenerationWorkflow - : request.segment.category === ClapSegmentCategory.DIALOGUE - ? request.settings.voiceGenerationWorkflow - : request.segment.category === ClapSegmentCategory.SOUND - ? request.settings.soundGenerationWorkflow - : request.segment.category === ClapSegmentCategory.MUSIC - ? request.settings.musicGenerationWorkflow - : undefined + const { workflow, provider, engine } = + getSegmentWorkflowProviderAndEngine(request) + + /* + console.log(`Resolving a ${request.segment.category} segment using:`, { + workflow, + provider, + engine, + }) + */ if (!workflow) { - throw new Error(`request to /api/resolve is missing the .workflow field`) + throw new Error(`cannot resolve a segment without a valid workflow`) } - const provider: ClapWorkflowProvider | undefined = - workflow.provider || undefined - - if (!provider) { - throw new Error(`request to /api/resolve is missing the .provider field`) + if (!provider || provider === ClapWorkflowProvider.NONE) { + throw new Error(`cannot resolve a segment without a valid provider`) } - const engine: ClapWorkflowEngine | undefined = workflow.engine || undefined - if (!engine) { - throw new Error(`request to /api/resolve is missing the .engine field`) + throw new Error(`cannot resolve a segment without a valid engine`) } const comfyProviders: Partial> = { @@ -102,6 +96,7 @@ export async function POST(req: NextRequest) { : providers[provider] || undefined if (!resolveSegment || typeof resolveSegment !== 'function') { + // console.log('invalid resolveSegment:', request) throw new Error( `Engine "${engine}" is not supported by "${provider}" yet. If you believe this is a mistake, please open a Pull Request (with working code) to fix it. Thank you!` ) @@ -110,6 +105,7 @@ export async function POST(req: NextRequest) { let segment = request.segment try { + // console.log('calling resolveSegment', request) segment = await resolveSegment(request) // we clean-up and parse the output from all the resolvers: diff --git a/packages/app/src/components/tasks/useTasks.tsx b/packages/app/src/components/tasks/useTasks.tsx index 58d6c682..ecf2d8b0 100644 --- a/packages/app/src/components/tasks/useTasks.tsx +++ b/packages/app/src/components/tasks/useTasks.tsx @@ -178,18 +178,18 @@ export const useTasks = create((set, get) => ({ const status = t.task.status || 'deleted' const progress = t.task.progress || 0 - console.log( - `useTasks[${id}]: checkStatus: checking task, current status is: "${status}"` - ) + // console.log( + // `useTasks[${id}]: checkStatus: checking task, current status is: "${status}"` + // ) if ( status === TaskStatus.ERROR || status === TaskStatus.SUCCESS || status === TaskStatus.DELETED || status === TaskStatus.CANCELLED ) { - console.log( - `useTasks[${id}]: checkStatus: status is "${status}", interrupting task loop..` - ) + // console.log( + // `useTasks[${id}]: checkStatus: status is "${status}", interrupting task loop..` + // ) // this call might be redundant if (status === TaskStatus.SUCCESS) { @@ -197,17 +197,17 @@ export const useTasks = create((set, get) => ({ } resolve(status) } else if (progress >= 100) { - console.log( - `useTasks[${id}]: checkStatus: task is completed at 100%, interrupting task loop..` - ) + // console.log( + // `useTasks[${id}]: checkStatus: task is completed at 100%, interrupting task loop..` + // ) // this call might be redundant get().setProgress(id, { isFinished: true }) // get().setStatus(TaskStatus.SUCCESS, id) resolve(TaskStatus.SUCCESS) } else { - console.log( - `useTasks[${id}]: checkStatus: status is "${status}", continuing task loop..` - ) + // console.log( + // `useTasks[${id}]: checkStatus: status is "${status}", continuing task loop..` + // ) setTimeout(checkStatus, 1000) } } catch (err) { @@ -218,15 +218,17 @@ export const useTasks = create((set, get) => ({ checkStatus() }) - toast.promise(task.promise, { - loading: , - success: (finalStatus) => { - return finalStatus === TaskStatus.SUCCESS - ? task.successMessage - : `Task ended` - }, - error: 'Task aborted', - }) + if (task.visibility === TaskVisibility.BACKGROUND) { + toast.promise(task.promise, { + loading: , + success: (finalStatus) => { + return finalStatus === TaskStatus.SUCCESS + ? task.successMessage + : `Task ended` + }, + error: 'Task aborted', + }) + } const { tasks } = get() set({ @@ -243,22 +245,22 @@ export const useTasks = create((set, get) => ({ } // oh, one last thing: let's launch-and-forget the actual task - console.log( - `useTasks[${id}]: launching the task runner in the background..` - ) + // console.log( + // `useTasks[${id}]: launching the task runner in the background..` + // ) // we provide to the task runner a wait to get the current status // that wait long-running jobs will know when they have been cancelled and no longer needed const result = await task.run(() => { const remoteControl = get().get(id)! const status = remoteControl?.task?.status - console.log( - `useTasks[${id}]: task runner asked for current status (which is: "${status || 'deleted'}")` - ) + // console.log( + // `useTasks[${id}]: task runner asked for current status (which is: "${status || 'deleted'}")` + // ) return status || 'deleted' }) - console.log(`useTasks[${id}]: task runner ended with status: "${result}"`) + // console.log(`useTasks[${id}]: task runner ended with status: "${result}"`) get().setProgress(id, { isFinished: true }) // get().setStatus(result, id) }, 100) @@ -280,11 +282,11 @@ export const useTasks = create((set, get) => ({ const { tasks } = get() const task = get().get(taskId)?.task - console.log(`useTasks[${taskId}]:setStatus("${status}")`) + // console.log(`useTasks[${taskId}]:setStatus("${status}")`) if (task) { - console.log( - `useTasks[${taskId}]:setStatus("${status}") -> setting one task to ${status}` - ) + // console.log( + // `useTasks[${taskId}]:setStatus("${status}") -> setting one task to ${status}` + // ) set({ tasks: { ...tasks, @@ -292,9 +294,9 @@ export const useTasks = create((set, get) => ({ }, }) } else { - console.log( - `useTasks[${taskId}]:setStatus("${status}") -> setting all tasks to ${status}` - ) + // console.log( + // `useTasks[${taskId}]:setStatus("${status}") -> setting all tasks to ${status}` + // ) const newTasks = {} as Record for (const [id, t] of Object.entries(tasks)) { newTasks[id] = { ...t, status: statusTransition(t.status, status) } @@ -379,12 +381,16 @@ export const useTasks = create((set, get) => ({ get().setStatus(TaskStatus.SUCCESS, taskId) }, fail: (taskId: string, reason?: string) => { + const message = reason || 'unknown failure' + get().setProgress(taskId, { - message: reason || 'unknown failure', + message, isFinished: true, hasFailed: true, }) get().setStatus(TaskStatus.ERROR, taskId) + + toast.error(message) }, cancel: (taskId?: string) => { get().setStatus(TaskStatus.CANCELLED, taskId) diff --git a/packages/app/src/components/toolbars/top-menu/image/index.tsx b/packages/app/src/components/toolbars/top-menu/image/index.tsx index 52b88183..ad3f27c1 100644 --- a/packages/app/src/components/toolbars/top-menu/image/index.tsx +++ b/packages/app/src/components/toolbars/top-menu/image/index.tsx @@ -23,6 +23,7 @@ import { SettingsCategory } from '@aitube/clapper-services' import { useResolver } from '@/services/resolver/useResolver' import { ImageDepthWorkflows } from '../lists/ImageDepthWorkflows' import { ImageSegmentationWorkflows } from '../lists/ImageSegmentationWorkflows' +import { ImageFaceswapWorkflows } from '../lists/ImageFaceswapWorkflows' export function TopMenuImage() { const hasBetaAccess = useUI((s) => s.hasBetaAccess) @@ -53,6 +54,7 @@ export function TopMenuImage() { + {hasBetaAccess && ( <> diff --git a/packages/app/src/components/toolbars/top-menu/lists/ImageFaceswapWorkflows.tsx b/packages/app/src/components/toolbars/top-menu/lists/ImageFaceswapWorkflows.tsx new file mode 100644 index 00000000..7c09f867 --- /dev/null +++ b/packages/app/src/components/toolbars/top-menu/lists/ImageFaceswapWorkflows.tsx @@ -0,0 +1,95 @@ +'use client' + +import { ClapWorkflowCategory, ClapWorkflowProvider } from '@aitube/clap' + +import { + MenubarCheckboxItem, + MenubarSub, + MenubarSubContent, + MenubarSubTrigger, +} from '@/components/ui/menubar' + +import { TagColor } from '@/components/tags/types' +import { Tag } from '@/components/tags/Tag' +import { useSettings } from '@/services/settings' +import { cn } from '@/lib/utils' + +import { hasNoPublicAPI } from './hasNoPublicAPI' +import { useWorkflowEditor } from '@/services/editors' +import { findWorkflows } from './getWorkflowProviders' +import { + ClapWorkflowProviderLogo, + ClapWorkflowProviderName, +} from '@/components/core/providers' +import { parseWorkflow } from '@/services/settings/workflows/parseWorkflow' + +const category = ClapWorkflowCategory.IMAGE_FACESWAP + +export function ImageFaceswapWorkflows() { + const imageFaceswapWorkflow = useSettings((s) => s.imageFaceswapWorkflow) + const setImageFaceswapWorkflow = useSettings( + (s) => s.setImageFaceswapWorkflow + ) + const availableWorkflows = useWorkflowEditor((s) => s.availableWorkflows) + + const { providers, nbProviders } = findWorkflows(availableWorkflows, { + category, + }) + + const workflow = parseWorkflow(imageFaceswapWorkflow, category) + + if (!nbProviders) { + return null + } + + return ( + + + + face swap + +
+ +
{workflow?.label || 'None'}
+
+
+ + {Object.entries(providers).map(([p, workflows]) => ( + + + + {p as ClapWorkflowProvider} + + + + {workflows?.map((w) => ( + { + if (hasNoPublicAPI(w)) { + e.stopPropagation() + e.preventDefault() + return false + } + setImageFaceswapWorkflow(w) + e.stopPropagation() + e.preventDefault() + return false + }} + > + {w.label} + + ))} + + + ))} + +
+ ) +} diff --git a/packages/app/src/services/api/resolve.ts b/packages/app/src/services/api/resolve.ts index 1478e85a..921ed630 100644 --- a/packages/app/src/services/api/resolve.ts +++ b/packages/app/src/services/api/resolve.ts @@ -12,7 +12,12 @@ import { SettingsStore, } from '@aitube/clapper-services' import { useSettings } from '../settings' -import { ClapSegmentCategory, newSegment } from '@aitube/clap' +import { + ClapSegmentCategory, + ClapWorkflowEngine, + ClapWorkflowProvider, + newSegment, +} from '@aitube/clap' import { getDefaultResolveRequestPrompts } from '../resolver/getDefaultResolveRequestPrompts' export async function resolve( @@ -66,6 +71,18 @@ export async function resolve( prompts: getDefaultResolveRequestPrompts(req.prompts), } + if ( + request.settings.imageGenerationWorkflow.provider === + ClapWorkflowProvider.ANTHROPIC && + request.settings.imageGenerationWorkflow.engine === + ClapWorkflowEngine.REST_API + ) { + console.log( + `The request looks weird, as if your codebase just got re-generated using NextJS hot reload?` + ) + return segment + } + const res = await fetch('/api/resolve', { method: 'POST', headers: { diff --git a/packages/app/src/services/editors/workflow-editor/getSegmentWorkflowProviderAndEngine.ts b/packages/app/src/services/editors/workflow-editor/getSegmentWorkflowProviderAndEngine.ts new file mode 100644 index 00000000..c5812e9a --- /dev/null +++ b/packages/app/src/services/editors/workflow-editor/getSegmentWorkflowProviderAndEngine.ts @@ -0,0 +1,40 @@ +import { + ClapSegmentCategory, + ClapWorkflow, + ClapWorkflowEngine, + ClapWorkflowProvider, +} from '@aitube/clap' +import { RequestSettings } from '@aitube/clapper-services' +import { TimelineSegment } from '@aitube/timeline' + +export function getSegmentWorkflowProviderAndEngine({ + segment, + settings, +}: { + segment: TimelineSegment + settings: RequestSettings +}): { + workflow?: ClapWorkflow + provider?: ClapWorkflowProvider + engine?: ClapWorkflowEngine +} { + const workflow: ClapWorkflow | undefined = + segment.category === ClapSegmentCategory.STORYBOARD + ? settings.imageGenerationWorkflow + : segment.category === ClapSegmentCategory.VIDEO + ? settings.videoGenerationWorkflow + : segment.category === ClapSegmentCategory.DIALOGUE + ? settings.voiceGenerationWorkflow + : segment.category === ClapSegmentCategory.SOUND + ? settings.soundGenerationWorkflow + : segment.category === ClapSegmentCategory.MUSIC + ? settings.musicGenerationWorkflow + : undefined + + const provider: ClapWorkflowProvider | undefined = + workflow?.provider || undefined + + const engine: ClapWorkflowEngine | undefined = workflow?.engine || undefined + + return { workflow, provider, engine } +} diff --git a/packages/app/src/services/editors/workflow-editor/workflows/common/defaultValues.ts b/packages/app/src/services/editors/workflow-editor/workflows/common/defaultValues.ts index 997a6527..849bd698 100644 --- a/packages/app/src/services/editors/workflow-editor/workflows/common/defaultValues.ts +++ b/packages/app/src/services/editors/workflow-editor/workflows/common/defaultValues.ts @@ -97,6 +97,26 @@ export const genericImageUrl: ClapInputField = { defaultValue: '', } +export const genericBaseImageUrl: ClapInputField = { + id: 'base_image_url', + label: 'Base Image URL', + description: 'Base Image URL', + category: ClapInputCategory.IMAGE_URL, + type: 'string', + allowedValues: [], + defaultValue: '', +} + +export const genericSwapImageUrl: ClapInputField = { + id: 'swap_image_url', + label: 'Swap Image URL', + description: 'Swap Image URL', + category: ClapInputCategory.IMAGE_URL, + type: 'string', + allowedValues: [], + defaultValue: '', +} + export const genericVideo: ClapInputField = { id: 'video', label: 'Video', @@ -147,6 +167,15 @@ export const genericLoras: ClapInputField = { defaultValue: '', } +export const genericReferenceImages: ClapInputField = { + id: 'reference_images', + label: 'Reference Images', + description: 'List of images to use as reference', + category: ClapInputCategory.IMAGE_URLS, + type: 'string[]', + defaultValue: [], +} + export const genericInferenceSteps: ClapInputField = { id: 'num_inference_steps', label: 'Inference steps', diff --git a/packages/app/src/services/editors/workflow-editor/workflows/falai/defaultWorkflows.ts b/packages/app/src/services/editors/workflow-editor/workflows/falai/defaultWorkflows.ts index f88fc0a4..15a2b27f 100644 --- a/packages/app/src/services/editors/workflow-editor/workflows/falai/defaultWorkflows.ts +++ b/packages/app/src/services/editors/workflow-editor/workflows/falai/defaultWorkflows.ts @@ -24,6 +24,9 @@ import { genericImageUrl, genericLora, genericLoras, + genericReferenceImages, + genericBaseImageUrl, + genericSwapImageUrl, } from '../common/defaultValues' import { sampleDrivingVideo, sampleVoice } from '@/lib/core/constants' @@ -32,13 +35,54 @@ import { sampleDrivingVideo, sampleVoice } from '@/lib/core/constants' TODO: add those as well "fal-ai/photomaker", -"fal-ai/pulid", "fal-ai/image-to-image", "fal-ai/omni-zero", */ export const defaultWorkflows: ClapWorkflow[] = [ + { + id: 'falai://fal-ai/face-swap', + label: 'Face Swap', + description: '', + tags: ['image'], + author: '', + thumbnailUrl: '', + + // they are using Roop, no? is it why it's for "research only"? + // https://fal.ai/models/fal-ai/face-swap + nonCommercial: true, + + engine: ClapWorkflowEngine.REST_API, + provider: ClapWorkflowProvider.FALAI, + category: ClapWorkflowCategory.IMAGE_FACESWAP, + data: 'fal-ai/face-swap', + schema: '', + inputFields: [genericBaseImageUrl, genericSwapImageUrl], + inputValues: { + [genericBaseImageUrl.id]: genericBaseImageUrl.defaultValue, + [genericSwapImageUrl.id]: genericSwapImageUrl.defaultValue, + }, + }, + { + id: 'falai://fal-ai/pulid', + label: 'PuLID (with Flux Schnell fallback)', + description: '', + tags: ['image'], + author: '', + thumbnailUrl: '', + nonCommercial: false, + engine: ClapWorkflowEngine.REST_API, + provider: ClapWorkflowProvider.FALAI, + category: ClapWorkflowCategory.IMAGE_GENERATION, + data: 'fal-ai/pulid', + schema: '', + inputFields: [genericPrompt, genericReferenceImages], + inputValues: { + [genericPrompt.id]: genericPrompt.defaultValue, + [genericReferenceImages.id]: genericReferenceImages.defaultValue, + }, + }, { id: 'falai://fal-ai/stable-video', label: 'Stable Video Diffusion', @@ -69,7 +113,7 @@ export const defaultWorkflows: ClapWorkflow[] = [ }, { id: 'falai://fal-ai/flux-general', - label: 'Flux.1-[DEV] with LoRAs', + label: 'Flux.1-[DEV] LoRA', description: '', tags: ['Flux', 'LoRA'], author: '', @@ -241,7 +285,7 @@ export const defaultWorkflows: ClapWorkflow[] = [ }, { id: 'falai://fal-ai/stable-diffusion-v3-medium', - label: 'Stable Diffusion 3 (Medium)', + label: 'SD3 (Medium)', description: '', tags: ['SD3'], author: 'Stability AI', diff --git a/packages/app/src/services/editors/workflow-editor/workflows/piapi/index.ts b/packages/app/src/services/editors/workflow-editor/workflows/piapi/index.ts index 941cbe8e..d0101ec0 100644 --- a/packages/app/src/services/editors/workflow-editor/workflows/piapi/index.ts +++ b/packages/app/src/services/editors/workflow-editor/workflows/piapi/index.ts @@ -9,7 +9,7 @@ import { genericImageUrl, genericPrompt } from '../common/defaultValues' export const piApiWorkflows: ClapWorkflow[] = [ { id: 'piapi://mj/v2/imagine', - label: 'Midjourne Imagine', + label: 'Midjourney Imagine', description: '', tags: ['Midjourney'], author: '', diff --git a/packages/app/src/services/resolver/useResolver.ts b/packages/app/src/services/resolver/useResolver.ts index 4cf6e769..6ee3bbec 100644 --- a/packages/app/src/services/resolver/useResolver.ts +++ b/packages/app/src/services/resolver/useResolver.ts @@ -8,6 +8,7 @@ import { ClapSegmentCategory, ClapSegmentFilteringMode, ClapSegmentStatus, + ClapWorkflowProvider, filterSegments, generateSeed, newSegment, @@ -36,6 +37,8 @@ import { RenderingBufferSizes, RenderingStrategies, ResolverStore, + TaskCategory, + TaskVisibility, } from '@aitube/clapper-services' import { getDefaultResolverState } from './getDefaultResolverState' @@ -46,6 +49,8 @@ import { useMonitor } from '../monitor/useMonitor' import { useRenderer } from '../renderer' import { getDefaultResolveRequestPrompts } from './getDefaultResolveRequestPrompts' import { resolve } from '../api/resolve' +import { useTasks } from '@/components/tasks/useTasks' +import { getSegmentWorkflowProviderAndEngine } from '../editors/workflow-editor/getSegmentWorkflowProviderAndEngine' export const useResolver = create((set, get) => ({ ...getDefaultResolverState(), @@ -484,7 +489,7 @@ export const useResolver = create((set, get) => ({ field?: 'face' | 'voice' ): Promise => { // note: if the entity has an image id or an audio id, we proceed anyway. - + // that way the parent function can decide to re-generate the entity at any time. if (!field || field === 'face') { @@ -509,8 +514,14 @@ export const useResolver = create((set, get) => ({ } } + /* + TODO @julian-hf finish character voice generation + This will have to be done using some specific providers which support + prompting a voice from settings like "old aged man" etc + if (!field || field === 'voice') { try { + throw new Error(`character voice generation isn't supported yet`) // we generate a random, novel voice // TODO use the gender const characterVoicePrompt = `A ${entity.age} years old ${entity.gender} is talking` @@ -541,6 +552,7 @@ wake of the euro-zone debt crisis.` ) } } + */ return entity }, @@ -562,36 +574,48 @@ wake of the euro-zone debt crisis.` resolveSegment: async ( segment: TimelineSegment ): Promise => { + // note: + if (segment.status !== ClapSegmentStatus.TO_GENERATE) { + return segment + } + + const { resolveEntity } = get() const settings = useSettings.getState().getRequestSettings() const timeline: TimelineStore = useTimeline.getState() - // note: do NOT use the visibleSegments here - // that's because resolveSegment is 100% asynchronous, - // meaning it might be called on invisible segments too! const { entityIndex, - segments: allSegments, + segments: allSegments, // we read all segments, even the invisible ones trackSilentChangeInSegment, } = timeline - if (!allSegments.length) { + const { workflow, provider, engine } = getSegmentWorkflowProviderAndEngine({ + segment, + settings, + }) + + let isUnprocessable = + !workflow || + !provider || + provider === ClapWorkflowProvider.NONE || + !engine || + !allSegments.length + + if (isUnprocessable) { + Object.assign(segment, { status: ClapSegmentStatus.ERROR }) + trackSilentChangeInSegment(segment.id) return segment - // throw new Error(`please call setSegmentRender(...) first`) } + Object.assign(segment, { status: ClapSegmentStatus.IN_PROGRESS }) + trackSilentChangeInSegment(segment.id) + const segments: TimelineSegment[] = filterSegments( ClapSegmentFilteringMode.ANY, segment, allSegments ) - if (segment.status === ClapSegmentStatus.IN_PROGRESS) { - // console.log(`useResolver.resolveSegment(): warning: this segment is already being generated!`) - return segment - } - - segment.status = ClapSegmentStatus.IN_PROGRESS - const entities = entityIndex || {} const speakingCharactersIds = segments @@ -613,6 +637,37 @@ wake of the euro-zone debt crisis.` ? entities[mainCharacterId] || undefined : undefined + if (mainCharacterEntity) { + if ( + !mainCharacterEntity?.imageId + // || !mainCharacterEntity.audioId + ) { + // we create an invisible task, which means there won't be any visible toast + // however if there is a failure this will still show up as an error toast + const entityTask = useTasks.getState().add({ + category: TaskCategory.GENERIC, + visibility: TaskVisibility.INVISIBLE, + value: 0, + }) + + try { + console.log('calling resolveEntity', mainCharacterEntity) + await resolveEntity(mainCharacterEntity, 'face') + entityTask.success() + } catch (err) { + console.log(`failed to resolve entity (${err})`, mainCharacterEntity) + entityTask.fail(`failed to resolve entity (${err})`) + + // not the best way to handle this + // mainCharacterEntity.imageId = 'ERROR' + } + } else { + console.log('main character entity already has an imageId') + } + } else { + console.log('no main character entity') + } + const storyboard = segments.find( (s) => s.category === ClapSegmentCategory.STORYBOARD ) @@ -676,6 +731,14 @@ wake of the euro-zone debt crisis.` }, }) + // we create an invisible task, which means there won't be any visible toast + // however if there is a failure this will still show up as an error toast + const resolutionTask = useTasks.getState().add({ + category: TaskCategory.GENERIC, + visibility: TaskVisibility.INVISIBLE, + value: 0, + }) + try { // note: this isn't really a "full" TimelineSegment, // it will miss some data that cannot be serialized @@ -765,15 +828,17 @@ wake of the euro-zone debt crisis.` await timeline.fitSegmentToAssetDuration(newSegment) } - newSegment.status = ClapSegmentStatus.COMPLETED - + Object.assign(newSegment, { status: ClapSegmentStatus.COMPLETED }) + resolutionTask.success() trackSilentChangeInSegment(newSegment.id) + return newSegment } catch (err) { - console.error(`useResolver.resolveSegment(): error: ${err}`) - segment.status = ClapSegmentStatus.TO_GENERATE - // we could do that in a future version to improve error tracking - // segment.status = ClapSegmentStatus.ERROR + const message = `Failed to resolve a segment (${err})` + console.error(message) + Object.assign(segment, { status: ClapSegmentStatus.ERROR }) + resolutionTask.fail(message) + trackSilentChangeInSegment(segment.id) } return segment }, diff --git a/packages/app/src/services/settings/getDefaultSettingsState.ts b/packages/app/src/services/settings/getDefaultSettingsState.ts index b89c4173..6058f810 100644 --- a/packages/app/src/services/settings/getDefaultSettingsState.ts +++ b/packages/app/src/services/settings/getDefaultSettingsState.ts @@ -51,10 +51,12 @@ export function getDefaultSettingsState(): SettingsState { assistantTurboWorkflow: '', imageGenerationWorkflow: '', imageGenerationTurboWorkflow: '', + imageFaceswapWorkflow: '', imageUpscalingWorkflow: '', imageDepthWorkflow: '', imageSegmentationWorkflow: '', videoGenerationWorkflow: '', + videoFaceswapWorkflow: '', videoUpscalingWorkflow: '', videoDepthWorkflow: '', videoSegmentationWorkflow: '', diff --git a/packages/app/src/services/settings/useSettings.ts b/packages/app/src/services/settings/useSettings.ts index 073d288d..60252521 100644 --- a/packages/app/src/services/settings/useSettings.ts +++ b/packages/app/src/services/settings/useSettings.ts @@ -360,6 +360,15 @@ export const useSettings = create()( : defaultImageGenerationTurboWorkflow, }) }, + setImageFaceswapWorkflow: (imageFaceswapWorkflow?: ClapWorkflow) => { + const { imageFaceswapWorkflow: defaultImageFaceswapWorkflow } = + getDefaultSettingsState() + set({ + imageFaceswapWorkflow: imageFaceswapWorkflow + ? JSON.stringify(imageFaceswapWorkflow) + : defaultImageFaceswapWorkflow, + }) + }, setImageUpscalingWorkflow: (imageUpscalingWorkflow?: ClapWorkflow) => { const { imageUpscalingWorkflow: defaultImageUpscalingWorkflow } = getDefaultSettingsState() @@ -398,6 +407,15 @@ export const useSettings = create()( : defaultVideoGenerationWorkflow, }) }, + setVideoFaceswapWorkflow: (videoFaceswapWorkflow?: ClapWorkflow) => { + const { videoFaceswapWorkflow: defaultVideoFaceswapWorkflow } = + getDefaultSettingsState() + set({ + videoFaceswapWorkflow: videoFaceswapWorkflow + ? JSON.stringify(videoFaceswapWorkflow) + : defaultVideoFaceswapWorkflow, + }) + }, setVideoUpscalingWorkflow: (videoUpscalingWorkflow?: ClapWorkflow) => { const { videoUpscalingWorkflow: defaultVideoUpscalingWorkflow } = getDefaultSettingsState() @@ -784,6 +802,11 @@ export const useSettings = create()( ClapWorkflowCategory.IMAGE_GENERATION ) + const imageFaceswapWorkflow = parseWorkflow( + state.imageFaceswapWorkflow || defaultSettings.imageFaceswapWorkflow, + ClapWorkflowCategory.IMAGE_FACESWAP + ) + const imageUpscalingWorkflow = parseWorkflow( state.imageUpscalingWorkflow || defaultSettings.imageUpscalingWorkflow, @@ -807,6 +830,11 @@ export const useSettings = create()( ClapWorkflowCategory.VIDEO_GENERATION ) + const videoFaceswapWorkflow = parseWorkflow( + state.videoFaceswapWorkflow || defaultSettings.videoFaceswapWorkflow, + ClapWorkflowCategory.VIDEO_FACESWAP + ) + const videoDepthWorkflow = parseWorkflow( state.videoDepthWorkflow || defaultSettings.videoDepthWorkflow, ClapWorkflowCategory.VIDEO_DEPTH_MAPPING @@ -921,10 +949,12 @@ export const useSettings = create()( assistantTurboWorkflow, imageGenerationWorkflow, imageGenerationTurboWorkflow, + imageFaceswapWorkflow, imageUpscalingWorkflow, imageDepthWorkflow, imageSegmentationWorkflow, videoGenerationWorkflow, + videoFaceswapWorkflow, videoDepthWorkflow, videoSegmentationWorkflow, videoUpscalingWorkflow, diff --git a/packages/app/src/services/settings/workflows/parseWorkflow.ts b/packages/app/src/services/settings/workflows/parseWorkflow.ts index 178a8bf3..d5f1ca62 100644 --- a/packages/app/src/services/settings/workflows/parseWorkflow.ts +++ b/packages/app/src/services/settings/workflows/parseWorkflow.ts @@ -1,16 +1,11 @@ import { convertComfyUiWorkflowApiToClapWorkflow } from '@/app/api/resolve/providers/comfyui/utils' -import { - findWorkflows, - WorkflowSearchResults, -} from '@/components/toolbars/top-menu/lists/getWorkflowProviders' -import { useWorkflowEditor } from '@/services/editors' + import { ClapWorkflow, ClapWorkflowCategory, ClapWorkflowEngine, ClapWorkflowProvider, } from '@aitube/clap' -import { WorkflowEditorStore } from '@aitube/clapper-services' export function parseWorkflow( input: string, @@ -61,22 +56,6 @@ export function parseWorkflow( } return maybeWorkflow } catch (err) { - // console.log("error:", err) - // MIGRATION OF OLDER SETTINGS - // in case the user has an old version of the settings, the "workflow" - // will be a simple ID. So we try to recover that - const results: WorkflowSearchResults = findWorkflows( - useWorkflowEditor.getState().availableWorkflows, - { workflowId: input } - ) - - if (results.workflow) { - return results.workflow - } - - // for now let's assume we ave two cases: - // 1. the user has an old version of the settings, and we need to migrate it - // 2. the user has an empty return noWorkflow } } diff --git a/packages/broadway/src/analysis/analyzeScreenplay.ts b/packages/broadway/src/analysis/analyzeScreenplay.ts index ebc1d9fc..df6a3b88 100644 --- a/packages/broadway/src/analysis/analyzeScreenplay.ts +++ b/packages/broadway/src/analysis/analyzeScreenplay.ts @@ -539,6 +539,10 @@ export async function analyzeScreenplay( // TODO: put any other info we can get from the script description: `${name} is a ${gender}`, + audioPrompt: `a ${age}yo american ${gender} called ${name}`, + + imagePrompt: `a ${age}yo american ${gender} called ${name}`, + category: ClapSegmentCategory.CHARACTER, age, gender, diff --git a/packages/clap/src/types.ts b/packages/clap/src/types.ts index b7d00a29..9a745b31 100644 --- a/packages/clap/src/types.ts +++ b/packages/clap/src/types.ts @@ -447,6 +447,7 @@ export enum ClapInputCategory { IMAGE_URL = "IMAGE_URL", SOUND_URL = "SOUND_URL", VIDEO_URL = "VIDEO_URL", + IMAGE_URLS = "IMAGE_URLS", WIDTH = "WIDTH", HEIGHT = "HEIGHT", SEED = "SEED", @@ -481,6 +482,11 @@ export type ClapInputFieldString = { defaultValue: string } +export type ClapInputFieldStrings = { + type: 'string[]' + defaultValue: string[] +} + export type ClapInputFieldBoolean = { type: 'boolean' defaultValue: boolean @@ -516,6 +522,12 @@ export type ClapInputField> = { */ category: ClapInputCategory + /** + * Whether the field is optional or not + */ + // note from Julian: do we need this now, or can we wait? + // isOptional: boolean + /** * If the input is composed by other inputs, useful * for grouping the inputs @@ -531,6 +543,7 @@ export type ClapInputField> = { | ClapInputFieldNumber | ClapInputFieldInteger | ClapInputFieldString + | ClapInputFieldStrings | ClapInputFieldBoolean | ClapInputFieldAny ) @@ -598,6 +611,7 @@ export enum ClapWorkflowProvider { export enum ClapWorkflowCategory { ASSISTANT = "ASSISTANT", IMAGE_GENERATION = "IMAGE_GENERATION", + IMAGE_FACESWAP = "iMAGE_FACESWAP", IMAGE_FILTERING = "IMAGE_FILTERING", IMAGE_UPSCALING = "IMAGE_UPSCALING", IMAGE_DEPTH_MAPPING = "IMAGE_DEPTH_MAPPING", @@ -606,6 +620,7 @@ export enum ClapWorkflowCategory { SOUND_GENERATION = "SOUND_GENERATION", VOICE_GENERATION = "VOICE_GENERATION", VIDEO_GENERATION = "VIDEO_GENERATION", + VIDEO_FACESWAP = "VIDEO_FACESWAP", VIDEO_FILTERING = "VIDEO_FILTERING", VIDEO_UPSCALING = "VIDEO_UPSCALING", VIDEO_DEPTH_MAPPING = "VIDEO_DEPTH_MAPPING", diff --git a/packages/clapper-services/src/settings.ts b/packages/clapper-services/src/settings.ts index d72cf38a..0d6f19d2 100644 --- a/packages/clapper-services/src/settings.ts +++ b/packages/clapper-services/src/settings.ts @@ -92,10 +92,12 @@ export type SettingsState = BaseSettings & { assistantTurboWorkflow: string imageGenerationWorkflow: string imageGenerationTurboWorkflow: string + imageFaceswapWorkflow: string imageUpscalingWorkflow: string imageDepthWorkflow: string imageSegmentationWorkflow: string videoGenerationWorkflow: string + videoFaceswapWorkflow: string videoUpscalingWorkflow: string videoDepthWorkflow: string videoSegmentationWorkflow: string @@ -111,10 +113,12 @@ export type RequestSettings = BaseSettings & { assistantTurboWorkflow: ClapWorkflow imageGenerationWorkflow: ClapWorkflow imageGenerationTurboWorkflow: ClapWorkflow + imageFaceswapWorkflow: ClapWorkflow imageUpscalingWorkflow: ClapWorkflow imageDepthWorkflow: ClapWorkflow imageSegmentationWorkflow: ClapWorkflow videoGenerationWorkflow: ClapWorkflow + videoFaceswapWorkflow: ClapWorkflow videoUpscalingWorkflow: ClapWorkflow videoDepthWorkflow: ClapWorkflow videoSegmentationWorkflow: ClapWorkflow @@ -163,10 +167,12 @@ export type SettingsControls = { setAssistantTurboWorkflow: (assistantTurboWorkflow?: ClapWorkflow) => void setImageGenerationWorkflow: (imageGenerationWorkflow?: ClapWorkflow) => void setImageGenerationTurboWorkflow: (imageGenerationTurboWorkflow?: ClapWorkflow) => void + setImageFaceswapWorkflow: (imageFaceswapWorkflow?: ClapWorkflow) => void setImageUpscalingWorkflow: (imageUpscalingWorkflow?: ClapWorkflow) => void setImageDepthWorkflow: (imageDepthWorkflow?: ClapWorkflow) => void setImageSegmentationWorkflow: (imageSegmentationWorkflow?: ClapWorkflow) => void setVideoGenerationWorkflow: (videoGenerationWorkflow?: ClapWorkflow) => void + setVideoFaceswapWorkflow: (videoFaceswapWorkflow?: ClapWorkflow) => void setVideoDepthWorkflow: (videoDepthWorkflow?: ClapWorkflow) => void setVideoSegmentationWorkflow: (videoSegmentationWorkflow?: ClapWorkflow) => void setVideoUpscalingWorkflow: (videoUpscalingWorkflow?: ClapWorkflow) => void diff --git a/packages/timeline/src/components/cells/RedrawButton.tsx b/packages/timeline/src/components/cells/RedrawButton.tsx index 6216c343..a2c319f1 100644 --- a/packages/timeline/src/components/cells/RedrawButton.tsx +++ b/packages/timeline/src/components/cells/RedrawButton.tsx @@ -1,5 +1,6 @@ import { useTimeline } from "@/hooks" import { TimelineSegment } from "@/types" +import { ClapSegmentStatus } from "@aitube/clap" import { Circle, Text } from "@react-three/drei" import { invalidate } from "@react-three/fiber" import { useState, useTransition } from "react" @@ -32,6 +33,11 @@ export function RedrawButton({ setInProgress(true) }) try { + // by default resolveSegment won't generate a segment twice, + // unless we force things like so: + if (segment.status !== ClapSegmentStatus.IN_PROGRESS) { + segment.status = ClapSegmentStatus.TO_GENERATE + } // console.log(`click on RedrawButton for segment ` + segment.id) const newSegment = await resolveSegment(segment) // if (ref.current) {