Skip to content

Commit

Permalink
working on workflows
Browse files Browse the repository at this point in the history
  • Loading branch information
jbilcke-hf committed Aug 10, 2024
1 parent a8ff2d1 commit 5c344b3
Show file tree
Hide file tree
Showing 33 changed files with 2,076 additions and 118 deletions.
10 changes: 9 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -86,12 +86,20 @@ If you find that Clapper is working with a more recent (stable) version of Node,

### Installing and running the app

Install the dependencies.

`--include=optional` is to make sure optional dependencies are installed (pre-build native modules compatible with your system)

```bash
npm i --include=optional
```

Then run the actual app, the first time you go to localhost:3000 after typing this command, the app will compile, which can take a minute (like, literally: `Compiled / in 52.6s (6372 modules)`)

```bash
npm run dev
```

`--include=optional` is to make sure optional dependencies are installed (pre-build native modules compatible with your system)

### Building the app

Expand Down
16 changes: 8 additions & 8 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,8 @@
"dependencies": {
"@aitube/broadway": "0.2.0",
"@aitube/clap": "0.2.0",
"@aitube/clapper-services": "0.2.0-2",
"@aitube/engine": "0.2.0",
"@aitube/clapper-services": "0.2.0-3",
"@aitube/engine": "0.2.0-1",
"@aitube/timeline": "0.2.0",
"@fal-ai/serverless-client": "^0.13.0",
"@ffmpeg/ffmpeg": "^0.12.10",
Expand Down
3 changes: 3 additions & 0 deletions public/carriers/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# Carriers

Put carrier videos here (for Live Portrait and other similar tech)
3 changes: 3 additions & 0 deletions public/images/providers/comfyicu.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
1 change: 1 addition & 0 deletions public/voices/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Voice files
71 changes: 61 additions & 10 deletions src/app/api/resolve/providers/comfy-comfyicu/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,29 +5,80 @@ import {
getClapAssetSourceType,
} from '@aitube/clap'
import { TimelineSegment } from '@aitube/timeline'
import { getWorkflowInputValues } from '../getWorkflowInputValues'
import { ComfyIcuApiRequestRunWorkflow } from './types'

export async function resolveSegment(
request: ResolveRequest
): Promise<TimelineSegment> {
if (!request.settings.comfyIcuApiKey) {
throw new Error(`Missing API key for "Comfy.icu"`)
}
if (request.segment.category !== ClapSegmentCategory.STORYBOARD) {

if (request.segment.category === ClapSegmentCategory.STORYBOARD) {

const workflowId = request.settings.imageGenerationWorkflow.id.split('://').pop() || ''

if (!workflowId) {
throw new Error(`The ComfyICU workflow ID is missing`)
}

const inputFields = request.settings.imageGenerationWorkflow.inputFields || []

// since this is a random "wild" workflow, it is possible
// that the field name is a bit different
// we try to look into the workflow input fields
// to find the best match
const promptFields = [
inputFields.find(f => f.id === 'prompt'),// exactMatch,
inputFields.find(f => f.id.includes('prompt')), // similarName,
inputFields.find(f => f.type === 'string') // similarType
].filter(x => typeof x !== 'undefined')

const promptField = promptFields[0]
if (!promptField) {
throw new Error(`this workflow doesn't seem to have a parameter called "prompt"`)
}

// TODO: modify the serialized workflow payload
// to inject our params:
// ...getWorkflowInputValues(request.settings.imageGenerationWorkflow),
// [promptField.id]: request.prompts.image.positive,

const payload: ComfyIcuApiRequestRunWorkflow = {
workflow_id: workflowId,
prompt: request.settings.imageGenerationWorkflow.data,
files: {},
}


const rawResponse = await fetch(`https://comfy.icu/api/v1/workflows/${workflowId}/runs`, {
headers: {
accept: "application/json",
"content-type": "application/json",
authorization: `Bearer ${request.settings.comfyIcuApiKey}`,
},
body: JSON.stringify(payload),
method: "POST",
});

const response = await rawResponse.json()

if (response.status === "error") {
throw new Error(response.message)
}

console.log('response:', response)

// TODO use the RUN ID to regularly check for status
// see https://comfy.icu/docs/api

throw new Error(
`Clapper doesn't support ${request.segment.category} generation for provider "Comfy.icu". Please open a pull request with (working code) to solve this!`
)
}

const segment: TimelineSegment = { ...request.segment }

try {
throw new Error(`Not Implemented!`)
} catch (err) {
console.error(`failed to call Comfy.icu: `, err)
segment.assetUrl = ''
segment.assetSourceType = getClapAssetSourceType(segment.assetUrl)
segment.status = ClapSegmentStatus.TO_GENERATE
}

return segment
}
24 changes: 24 additions & 0 deletions src/app/api/resolve/providers/comfy-comfyicu/types.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
export type ComfyIcuApiRequestRunWorkflow = {
workflow_id: string
prompt: string
files: Record<string, any>
}

export type ComfyIcuApiResponseWorkflowStatus = {
id: string
run_time?: number
status: string
name?: string
created_at: string
output?: ComfyIcuWorkflowOutput[]
project_id: string
api_key_id: any
device?: string
}

export type ComfyIcuWorkflowOutput = {
filename: string
url: string
thumbnail_url: string
}

86 changes: 57 additions & 29 deletions src/app/api/resolve/providers/falai/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import {
FalAiSpeechResponse,
FalAiVideoResponse,
} from './types'
import { getWorkflowInputValues } from '../getWorkflowInputValues'

export async function resolveSegment(
request: ResolveRequest
Expand Down Expand Up @@ -37,13 +38,6 @@ export async function resolveSegment(
return segment
}

const imageSize =
request.meta.orientation === ClapMediaOrientation.SQUARE
? FalAiImageSize.SQUARE_HD
: request.meta.orientation === ClapMediaOrientation.PORTRAIT
? FalAiImageSize.PORTRAIT_16_9
: FalAiImageSize.LANDSCAPE_16_9

let result: FalAiImageResponse | undefined = undefined

if (model === 'fal-ai/pulid') {
Expand All @@ -56,6 +50,25 @@ export async function resolveSegment(
}
}

const {
workflowDefaultValues,
workflowValues
} = getWorkflowInputValues(request.settings.imageGenerationWorkflow)

// for the moment let's use FAL's predefined sizes
const imageSize =
request.meta.orientation === ClapMediaOrientation.SQUARE
? FalAiImageSize.SQUARE_HD
: request.meta.orientation === ClapMediaOrientation.PORTRAIT
? FalAiImageSize.PORTRAIT_16_9
: FalAiImageSize.LANDSCAPE_16_9

// for the moment let's use FAL's predefined sizes
delete workflowDefaultValues.width
delete workflowDefaultValues.height
delete workflowValues.width
delete workflowValues.height

if (model === 'fal-ai/pulid') {
result = (await fal.run(model, {
input: {
Expand All @@ -74,11 +87,13 @@ export async function resolveSegment(
} else {
result = (await fal.run(model, {
input: {
...workflowDefaultValues,
...workflowValues,

prompt: request.prompts.image.positive,

image_size: imageSize,
sync_mode: true,
num_inference_steps:
model === 'fal-ai/stable-diffusion-v3-medium' ? 40 : 25,
num_images: 1,
enable_safety_checker:
request.settings.censorNotForAllAudiencesContent,
Expand Down Expand Up @@ -113,18 +128,12 @@ export async function resolveSegment(
`cannot generate a video without a storyboard (the concept of Clapper is to use storyboards)`
)
}

const result = (await fal.run(model, {
input: {
image_url: storyboard.assetUrl,
...getWorkflowInputValues(request.settings.videoGenerationWorkflow),

motion_bucket_id: 55,

// The conditoning augmentation determines the amount of noise that
// will be added to the conditioning frame. The higher the number,
// the more noise there will be, and the less the video will look
// like the initial image. Increase it for more motion.
// Default value: 0.02
cond_aug: 0.02,
image_url: storyboard.assetUrl,

sync_mode: true,
enable_safety_checker: request.settings.censorNotForAllAudiencesContent,
Expand All @@ -140,17 +149,29 @@ export async function resolveSegment(
}

segment.assetUrl = result?.video?.url || ''
} else if (
request.segment.category === ClapSegmentCategory.SOUND ||
request.segment.category === ClapSegmentCategory.MUSIC
) {
model =
request.segment.category === ClapSegmentCategory.MUSIC
? request.settings.musicGenerationWorkflow.data
: request.settings.soundGenerationWorkflow.data
} else if (request.segment.category === ClapSegmentCategory.SOUND) {
model = request.settings.musicGenerationWorkflow.data

const result = (await fal.run(model, {
input: {
...getWorkflowInputValues(request.settings.soundGenerationWorkflow),

// note how we use the *segment* prompt for music or sound
prompt: request.segment.prompt,

sync_mode: true,
enable_safety_checker: request.settings.censorNotForAllAudiencesContent,
},
})) as FalAiAudioResponse

segment.assetUrl = result?.audio_file?.url || ''
} else if (request.segment.category === ClapSegmentCategory.MUSIC) {
model = request.settings.musicGenerationWorkflow.data

const result = (await fal.run(model, {
input: {
...getWorkflowInputValues(request.settings.soundGenerationWorkflow),

// note how we use the *segment* prompt for music or sound
prompt: request.segment.prompt,

Expand All @@ -163,12 +184,19 @@ export async function resolveSegment(
} else if (request.segment.category === ClapSegmentCategory.DIALOGUE) {
model = request.settings.voiceGenerationWorkflow.data || ''


let voiceIdentity =
request.prompts.voice.identity ||
// TODO for the default we should use one of our own voices instea
// PS: are you implementing this task? please do a search in the code for speakers/bria.mp3
'https://cdn.themetavoice.xyz/speakers/bria.mp3'

const result = (await fal.run(model, {
input: {
text: request.segment.prompt,
...getWorkflowInputValues(request.settings.voiceGenerationWorkflow),

// todo use the entty audio id, if available
audio_url: 'https://cdn.themetavoice.xyz/speakers/bria.mp3',
text: request.segment.prompt, // <-- we are using the segment prompt
audio_url: voiceIdentity,

sync_mode: true,
enable_safety_checker: request.settings.censorNotForAllAudiencesContent,
Expand Down
23 changes: 23 additions & 0 deletions src/app/api/resolve/providers/getWorkflowInputValues.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
import { ClapInputValues, ClapWorkflow } from "@aitube/clap"

export function getWorkflowInputValues(workflow: ClapWorkflow): {
workflowDefaultValues: ClapInputValues
workflowValues: ClapInputValues
} {
const workflowDefaultValues =
workflow.inputFields.reduce(
(acc, field) => ({
...acc,
[field.id]: field.defaultValue,
}),
{} as ClapInputValues
)

const workflowValues = workflow
.inputValues as ClapInputValues

return {
workflowDefaultValues,
workflowValues
}
}
Loading

0 comments on commit 5c344b3

Please sign in to comment.