Skip to content

Commit 5c344b3

Browse files
committed
working on workflows
1 parent a8ff2d1 commit 5c344b3

File tree

33 files changed

+2076
-118
lines changed

33 files changed

+2076
-118
lines changed

README.md

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -86,12 +86,20 @@ If you find that Clapper is working with a more recent (stable) version of Node,
8686

8787
### Installing and running the app
8888

89+
Install the dependencies.
90+
91+
`--include=optional` is to make sure optional dependencies are installed (pre-build native modules compatible with your system)
92+
8993
```bash
9094
npm i --include=optional
95+
```
96+
97+
Then run the actual app, the first time you go to localhost:3000 after typing this command, the app will compile, which can take a minute (like, literally: `Compiled / in 52.6s (6372 modules)`)
98+
99+
```bash
91100
npm run dev
92101
```
93102

94-
`--include=optional` is to make sure optional dependencies are installed (pre-build native modules compatible with your system)
95103

96104
### Building the app
97105

package-lock.json

Lines changed: 8 additions & 8 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

package.json

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,8 +38,8 @@
3838
"dependencies": {
3939
"@aitube/broadway": "0.2.0",
4040
"@aitube/clap": "0.2.0",
41-
"@aitube/clapper-services": "0.2.0-2",
42-
"@aitube/engine": "0.2.0",
41+
"@aitube/clapper-services": "0.2.0-3",
42+
"@aitube/engine": "0.2.0-1",
4343
"@aitube/timeline": "0.2.0",
4444
"@fal-ai/serverless-client": "^0.13.0",
4545
"@ffmpeg/ffmpeg": "^0.12.10",

public/carriers/README.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
# Carriers
2+
3+
Put carrier videos here (for Live Portrait and other similar tech)

public/images/providers/comfyicu.png

Lines changed: 3 additions & 0 deletions
Loading

public/voices/README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Voice files

src/app/api/resolve/providers/comfy-comfyicu/index.ts

Lines changed: 61 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -5,29 +5,80 @@ import {
55
getClapAssetSourceType,
66
} from '@aitube/clap'
77
import { TimelineSegment } from '@aitube/timeline'
8+
import { getWorkflowInputValues } from '../getWorkflowInputValues'
9+
import { ComfyIcuApiRequestRunWorkflow } from './types'
810

911
export async function resolveSegment(
1012
request: ResolveRequest
1113
): Promise<TimelineSegment> {
1214
if (!request.settings.comfyIcuApiKey) {
1315
throw new Error(`Missing API key for "Comfy.icu"`)
1416
}
15-
if (request.segment.category !== ClapSegmentCategory.STORYBOARD) {
17+
18+
if (request.segment.category === ClapSegmentCategory.STORYBOARD) {
19+
20+
const workflowId = request.settings.imageGenerationWorkflow.id.split('://').pop() || ''
21+
22+
if (!workflowId) {
23+
throw new Error(`The ComfyICU workflow ID is missing`)
24+
}
25+
26+
const inputFields = request.settings.imageGenerationWorkflow.inputFields || []
27+
28+
// since this is a random "wild" workflow, it is possible
29+
// that the field name is a bit different
30+
// we try to look into the workflow input fields
31+
// to find the best match
32+
const promptFields = [
33+
inputFields.find(f => f.id === 'prompt'),// exactMatch,
34+
inputFields.find(f => f.id.includes('prompt')), // similarName,
35+
inputFields.find(f => f.type === 'string') // similarType
36+
].filter(x => typeof x !== 'undefined')
37+
38+
const promptField = promptFields[0]
39+
if (!promptField) {
40+
throw new Error(`this workflow doesn't seem to have a parameter called "prompt"`)
41+
}
42+
43+
// TODO: modify the serialized workflow payload
44+
// to inject our params:
45+
// ...getWorkflowInputValues(request.settings.imageGenerationWorkflow),
46+
// [promptField.id]: request.prompts.image.positive,
47+
48+
const payload: ComfyIcuApiRequestRunWorkflow = {
49+
workflow_id: workflowId,
50+
prompt: request.settings.imageGenerationWorkflow.data,
51+
files: {},
52+
}
53+
54+
55+
const rawResponse = await fetch(`https://comfy.icu/api/v1/workflows/${workflowId}/runs`, {
56+
headers: {
57+
accept: "application/json",
58+
"content-type": "application/json",
59+
authorization: `Bearer ${request.settings.comfyIcuApiKey}`,
60+
},
61+
body: JSON.stringify(payload),
62+
method: "POST",
63+
});
64+
65+
const response = await rawResponse.json()
66+
67+
if (response.status === "error") {
68+
throw new Error(response.message)
69+
}
70+
71+
console.log('response:', response)
72+
73+
// TODO use the RUN ID to regularly check for status
74+
// see https://comfy.icu/docs/api
75+
1676
throw new Error(
1777
`Clapper doesn't support ${request.segment.category} generation for provider "Comfy.icu". Please open a pull request with (working code) to solve this!`
1878
)
1979
}
2080

2181
const segment: TimelineSegment = { ...request.segment }
2282

23-
try {
24-
throw new Error(`Not Implemented!`)
25-
} catch (err) {
26-
console.error(`failed to call Comfy.icu: `, err)
27-
segment.assetUrl = ''
28-
segment.assetSourceType = getClapAssetSourceType(segment.assetUrl)
29-
segment.status = ClapSegmentStatus.TO_GENERATE
30-
}
31-
3283
return segment
3384
}
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
export type ComfyIcuApiRequestRunWorkflow = {
2+
workflow_id: string
3+
prompt: string
4+
files: Record<string, any>
5+
}
6+
7+
export type ComfyIcuApiResponseWorkflowStatus = {
8+
id: string
9+
run_time?: number
10+
status: string
11+
name?: string
12+
created_at: string
13+
output?: ComfyIcuWorkflowOutput[]
14+
project_id: string
15+
api_key_id: any
16+
device?: string
17+
}
18+
19+
export type ComfyIcuWorkflowOutput = {
20+
filename: string
21+
url: string
22+
thumbnail_url: string
23+
}
24+

src/app/api/resolve/providers/falai/index.ts

Lines changed: 57 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ import {
88
FalAiSpeechResponse,
99
FalAiVideoResponse,
1010
} from './types'
11+
import { getWorkflowInputValues } from '../getWorkflowInputValues'
1112

1213
export async function resolveSegment(
1314
request: ResolveRequest
@@ -37,13 +38,6 @@ export async function resolveSegment(
3738
return segment
3839
}
3940

40-
const imageSize =
41-
request.meta.orientation === ClapMediaOrientation.SQUARE
42-
? FalAiImageSize.SQUARE_HD
43-
: request.meta.orientation === ClapMediaOrientation.PORTRAIT
44-
? FalAiImageSize.PORTRAIT_16_9
45-
: FalAiImageSize.LANDSCAPE_16_9
46-
4741
let result: FalAiImageResponse | undefined = undefined
4842

4943
if (model === 'fal-ai/pulid') {
@@ -56,6 +50,25 @@ export async function resolveSegment(
5650
}
5751
}
5852

53+
const {
54+
workflowDefaultValues,
55+
workflowValues
56+
} = getWorkflowInputValues(request.settings.imageGenerationWorkflow)
57+
58+
// for the moment let's use FAL's predefined sizes
59+
const imageSize =
60+
request.meta.orientation === ClapMediaOrientation.SQUARE
61+
? FalAiImageSize.SQUARE_HD
62+
: request.meta.orientation === ClapMediaOrientation.PORTRAIT
63+
? FalAiImageSize.PORTRAIT_16_9
64+
: FalAiImageSize.LANDSCAPE_16_9
65+
66+
// for the moment let's use FAL's predefined sizes
67+
delete workflowDefaultValues.width
68+
delete workflowDefaultValues.height
69+
delete workflowValues.width
70+
delete workflowValues.height
71+
5972
if (model === 'fal-ai/pulid') {
6073
result = (await fal.run(model, {
6174
input: {
@@ -74,11 +87,13 @@ export async function resolveSegment(
7487
} else {
7588
result = (await fal.run(model, {
7689
input: {
90+
...workflowDefaultValues,
91+
...workflowValues,
92+
7793
prompt: request.prompts.image.positive,
94+
7895
image_size: imageSize,
7996
sync_mode: true,
80-
num_inference_steps:
81-
model === 'fal-ai/stable-diffusion-v3-medium' ? 40 : 25,
8297
num_images: 1,
8398
enable_safety_checker:
8499
request.settings.censorNotForAllAudiencesContent,
@@ -113,18 +128,12 @@ export async function resolveSegment(
113128
`cannot generate a video without a storyboard (the concept of Clapper is to use storyboards)`
114129
)
115130
}
131+
116132
const result = (await fal.run(model, {
117133
input: {
118-
image_url: storyboard.assetUrl,
134+
...getWorkflowInputValues(request.settings.videoGenerationWorkflow),
119135

120-
motion_bucket_id: 55,
121-
122-
// The conditoning augmentation determines the amount of noise that
123-
// will be added to the conditioning frame. The higher the number,
124-
// the more noise there will be, and the less the video will look
125-
// like the initial image. Increase it for more motion.
126-
// Default value: 0.02
127-
cond_aug: 0.02,
136+
image_url: storyboard.assetUrl,
128137

129138
sync_mode: true,
130139
enable_safety_checker: request.settings.censorNotForAllAudiencesContent,
@@ -140,17 +149,29 @@ export async function resolveSegment(
140149
}
141150

142151
segment.assetUrl = result?.video?.url || ''
143-
} else if (
144-
request.segment.category === ClapSegmentCategory.SOUND ||
145-
request.segment.category === ClapSegmentCategory.MUSIC
146-
) {
147-
model =
148-
request.segment.category === ClapSegmentCategory.MUSIC
149-
? request.settings.musicGenerationWorkflow.data
150-
: request.settings.soundGenerationWorkflow.data
152+
} else if (request.segment.category === ClapSegmentCategory.SOUND) {
153+
model = request.settings.musicGenerationWorkflow.data
154+
155+
const result = (await fal.run(model, {
156+
input: {
157+
...getWorkflowInputValues(request.settings.soundGenerationWorkflow),
158+
159+
// note how we use the *segment* prompt for music or sound
160+
prompt: request.segment.prompt,
161+
162+
sync_mode: true,
163+
enable_safety_checker: request.settings.censorNotForAllAudiencesContent,
164+
},
165+
})) as FalAiAudioResponse
166+
167+
segment.assetUrl = result?.audio_file?.url || ''
168+
} else if (request.segment.category === ClapSegmentCategory.MUSIC) {
169+
model = request.settings.musicGenerationWorkflow.data
151170

152171
const result = (await fal.run(model, {
153172
input: {
173+
...getWorkflowInputValues(request.settings.soundGenerationWorkflow),
174+
154175
// note how we use the *segment* prompt for music or sound
155176
prompt: request.segment.prompt,
156177

@@ -163,12 +184,19 @@ export async function resolveSegment(
163184
} else if (request.segment.category === ClapSegmentCategory.DIALOGUE) {
164185
model = request.settings.voiceGenerationWorkflow.data || ''
165186

187+
188+
let voiceIdentity =
189+
request.prompts.voice.identity ||
190+
// TODO for the default we should use one of our own voices instea
191+
// PS: are you implementing this task? please do a search in the code for speakers/bria.mp3
192+
'https://cdn.themetavoice.xyz/speakers/bria.mp3'
193+
166194
const result = (await fal.run(model, {
167195
input: {
168-
text: request.segment.prompt,
196+
...getWorkflowInputValues(request.settings.voiceGenerationWorkflow),
169197

170-
// todo use the entty audio id, if available
171-
audio_url: 'https://cdn.themetavoice.xyz/speakers/bria.mp3',
198+
text: request.segment.prompt, // <-- we are using the segment prompt
199+
audio_url: voiceIdentity,
172200

173201
sync_mode: true,
174202
enable_safety_checker: request.settings.censorNotForAllAudiencesContent,
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
import { ClapInputValues, ClapWorkflow } from "@aitube/clap"
2+
3+
export function getWorkflowInputValues(workflow: ClapWorkflow): {
4+
workflowDefaultValues: ClapInputValues
5+
workflowValues: ClapInputValues
6+
} {
7+
const workflowDefaultValues =
8+
workflow.inputFields.reduce(
9+
(acc, field) => ({
10+
...acc,
11+
[field.id]: field.defaultValue,
12+
}),
13+
{} as ClapInputValues
14+
)
15+
16+
const workflowValues = workflow
17+
.inputValues as ClapInputValues
18+
19+
return {
20+
workflowDefaultValues,
21+
workflowValues
22+
}
23+
}

0 commit comments

Comments
 (0)