-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathmovieseq.py
82 lines (72 loc) · 2.66 KB
/
movieseq.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
import time
import os
import json
import imageio
import requests
import argparse
from tqdm import tqdm
from moviepy.editor import VideoFileClip
from utils import encode_image
import openai
from openai import OpenAI
class MovieSeq:
def __init__(self,
model="gpt-4o", api_key=None, image_detail="auto",
system_text=None):
self.api_key = api_key
self.model = model
self.image_detail = image_detail
if system_text is None:
self.system_text = """
You will be provided with the following inputs:
1. A sequence of photos of characters along with their names.
2. Keyframes from a video clip and the corresponding dialogues, each associated with a speaker ID.
Your task is to analyze and associate these inputs, understand the context of the video, and respond to the user's needs accordingly.
"""
self.headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {self.api_key}"
}
self.url = "https://api.openai.com/v1/chat/completions"
self.client = OpenAI()
def get_response(self, char_bank, frame_list, diag_list,
query,
resize=None, temperature=0, detail="auto"):
messages = [{
"role": "system",
"content": [{"type": "text", "text": self.system_text,},]
}]
for char_name, char_url in char_bank.items():
char_image = encode_image(char_url)
messages.append({
"role": "user",
"content": [
f"This is the photo of {char_name}.",
{'image': char_image},
],
})
assert len(diag_list) == len(frame_list)
for frame_i, diag_i in zip(frame_list, diag_list):
frame_image = encode_image(frame_i)
messages.append({
"role": "user",
"content": [
{'image': frame_image},
f"{diag_i}.",
],
})
messages.append({
"role": "user",
"content": [{"type": "text", "text": query,},]
})
params = {
"model": self.model,
"messages": messages,
"max_tokens": 2048,
"temperature": temperature,
}
response = self.client.chat.completions.create(**params)
json_string = response.json()
json_object = json.loads(json_string)
content = json_object['choices'][0]['message']['content']
return content