Skip to content

Commit 40260f7

Browse files
committed
refactor chat gpt conversation to include outputFormat (voice, text), msgId, commandPrefix + add talk voice command
1 parent 6516219 commit 40260f7

File tree

6 files changed

+106
-49
lines changed

6 files changed

+106
-49
lines changed

src/modules/open-ai/helpers.ts

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,8 @@ export enum SupportedCommands {
2121
dalleShorter = 'i',
2222
genImgEn = 'genImgEn',
2323
on = 'on',
24-
off = 'off'
24+
off = 'off',
25+
talk = 'talk'
2526
}
2627

2728
export const MAX_TRIES = 3

src/modules/open-ai/index.ts

Lines changed: 65 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ import {
1313
} from '../types'
1414
import {
1515
alterGeneratedImg,
16+
chatCompletion,
1617
getChatModel,
1718
getDalleModel,
1819
getDalleModelPrice,
@@ -47,6 +48,7 @@ import { AxiosError } from 'axios'
4748
import { Callbacks } from '../types'
4849
import { LlmsBot } from '../llms'
4950
import { type PhotoSize } from 'grammy/types'
51+
import { responseWithVoice } from '../voice-to-voice-gpt/helpers'
5052

5153
const priceAdjustment = config.openAi.chatGpt.priceAdjustment
5254
export class OpenAIBot implements PayableBot {
@@ -174,16 +176,21 @@ export class OpenAIBot implements PayableBot {
174176
}
175177
break
176178
}
177-
case SupportedCommands.ask: {
179+
case SupportedCommands.ask:
180+
case SupportedCommands.talk: {
178181
if (this.botSuspended) {
179182
ctx.transient.analytics.sessionState = RequestState.Error
180183
await sendMessage(ctx, 'The bot is suspended').catch(async (e) => { await this.onError(ctx, e) })
181184
ctx.transient.analytics.actualResponseTime = now()
182185
return
183186
}
184-
ctx.session.openAi.chatGpt.requestQueue.push(
185-
await preparePrompt(ctx, prompt)
186-
)
187+
const adaptedPrompt = (SupportedCommands.talk === command
188+
? 'Keep it short, like a phone call'
189+
: '') + await preparePrompt(ctx, prompt)
190+
ctx.session.openAi.chatGpt.requestQueue.push({
191+
prompt: adaptedPrompt,
192+
outputFormat: SupportedCommands.ask === command ? 'text' : 'voice'
193+
})
187194
if (!ctx.session.openAi.chatGpt.isProcessingQueue) {
188195
ctx.session.openAi.chatGpt.isProcessingQueue = true
189196
await this.onChatRequestHandler(ctx).then(() => {
@@ -407,7 +414,7 @@ export class OpenAIBot implements PayableBot {
407414
)
408415
}
409416

410-
private async promptGen (data: ChatPayload, msgId?: number): Promise< { price: number, chat: ChatConversation[] }> {
417+
private async completionGen (data: ChatPayload, msgId?: number, outputFormat = 'text'): Promise< { price: number, chat: ChatConversation[] }> {
411418
const { conversation, ctx, model } = data
412419
try {
413420
if (!msgId) {
@@ -420,29 +427,42 @@ export class OpenAIBot implements PayableBot {
420427
})
421428
).message_id
422429
}
423-
const isTypingEnabled = config.openAi.chatGpt.isTypingEnabled
424-
if (isTypingEnabled) {
425-
ctx.chatAction = 'typing'
426-
}
427-
const completion = await streamChatCompletion(
428-
conversation,
429-
ctx,
430-
model,
431-
msgId,
432-
true // telegram messages has a character limit
433-
)
434-
if (isTypingEnabled) {
435-
ctx.chatAction = null
436-
}
437-
if (completion) {
438-
ctx.transient.analytics.sessionState = RequestState.Success
439-
ctx.transient.analytics.actualResponseTime = now()
440-
const price = getPromptPrice(completion, data)
441-
this.logger.info(
442-
`streamChatCompletion result = tokens: ${price.totalTokens} | ${model} | price: ${price.price}¢` // price.promptTokens + price.completionTokens }
430+
if (outputFormat === 'text') {
431+
const isTypingEnabled = config.openAi.chatGpt.isTypingEnabled
432+
if (isTypingEnabled) {
433+
ctx.chatAction = 'typing'
434+
}
435+
const completion = await streamChatCompletion(
436+
conversation,
437+
ctx,
438+
model,
439+
msgId,
440+
true // telegram messages has a character limit
443441
)
442+
if (isTypingEnabled) {
443+
ctx.chatAction = null
444+
}
445+
if (completion) {
446+
ctx.transient.analytics.sessionState = RequestState.Success
447+
ctx.transient.analytics.actualResponseTime = now()
448+
const price = getPromptPrice(completion, data)
449+
this.logger.info(
450+
`streamChatCompletion result = tokens: ${price.totalTokens} | ${model} | price: ${price.price}¢` // price.promptTokens + price.completionTokens }
451+
)
452+
return {
453+
price: price.price,
454+
chat: conversation
455+
}
456+
}
457+
} else {
458+
const response = await chatCompletion(conversation, ChatGPTModelsEnum.GPT_35_TURBO_16K)
459+
conversation.push({
460+
role: 'system',
461+
content: response.completion
462+
})
463+
await responseWithVoice(response.completion, ctx as OnMessageContext, msgId)
444464
return {
445-
price: price.price,
465+
price: response.price,
446466
chat: conversation
447467
}
448468
}
@@ -469,9 +489,10 @@ export class OpenAIBot implements PayableBot {
469489
}
470490
const { username } = ctx.me
471491
const prompt = ctx.message?.text?.slice(username.length + 1) ?? '' // @
472-
ctx.session.openAi.chatGpt.requestQueue.push(
473-
await preparePrompt(ctx, prompt)
474-
)
492+
ctx.session.openAi.chatGpt.requestQueue.push({
493+
prompt: await preparePrompt(ctx, prompt),
494+
outputFormat: 'text'
495+
})
475496
if (!ctx.session.openAi.chatGpt.isProcessingQueue) {
476497
ctx.session.openAi.chatGpt.isProcessingQueue = true
477498
await this.onChatRequestHandler(ctx).then(() => {
@@ -494,9 +515,10 @@ export class OpenAIBot implements PayableBot {
494515
return
495516
}
496517
const prompt = ctx.message?.text?.slice(prefix.length) ?? ''
497-
ctx.session.openAi.chatGpt.requestQueue.push(
498-
await preparePrompt(ctx, prompt)
499-
)
518+
ctx.session.openAi.chatGpt.requestQueue.push({
519+
prompt: await preparePrompt(ctx, prompt),
520+
outputFormat: 'text'
521+
})
500522
if (!ctx.session.openAi.chatGpt.isProcessingQueue) {
501523
ctx.session.openAi.chatGpt.isProcessingQueue = true
502524
await this.onChatRequestHandler(ctx).then(() => {
@@ -516,9 +538,10 @@ export class OpenAIBot implements PayableBot {
516538
ctx.transient.analytics.actualResponseTime = now()
517539
return
518540
}
519-
ctx.session.openAi.chatGpt.requestQueue.push(
520-
await preparePrompt(ctx, ctx.message?.text ?? '')
521-
)
541+
ctx.session.openAi.chatGpt.requestQueue.push({
542+
prompt: await preparePrompt(ctx, ctx.message?.text ?? ''),
543+
outputFormat: 'text'
544+
})
522545
if (!ctx.session.openAi.chatGpt.isProcessingQueue) {
523546
ctx.session.openAi.chatGpt.isProcessingQueue = true
524547
await this.onChatRequestHandler(ctx).then(() => {
@@ -556,9 +579,10 @@ export class OpenAIBot implements PayableBot {
556579
if (await this.freePromptChatGroup(ctx, prompt as string)) {
557580
return
558581
}
559-
ctx.session.openAi.chatGpt.requestQueue.push(
560-
await preparePrompt(ctx, prompt as string)
561-
)
582+
ctx.session.openAi.chatGpt.requestQueue.push({
583+
prompt: await preparePrompt(ctx, prompt as string),
584+
outputFormat: 'text'
585+
})
562586
if (!ctx.session.openAi.chatGpt.isProcessingQueue) {
563587
ctx.session.openAi.chatGpt.isProcessingQueue = true
564588
await this.onChatRequestHandler(ctx).then(() => {
@@ -588,7 +612,7 @@ export class OpenAIBot implements PayableBot {
588612
ctx.transient.analytics.actualResponseTime = now()
589613
return
590614
}
591-
const { url, newPrompt } = hasUrl(ctx, prompt)
615+
const { url, newPrompt } = hasUrl(ctx, prompt.prompt)
592616
const hasCode = hasCodeSnippet(ctx)
593617
if (chatConversation.length === 0 && (hasCode || !url)) {
594618
chatConversation.push({
@@ -601,14 +625,14 @@ export class OpenAIBot implements PayableBot {
601625
} else {
602626
chatConversation.push({
603627
role: 'user',
604-
content: prompt
628+
content: prompt.prompt
605629
})
606630
const payload = {
607631
conversation: chatConversation,
608632
model: model || config.openAi.chatGpt.model,
609633
ctx
610634
}
611-
const result = await this.promptGen(payload)
635+
const result = await this.completionGen(payload, prompt.msgId, prompt.outputFormat)
612636
ctx.session.openAi.chatGpt.chatConversation = [...result.chat]
613637
if (
614638
!(await this.payments.pay(ctx as OnMessageContext, result.price))

src/modules/types.ts

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,14 +59,21 @@ export interface ImageRequest {
5959
photo?: PhotoSize[] | undefined
6060
photoUrl?: string[]
6161
}
62+
63+
export interface promptRequest {
64+
prompt: string
65+
msgId?: number
66+
outputFormat?: 'text' | 'voice'
67+
commandPrefix?: string
68+
}
6269
export interface ChatGptSessionData {
6370
model: string
6471
isEnabled: boolean
6572
isFreePromptChatGroups: boolean
6673
chatConversation: ChatConversation[]
6774
usage: number
6875
price: number
69-
requestQueue: string[]
76+
requestQueue: promptRequest[]
7077
isProcessingQueue: boolean
7178
}
7279

src/modules/voice-command/index.ts

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,8 @@ import { SupportedCommands as OpenAISupportedCommands } from '../open-ai/helpers
1111
const VOICE_COMMAND_LIST = [
1212
OpenAISupportedCommands.vision,
1313
OpenAISupportedCommands.ask,
14-
OpenAISupportedCommands.dalleImg
14+
OpenAISupportedCommands.dalleImg,
15+
OpenAISupportedCommands.talk
1516
]
1617
export class VoiceCommand implements PayableBot {
1718
public readonly module = 'VoiceCommand'
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
import { InputFile } from 'grammy'
2+
import config from '../../config'
3+
import { ElevenlabsClient } from '../../elevenlabs/elevenlabsClient'
4+
import { type OnMessageContext } from '../types'
5+
6+
export const generateVoiceFromText = async (text: string, voiceId = '21m00Tcm4TlvDq8ikWAM'): Promise<string | Uint8Array | null | undefined> => {
7+
const elevenlabsClient = new ElevenlabsClient(config.elevenlabs.apiKey)
8+
9+
const voiceResult = await elevenlabsClient.textToSpeech({ text, voiceId })
10+
11+
return voiceResult
12+
}
13+
14+
export const responseWithVoice = async (text: string, ctx: OnMessageContext, msgId: number, voiceId = '21m00Tcm4TlvDq8ikWAM'): Promise<void> => {
15+
const voiceResult = await generateVoiceFromText(text, voiceId)
16+
17+
if (!voiceResult) {
18+
await ctx.reply('voice generation error')
19+
return
20+
}
21+
22+
await ctx.api.deleteMessage(ctx.chat.id, msgId)
23+
24+
const inputFile = new InputFile(voiceResult)
25+
26+
await ctx.replyWithVoice(inputFile)
27+
}

src/modules/voice-to-voice-gpt/index.ts

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,8 @@ import type { Logger } from 'pino'
55
import type { BotPayments } from '../payment'
66
import { chatCompletion, speechToText } from '../open-ai/api/openAi'
77
import type { OnMessageContext, PayableBot } from '../types'
8-
import config from '../../config'
98
import { ChatGPTModelsEnum } from '../open-ai/types'
10-
import { ElevenlabsClient } from '../../elevenlabs/elevenlabsClient'
9+
import { generateVoiceFromText } from './helpers'
1110

1211
export class VoiceToVoiceGPTBot implements PayableBot {
1312
private readonly payments: BotPayments
@@ -67,9 +66,7 @@ export class VoiceToVoiceGPTBot implements PayableBot {
6766
const conversation = [{ role: 'user', content: resultText }]
6867
const response = await chatCompletion(conversation, ChatGPTModelsEnum.GPT_35_TURBO_16K)
6968

70-
const elevenlabsClient = new ElevenlabsClient(config.elevenlabs.apiKey)
71-
72-
const voiceResult = await elevenlabsClient.textToSpeech({ text: response.completion, voiceId: '21m00Tcm4TlvDq8ikWAM' })
69+
const voiceResult = await generateVoiceFromText(response.completion)
7370
// const voiceResult = await gcTextToSpeedClient.ssmlTextToSpeech({ text: response.completion, ssmlGender: 'MALE', languageCode: 'en-US' })
7471

7572
if (!voiceResult) {

0 commit comments

Comments
 (0)