refactor chat gpt conversation to include outputFormat (voice, text), msgId, commandPrefix + add talk voice command

fegloff · fegloff · commit 40260f71ec29 · 2024-01-25T16:48:43.000-05:00
diff --git a/src/modules/open-ai/helpers.ts b/src/modules/open-ai/helpers.ts
@@ -21,7 +21,8 @@ export enum SupportedCommands {
   dalleShorter = 'i',
   genImgEn = 'genImgEn',
   on = 'on',
-  off = 'off'
+  off = 'off',
+  talk = 'talk'
 }
 
 export const MAX_TRIES = 3
diff --git a/src/modules/open-ai/index.ts b/src/modules/open-ai/index.ts
@@ -13,6 +13,7 @@ import {
 } from '../types'
 import {
   alterGeneratedImg,
+  chatCompletion,
   getChatModel,
   getDalleModel,
   getDalleModelPrice,
@@ -47,6 +48,7 @@ import { AxiosError } from 'axios'
 import { Callbacks } from '../types'
 import { LlmsBot } from '../llms'
 import { type PhotoSize } from 'grammy/types'
+import { responseWithVoice } from '../voice-to-voice-gpt/helpers'
 
 const priceAdjustment = config.openAi.chatGpt.priceAdjustment
 export class OpenAIBot implements PayableBot {
@@ -174,16 +176,21 @@ export class OpenAIBot implements PayableBot {
           }
           break
         }
-        case SupportedCommands.ask: {
+        case SupportedCommands.ask:
+        case SupportedCommands.talk: {
           if (this.botSuspended) {
             ctx.transient.analytics.sessionState = RequestState.Error
             await sendMessage(ctx, 'The bot is suspended').catch(async (e) => { await this.onError(ctx, e) })
             ctx.transient.analytics.actualResponseTime = now()
             return
           }
-          ctx.session.openAi.chatGpt.requestQueue.push(
-            await preparePrompt(ctx, prompt)
-          )
+          const adaptedPrompt = (SupportedCommands.talk === command
+            ? 'Keep it short, like a phone call'
+            : '') + await preparePrompt(ctx, prompt)
+          ctx.session.openAi.chatGpt.requestQueue.push({
+            prompt: adaptedPrompt,
+            outputFormat: SupportedCommands.ask === command ? 'text' : 'voice'
+          })
           if (!ctx.session.openAi.chatGpt.isProcessingQueue) {
             ctx.session.openAi.chatGpt.isProcessingQueue = true
             await this.onChatRequestHandler(ctx).then(() => {
@@ -407,7 +414,7 @@ export class OpenAIBot implements PayableBot {
     )
   }
 
-  private async promptGen (data: ChatPayload, msgId?: number): Promise< { price: number, chat: ChatConversation[] }> {
+  private async completionGen (data: ChatPayload, msgId?: number, outputFormat = 'text'): Promise< { price: number, chat: ChatConversation[] }> {
     const { conversation, ctx, model } = data
     try {
       if (!msgId) {
@@ -420,29 +427,42 @@ export class OpenAIBot implements PayableBot {
           })
         ).message_id
       }
-      const isTypingEnabled = config.openAi.chatGpt.isTypingEnabled
-      if (isTypingEnabled) {
-        ctx.chatAction = 'typing'
-      }
-      const completion = await streamChatCompletion(
-        conversation,
-        ctx,
-        model,
-        msgId,
-        true // telegram messages has a character limit
-      )
-      if (isTypingEnabled) {
-        ctx.chatAction = null
-      }
-      if (completion) {
-        ctx.transient.analytics.sessionState = RequestState.Success
-        ctx.transient.analytics.actualResponseTime = now()
-        const price = getPromptPrice(completion, data)
-        this.logger.info(
-          `streamChatCompletion result = tokens: ${price.totalTokens} | ${model} | price: ${price.price}¢` // price.promptTokens + price.completionTokens  }
+      if (outputFormat === 'text') {
+        const isTypingEnabled = config.openAi.chatGpt.isTypingEnabled
+        if (isTypingEnabled) {
+          ctx.chatAction = 'typing'
+        }
+        const completion = await streamChatCompletion(
+          conversation,
+          ctx,
+          model,
+          msgId,
+          true // telegram messages has a character limit
         )
+        if (isTypingEnabled) {
+          ctx.chatAction = null
+        }
+        if (completion) {
+          ctx.transient.analytics.sessionState = RequestState.Success
+          ctx.transient.analytics.actualResponseTime = now()
+          const price = getPromptPrice(completion, data)
+          this.logger.info(
+            `streamChatCompletion result = tokens: ${price.totalTokens} | ${model} | price: ${price.price}¢` // price.promptTokens + price.completionTokens  }
+          )
+          return {
+            price: price.price,
+            chat: conversation
+          }
+        }
+      } else {
+        const response = await chatCompletion(conversation, ChatGPTModelsEnum.GPT_35_TURBO_16K)
+        conversation.push({
+          role: 'system',
+          content: response.completion
+        })
+        await responseWithVoice(response.completion, ctx as OnMessageContext, msgId)
         return {
-          price: price.price,
+          price: response.price,
           chat: conversation
         }
       }
@@ -469,9 +489,10 @@ export class OpenAIBot implements PayableBot {
       }
       const { username } = ctx.me
       const prompt = ctx.message?.text?.slice(username.length + 1) ?? '' // @
-      ctx.session.openAi.chatGpt.requestQueue.push(
-        await preparePrompt(ctx, prompt)
-      )
+      ctx.session.openAi.chatGpt.requestQueue.push({
+        prompt: await preparePrompt(ctx, prompt),
+        outputFormat: 'text'
+      })
       if (!ctx.session.openAi.chatGpt.isProcessingQueue) {
         ctx.session.openAi.chatGpt.isProcessingQueue = true
         await this.onChatRequestHandler(ctx).then(() => {
@@ -494,9 +515,10 @@ export class OpenAIBot implements PayableBot {
         return
       }
       const prompt = ctx.message?.text?.slice(prefix.length) ?? ''
-      ctx.session.openAi.chatGpt.requestQueue.push(
-        await preparePrompt(ctx, prompt)
-      )
+      ctx.session.openAi.chatGpt.requestQueue.push({
+        prompt: await preparePrompt(ctx, prompt),
+        outputFormat: 'text'
+      })
       if (!ctx.session.openAi.chatGpt.isProcessingQueue) {
         ctx.session.openAi.chatGpt.isProcessingQueue = true
         await this.onChatRequestHandler(ctx).then(() => {
@@ -516,9 +538,10 @@ export class OpenAIBot implements PayableBot {
         ctx.transient.analytics.actualResponseTime = now()
         return
       }
-      ctx.session.openAi.chatGpt.requestQueue.push(
-        await preparePrompt(ctx, ctx.message?.text ?? '')
-      )
+      ctx.session.openAi.chatGpt.requestQueue.push({
+        prompt: await preparePrompt(ctx, ctx.message?.text ?? ''),
+        outputFormat: 'text'
+      })
       if (!ctx.session.openAi.chatGpt.isProcessingQueue) {
         ctx.session.openAi.chatGpt.isProcessingQueue = true
         await this.onChatRequestHandler(ctx).then(() => {
@@ -556,9 +579,10 @@ export class OpenAIBot implements PayableBot {
       if (await this.freePromptChatGroup(ctx, prompt as string)) {
         return
       }
-      ctx.session.openAi.chatGpt.requestQueue.push(
-        await preparePrompt(ctx, prompt as string)
-      )
+      ctx.session.openAi.chatGpt.requestQueue.push({
+        prompt: await preparePrompt(ctx, prompt as string),
+        outputFormat: 'text'
+      })
       if (!ctx.session.openAi.chatGpt.isProcessingQueue) {
         ctx.session.openAi.chatGpt.isProcessingQueue = true
         await this.onChatRequestHandler(ctx).then(() => {
@@ -588,7 +612,7 @@ export class OpenAIBot implements PayableBot {
             ctx.transient.analytics.actualResponseTime = now()
             return
           }
-          const { url, newPrompt } = hasUrl(ctx, prompt)
+          const { url, newPrompt } = hasUrl(ctx, prompt.prompt)
           const hasCode = hasCodeSnippet(ctx)
           if (chatConversation.length === 0 && (hasCode || !url)) {
             chatConversation.push({
@@ -601,14 +625,14 @@ export class OpenAIBot implements PayableBot {
           } else {
             chatConversation.push({
               role: 'user',
-              content: prompt
+              content: prompt.prompt
             })
             const payload = {
               conversation: chatConversation,
               model: model || config.openAi.chatGpt.model,
               ctx
             }
-            const result = await this.promptGen(payload)
+            const result = await this.completionGen(payload, prompt.msgId, prompt.outputFormat)
             ctx.session.openAi.chatGpt.chatConversation = [...result.chat]
             if (
               !(await this.payments.pay(ctx as OnMessageContext, result.price))
diff --git a/src/modules/types.ts b/src/modules/types.ts
@@ -59,14 +59,21 @@ export interface ImageRequest {
   photo?: PhotoSize[] | undefined
   photoUrl?: string[]
 }
+
+export interface promptRequest {
+  prompt: string
+  msgId?: number
+  outputFormat?: 'text' | 'voice'
+  commandPrefix?: string
+}
 export interface ChatGptSessionData {
   model: string
   isEnabled: boolean
   isFreePromptChatGroups: boolean
   chatConversation: ChatConversation[]
   usage: number
   price: number
-  requestQueue: string[]
+  requestQueue: promptRequest[]
   isProcessingQueue: boolean
 }
 
diff --git a/src/modules/voice-command/index.ts b/src/modules/voice-command/index.ts
@@ -11,7 +11,8 @@ import { SupportedCommands as OpenAISupportedCommands } from '../open-ai/helpers
 const VOICE_COMMAND_LIST = [
   OpenAISupportedCommands.vision,
   OpenAISupportedCommands.ask,
-  OpenAISupportedCommands.dalleImg
+  OpenAISupportedCommands.dalleImg,
+  OpenAISupportedCommands.talk
 ]
 export class VoiceCommand implements PayableBot {
   public readonly module = 'VoiceCommand'
diff --git a/src/modules/voice-to-voice-gpt/helpers.ts b/src/modules/voice-to-voice-gpt/helpers.ts
@@ -0,0 +1,27 @@
+import { InputFile } from 'grammy'
+import config from '../../config'
+import { ElevenlabsClient } from '../../elevenlabs/elevenlabsClient'
+import { type OnMessageContext } from '../types'
+
+export const generateVoiceFromText = async (text: string, voiceId = '21m00Tcm4TlvDq8ikWAM'): Promise<string | Uint8Array | null | undefined> => {
+  const elevenlabsClient = new ElevenlabsClient(config.elevenlabs.apiKey)
+
+  const voiceResult = await elevenlabsClient.textToSpeech({ text, voiceId })
+
+  return voiceResult
+}
+
+export const responseWithVoice = async (text: string, ctx: OnMessageContext, msgId: number, voiceId = '21m00Tcm4TlvDq8ikWAM'): Promise<void> => {
+  const voiceResult = await generateVoiceFromText(text, voiceId)
+
+  if (!voiceResult) {
+    await ctx.reply('voice generation error')
+    return
+  }
+
+  await ctx.api.deleteMessage(ctx.chat.id, msgId)
+
+  const inputFile = new InputFile(voiceResult)
+
+  await ctx.replyWithVoice(inputFile)
+}
diff --git a/src/modules/voice-to-voice-gpt/index.ts b/src/modules/voice-to-voice-gpt/index.ts
@@ -5,9 +5,8 @@ import type { Logger } from 'pino'
 import type { BotPayments } from '../payment'
 import { chatCompletion, speechToText } from '../open-ai/api/openAi'
 import type { OnMessageContext, PayableBot } from '../types'
-import config from '../../config'
 import { ChatGPTModelsEnum } from '../open-ai/types'
-import { ElevenlabsClient } from '../../elevenlabs/elevenlabsClient'
+import { generateVoiceFromText } from './helpers'
 
 export class VoiceToVoiceGPTBot implements PayableBot {
   private readonly payments: BotPayments
@@ -67,9 +66,7 @@ export class VoiceToVoiceGPTBot implements PayableBot {
     const conversation = [{ role: 'user', content: resultText }]
     const response = await chatCompletion(conversation, ChatGPTModelsEnum.GPT_35_TURBO_16K)
 
-    const elevenlabsClient = new ElevenlabsClient(config.elevenlabs.apiKey)
-
-    const voiceResult = await elevenlabsClient.textToSpeech({ text: response.completion, voiceId: '21m00Tcm4TlvDq8ikWAM' })
+    const voiceResult = await generateVoiceFromText(response.completion)
     // const voiceResult = await gcTextToSpeedClient.ssmlTextToSpeech({ text: response.completion, ssmlGender: 'MALE', languageCode: 'en-US' })
 
     if (!voiceResult) {

Original file line number	Diff line number	Diff line change
`@@ -21,7 +21,8 @@ export enum SupportedCommands {`
`21`	`21`	`dalleShorter = 'i',`
`22`	`22`	`genImgEn = 'genImgEn',`
`23`	`23`	`on = 'on',`
`24`		`- off = 'off'`
	`24`	`+ off = 'off',`
	`25`	`+ talk = 'talk'`
`25`	`26`	`}`
`26`	`27`
`27`	`28`	`export const MAX_TRIES = 3`