From 9d70e31993d4e7e46420bfd5b8573c4a90a889df Mon Sep 17 00:00:00 2001
From: fegloff <fegloff@gmail.com>
Date: Thu, 14 Mar 2024 11:01:30 -0500
Subject: [PATCH] add sonnet model, opus model + fix chat action status
 infinite loop after error

---
 src/config.ts                    |  2 +-
 src/modules/llms/api/athropic.ts | 43 ++++++++++++++++++++++++++++++++
 src/modules/llms/api/llmApi.ts   |  3 ++-
 src/modules/llms/helpers.ts      |  9 +++++++
 src/modules/llms/index.ts        | 23 +++++++++++++++--
 src/modules/llms/types.ts        | 18 ++++++++++++-
 src/modules/open-ai/index.ts     |  1 +
 7 files changed, 94 insertions(+), 5 deletions(-)
 create mode 100644 src/modules/llms/api/athropic.ts
diff --git a/src/config.ts b/src/config.ts
index 34eaae3e..26153832 100644
--- a/src/config.ts
+++ b/src/config.ts
@@ -33,7 +33,7 @@ export default {
     ? parseInt(process.env.SESSION_TIMEOUT)
     : 48, // in hours
   llms: {
-    apiEndpoint: process.env.LLMS_ENDPOINT, // 'http://127.0.0.1:5000', // process.env.LLMS_ENDPOINT, //
+    apiEndpoint: process.env.LLMS_ENDPOINT, // 'http://127.0.0.1:5000',
     wordLimit: 50,
     model: 'chat-bison',
     minimumBalance: 0,
diff --git a/src/modules/llms/api/athropic.ts b/src/modules/llms/api/athropic.ts
new file mode 100644
index 00000000..b57755c8
--- /dev/null
+++ b/src/modules/llms/api/athropic.ts
@@ -0,0 +1,43 @@
+import axios from 'axios'
+import config from '../../../config'
+import { type ChatConversation } from '../../types'
+import { type LlmCompletion } from './llmApi'
+import { LlmsModelsEnum } from '../types'
+
+const API_ENDPOINT = config.llms.apiEndpoint
+
+export const anthropicCompletion = async (
+  conversation: ChatConversation[],
+  model = LlmsModelsEnum.CLAUDE_OPUS
+): Promise<LlmCompletion> => {
+  const data = {
+    model,
+    stream: false,
+    system: config.openAi.chatGpt.chatCompletionContext,
+    max_tokens: +config.openAi.chatGpt.maxTokens,
+    messages: conversation
+  }
+  const url = `${API_ENDPOINT}/anthropic/completions`
+  const response = await axios.post(url, data)
+  const respJson = JSON.parse(response.data)
+  if (response) {
+    const totalInputTokens = respJson.usage.input_tokens
+    const totalOutputTokens = respJson.usage.output_tokens
+    const completion = respJson.content
+
+    return {
+      completion: {
+        content: completion[0].text,
+        role: 'assistant',
+        model
+      },
+      usage: totalOutputTokens + totalInputTokens,
+      price: 0
+    }
+  }
+  return {
+    completion: undefined,
+    usage: 0,
+    price: 0
+  }
+}
diff --git a/src/modules/llms/api/llmApi.ts b/src/modules/llms/api/llmApi.ts
index 2348a93f..85aa70f2 100644
--- a/src/modules/llms/api/llmApi.ts
+++ b/src/modules/llms/api/llmApi.ts
@@ -2,6 +2,7 @@ import axios from 'axios'
 import config from '../../../config'
 import { type ChatConversation } from '../../types'
 import pino from 'pino'
+import { LlmsModelsEnum } from '../types'
 
 const API_ENDPOINT = config.llms.apiEndpoint // config.llms.apiEndpoint // 'http://localhost:8080' // http://127.0.0.1:5000' // config.llms.apiEndpoint
 
@@ -86,7 +87,7 @@ export const deleteCollection = async (collectionName: string): Promise<void> =>
 
 export const llmCompletion = async (
   conversation: ChatConversation[],
-  model = config.llms.model
+  model = LlmsModelsEnum.BISON
 ): Promise<LlmCompletion> => {
   const data = {
     model, // chat-bison@001 'chat-bison', //'gpt-3.5-turbo',
diff --git a/src/modules/llms/helpers.ts b/src/modules/llms/helpers.ts
index 0ea4e4a4..a636748b 100644
--- a/src/modules/llms/helpers.ts
+++ b/src/modules/llms/helpers.ts
@@ -12,6 +12,10 @@ import { llmAddUrlDocument } from './api/llmApi'
 
 export enum SupportedCommands {
   bardF = 'bard',
+  claudeOpus = 'claude',
+  opus = 'opus',
+  claudeSonnet = 'claudes',
+  sonnet = 'sonnet',
   bard = 'b',
   j2Ultra = 'j2-ultra',
   sum = 'sum',
@@ -19,6 +23,11 @@ export enum SupportedCommands {
   pdf = 'pdf'
 }
 
+export enum SupportedModels {
+  bison = 'chat-bison',
+  claude = 'claude-3-opus-20240229'
+}
+
 export const MAX_TRIES = 3
 const LLAMA_PREFIX_LIST = ['* ']
 const BARD_PREFIX_LIST = ['b. ', 'B. ']
diff --git a/src/modules/llms/index.ts b/src/modules/llms/index.ts
index f8d8bda1..ae8a1767 100644
--- a/src/modules/llms/index.ts
+++ b/src/modules/llms/index.ts
@@ -37,6 +37,7 @@ import * as Sentry from '@sentry/node'
 import { now } from '../../utils/perf'
 import { AxiosError } from 'axios'
 import OpenAI from 'openai'
+import { anthropicCompletion } from './api/athropic'
 export class LlmsBot implements PayableBot {
   public readonly module = 'LlmsBot'
   private readonly logger: Logger
@@ -122,6 +123,18 @@ export class LlmsBot implements PayableBot {
       return
     }
 
+    if (ctx.hasCommand(SupportedCommands.bard) || ctx.hasCommand(SupportedCommands.bardF)) {
+      await this.onChat(ctx, LlmsModelsEnum.BISON)
+      return
+    }
+    if (ctx.hasCommand([SupportedCommands.claudeOpus, SupportedCommands.opus])) {
+      await this.onChat(ctx, LlmsModelsEnum.CLAUDE_OPUS)
+      return
+    }
+    if (ctx.hasCommand([SupportedCommands.claudeSonnet, SupportedCommands.sonnet])) {
+      await this.onChat(ctx, LlmsModelsEnum.CLAUDE_SONNET)
+      return
+    }
     if (ctx.hasCommand(SupportedCommands.bard) || ctx.hasCommand(SupportedCommands.bardF)) {
       await this.onChat(ctx, LlmsModelsEnum.BISON)
       return
@@ -547,8 +560,10 @@ export class LlmsBot implements PayableBot {
     const chat = prepareConversation(conversation, model)
     if (model === LlmsModelsEnum.BISON) {
       response = await vertexCompletion(chat, model) // "chat-bison@001");
+    } else if (model === LlmsModelsEnum.CLAUDE_OPUS || model === LlmsModelsEnum.CLAUDE_SONNET) {
+      response = await anthropicCompletion(chat, model)
     } else {
-      response = await llmCompletion(chat, model)
+      response = await llmCompletion(chat, model as LlmsModelsEnum)
     }
     if (response.completion) {
       await ctx.api.editMessageText(
@@ -568,7 +583,7 @@ export class LlmsBot implements PayableBot {
         chat: conversation
       }
     }
-    ctx.chatAction = null
+    // ctx.chatAction = null
     ctx.transient.analytics.actualResponseTime = now()
     return {
       price: 0,
@@ -678,6 +693,9 @@ export class LlmsBot implements PayableBot {
           await this.onNotBalanceMessage(ctx)
         }
       } catch (e: any) {
+        console.log('HERE FCO')
+        ctx.chatAction = null
+        ctx.session.llms.chatConversation = []
         await this.onError(ctx, e)
       }
     }
@@ -722,6 +740,7 @@ export class LlmsBot implements PayableBot {
     ctx.transient.analytics.sessionState = RequestState.Error
     Sentry.setContext('llms', { retryCount, msg })
     Sentry.captureException(e)
+    ctx.chatAction = null
     if (retryCount === 0) {
       // Retry limit reached, log an error or take alternative action
       this.logger.error(`Retry limit reached for error: ${e}`)
diff --git a/src/modules/llms/types.ts b/src/modules/llms/types.ts
index 1e12c90f..82548375 100644
--- a/src/modules/llms/types.ts
+++ b/src/modules/llms/types.ts
@@ -3,7 +3,9 @@ import { type ChatModel } from '../open-ai/types'
 export enum LlmsModelsEnum {
   GPT_4_32K = 'gpt-4-32k',
   BISON = 'chat-bison',
-  J2_ULTRA = 'j2-ultra'
+  J2_ULTRA = 'j2-ultra',
+  CLAUDE_OPUS = 'claude-3-opus-20240229',
+  CLAUDE_SONNET = 'claude-3-sonnet-20240229'
 }
 
 export const LlmsModels: Record<string, ChatModel> = {
@@ -27,5 +29,19 @@ export const LlmsModels: Record<string, ChatModel> = {
     outputPrice: 0.12,
     maxContextTokens: 32000,
     chargeType: 'TOKEN'
+  },
+  'claude-3-opus-20240229': {
+    name: 'claude-3-opus-20240229',
+    inputPrice: 0.03,
+    outputPrice: 0.06,
+    maxContextTokens: 8192,
+    chargeType: 'TOKEN'
+  },
+  'claude-3-sonnet-20240229': {
+    name: 'claude-3-sonnet-20240229',
+    inputPrice: 0.03,
+    outputPrice: 0.06,
+    maxContextTokens: 8192,
+    chargeType: 'TOKEN'
   }
 }
diff --git a/src/modules/open-ai/index.ts b/src/modules/open-ai/index.ts
index 6ba115f0..572ac4fd 100644
--- a/src/modules/open-ai/index.ts
+++ b/src/modules/open-ai/index.ts
@@ -963,6 +963,7 @@ export class OpenAIBot implements PayableBot {
     ctx.transient.analytics.sessionState = RequestState.Error
     Sentry.setContext('open-ai', { retryCount, msg })
     Sentry.captureException(ex)
+    ctx.chatAction = null
     if (retryCount === 0) {
       // Retry limit reached, log an error or take alternative action
       this.logger.error(`Retry limit reached for error: ${ex}`)