From 9d70e31993d4e7e46420bfd5b8573c4a90a889df Mon Sep 17 00:00:00 2001 From: fegloff Date: Thu, 14 Mar 2024 11:01:30 -0500 Subject: [PATCH] add sonnet model, opus model + fix chat action status infinite loop after error --- src/config.ts | 2 +- src/modules/llms/api/athropic.ts | 43 ++++++++++++++++++++++++++++++++ src/modules/llms/api/llmApi.ts | 3 ++- src/modules/llms/helpers.ts | 9 +++++++ src/modules/llms/index.ts | 23 +++++++++++++++-- src/modules/llms/types.ts | 18 ++++++++++++- src/modules/open-ai/index.ts | 1 + 7 files changed, 94 insertions(+), 5 deletions(-) create mode 100644 src/modules/llms/api/athropic.ts diff --git a/src/config.ts b/src/config.ts index 34eaae3e..26153832 100644 --- a/src/config.ts +++ b/src/config.ts @@ -33,7 +33,7 @@ export default { ? parseInt(process.env.SESSION_TIMEOUT) : 48, // in hours llms: { - apiEndpoint: process.env.LLMS_ENDPOINT, // 'http://127.0.0.1:5000', // process.env.LLMS_ENDPOINT, // + apiEndpoint: process.env.LLMS_ENDPOINT, // 'http://127.0.0.1:5000', wordLimit: 50, model: 'chat-bison', minimumBalance: 0, diff --git a/src/modules/llms/api/athropic.ts b/src/modules/llms/api/athropic.ts new file mode 100644 index 00000000..b57755c8 --- /dev/null +++ b/src/modules/llms/api/athropic.ts @@ -0,0 +1,43 @@ +import axios from 'axios' +import config from '../../../config' +import { type ChatConversation } from '../../types' +import { type LlmCompletion } from './llmApi' +import { LlmsModelsEnum } from '../types' + +const API_ENDPOINT = config.llms.apiEndpoint + +export const anthropicCompletion = async ( + conversation: ChatConversation[], + model = LlmsModelsEnum.CLAUDE_OPUS +): Promise => { + const data = { + model, + stream: false, + system: config.openAi.chatGpt.chatCompletionContext, + max_tokens: +config.openAi.chatGpt.maxTokens, + messages: conversation + } + const url = `${API_ENDPOINT}/anthropic/completions` + const response = await axios.post(url, data) + const respJson = JSON.parse(response.data) + if (response) { + const totalInputTokens = respJson.usage.input_tokens + const totalOutputTokens = respJson.usage.output_tokens + const completion = respJson.content + + return { + completion: { + content: completion[0].text, + role: 'assistant', + model + }, + usage: totalOutputTokens + totalInputTokens, + price: 0 + } + } + return { + completion: undefined, + usage: 0, + price: 0 + } +} diff --git a/src/modules/llms/api/llmApi.ts b/src/modules/llms/api/llmApi.ts index 2348a93f..85aa70f2 100644 --- a/src/modules/llms/api/llmApi.ts +++ b/src/modules/llms/api/llmApi.ts @@ -2,6 +2,7 @@ import axios from 'axios' import config from '../../../config' import { type ChatConversation } from '../../types' import pino from 'pino' +import { LlmsModelsEnum } from '../types' const API_ENDPOINT = config.llms.apiEndpoint // config.llms.apiEndpoint // 'http://localhost:8080' // http://127.0.0.1:5000' // config.llms.apiEndpoint @@ -86,7 +87,7 @@ export const deleteCollection = async (collectionName: string): Promise => export const llmCompletion = async ( conversation: ChatConversation[], - model = config.llms.model + model = LlmsModelsEnum.BISON ): Promise => { const data = { model, // chat-bison@001 'chat-bison', //'gpt-3.5-turbo', diff --git a/src/modules/llms/helpers.ts b/src/modules/llms/helpers.ts index 0ea4e4a4..a636748b 100644 --- a/src/modules/llms/helpers.ts +++ b/src/modules/llms/helpers.ts @@ -12,6 +12,10 @@ import { llmAddUrlDocument } from './api/llmApi' export enum SupportedCommands { bardF = 'bard', + claudeOpus = 'claude', + opus = 'opus', + claudeSonnet = 'claudes', + sonnet = 'sonnet', bard = 'b', j2Ultra = 'j2-ultra', sum = 'sum', @@ -19,6 +23,11 @@ export enum SupportedCommands { pdf = 'pdf' } +export enum SupportedModels { + bison = 'chat-bison', + claude = 'claude-3-opus-20240229' +} + export const MAX_TRIES = 3 const LLAMA_PREFIX_LIST = ['* '] const BARD_PREFIX_LIST = ['b. ', 'B. '] diff --git a/src/modules/llms/index.ts b/src/modules/llms/index.ts index f8d8bda1..ae8a1767 100644 --- a/src/modules/llms/index.ts +++ b/src/modules/llms/index.ts @@ -37,6 +37,7 @@ import * as Sentry from '@sentry/node' import { now } from '../../utils/perf' import { AxiosError } from 'axios' import OpenAI from 'openai' +import { anthropicCompletion } from './api/athropic' export class LlmsBot implements PayableBot { public readonly module = 'LlmsBot' private readonly logger: Logger @@ -122,6 +123,18 @@ export class LlmsBot implements PayableBot { return } + if (ctx.hasCommand(SupportedCommands.bard) || ctx.hasCommand(SupportedCommands.bardF)) { + await this.onChat(ctx, LlmsModelsEnum.BISON) + return + } + if (ctx.hasCommand([SupportedCommands.claudeOpus, SupportedCommands.opus])) { + await this.onChat(ctx, LlmsModelsEnum.CLAUDE_OPUS) + return + } + if (ctx.hasCommand([SupportedCommands.claudeSonnet, SupportedCommands.sonnet])) { + await this.onChat(ctx, LlmsModelsEnum.CLAUDE_SONNET) + return + } if (ctx.hasCommand(SupportedCommands.bard) || ctx.hasCommand(SupportedCommands.bardF)) { await this.onChat(ctx, LlmsModelsEnum.BISON) return @@ -547,8 +560,10 @@ export class LlmsBot implements PayableBot { const chat = prepareConversation(conversation, model) if (model === LlmsModelsEnum.BISON) { response = await vertexCompletion(chat, model) // "chat-bison@001"); + } else if (model === LlmsModelsEnum.CLAUDE_OPUS || model === LlmsModelsEnum.CLAUDE_SONNET) { + response = await anthropicCompletion(chat, model) } else { - response = await llmCompletion(chat, model) + response = await llmCompletion(chat, model as LlmsModelsEnum) } if (response.completion) { await ctx.api.editMessageText( @@ -568,7 +583,7 @@ export class LlmsBot implements PayableBot { chat: conversation } } - ctx.chatAction = null + // ctx.chatAction = null ctx.transient.analytics.actualResponseTime = now() return { price: 0, @@ -678,6 +693,9 @@ export class LlmsBot implements PayableBot { await this.onNotBalanceMessage(ctx) } } catch (e: any) { + console.log('HERE FCO') + ctx.chatAction = null + ctx.session.llms.chatConversation = [] await this.onError(ctx, e) } } @@ -722,6 +740,7 @@ export class LlmsBot implements PayableBot { ctx.transient.analytics.sessionState = RequestState.Error Sentry.setContext('llms', { retryCount, msg }) Sentry.captureException(e) + ctx.chatAction = null if (retryCount === 0) { // Retry limit reached, log an error or take alternative action this.logger.error(`Retry limit reached for error: ${e}`) diff --git a/src/modules/llms/types.ts b/src/modules/llms/types.ts index 1e12c90f..82548375 100644 --- a/src/modules/llms/types.ts +++ b/src/modules/llms/types.ts @@ -3,7 +3,9 @@ import { type ChatModel } from '../open-ai/types' export enum LlmsModelsEnum { GPT_4_32K = 'gpt-4-32k', BISON = 'chat-bison', - J2_ULTRA = 'j2-ultra' + J2_ULTRA = 'j2-ultra', + CLAUDE_OPUS = 'claude-3-opus-20240229', + CLAUDE_SONNET = 'claude-3-sonnet-20240229' } export const LlmsModels: Record = { @@ -27,5 +29,19 @@ export const LlmsModels: Record = { outputPrice: 0.12, maxContextTokens: 32000, chargeType: 'TOKEN' + }, + 'claude-3-opus-20240229': { + name: 'claude-3-opus-20240229', + inputPrice: 0.03, + outputPrice: 0.06, + maxContextTokens: 8192, + chargeType: 'TOKEN' + }, + 'claude-3-sonnet-20240229': { + name: 'claude-3-sonnet-20240229', + inputPrice: 0.03, + outputPrice: 0.06, + maxContextTokens: 8192, + chargeType: 'TOKEN' } } diff --git a/src/modules/open-ai/index.ts b/src/modules/open-ai/index.ts index 6ba115f0..572ac4fd 100644 --- a/src/modules/open-ai/index.ts +++ b/src/modules/open-ai/index.ts @@ -963,6 +963,7 @@ export class OpenAIBot implements PayableBot { ctx.transient.analytics.sessionState = RequestState.Error Sentry.setContext('open-ai', { retryCount, msg }) Sentry.captureException(ex) + ctx.chatAction = null if (retryCount === 0) { // Retry limit reached, log an error or take alternative action this.logger.error(`Retry limit reached for error: ${ex}`)