add sliding window logic + add timestamp field

fegloff · fegloff · commit 5fc762c49140 · 2024-11-01T11:10:35.000-05:00
diff --git a/src/modules/llms/api/athropic.ts b/src/modules/llms/api/athropic.ts
@@ -49,7 +49,8 @@ export const anthropicCompletion = async (
       completion: {
         content: completion[0].text,
         role: 'assistant',
-        model
+        model,
+        timestamp: Date.now()
       },
       usage: totalOutputTokens + totalInputTokens,
       price: 0,
@@ -92,7 +93,8 @@ export const xaiCompletion = async (
       completion: {
         content: completion[0].text,
         role: 'assistant',
-        model
+        model,
+        timestamp: Date.now()
       },
       usage: totalOutputTokens + totalInputTokens,
       price: 0,
@@ -202,7 +204,8 @@ export const anthropicStreamCompletion = async (
     completion: {
       content: completion,
       role: 'assistant',
-      model
+      model,
+      timestamp: Date.now()
     },
     usage: parseInt(totalOutputTokens, 10) + parseInt(totalInputTokens, 10),
     price: 0,
@@ -252,7 +255,8 @@ export const toolsChatCompletion = async (
         completion: {
           content: completion[0].text,
           role: 'assistant',
-          model
+          model,
+          timestamp: Date.now()
         },
         usage: totalOutputTokens + totalInputTokens,
         price: 0,
@@ -264,7 +268,8 @@ export const toolsChatCompletion = async (
         completion: {
           content: 'Timeout error',
           role: 'assistant',
-          model
+          model,
+          timestamp: Date.now()
         },
         usage: 0,
         price: 0
diff --git a/src/modules/llms/api/llmApi.ts b/src/modules/llms/api/llmApi.ts
@@ -1,6 +1,6 @@
 import axios from 'axios'
 import config from '../../../config'
-import { type ChatConversation } from '../../types'
+import { type ChatConversationWithoutTimestamp, type ChatConversation } from '../../types'
 import pino from 'pino'
 import { type ChatModel } from '../utils/types'
 import { headers } from './helper'
@@ -36,7 +36,7 @@ interface LlmAddUrlDocument {
 interface QueryUrlDocument {
   collectioName: string
   prompt: string
-  conversation?: ChatConversation[]
+  conversation?: ChatConversationWithoutTimestamp[]
 }
 
 export const getChatModel = (modelName: string): ChatModel | undefined => {
@@ -130,7 +130,8 @@ export const llmCompletion = async (
       completion: {
         content: completion[0].message?.content,
         role: 'system',
-        model
+        model,
+        timestamp: Date.now()
       },
       usage: totalOutputTokens + totalInputTokens,
       price: 0
diff --git a/src/modules/llms/api/openai.ts b/src/modules/llms/api/openai.ts
@@ -79,7 +79,9 @@ export async function alterGeneratedImg (
   }
 }
 
-const prepareConversation = (conversation: ChatConversation[], model: string): ChatConversation[] => {
+type ConversationOutput = Omit<ChatConversation, 'timestamp' | 'model' | 'id' | 'author' | 'numSubAgents'>
+
+const prepareConversation = (conversation: ChatConversation[], model: string): ConversationOutput[] => {
   const messages = conversation.filter(c => c.model === model).map(m => { return { content: m.content, role: m.role } })
   if (messages.length !== 1 || model === LlmModelsEnum.O1) {
     return messages
@@ -125,7 +127,8 @@ export async function chatCompletion (
   return {
     completion: {
       content: response.choices[0].message?.content ?? 'Error - no completion available',
-      role: 'assistant'
+      role: 'assistant',
+      timestamp: Date.now()
     },
     usage: response.usage?.total_tokens, // 2010
     price: price * config.openAi.chatGpt.priceAdjustment,
@@ -215,7 +218,8 @@ export const streamChatCompletion = async (
   return {
     completion: {
       content: completion,
-      role: 'assistant'
+      role: 'assistant',
+      timestamp: Date.now()
     },
     usage: outputTokens + inputTokens,
     price: 0,
@@ -308,7 +312,8 @@ export const streamChatVisionCompletion = async (
   return {
     completion: {
       content: completion,
-      role: 'assistant'
+      role: 'assistant',
+      timestamp: Date.now()
     },
     usage: outputTokens + inputTokens,
     price: 0,
@@ -319,7 +324,7 @@ export const streamChatVisionCompletion = async (
 
 export async function improvePrompt (promptText: string, model: string): Promise<string> {
   const prompt = `Improve this picture description using max 100 words and don't add additional text to the image: ${promptText} `
-  const conversation = [{ role: 'user', content: prompt }]
+  const conversation = [{ role: 'user', content: prompt, timestamp: Date.now() }]
   const response = await chatCompletion(conversation, model)
   return response.completion?.content as string ?? ''
 }
diff --git a/src/modules/llms/api/pdfHandler.ts b/src/modules/llms/api/pdfHandler.ts
@@ -19,7 +19,8 @@ export const handlePdf = async (prompt: string): Promise<PdfCompletion> => {
       return {
         completion: {
           content: response.data.response,
-          role: 'system'
+          role: 'system',
+          timestamp: Date.now()
         },
         prompt,
         price: response.data.cost
diff --git a/src/modules/llms/api/vertex.ts b/src/modules/llms/api/vertex.ts
@@ -1,6 +1,6 @@
 import axios, { type AxiosResponse } from 'axios'
 import config from '../../../config'
-import { type OnMessageContext, type ChatConversation, type OnCallBackQueryData } from '../../types'
+import { type OnMessageContext, type ChatConversation, type OnCallBackQueryData, type ChatConversationWithoutTimestamp } from '../../types'
 import { type LlmCompletion } from './llmApi'
 import { type Readable } from 'stream'
 import { GrammyError } from 'grammy'
@@ -29,7 +29,7 @@ export const vertexCompletion = async (
     stream: false,
     messages: conversation.filter(c => c.model === model)
       .map((msg) => {
-        const msgFiltered: ChatConversation = { content: msg.content, model: msg.model }
+        const msgFiltered: ChatConversationWithoutTimestamp = { content: msg.content, model: msg.model }
         if (model === LlmModelsEnum.CHAT_BISON) {
           msgFiltered.author = msg.role
         } else {
@@ -48,7 +48,8 @@ export const vertexCompletion = async (
       completion: {
         content: response.data._prediction_response[0][0].candidates[0].content,
         role: 'bot', // role replace to author attribute will be done later
-        model
+        model,
+        timestamp: Date.now()
       },
       usage: totalOutputTokens + totalInputTokens,
       price: 0
@@ -145,7 +146,8 @@ export const vertexStreamCompletion = async (
     completion: {
       content: completion,
       role: 'assistant',
-      model
+      model,
+      timestamp: Date.now()
     },
     usage: parseInt(totalOutputTokens, 10) + parseInt(totalInputTokens, 10),
     price: 0,
diff --git a/src/modules/llms/llmsBase.ts b/src/modules/llms/llmsBase.ts
@@ -37,6 +37,7 @@ import {
   type LLMModelsManager,
   type ModelVersion
 } from './utils/llmModelsManager'
+import { conversationManager } from './utils/conversationManager'
 
 export abstract class LlmsBase implements PayableBot {
   public module: string
@@ -205,7 +206,8 @@ export abstract class LlmsBase implements PayableBot {
           id: ctx.message?.message_id,
           model,
           content: await preparePrompt(ctx, prompt as string),
-          numSubAgents: 0
+          numSubAgents: 0,
+          timestamp: Date.now()
         })
         if (!session.isProcessingQueue) {
           session.isProcessingQueue = true
@@ -218,7 +220,8 @@ export abstract class LlmsBase implements PayableBot {
           id: ctx.message?.message_id ?? ctx.message?.message_thread_id ?? 0,
           model,
           content: prompt as string ?? '', // await preparePrompt(ctx, prompt as string),
-          numSubAgents: supportedAgents
+          numSubAgents: supportedAgents,
+          timestamp: Date.now()
         }
         await this.runSubagents(ctx, msg, stream, usesTools) //  prompt as string)
       }
@@ -230,6 +233,8 @@ export abstract class LlmsBase implements PayableBot {
 
   async onChatRequestHandler (ctx: OnMessageContext | OnCallBackQueryData, stream: boolean, usesTools: boolean): Promise<void> {
     const session = this.getSession(ctx)
+    session.chatConversation = conversationManager.manageConversationWindow(session.chatConversation)
+
     while (session.requestQueue.length > 0) {
       try {
         const msg = session.requestQueue.shift()
@@ -272,7 +277,8 @@ export abstract class LlmsBase implements PayableBot {
           const chat: ChatConversation = {
             content: enhancedPrompt || prompt,
             role: 'user',
-            model: modelVersion
+            model: modelVersion,
+            timestamp: Date.now()
           }
           chatConversation.push(chat)
           const payload = {
@@ -358,7 +364,8 @@ export abstract class LlmsBase implements PayableBot {
           conversation.push({
             role: 'assistant',
             content: completion.completion?.content ?? '',
-            model
+            model,
+            timestamp: Date.now()
           })
           return {
             price: price.price,
@@ -371,7 +378,8 @@ export abstract class LlmsBase implements PayableBot {
         conversation.push({
           role: 'assistant',
           content: response.completion?.content ?? '',
-          model
+          model,
+          timestamp: Date.now()
         })
         return {
           price: response.price,
diff --git a/src/modules/llms/utils/conversationManager.ts b/src/modules/llms/utils/conversationManager.ts
@@ -0,0 +1,41 @@
+import { type VisionContent, type ChatConversation } from '../../types'
+
+const MINUTE_IN_MS = 60000 // 1 minute in milliseconds
+const INACTIVE_THRESHOLD = 5 * MINUTE_IN_MS // 5 minutes
+const IDLE_THRESHOLD = MINUTE_IN_MS // 1 minute
+const IDLE_MESSAGE_LIMIT = 5
+
+// const HOUR_IN_MS = 3600000 // 1 hour in milliseconds
+// const INACTIVE_THRESHOLD = 12 * HOUR_IN_MS // 12 hours
+// const IDLE_THRESHOLD = HOUR_IN_MS // 1 hour
+// const IDLE_MESSAGE_LIMIT = 5
+
+// Utility functions
+export const conversationManager = {
+  manageConversationWindow (conversation: ChatConversation[]): ChatConversation[] {
+    console.log('fco::::::: here', conversation.length)
+    if (conversation.length === 0) return conversation
+    const now = Date.now()
+    const lastMessageTime = conversation[conversation.length - 1].timestamp
+    const timeDifference = now - lastMessageTime
+    // Case 1: Inactive conversation (>12 hours) - Reset
+    if (timeDifference > INACTIVE_THRESHOLD) {
+      return []
+    }
+
+    // Case 2: Idle conversation (>1 hour) - Keep last 5 messages
+    if (timeDifference > IDLE_THRESHOLD) {
+      return conversation.slice(-IDLE_MESSAGE_LIMIT)
+    }
+
+    // Case 3: Active conversation (<1 hour) - Keep full history
+    return conversation
+  },
+
+  addMessageWithTimestamp (message: Omit<ChatConversation, 'timestamp'> | Partial<Omit<ChatConversation, 'content' | 'timestamp'>> & { content: string | VisionContent[] }): ChatConversation {
+    return {
+      ...message,
+      timestamp: Date.now()
+    }
+  }
+}
diff --git a/src/modules/subagents/llamaSubagent.ts b/src/modules/subagents/llamaSubagent.ts
@@ -11,7 +11,8 @@ import {
   type Collection, type OnCallBackQueryData,
   type OnMessageContext,
   type SubagentResult,
-  SubagentStatus
+  SubagentStatus,
+  type ChatConversationWithoutTimestamp
 } from '../types'
 import config from '../../config'
 import { appText } from '../../utils/text'
@@ -305,7 +306,7 @@ export class LlamaAgent extends SubagentBase {
       const session = this.getSession(ctx)
       const collection = ctx.session.collections.activeCollections.find(c => c.url === url)
       if (collection) {
-        const conversation = this.getCollectionConversation(ctx, collection)
+        const conversation = this.getCollectionConversation(ctx, collection) as unknown as ChatConversationWithoutTimestamp[]
         if (conversation.length === 0) {
           conversation.push({
             role: 'system',
diff --git a/src/modules/types.ts b/src/modules/types.ts
@@ -58,8 +58,11 @@ export interface ChatConversation {
   content: string | VisionContent[]
   model?: string
   numSubAgents?: number
+  timestamp: number
 }
 
+export type ChatConversationWithoutTimestamp = Omit<ChatConversation, 'timestamp'>
+
 export interface ImageRequest {
   command?: 'dalle' | 'alter' | 'vision'
   prompt?: string
diff --git a/src/modules/voice-to-voice-gpt/index.ts b/src/modules/voice-to-voice-gpt/index.ts
@@ -63,7 +63,7 @@ export class VoiceToVoiceGPTBot implements PayableBot {
     const resultText = await speechToText(fs.createReadStream(filename))
     fs.rmSync(filename)
 
-    const conversation = [{ role: 'user', content: resultText }]
+    const conversation = [{ role: 'user', content: resultText, timestamp: Date.now() }]
     const response = await chatCompletion(conversation, LlmModelsEnum.GPT_35_TURBO)
 
     const voiceResult = await generateVoiceFromText(response.completion?.content as string)