some formalization of streaming

neph1 · neph1 · commit 472bc7cf4d6d · 2023-08-08T17:45:30.000+02:00
diff --git a/tale/llm_config.yaml b/tale/llm_config.yaml
@@ -1,6 +1,8 @@
 URL: "http://localhost:5001"
 ENDPOINT: "/api/v1/generate"
-WORD_LIMIT: 300
+STREAM_ENDPOINT: "/api/extra/generate/stream"
+DATA_ENDPOINT: "/api/extra/generate/check"
+WORD_LIMIT: 500
 DEFAULT_BODY: '{"stop_sequence": "", "max_length":300, "max_context_length":4096, "temperature":1.0, "top_k":120, "top_a":0.0, "top_p":0.85, "typical_p":1.0, "tfs":1.0, "rep_pen":1.2, "rep_pen_range":256, "mirostat":2, "mirostat_tau":5.0, "mirostat_eta":0.1, "sampler_order":[6,0,1,3,4,2,5], "seed":-1}'
 ANALYSIS_BODY: '{"stop_sequence": "\n\n", "max_length":300, "max_context_length":4096, "temperature":0.15, "top_k":120, "top_a":0.0, "top_p":0.85, "typical_p":1.0, "tfs":1.0, "rep_pen":1.2, "rep_pen_range":256, "mirostat":2, "mirostat_tau":5.0, "mirostat_eta":0.1, "sampler_order":[6,0,1,3,4,2,5], "seed":-1}'
 MEMORY_SIZE: 1024
diff --git a/tale/llm_ext.py b/tale/llm_ext.py
@@ -4,6 +4,8 @@
 from tale.player import Player
 
 class LivingNpc(Living):
+    """An NPC with extra fields to define personality and help LLM generate dialogue"""
+
     def __init__(self, name: str, gender: str, *,
                  title: str="", descr: str="", short_descr: str="", age: int, personality: str, occupation: str=""):
         super(LivingNpc, self).__init__(name=name, gender=gender, title=title, descr=descr, short_descr=short_descr)
diff --git a/tale/llm_io.py b/tale/llm_io.py
@@ -2,47 +2,50 @@
 import time
 import aiohttp
 import asyncio
-import threading
 import json
 import tale.parse_utils as parse_utils
 from tale.player_utils import TextBuffer
-from .tio.iobase import IoAdapterBase
+
 class IoUtil():
+    """ Handles connection and data retrieval from backend """
 
     def synchronous_request(self, url: str, request_body: dict):
+        """ Send request to backend and return the result """
         response = requests.post(url, data=json.dumps(request_body))
         text = parse_utils.trim_response(json.loads(response.text)['results'][0]['text'])
         return text
 
-    def stream_request(self, player_io: TextBuffer, url: str, request_body: dict, io: IoAdapterBase) -> str:
-        result = asyncio.run(self._do_stream_request(url, request_body))
+    def stream_request(self, stream_url: str, data_url: str, request_body: dict, player_io: TextBuffer, io) -> str:
+        result = asyncio.run(self._do_stream_request(stream_url, request_body))
         if result:
-            return self._do_process_result(url, player_io, io)
+            return self._do_process_result(data_url, player_io, io)
         return ''
 
     async def _do_stream_request(self, url: str, request_body: dict,) -> bool:
-        sub_endpt = "http://localhost:5001/api/extra/generate/stream"
-
+        """ Send request to stream endpoint async to not block the main thread"""
         async with aiohttp.ClientSession() as session:
-            async with session.post(sub_endpt, data=json.dumps(request_body)) as response:
+            async with session.post(url, data=json.dumps(request_body)) as response:
                 if response.status == 200:
                     return True
-                    
                 else:
                     # Handle errors
                     print("Error occurred:", response.status)
 
-    def _do_process_result(self, url, player_io: TextBuffer, io: IoAdapterBase) -> str:
+    def _do_process_result(self, url, player_io: TextBuffer, io) -> str:
+        """ Process the result from the stream endpoint """
         tries = 0
-        old_data = ''
+        old_text = ''
         while tries < 2:
-            data = requests.post("http://localhost:5001/api/extra/generate/check")
+            time.sleep(0.5)
+            data = requests.post(url)
             text = json.loads(data.text)['results'][0]['text']
-            new_text = text[len(old_data):]
-            player_io.print(new_text, end=False, format=False, line_breaks=False)
-            io.write_output()
-            if len(text) == len(old_data):
+
+            if len(text) == len(old_text):
                 tries += 1
-            old_data = text
-            time.sleep(1)
-        return old_data
+                continue
+            new_text = text[len(old_text):]
+            player_io.print(new_text, end=False, format=True, line_breaks=False)
+            io.write_output()
+            old_text = text
+
+        return old_text
diff --git a/tale/llm_utils.py b/tale/llm_utils.py
@@ -6,16 +6,20 @@
 from tale.llm_io import IoUtil
 import tale.parse_utils as parse_utils
 from tale.player_utils import TextBuffer
-from .tio.iobase import IoAdapterBase
 
 class LlmUtil():
+    """ Prepares prompts for various LLM requests"""
+
     def __init__(self):
         with open(os.path.realpath(os.path.join(os.path.dirname(__file__), "llm_config.yaml")), "r") as stream:
             try:
                 config_file = yaml.safe_load(stream)
             except yaml.YAMLError as exc:
                 print(exc)
-        self.url = config_file['URL'] + config_file['ENDPOINT']
+        self.url = config_file['URL']
+        self.endpoint = config_file['ENDPOINT']
+        self.stream_endpoint = config_file['STREAM_ENDPOINT']
+        self.data_endpoint = config_file['DATA_ENDPOINT']
         self.default_body = json.loads(config_file['DEFAULT_BODY'])
         self.analysis_body = json.loads(config_file['ANALYSIS_BODY'])
         self.memory_size = config_file['MEMORY_SIZE']
@@ -46,11 +50,12 @@ def evoke(self, player_io: TextBuffer, message: str, max_length : bool=False, ro
                 request_body['max_length'] = amount
             
             if not self.stream:
-                text = self.io_util.synchronous_request(self.url, request_body)
+                text = self.io_util.synchronous_request(self.url + self.endpoint, request_body)
                 rolling_prompt = self.update_memory(rolling_prompt, text)
                 return f'Original:[ {message} ]\nGenerated:\n{text}', rolling_prompt
             else:
-                text = self.io_util.stream_request(player_io, self.url, request_body, self.connection)
+                player_io.print(f'Original:[ {message} ]\nGenerated:\n', end=False, format=True, line_breaks=False)
+                text = self.io_util.stream_request(self.url + self.stream_endpoint, self.url + self.data_endpoint, request_body, player_io, self.connection)
                 rolling_prompt = self.update_memory(rolling_prompt, text)
                 return '\n', rolling_prompt
         return str(message), rolling_prompt
diff --git a/tale/player_utils.py b/tale/player_utils.py
@@ -13,14 +13,16 @@ def __init__(self, format: bool=True, line_breaks=True) -> None:
             self.line_breaks = line_breaks
 
         def add(self, line: str) -> None:
-            self.lines.append(line)
-
-        def text(self) -> str:
             if self.line_breaks:
-                return "\n".join(self.lines) + "\n"
+                self.lines.append(line)
+            elif len(self.lines) > 0:
+                self.lines[-1] += line 
             else:
-                return "".join(self.lines)
+                self.lines.append(line)
 
+        def text(self) -> str:
+            return "\n".join(self.lines) + "\n"
+            
     def __init__(self) -> None:
         self.init()