Merge pull request #1 from nicholas-camarda/no-open-api-key

onlyphantom · web-flow · commit 80feacc26905 · 2023-05-16T07:02:43.000+07:00
updated 7_custom.py
diff --git a/7_custom.py b/7_custom.py
@@ -6,27 +6,29 @@
 # import os
 # os.environ["TRANSFORMERS_CACHE"] = "/media/samuel/UDISK1/transformers_cache"
 import os
-from dotenv import load_dotenv
 import time
+
 import torch
+from dotenv import load_dotenv
 from langchain.llms.base import LLM
-from transformers import pipeline
-
 from llama_index import (
-    SimpleDirectoryReader,
     GPTListIndex,
-    PromptHelper,
     LLMPredictor,
-    ServiceContext
+    PromptHelper,
+    ServiceContext,
+    SimpleDirectoryReader,
 )
+from transformers import pipeline
 
 # load_dotenv()
 os.environ["OPENAI_API_KEY"] = "random"
 
+
 def timeit():
     """
     a utility decoration to time running time
     """
+
     def decorator(func):
         def wrapper(*args, **kwargs):
             start = time.time()
@@ -36,7 +38,9 @@ def wrapper(*args, **kwargs):
 
             print(f"[{(end - start):.8f} seconds]: f({args}) -> {result}")
             return result
+
         return wrapper
+
     return decorator
 
 
@@ -45,24 +49,27 @@ def wrapper(*args, **kwargs):
     max_input_size=2048,
     # number of output tokens
     num_output=256,
-    # the maximum overlap between chunks.  
-    max_chunk_overlap=20
+    # the maximum overlap between chunks.
+    max_chunk_overlap=20,
 )
 
+
 class LocalOPT(LLM):
     # model_name = "facebook/opt-iml-max-30b" (this is a 60gb model)
-    model_name = "facebook/opt-iml-1.3b" # ~2.63gb model
+    model_name = "facebook/opt-iml-1.3b"  # ~2.63gb model
     # https://huggingface.co/docs/transformers/main_classes/pipelines
-    pipeline = pipeline("text-generation", model=model_name, 
-                        device="cuda:0", 
-                        model_kwargs={"torch_dtype": torch.bfloat16}
-                        )
-
-    def _call(self, prompt:str, stop=None) -> str:
-       response = self.pipeline(prompt, max_new_tokens=256)[0]["generated_text"]     
-       # only return newly generated tokens
-       return response[len(prompt):]
-    
+    pipeline = pipeline(
+        "text-generation",
+        model=model_name,
+        device="cuda:0",
+        model_kwargs={"torch_dtype": torch.bfloat16},
+    )
+
+    def _call(self, prompt: str, stop=None) -> str:
+        response = self.pipeline(prompt, max_new_tokens=256)[0]["generated_text"]
+        # only return newly generated tokens
+        return response[len(prompt) :]
+
     @property
     def _identifying_params(self):
         return {"name_of_model": self.model_name}
@@ -71,6 +78,7 @@ def _identifying_params(self):
     def _llm_type(self):
         return "custom"
 
+
 @timeit()
 def create_index():
     print("Creating index")
@@ -79,30 +87,29 @@ def create_index():
     # Service Context: a container for your llamaindex index and query
     # https://gpt-index.readthedocs.io/en/latest/reference/service_context.html
     service_context = ServiceContext.from_defaults(
-        llm_predictor=llm,
-        prompt_helper=prompt_helper
+        llm_predictor=llm, prompt_helper=prompt_helper
     )
-    docs = SimpleDirectoryReader('news').load_data()
+    docs = SimpleDirectoryReader("news").load_data()
     index = GPTListIndex.from_documents(docs, service_context=service_context)
     print("Done creating index", index)
     return index
 
+
 @timeit()
 def execute_query():
     response = index.query(
         "Who does Indonesia export its coal to in 2023?",
-        # This will preemptively filter out nodes that do not contain required_keywords 
+        # This will preemptively filter out nodes that do not contain required_keywords
         # or contain exclude_keywords, reducing the search space and hence time/number of LLM calls/cost.
         exclude_keywords=["petroleum"],
         # required_keywords=["coal"],
-        # exclude_keywords=["oil", "gas", "petroleum"]   
-
+        # exclude_keywords=["oil", "gas", "petroleum"]
     )
     return response
 
 
 if __name__ == "__main__":
-    """    
+    """
     Check if a local cache of the model exists,
     if not, it will download the model from huggingface
     """
@@ -112,7 +119,13 @@ def execute_query():
         index.save_to_disk("7_custom_opt.json")
     else:
         print("Loading local cache of model")
-        index = GPTListIndex.load_from_disk("7_custom_opt.json")
+        llm = LLMPredictor(llm=LocalOPT())
+        service_context = ServiceContext.from_defaults(
+            llm_predictor=llm, prompt_helper=prompt_helper
+        )
+        index = GPTListIndex.load_from_disk(
+            "7_custom_opt.json", service_context=service_context
+        )
 
     response = execute_query()
     print(response)