Merge pull request #47 from codefuse-ai/modelcache_dev

peng3307165 · web-flow · commit c0a45e62d00b · 2024-09-13T15:18:50.000+08:00
Modelcache dev
diff --git a/modelcache/adapter/adapter.py b/modelcache/adapter/adapter.py
@@ -1,14 +1,12 @@
 # -*- coding: utf-8 -*-
 import logging
-
-import openai
 from modelcache.adapter.adapter_query import adapt_query
 from modelcache.adapter.adapter_insert import adapt_insert
 from modelcache.adapter.adapter_remove import adapt_remove
 from modelcache.adapter.adapter_register import adapt_register
 
 
-class ChatCompletion(openai.ChatCompletion):
+class ChatCompletion(object):
     """Openai ChatCompletion Wrapper"""
 
     @classmethod
diff --git a/modelcache/adapter/adapter_query.py b/modelcache/adapter/adapter_query.py
@@ -22,14 +22,12 @@ def adapt_query(cache_data_convert, *args, **kwargs):
         extra_param=context.get("pre_embedding_func", None),
         prompts=chat_cache.config.prompts,
     )
-
     if cache_enable:
         embedding_data = time_cal(
             chat_cache.embedding_func,
             func_name="embedding",
             report_func=chat_cache.report.embedding,
         )(pre_embedding_data)
-
     if cache_enable:
         cache_data_list = time_cal(
             chat_cache.data_manager.search,
@@ -64,7 +62,6 @@ def adapt_query(cache_data_convert, *args, **kwargs):
             if rank_threshold_long < min_rank
             else rank_threshold_long
         )
-
         if cache_data_list is None or len(cache_data_list) == 0:
             rank_pre = -1.0
         else:
diff --git a/modelcache/embedding/data2vec.py b/modelcache/embedding/data2vec.py
@@ -14,24 +14,23 @@ def mean_pooling(model_output, attention_mask):
 
 
 class Data2VecAudio(BaseEmbedding):
-    def __init__(self, model: str = "model/text2vec-base-chinese/"):
+    def __init__(self, model):
         current_dir = os.path.dirname(os.path.abspath(__file__))
         parent_dir = os.path.dirname(current_dir)
         model_dir = os.path.dirname(parent_dir)
-        model = os.path.join(model_dir, model)
+        model_path = os.path.join(model_dir, model)
+
+        self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
+        self.tokenizer = BertTokenizer.from_pretrained(model_path, local_files_only=True)
+        self.model = BertModel.from_pretrained(model_path, local_files_only=True)
 
         try:
             self.__dimension = self.model.config.hidden_size
         except Exception:
             from transformers import AutoConfig
-
             config = AutoConfig.from_pretrained(model)
             self.__dimension = config.hidden_size
 
-        self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
-        self.tokenizer = BertTokenizer.from_pretrained(model, local_files_only=True)
-        self.model = BertModel.from_pretrained(model, local_files_only=True)
-
     def to_embeddings(self, data, **_):
         encoded_input = self.tokenizer(data, padding=True, truncation=True, return_tensors='pt')
         num_tokens = sum(map(len, encoded_input['input_ids']))
diff --git a/modelcache_mm/manager/vector_data/faiss.py b/modelcache_mm/manager/vector_data/faiss.py
@@ -25,8 +25,6 @@ def add(self, datas: List[VectorData], model=None, mm_type=None):
         data_array, id_array = map(list, zip(*((data.data, data.id) for data in datas)))
         np_data = np.array(data_array).astype("float32")
         ids = np.array(id_array)
-        print('insert_np_data: {}'.format(np_data))
-        print('insert_np_data: {}'.format(np_data.shape))
         self._index.add_with_ids(np_data, ids)
 
     def search(self, data: np.ndarray, top_k: int, model, mm_type='mm'):
diff --git a/modelcache_mm/manager/vector_data/manager.py b/modelcache_mm/manager/vector_data/manager.py
@@ -2,7 +2,7 @@
 from modelcache_mm.utils.error import NotFoundError, ParamError
 
 TOP_K = 1
-FAISS_INDEX_PATH = "faiss.index"
+FAISS_INDEX_PATH = "mm_faiss.index"
 DIMENSION = 0
 MILVUS_HOST = "localhost"
 MILVUS_PORT = 19530