Merge pull request #164 from gomate-community/pipeline

yanqiangmiffy · web-flow · commit e7eb8c87d70e · 2025-07-15T17:07:39.000+08:00
Pipeline
diff --git a/app_local_model.py b/app_local_model.py
@@ -10,60 +10,114 @@
 import sys
 
 sys.path.append(".")
-import os
 import shutil
 import time
 import gradio as gr
 import loguru
 import pandas as pd
 from datetime import datetime
 import pytz
-from trustrag.config.config_loader import ConfigLoader
-from trustrag.applications.rag import RagApplication, ApplicationConfig
 from trustrag.modules.reranker.bge_reranker import BgeRerankerConfig
 from trustrag.modules.retrieval.dense_retriever import DenseRetrieverConfig
+import os
+from trustrag.modules.citation.match_citation import MatchCitation
+from trustrag.modules.document.common_parser import CommonParser
+from trustrag.modules.generator.llm import Qwen3Chat
+from trustrag.modules.reranker.bge_reranker import BgeReranker
+from trustrag.modules.retrieval.dense_retriever import DenseRetriever
+from trustrag.modules.document.chunk import TextChunker
+from trustrag.modules.vector.embedding import SentenceTransformerEmbedding
+
+
+class ApplicationConfig():
+    def __init__(self):
+        self.retriever_config = None
+        self.rerank_config = None
+
+
+class RagApplication():
+    def __init__(self, config):
+        self.config = config
+        self.parser = CommonParser()
+        self.embedding_generator = SentenceTransformerEmbedding(self.config.retriever_config.model_name_or_path)
+        self.retriever = DenseRetriever(self.config.retriever_config, self.embedding_generator)
+        self.reranker = BgeReranker(self.config.rerank_config)
+        self.llm = Qwen3Chat(self.config.llm_model_path)
+        self.mc = MatchCitation()
+        self.tc = TextChunker()
+
+    def init_vector_store(self):
+        """
+
+        """
+        print("init_vector_store ... ")
+        all_paragraphs = []
+        all_chunks = []
+        for filename in os.listdir(self.config.docs_path):
+            file_path = os.path.join(self.config.docs_path, filename)
+            try:
+                paragraphs = self.parser.parse(file_path)
+                all_paragraphs.append(paragraphs)
+            except:
+                pass
+        print("chunking for paragraphs")
+        for paragraphs in all_paragraphs:
+            # 确保paragraphs是list，并处理其中的元素
+            if isinstance(paragraphs, list) and paragraphs:
+                if isinstance(paragraphs[0], dict):
+                    # list[dict] -> list[str]
+                    text_list = [' '.join(str(value) for value in item.values()) for item in paragraphs]
+                else:
+                    # 已经是list[str]
+                    text_list = [str(item) for item in paragraphs]
+            else:
+                # 处理其他情况
+                text_list = [str(paragraphs)] if paragraphs else []
+
+            chunks = self.tc.get_chunks(text_list, 256)
+            all_chunks.extend(chunks)
+
+        self.retriever.build_from_texts(all_chunks)
+        print("init_vector_store done! ")
+        self.retriever.save_index(self.config.retriever_config.index_path)
+
+    def load_vector_store(self):
+        self.retriever.load_index(self.config.retriever_config.index_path)
+
+    def add_document(self, file_path):
+        chunks = self.parser.parse(file_path)
+        for chunk in chunks:
+            self.retriever.add_text(chunk)
+        print("add_document done!")
+
+    def chat(self, question: str = '', top_k: int = 5):
+        contents = self.retriever.retrieve(query=question, top_k=top_k)
+        contents = self.reranker.rerank(query=question, documents=[content['text'] for content in contents])
+        content = '\n'.join([content['text'] for content in contents])
+        result, history = self.llm.chat(question, [], content)
+        return result, history, contents, question
 
 
 # ========================== Config Start====================
-# # 创建全局配置实例
-# config = ConfigLoader(config_path="config_local.json")
-# app_config = ApplicationConfig()
-#
-# llm_model = config.get_config('models.llm')
-# embedding_model = config.get_config('models.embedding')
-# reranker_model = config.get_config('models.reranker')
-#
-# # 加载配置
-# app_config.docs_path = config.get_config('paths.docs')
-# retriever_config = DenseRetrieverConfig(
-#     model_name_or_path=embedding_model["path"],
-#     dim=1024,
-#     index_path=config.get_config('index')
-# )
-# rerank_config = BgeRerankerConfig(
-#     model_name_or_path=reranker_model["path"],
-# )
 app_config = ApplicationConfig()
-app_config.docs_path = r"G:\Projects\TrustRAG\data\docs"
-app_config.llm_model_path = r"G:\pretrained_models\llm\glm-4-9b-chat"
+app_config.docs_path = r"/data/users/searchgpt/yq/TrustRAG/data/docs"
+app_config.llm_model_path = r"/data/users/searchgpt/pretrained_models/Qwen3-4B"
 retriever_config = DenseRetrieverConfig(
-    model_name_or_path=r"G:\pretrained_models\mteb\bge-large-zh-v1.5",
+    model_name_or_path=r"/data/users/searchgpt/pretrained_models/bge-large-zh-v1.5",
     dim=1024,
-    index_path=r'G:\Projects\TrustRAG\examples\retrievers\dense_cache'
+    index_path=r'/data/users/searchgpt/yq/TrustRAG/examples/retrievers/dense_cache'
 )
 rerank_config = BgeRerankerConfig(
-    model_name_or_path=r"G:\pretrained_models\mteb\bge-reranker-large"
+    model_name_or_path=r"/data/users/searchgpt/pretrained_models/bge-reranker-large"
 )
 
 app_config.retriever_config = retriever_config
 app_config.rerank_config = rerank_config
 application = RagApplication(app_config)
 application.init_vector_store()
 
-
 # ========================== Config End====================
 
-
 # 创建北京时区的变量
 beijing_tz = pytz.timezone("Asia/Shanghai")
 IGNORE_FILE_LIST = [".DS_Store"]
diff --git a/examples/generator/vllm_curl.sh b/examples/generator/vllm_curl.sh
@@ -0,0 +1,9 @@
+curl http://localhost:8002/v1/chat/completions -H "Content-Type: application/json" -d '{
+  "model": "Qwen3-32B",
+  "messages": [
+    {"role": "user", "content": "Give me a short introduction to large language models."}
+  ],
+  "temperature": 0.6,
+  "top_p": 0.95,
+  "top_k": 20
+}'
diff --git a/requirements.txt b/requirements.txt
@@ -50,7 +50,6 @@ xgboost
 bm25s
 jieba
 accelerate
-FlagEmbedding
 chardet
 openpyxl
 protobuf
diff --git a/trustrag/applications/rag.py b/trustrag/applications/rag.py
@@ -15,7 +15,7 @@
 from trustrag.modules.reranker.bge_reranker import BgeReranker
 from trustrag.modules.retrieval.dense_retriever import DenseRetriever
 from trustrag.modules.document.chunk import TextChunker
-from trustrag.modules.vector.embedding import FlagModelEmbedding
+from trustrag.modules.vector.embedding import SentenceTransformerEmbedding
 class ApplicationConfig():
     def __init__(self):
         self.retriever_config = None
@@ -26,7 +26,7 @@ class RagApplication():
     def __init__(self, config):
         self.config = config
         self.parser = CommonParser()
-        self.embedding_generator = FlagModelEmbedding(self.config.retriever_config.model_name_or_path)
+        self.embedding_generator = SentenceTransformerEmbedding(self.config.retriever_config.model_name_or_path)
         self.retriever = DenseRetriever(self.config.retriever_config,self.embedding_generator)
         self.reranker = BgeReranker(self.config.rerank_config)
         self.llm = GLM4Chat(self.config.llm_model_path)
diff --git a/trustrag/modules/generator/llm.py b/trustrag/modules/generator/llm.py
@@ -180,3 +180,187 @@ def load_model(self):
         print("load model success")
 
 
+# !/usr/bin/env python
+# -*- coding:utf-8 _*-
+"""
+@author:quincy qiang
+@license: Apache Licence
+@file: llm.py
+@time: 2024/05/16
+@contact: yanqiangmiffy@gamil.com
+@software: PyCharm
+@description: coding..
+"""
+import os
+from typing import Dict, List, Any
+
+import torch
+from openai import OpenAI
+from transformers import AutoTokenizer, AutoModelForCausalLM
+from trustrag.modules.prompt.templates import SYSTEM_PROMPT, CHAT_PROMPT_TEMPLATES
+
+
+class BaseModel:
+    def __init__(self, path: str = '') -> None:
+        self.path = path
+
+    def chat(self, prompt: str, history: List[dict], content: str) -> str:
+        pass
+
+    def load_model(self):
+        pass
+
+
+class OpenAIChat(BaseModel):
+    def __init__(self, path: str = '', model: str = "gpt-3.5-turbo-1106") -> None:
+        super().__init__(path)
+        self.model = model
+
+    def chat(self, prompt: str, history: List[dict], content: str) -> str:
+        client = OpenAI()
+        client.api_key = os.getenv("OPENAI_API_KEY")
+        client.base_url = os.getenv("OPENAI_BASE_URL")
+        history.append({'role': 'user',
+                        'content': CHAT_PROMPT_TEMPLATES['RAG_PROMPT_TEMPALTE'].format(question=prompt,
+                                                                                       context=content)})
+        response = client.chat.completions.create(
+            model=self.model,
+            messages=history,
+            max_tokens=150,
+            temperature=0.1
+        )
+        return response.choices[0].message.content
+
+
+class InternLMChat(BaseModel):
+    def __init__(self, path: str = '') -> None:
+        super().__init__(path)
+        self.load_model()
+
+    def chat(self, prompt: str, history: List = [], content: str = '') -> str:
+        prompt = CHAT_PROMPT_TEMPLATES['InternLM_PROMPT_TEMPALTE'].format(question=prompt, context=content)
+        response, history = self.model.chat(self.tokenizer, prompt, history)
+        return response
+
+    def load_model(self):
+        self.tokenizer = AutoTokenizer.from_pretrained(self.path, trust_remote_code=True)
+        self.model = AutoModelForCausalLM.from_pretrained(self.path, torch_dtype=torch.float16,
+                                                          trust_remote_code=True).cuda()
+
+
+class GLM3Chat(BaseModel):
+    def __init__(self, path: str = '') -> None:
+        super().__init__(path)
+        self.load_model()
+
+    def chat(self, prompt: str, history=None, content: str = '', llm_only: bool = False) -> tuple[Any, Any]:
+        if history is None:
+            history = []
+        if llm_only:
+            prompt = prompt
+        else:
+            prompt = CHAT_PROMPT_TEMPLATES['GLM_PROMPT_TEMPALTE'].format(question=prompt, context=content)
+        response, history = self.model.chat(self.tokenizer, prompt, history, max_length=32000, num_beams=1,
+                                            do_sample=True, top_p=0.8, temperature=0.2)
+        return response, history
+
+    def load_model(self):
+        self.tokenizer = AutoTokenizer.from_pretrained(self.path, trust_remote_code=True)
+        self.model = AutoModelForCausalLM.from_pretrained(self.path, torch_dtype=torch.float16,
+                                                          trust_remote_code=True).cuda()
+
+
+class GLM4Chat(BaseModel):
+    def __init__(self, path: str = '') -> None:
+        super().__init__(path)
+        self.load_model()
+
+    def chat(self, prompt: str, history=None, content: str = '', llm_only: bool = False) -> tuple[Any, Any]:
+        if llm_only:
+            prompt = prompt
+        else:
+            prompt = CHAT_PROMPT_TEMPLATES['GLM_PROMPT_TEMPALTE'].format(system_prompt=SYSTEM_PROMPT, question=prompt,
+                                                                         context=content)
+        prompt = prompt.encode("utf-8", 'ignore').decode('utf-8', 'ignore')
+        print(prompt)
+
+        inputs = self.tokenizer.apply_chat_template([{"role": "user", "content": prompt}],
+                                                    add_generation_prompt=True,
+                                                    tokenize=True,
+                                                    return_tensors="pt",
+                                                    return_dict=True
+                                                    )
+
+        inputs = inputs.to('cuda')
+        gen_kwargs = {"max_length": 5120, "do_sample": False, "top_k": 1}
+        with torch.no_grad():
+            outputs = self.model.generate(**inputs, **gen_kwargs)
+            outputs = outputs[:, inputs['input_ids'].shape[1]:]
+            output = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
+            response, history = output, []
+            return response, history
+
+    def load_model(self):
+
+        self.tokenizer = AutoTokenizer.from_pretrained(self.path, trust_remote_code=True)
+        self.model = AutoModelForCausalLM.from_pretrained(
+            self.path,
+            torch_dtype=torch.bfloat16,
+            low_cpu_mem_usage=True,
+            trust_remote_code=True
+        ).cuda().eval()
+
+
+class Qwen3Chat(BaseModel):
+    def __init__(self, path: str = '') -> None:
+        super().__init__(path)
+        self.load_model()
+        self.device = 'cuda'
+
+    def chat(self, prompt: str, history: List = [], content: str = '', llm_only: bool = False,
+             enable_thinking: bool = True) -> tuple[Any, Any]:
+        if llm_only:
+            prompt = prompt
+        else:
+            # 使用适当的prompt模板，可以根据需要调整
+            prompt = CHAT_PROMPT_TEMPLATES.get('DF_QWEN_PROMPT_TEMPLATE2', '{question}\n\n上下文：{context}').format(
+                question=prompt, context=content)
+
+        messages = [
+            {"role": "user", "content": prompt}
+        ]
+
+        text = self.tokenizer.apply_chat_template(
+            messages,
+            tokenize=False,
+            add_generation_prompt=True,
+            enable_thinking=enable_thinking  # 支持thinking模式
+        )
+
+        model_inputs = self.tokenizer([text], return_tensors="pt").to(self.model.device)
+
+        # 生成文本，支持更大的token数量
+        generated_ids = self.model.generate(
+            **model_inputs,
+            max_new_tokens=32768,  # 支持更大的生成长度
+            do_sample=False,
+            top_k=10
+        )
+
+        # 提取生成的部分
+        output_ids = generated_ids[0][len(model_inputs.input_ids[0]):].tolist()
+        response = self.tokenizer.decode(output_ids, skip_special_tokens=True)
+
+        return response, history
+
+    def load_model(self):
+        self.tokenizer = AutoTokenizer.from_pretrained(self.path, trust_remote_code=True)
+        self.model = AutoModelForCausalLM.from_pretrained(
+            self.path,
+            torch_dtype="auto",  # 使用auto自动选择最佳数据类型
+            device_map="auto",  # 自动设备映射
+            trust_remote_code=True
+        )
+        print("Qwen3 model loaded successfully")
+
+
diff --git a/trustrag/modules/vector/embedding.py b/trustrag/modules/vector/embedding.py