添加接入 火山引擎在线大模型 内容的支持 (#2165)

littleolaf · binary-husky · web-flow · commit 72dbe856d29b · 2025-03-04T23:58:03.000+08:00
* use oai adaptive bridge function to handle vol engine

* add vol engine deepseek v3

---------

Co-authored-by: binary-husky &lt;qingxu.fu@outlook.com&gt;
diff --git a/config.py b/config.py
@@ -43,7 +43,8 @@
                     "gpt-3.5-turbo-1106", "gpt-3.5-turbo-16k", "gpt-3.5-turbo", "azure-gpt-3.5",
                     "gpt-4", "gpt-4-32k", "azure-gpt-4", "glm-4", "glm-4v", "glm-3-turbo",
                     "gemini-1.5-pro", "chatglm3", "chatglm4",
-                    "deepseek-chat", "deepseek-coder", "deepseek-reasoner"
+                    "deepseek-chat", "deepseek-coder", "deepseek-reasoner", 
+                    "volcengine-deepseek-r1-250120", "volcengine-deepseek-v3-241226",
                     ]
 
 EMBEDDING_MODEL = "text-embedding-3-small"
@@ -267,6 +268,10 @@
 YIMODEL_API_KEY = ""
 
 
+# 接入火山引擎的在线大模型)，api-key获取地址 https://console.volcengine.com/ark/region:ark+cn-beijing/endpoint
+ARK_API_KEY = "00000000-0000-0000-0000-000000000000" # 火山引擎 API KEY
+
+
 # 紫东太初大模型 https://ai-maas.wair.ac.cn
 TAICHU_API_KEY = ""
 
diff --git a/request_llms/bridge_all.py b/request_llms/bridge_all.py
@@ -80,6 +80,7 @@ def decode(self, *args, **kwargs):
 yimodel_endpoint = "https://api.lingyiwanwu.com/v1/chat/completions"
 deepseekapi_endpoint = "https://api.deepseek.com/v1/chat/completions"
 grok_model_endpoint = "https://api.x.ai/v1/chat/completions"
+volcengine_endpoint = "https://ark.cn-beijing.volces.com/api/v3/chat/completions"
 
 if not AZURE_ENDPOINT.endswith('/'): AZURE_ENDPOINT += '/'
 azure_endpoint = AZURE_ENDPOINT + f'openai/deployments/{AZURE_ENGINE}/chat/completions?api-version=2023-05-15'
@@ -102,6 +103,7 @@ def decode(self, *args, **kwargs):
 if yimodel_endpoint in API_URL_REDIRECT: yimodel_endpoint = API_URL_REDIRECT[yimodel_endpoint]
 if deepseekapi_endpoint in API_URL_REDIRECT: deepseekapi_endpoint = API_URL_REDIRECT[deepseekapi_endpoint]
 if grok_model_endpoint in API_URL_REDIRECT: grok_model_endpoint = API_URL_REDIRECT[grok_model_endpoint]
+if volcengine_endpoint in API_URL_REDIRECT: volcengine_endpoint = API_URL_REDIRECT[volcengine_endpoint]
 
 # 获取tokenizer
 tokenizer_gpt35 = LazyloadTiktoken("gpt-3.5-turbo")
@@ -954,7 +956,7 @@ def decode(self, *args, **kwargs):
     try:
         grok_beta_128k_noui, grok_beta_128k_ui = get_predict_function(
             api_key_conf_name="GROK_API_KEY", max_output_token=8192, disable_proxy=False
-            )
+        )
         
         model_info.update({
             "grok-beta": {
@@ -1089,8 +1091,10 @@ def decode(self, *args, **kwargs):
         })
     except:
         logger.error(trimmed_format_exc())
+
 # -=-=-=-=-=-=- 幻方-深度求索大模型在线API -=-=-=-=-=-=-
-if "deepseek-chat" in AVAIL_LLM_MODELS or "deepseek-coder" in AVAIL_LLM_MODELS or "deepseek-reasoner" in AVAIL_LLM_MODELS:
+claude_models = ["deepseek-chat", "deepseek-coder", "deepseek-reasoner"]
+if any(item in claude_models for item in AVAIL_LLM_MODELS):
     try:
         deepseekapi_noui, deepseekapi_ui = get_predict_function(
             api_key_conf_name="DEEPSEEK_API_KEY", max_output_token=4096, disable_proxy=False
@@ -1127,6 +1131,60 @@ def decode(self, *args, **kwargs):
         })
     except:
         logger.error(trimmed_format_exc())
+
+# -=-=-=-=-=-=- 火山引擎 对齐支持 -=-=-=-=-=-=-
+for model in [m for m in AVAIL_LLM_MODELS if m.startswith("volcengine-")]:
+    # 为了更灵活地接入volcengine多模型管理界面，设计了此接口，例子：AVAIL_LLM_MODELS = ["volcengine-deepseek-r1-250120(max_token=6666)"]
+    # 其中
+    #   "volcengine-"          是前缀（必要）
+    #   "deepseek-r1-250120"   是模型名（必要）
+    #   "(max_token=6666)"     是配置（非必要）
+    model_info_extend = model_info
+    model_info_extend.update({
+        "deepseek-r1-250120": {
+            "max_token": 16384,
+            "enable_reasoning": True,
+            "can_multi_thread": True,
+            "endpoint": volcengine_endpoint,
+            "tokenizer": tokenizer_gpt35,
+            "token_cnt": get_token_num_gpt35,
+        },
+        "deepseek-v3-241226": {
+            "max_token": 16384,
+            "enable_reasoning": False,
+            "can_multi_thread": True,
+            "endpoint": volcengine_endpoint,
+            "tokenizer": tokenizer_gpt35,
+            "token_cnt": get_token_num_gpt35,
+        },
+    })
+    try:
+        origin_model_name, max_token_tmp = read_one_api_model_name(model)
+        # 如果是已知模型，则尝试获取其信息
+        original_model_info = model_info_extend.get(origin_model_name.replace("volcengine-", "", 1), None)
+    except:
+        logger.error(f"volcengine模型 {model} 的 max_token 配置不是整数，请检查配置文件。")
+        continue
+
+    volcengine_noui, volcengine_ui = get_predict_function(api_key_conf_name="ARK_API_KEY", max_output_token=8192, disable_proxy=True, model_remove_prefix = ["volcengine-"])
+
+    this_model_info = {
+        "fn_with_ui": volcengine_ui,
+        "fn_without_ui": volcengine_noui,
+        "endpoint": volcengine_endpoint,
+        "can_multi_thread": True,
+        "max_token": 64000,
+        "tokenizer": tokenizer_gpt35,
+        "token_cnt": get_token_num_gpt35,
+    }
+
+    # 同步已知模型的其他信息
+    attribute = "has_multimodal_capacity"
+    if original_model_info is not None and original_model_info.get(attribute, None) is not None: this_model_info.update({attribute: original_model_info.get(attribute, None)})
+    attribute = "enable_reasoning"
+    if original_model_info is not None and original_model_info.get(attribute, None) is not None: this_model_info.update({attribute: original_model_info.get(attribute, None)})
+    model_info.update({model: this_model_info})
+
 # -=-=-=-=-=-=- one-api 对齐支持 -=-=-=-=-=-=-
 for model in [m for m in AVAIL_LLM_MODELS if m.startswith("one-api-")]:
     # 为了更灵活地接入one-api多模型管理界面，设计了此接口，例子：AVAIL_LLM_MODELS = ["one-api-mixtral-8x7b(max_token=6666)"]
diff --git a/request_llms/oai_std_model_template.py b/request_llms/oai_std_model_template.py
@@ -57,7 +57,7 @@ def decode_chunk(chunk):
             finish_reason = chunk["error"]["code"]
         except:
             finish_reason = "API_ERROR"
-        return response, reasoning_content, finish_reason
+        return response, reasoning_content, finish_reason, str(chunk)
 
     try:
         if chunk["choices"][0]["delta"]["content"] is not None:
@@ -122,7 +122,8 @@ def generate_message(input, model, key, history, max_output_token, system_prompt
 def get_predict_function(
         api_key_conf_name,
         max_output_token,
-        disable_proxy = False
+        disable_proxy = False,
+        model_remove_prefix = [],
     ):
     """
     为openai格式的API生成响应函数，其中传入参数：
@@ -137,6 +138,16 @@ def get_predict_function(
 
     APIKEY = get_conf(api_key_conf_name)
 
+    def remove_prefix(model_name):
+        # 去除模型名字的前缀，输入 volcengine-deepseek-r1-250120 会返回 deepseek-r1-250120
+        if not model_remove_prefix:
+            return model_name
+        model_without_prefix = model_name
+        for prefix in model_remove_prefix:
+            if model_without_prefix.startswith(prefix):
+                model_without_prefix = model_without_prefix[len(prefix):]
+        return model_without_prefix
+
     def predict_no_ui_long_connection(
         inputs,
         llm_kwargs,
@@ -164,9 +175,11 @@ def predict_no_ui_long_connection(
             raise RuntimeError(f"APIKEY为空,请检查配置文件的{APIKEY}")
         if inputs == "":
             inputs = "你好👋"
+
+
         headers, payload = generate_message(
             input=inputs,
-            model=llm_kwargs["llm_model"],
+            model=remove_prefix(llm_kwargs["llm_model"]),
             key=APIKEY,
             history=history,
             max_output_token=max_output_token,
@@ -302,7 +315,7 @@ def predict(
 
         headers, payload = generate_message(
             input=inputs,
-            model=llm_kwargs["llm_model"],
+            model=remove_prefix(llm_kwargs["llm_model"]),
             key=APIKEY,
             history=history,
             max_output_token=max_output_token,
diff --git a/tests/test_llms.py b/tests/test_llms.py
@@ -11,46 +11,65 @@ def validate_path():
 
 
 validate_path()  # validate path so you can run from base directory
+if __name__ == "__main__":
+    # from request_llms.bridge_taichu import predict_no_ui_long_connection
+    from request_llms.bridge_volcengine import predict_no_ui_long_connection
+    # from request_llms.bridge_cohere import predict_no_ui_long_connection
+    # from request_llms.bridge_spark import predict_no_ui_long_connection
+    # from request_llms.bridge_zhipu import predict_no_ui_long_connection
+    # from request_llms.bridge_chatglm3 import predict_no_ui_long_connection
+    llm_kwargs = {
+        "llm_model": "volcengine",
+        "max_length": 4096,
+        "top_p": 1,
+        "temperature": 1,
+    }
 
-if "在线模型":
-    if __name__ == "__main__":
-        from request_llms.bridge_taichu import predict_no_ui_long_connection
-        # from request_llms.bridge_cohere import predict_no_ui_long_connection
-        # from request_llms.bridge_spark import predict_no_ui_long_connection
-        # from request_llms.bridge_zhipu import predict_no_ui_long_connection
-        # from request_llms.bridge_chatglm3 import predict_no_ui_long_connection
-        llm_kwargs = {
-            "llm_model": "taichu",
-            "max_length": 4096,
-            "top_p": 1,
-            "temperature": 1,
-        }
-
-        result = predict_no_ui_long_connection(
-            inputs="请问什么是质子？", llm_kwargs=llm_kwargs, history=["你好", "我好！"], sys_prompt="系统"
-        )
-        print("final result:", result)
-        print("final result:", result)
-
-
-if "本地模型":
-    if __name__ == "__main__":
-        # from request_llms.bridge_newbingfree import predict_no_ui_long_connection
-        # from request_llms.bridge_moss import predict_no_ui_long_connection
-        # from request_llms.bridge_jittorllms_pangualpha import predict_no_ui_long_connection
-        # from request_llms.bridge_jittorllms_llama import predict_no_ui_long_connection
-        # from request_llms.bridge_claude import predict_no_ui_long_connection
-        # from request_llms.bridge_internlm import predict_no_ui_long_connection
-        # from request_llms.bridge_deepseekcoder import predict_no_ui_long_connection
-        # from request_llms.bridge_qwen_7B import predict_no_ui_long_connection
-        # from request_llms.bridge_qwen_local import predict_no_ui_long_connection
-        llm_kwargs = {
-            "max_length": 4096,
-            "top_p": 1,
-            "temperature": 1,
-        }
-        result = predict_no_ui_long_connection(
-            inputs="请问什么是质子？", llm_kwargs=llm_kwargs, history=["你好", "我好！"], sys_prompt=""
-        )
-        print("final result:", result)
+    result = predict_no_ui_long_connection(
+        inputs="请问什么是质子？", llm_kwargs=llm_kwargs, history=["你好", "我好！"], sys_prompt="系统"
+    )
+    print("final result:", result)
+    print("final result:", result)
+# if "在线模型":
+#     if __name__ == "__main__":
+#         # from request_llms.bridge_taichu import predict_no_ui_long_connection
+#         from request_llms.bridge_volcengine import predict_no_ui_long_connection
+#         # from request_llms.bridge_cohere import predict_no_ui_long_connection
+#         # from request_llms.bridge_spark import predict_no_ui_long_connection
+#         # from request_llms.bridge_zhipu import predict_no_ui_long_connection
+#         # from request_llms.bridge_chatglm3 import predict_no_ui_long_connection
+#         llm_kwargs = {
+#             "llm_model": "ep-20250222011816-5cq8z",
+#             "max_length": 4096,
+#             "top_p": 1,
+#             "temperature": 1,
+#         }
+
+#         result = predict_no_ui_long_connection(
+#             inputs="请问什么是质子？", llm_kwargs=llm_kwargs, history=["你好", "我好！"], sys_prompt="系统"
+#         )
+#         print("final result:", result)
+#         print("final result:", result)
+
+
+# if "本地模型":
+#     if __name__ == "__main__":
+#         # from request_llms.bridge_newbingfree import predict_no_ui_long_connection
+#         # from request_llms.bridge_moss import predict_no_ui_long_connection
+#         # from request_llms.bridge_jittorllms_pangualpha import predict_no_ui_long_connection
+#         # from request_llms.bridge_jittorllms_llama import predict_no_ui_long_connection
+#         # from request_llms.bridge_claude import predict_no_ui_long_connection
+#         # from request_llms.bridge_internlm import predict_no_ui_long_connection
+#         # from request_llms.bridge_deepseekcoder import predict_no_ui_long_connection
+#         # from request_llms.bridge_qwen_7B import predict_no_ui_long_connection
+#         # from request_llms.bridge_qwen_local import predict_no_ui_long_connection
+#         llm_kwargs = {
+#             "max_length": 4096,
+#             "top_p": 1,
+#             "temperature": 1,
+#         }
+#         result = predict_no_ui_long_connection(
+#             inputs="请问什么是质子？", llm_kwargs=llm_kwargs, history=["你好", "我好！"], sys_prompt=""
+#         )
+#         print("final result:", result)