@@ -80,6 +80,7 @@ def decode(self, *args, **kwargs):
80
80
yimodel_endpoint = "https://api.lingyiwanwu.com/v1/chat/completions"
81
81
deepseekapi_endpoint = "https://api.deepseek.com/v1/chat/completions"
82
82
grok_model_endpoint = "https://api.x.ai/v1/chat/completions"
83
+ volcengine_endpoint = "https://ark.cn-beijing.volces.com/api/v3/chat/completions"
83
84
84
85
if not AZURE_ENDPOINT .endswith ('/' ): AZURE_ENDPOINT += '/'
85
86
azure_endpoint = AZURE_ENDPOINT + f'openai/deployments/{ AZURE_ENGINE } /chat/completions?api-version=2023-05-15'
@@ -102,6 +103,7 @@ def decode(self, *args, **kwargs):
102
103
if yimodel_endpoint in API_URL_REDIRECT : yimodel_endpoint = API_URL_REDIRECT [yimodel_endpoint ]
103
104
if deepseekapi_endpoint in API_URL_REDIRECT : deepseekapi_endpoint = API_URL_REDIRECT [deepseekapi_endpoint ]
104
105
if grok_model_endpoint in API_URL_REDIRECT : grok_model_endpoint = API_URL_REDIRECT [grok_model_endpoint ]
106
+ if volcengine_endpoint in API_URL_REDIRECT : volcengine_endpoint = API_URL_REDIRECT [volcengine_endpoint ]
105
107
106
108
# 获取tokenizer
107
109
tokenizer_gpt35 = LazyloadTiktoken ("gpt-3.5-turbo" )
@@ -954,7 +956,7 @@ def decode(self, *args, **kwargs):
954
956
try :
955
957
grok_beta_128k_noui , grok_beta_128k_ui = get_predict_function (
956
958
api_key_conf_name = "GROK_API_KEY" , max_output_token = 8192 , disable_proxy = False
957
- )
959
+ )
958
960
959
961
model_info .update ({
960
962
"grok-beta" : {
@@ -1089,8 +1091,10 @@ def decode(self, *args, **kwargs):
1089
1091
})
1090
1092
except :
1091
1093
logger .error (trimmed_format_exc ())
1094
+
1092
1095
# -=-=-=-=-=-=- 幻方-深度求索大模型在线API -=-=-=-=-=-=-
1093
- if "deepseek-chat" in AVAIL_LLM_MODELS or "deepseek-coder" in AVAIL_LLM_MODELS or "deepseek-reasoner" in AVAIL_LLM_MODELS :
1096
+ claude_models = ["deepseek-chat" , "deepseek-coder" , "deepseek-reasoner" ]
1097
+ if any (item in claude_models for item in AVAIL_LLM_MODELS ):
1094
1098
try :
1095
1099
deepseekapi_noui , deepseekapi_ui = get_predict_function (
1096
1100
api_key_conf_name = "DEEPSEEK_API_KEY" , max_output_token = 4096 , disable_proxy = False
@@ -1127,6 +1131,60 @@ def decode(self, *args, **kwargs):
1127
1131
})
1128
1132
except :
1129
1133
logger .error (trimmed_format_exc ())
1134
+
1135
+ # -=-=-=-=-=-=- 火山引擎 对齐支持 -=-=-=-=-=-=-
1136
+ for model in [m for m in AVAIL_LLM_MODELS if m .startswith ("volcengine-" )]:
1137
+ # 为了更灵活地接入volcengine多模型管理界面,设计了此接口,例子:AVAIL_LLM_MODELS = ["volcengine-deepseek-r1-250120(max_token=6666)"]
1138
+ # 其中
1139
+ # "volcengine-" 是前缀(必要)
1140
+ # "deepseek-r1-250120" 是模型名(必要)
1141
+ # "(max_token=6666)" 是配置(非必要)
1142
+ model_info_extend = model_info
1143
+ model_info_extend .update ({
1144
+ "deepseek-r1-250120" : {
1145
+ "max_token" : 16384 ,
1146
+ "enable_reasoning" : True ,
1147
+ "can_multi_thread" : True ,
1148
+ "endpoint" : volcengine_endpoint ,
1149
+ "tokenizer" : tokenizer_gpt35 ,
1150
+ "token_cnt" : get_token_num_gpt35 ,
1151
+ },
1152
+ "deepseek-v3-241226" : {
1153
+ "max_token" : 16384 ,
1154
+ "enable_reasoning" : False ,
1155
+ "can_multi_thread" : True ,
1156
+ "endpoint" : volcengine_endpoint ,
1157
+ "tokenizer" : tokenizer_gpt35 ,
1158
+ "token_cnt" : get_token_num_gpt35 ,
1159
+ },
1160
+ })
1161
+ try :
1162
+ origin_model_name , max_token_tmp = read_one_api_model_name (model )
1163
+ # 如果是已知模型,则尝试获取其信息
1164
+ original_model_info = model_info_extend .get (origin_model_name .replace ("volcengine-" , "" , 1 ), None )
1165
+ except :
1166
+ logger .error (f"volcengine模型 { model } 的 max_token 配置不是整数,请检查配置文件。" )
1167
+ continue
1168
+
1169
+ volcengine_noui , volcengine_ui = get_predict_function (api_key_conf_name = "ARK_API_KEY" , max_output_token = 8192 , disable_proxy = True , model_remove_prefix = ["volcengine-" ])
1170
+
1171
+ this_model_info = {
1172
+ "fn_with_ui" : volcengine_ui ,
1173
+ "fn_without_ui" : volcengine_noui ,
1174
+ "endpoint" : volcengine_endpoint ,
1175
+ "can_multi_thread" : True ,
1176
+ "max_token" : 64000 ,
1177
+ "tokenizer" : tokenizer_gpt35 ,
1178
+ "token_cnt" : get_token_num_gpt35 ,
1179
+ }
1180
+
1181
+ # 同步已知模型的其他信息
1182
+ attribute = "has_multimodal_capacity"
1183
+ if original_model_info is not None and original_model_info .get (attribute , None ) is not None : this_model_info .update ({attribute : original_model_info .get (attribute , None )})
1184
+ attribute = "enable_reasoning"
1185
+ if original_model_info is not None and original_model_info .get (attribute , None ) is not None : this_model_info .update ({attribute : original_model_info .get (attribute , None )})
1186
+ model_info .update ({model : this_model_info })
1187
+
1130
1188
# -=-=-=-=-=-=- one-api 对齐支持 -=-=-=-=-=-=-
1131
1189
for model in [m for m in AVAIL_LLM_MODELS if m .startswith ("one-api-" )]:
1132
1190
# 为了更灵活地接入one-api多模型管理界面,设计了此接口,例子:AVAIL_LLM_MODELS = ["one-api-mixtral-8x7b(max_token=6666)"]
0 commit comments