File tree 2 files changed +33
-1
lines changed
2 files changed +33
-1
lines changed Original file line number Diff line number Diff line change 7
7
paddlenlp = LazyImport ("paddlenlp" , globals (), "modelcache.embedding.paddlenlp" )
8
8
timm = LazyImport ("timm" , globals (), "modelcache.embedding.timm" )
9
9
huggingface_tei = LazyImport ("huggingface_tei" , globals (), "modelcache.embedding.huggingface_tei" )
10
+ bge_m3 = LazyImport ("bge_m3" , globals (), "modelcache.embedding.bge_m3" )
10
11
11
12
12
13
def Huggingface (model = "sentence-transformers/all-mpnet-base-v2" ):
@@ -33,4 +34,7 @@ def Timm(model="resnet50", device="default"):
33
34
return timm .Timm (model , device )
34
35
35
36
def HuggingfaceTEI (base_url , model ):
36
- return huggingface_tei .HuggingfaceTEI (base_url , model )
37
+ return huggingface_tei .HuggingfaceTEI (base_url , model )
38
+
39
+ def BgeM3Embedding (model_path = "model/bge-m3" ):
40
+ return bge_m3 .BgeM3Embedding (model_path )
Original file line number Diff line number Diff line change
1
+ # -*- coding: utf-8 -*-
2
+ import numpy as np
3
+ from modelcache .embedding .base import BaseEmbedding
4
+ from transformers import AutoTokenizer , AutoModel
5
+ from FlagEmbedding import BGEM3FlagModel
6
+
7
+ class BgeM3Embedding (BaseEmbedding ):
8
+ def __init__ (self , model_path : str = "model/bge-m3" ):
9
+ self .tokenizer = AutoTokenizer .from_pretrained (model_path )
10
+ self .model = AutoModel .from_pretrained (model_path )
11
+
12
+ self .bge_model = BGEM3FlagModel (model_name_or_path = model_path ,
13
+ model = self .model ,
14
+ tokenizer = self .tokenizer ,
15
+ use_fp16 = False )
16
+
17
+ self .__dimension = 768
18
+
19
+ def to_embeddings (self , data , ** _ ):
20
+ if not isinstance (data , list ):
21
+ data = [data ]
22
+
23
+ embeddings = self .bge_model .encode (data , batch_size = 12 , max_length = 8192 )['dense_vecs' ]
24
+ return np .array (embeddings ).astype ("float32" )
25
+
26
+ @property
27
+ def dimension (self ):
28
+ return self .__dimension
You can’t perform that action at this time.
0 commit comments