Skip to content

Commit 27f6b78

Browse files
authored
Merge pull request #39 from liwenshipro/main
feat: support huggingface/text-embeddings-inference for faster embedding inference
2 parents 5e8400b + 7605a82 commit 27f6b78

File tree

3 files changed

+53
-0
lines changed

3 files changed

+53
-0
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
# -*- coding: utf-8 -*-
2+
import sys
3+
sys.path.append(".")
4+
from modelcache.embedding.huggingface_tei import HuggingfaceTEI
5+
6+
'''
7+
run tei server:
8+
text-embeddings-router --model-id BAAI/bge-large-zh-v1.5 --port 8080
9+
'''
10+
11+
def run():
12+
tei_instance = HuggingfaceTEI('http://127.0.0.1:8080/v1/embeddings', 'BAAI/bge-large-zh-v1.5')
13+
print('dimenson', tei_instance.dimension)
14+
print('embedding', tei_instance.to_embeddings('hello'))
15+
16+
if __name__ == '__main__':
17+
run()

modelcache/embedding/__init__.py

+4
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
fasttext = LazyImport("fasttext", globals(), "modelcache.embedding.fasttext")
77
paddlenlp = LazyImport("paddlenlp", globals(), "modelcache.embedding.paddlenlp")
88
timm = LazyImport("timm", globals(), "modelcache.embedding.timm")
9+
huggingface_tei = LazyImport("huggingface_tei", globals(), "modelcache.embedding.huggingface_tei")
910

1011

1112
def Huggingface(model="sentence-transformers/all-mpnet-base-v2"):
@@ -30,3 +31,6 @@ def PaddleNLP(model="ernie-3.0-medium-zh"):
3031

3132
def Timm(model="resnet50", device="default"):
3233
return timm.Timm(model, device)
34+
35+
def HuggingfaceTEI(base_url, model):
36+
return huggingface_tei.HuggingfaceTEI(base_url, model)
+32
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
# -*- coding: utf-8 -*-
2+
import requests
3+
import numpy as np
4+
from modelcache.embedding.base import BaseEmbedding
5+
6+
class HuggingfaceTEI(BaseEmbedding):
7+
def __init__(self, base_url: str, model: str):
8+
self.base_url = base_url
9+
self.model = model
10+
self.headers = {
11+
'accept': 'application/json',
12+
'Content-Type': 'application/json',
13+
}
14+
self.__dimension = self.to_embeddings('test').shape[0]
15+
16+
def to_embeddings(self, data, **_):
17+
json_data = {
18+
'input': data,
19+
'model': self.model,
20+
}
21+
22+
response = requests.post(self.base_url, headers=self.headers, json=json_data)
23+
embedding = response.json()['data'][0]['embedding']
24+
return np.array(embedding)
25+
26+
@property
27+
def dimension(self):
28+
"""Embedding dimension.
29+
30+
:return: embedding dimension
31+
"""
32+
return self.__dimension

0 commit comments

Comments
 (0)