Skip to content

Commit 877dd74

Browse files
refactor: Removed type ignore
1 parent b5c6b62 commit 877dd74

File tree

5 files changed

+25
-25
lines changed

5 files changed

+25
-25
lines changed

fastembed/common/onnx_model.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,7 @@ def _load_onnx_model(
9090
str(model_path), providers=onnx_providers, sess_options=so
9191
)
9292
if "CUDAExecutionProvider" in requested_provider_names:
93-
current_providers = self.model.get_providers() # type: ignore
93+
current_providers = self.model.get_providers()
9494
if "CUDAExecutionProvider" not in current_providers:
9595
warnings.warn(
9696
f"Attempt to set CUDAExecutionProvider failed. Current providers: {current_providers}."

fastembed/late_interaction/colbert.py

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -88,26 +88,26 @@ def tokenize(self, documents: list[str], is_doc: bool = True, **kwargs: Any) ->
8888
)
8989

9090
def _tokenize_query(self, query: str) -> list[Encoding]:
91-
encoded = self.tokenizer.encode_batch([query]) # type: ignore
91+
encoded = self.tokenizer.encode_batch([query])
9292
# colbert authors recommend to pad queries with [MASK] tokens for query augmentation to improve performance
9393
if len(encoded[0].ids) < self.MIN_QUERY_LENGTH:
9494
prev_padding = None
95-
if self.tokenizer.padding: # type: ignore
96-
prev_padding = self.tokenizer.padding # type: ignore
97-
self.tokenizer.enable_padding( # type: ignore
95+
if self.tokenizer.padding:
96+
prev_padding = self.tokenizer.padding
97+
self.tokenizer.enable_padding(
9898
pad_token=self.MASK_TOKEN,
9999
pad_id=self.mask_token_id,
100100
length=self.MIN_QUERY_LENGTH,
101101
)
102-
encoded = self.tokenizer.encode_batch([query]) # type: ignore
102+
encoded = self.tokenizer.encode_batch([query])
103103
if prev_padding is None:
104-
self.tokenizer.no_padding() # type: ignore
104+
self.tokenizer.no_padding()
105105
else:
106-
self.tokenizer.enable_padding(**prev_padding) # type: ignore
106+
self.tokenizer.enable_padding(**prev_padding)
107107
return encoded
108108

109109
def _tokenize_documents(self, documents: list[str]) -> list[Encoding]:
110-
encoded = self.tokenizer.encode_batch(documents) # type: ignore
110+
encoded = self.tokenizer.encode_batch(documents)
111111
return encoded
112112

113113
@classmethod
@@ -195,14 +195,14 @@ def load_onnx_model(self) -> None:
195195
device_id=self.device_id,
196196
)
197197
self.mask_token_id = self.special_token_to_id[self.MASK_TOKEN]
198-
self.pad_token_id = self.tokenizer.padding["pad_id"] # type: ignore
198+
self.pad_token_id = self.tokenizer.padding["pad_id"]
199199
self.skip_list = {
200-
self.tokenizer.encode(symbol, add_special_tokens=False).ids[0] # type: ignore
200+
self.tokenizer.encode(symbol, add_special_tokens=False).ids[0]
201201
for symbol in string.punctuation
202202
}
203-
current_max_length = self.tokenizer.truncation["max_length"] # type: ignore
203+
current_max_length = self.tokenizer.truncation["max_length"]
204204
# ensure not to overflow after adding document-marker
205-
self.tokenizer.enable_truncation(max_length=current_max_length - 1) # type: ignore
205+
self.tokenizer.enable_truncation(max_length=current_max_length - 1)
206206

207207
def embed(
208208
self,

fastembed/rerank/cross_encoder/onnx_text_model.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -45,10 +45,10 @@ def _load_onnx_model(
4545
self.tokenizer, _ = load_tokenizer(model_dir=model_dir)
4646

4747
def tokenize(self, pairs: list[tuple[str, str]], **_: Any) -> list[Encoding]:
48-
return self.tokenizer.encode_batch(pairs) # type: ignore
48+
return self.tokenizer.encode_batch(pairs)
4949

5050
def _build_onnx_input(self, tokenized_input: list[Encoding]) -> dict[str, NumpyArray]:
51-
input_names: set[str] = {node.name for node in self.model.get_inputs()} # type: ignore
51+
input_names: set[str] = {node.name for node in self.model.get_inputs()}
5252
inputs: dict[str, NumpyArray] = {
5353
"input_ids": np.array([enc.ids for enc in tokenized_input], dtype=np.int64),
5454
}
@@ -70,7 +70,7 @@ def onnx_embed_pairs(self, pairs: list[tuple[str, str]], **kwargs: Any) -> OnnxO
7070
tokenized_input = self.tokenize(pairs, **kwargs)
7171
inputs = self._build_onnx_input(tokenized_input)
7272
onnx_input = self._preprocess_onnx_input(inputs, **kwargs)
73-
outputs = self.model.run(self.ONNX_OUTPUT_NAMES, onnx_input) # type: ignore
73+
outputs = self.model.run(self.ONNX_OUTPUT_NAMES, onnx_input)
7474
relevant_output = outputs[0]
7575
scores: NumpyArray = relevant_output[:, 0]
7676
return OnnxOutputContext(model_output=scores)
@@ -98,7 +98,7 @@ def _rerank_pairs(
9898
is_small = False
9999

100100
if isinstance(pairs, tuple):
101-
pairs = [pairs] # type: ignore
101+
pairs = [pairs]
102102
is_small = True
103103

104104
if isinstance(pairs, list):

fastembed/sparse/bm42.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -139,7 +139,7 @@ def load_onnx_model(self) -> None:
139139
cuda=self.cuda,
140140
device_id=self.device_id,
141141
)
142-
for token, idx in self.tokenizer.get_vocab().items(): # type: ignore
142+
for token, idx in self.tokenizer.get_vocab().items():
143143
self.invert_vocab[idx] = token
144144
self.special_tokens = set(self.special_token_to_id.keys())
145145
self.special_tokens_ids = set(self.special_token_to_id.values())
@@ -177,7 +177,7 @@ def _reconstruct_bpe(
177177
acc: str = ""
178178
acc_idx: list[int] = []
179179

180-
continuing_subword_prefix = self.tokenizer.model.continuing_subword_prefix # type: ignore
180+
continuing_subword_prefix = self.tokenizer.model.continuing_subword_prefix
181181
continuing_subword_prefix_len = len(continuing_subword_prefix)
182182

183183
for idx, token in bpe_tokens:
@@ -324,7 +324,7 @@ def query_embed(
324324
self.load_onnx_model()
325325

326326
for text in query:
327-
encoded = self.tokenizer.encode(text) # type: ignore
327+
encoded = self.tokenizer.encode(text)
328328
document_tokens_with_ids = enumerate(encoded.tokens)
329329
reconstructed = self._reconstruct_bpe(document_tokens_with_ids)
330330
filtered = self._filter_pair_tokens(reconstructed)

fastembed/text/onnx_text_model.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -60,17 +60,17 @@ def load_onnx_model(self) -> None:
6060
raise NotImplementedError("Subclasses must implement this method")
6161

6262
def tokenize(self, documents: list[str], **kwargs: Any) -> list[Encoding]:
63-
return self.tokenizer.encode_batch(documents) # type: ignore
63+
return self.tokenizer.encode_batch(documents)
6464

6565
def onnx_embed(
6666
self,
6767
documents: list[str],
6868
**kwargs: Any,
6969
) -> OnnxOutputContext:
7070
encoded = self.tokenize(documents, **kwargs)
71-
input_ids = np.array([e.ids for e in encoded]) # type: ignore
72-
attention_mask = np.array([e.attention_mask for e in encoded]) # type: ignore
73-
input_names = {node.name for node in self.model.get_inputs()} # type: ignore
71+
input_ids = np.array([e.ids for e in encoded])
72+
attention_mask = np.array([e.attention_mask for e in encoded])
73+
input_names = {node.name for node in self.model.get_inputs()}
7474
onnx_input: dict[str, NumpyArray] = {
7575
"input_ids": np.array(input_ids, dtype=np.int64),
7676
}
@@ -82,7 +82,7 @@ def onnx_embed(
8282
)
8383
onnx_input = self._preprocess_onnx_input(onnx_input, **kwargs)
8484

85-
model_output = self.model.run(self.ONNX_OUTPUT_NAMES, onnx_input) # type: ignore
85+
model_output = self.model.run(self.ONNX_OUTPUT_NAMES, onnx_input)
8686
return OnnxOutputContext(
8787
model_output=model_output[0],
8888
attention_mask=onnx_input.get("attention_mask", attention_mask),

0 commit comments

Comments
 (0)