Skip to content

Commit 248dccb

Browse files
chore: fixing pylint issues (#8610)
* initial import * fixing internal methods * fixing some internal methods * modify _preprocess * fixed internal methods --------- Co-authored-by: anakin87 <[email protected]>
1 parent 6f983a2 commit 248dccb

20 files changed

+58
-33
lines changed

Diff for: haystack/components/classifiers/zero_shot_document_classifier.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ class TransformersZeroShotDocumentClassifier:
7373
```
7474
"""
7575

76-
def __init__(
76+
def __init__( # pylint: disable=too-many-positional-arguments
7777
self,
7878
model: str,
7979
labels: List[str],

Diff for: haystack/components/embedders/azure_document_embedder.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ class AzureOpenAIDocumentEmbedder:
3434
```
3535
"""
3636

37-
def __init__( # noqa: PLR0913 (too-many-arguments)
37+
def __init__( # noqa: PLR0913 (too-many-arguments) # pylint: disable=too-many-positional-arguments
3838
self,
3939
azure_endpoint: Optional[str] = None,
4040
api_version: Optional[str] = "2023-05-15",

Diff for: haystack/components/embedders/azure_text_embedder.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ class AzureOpenAITextEmbedder:
3333
```
3434
"""
3535

36-
def __init__(
36+
def __init__( # pylint: disable=too-many-positional-arguments
3737
self,
3838
azure_endpoint: Optional[str] = None,
3939
api_version: Optional[str] = "2023-05-15",

Diff for: haystack/components/embedders/openai_text_embedder.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ class OpenAITextEmbedder:
3838
```
3939
"""
4040

41-
def __init__(
41+
def __init__( # pylint: disable=too-many-positional-arguments
4242
self,
4343
api_key: Secret = Secret.from_env_var("OPENAI_API_KEY"),
4444
model: str = "text-embedding-ada-002",

Diff for: haystack/components/evaluators/context_relevance.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,7 @@ class ContextRelevanceEvaluator(LLMEvaluator):
9595
```
9696
"""
9797

98-
def __init__(
98+
def __init__( # pylint: disable=too-many-positional-arguments
9999
self,
100100
examples: Optional[List[Dict[str, Any]]] = None,
101101
progress_bar: bool = True,

Diff for: haystack/components/evaluators/faithfulness.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,7 @@ class FaithfulnessEvaluator(LLMEvaluator):
8282
```
8383
"""
8484

85-
def __init__(
85+
def __init__( # pylint: disable=too-many-positional-arguments
8686
self,
8787
examples: Optional[List[Dict[str, Any]]] = None,
8888
progress_bar: bool = True,

Diff for: haystack/components/evaluators/llm_evaluator.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ class LLMEvaluator:
4747
```
4848
"""
4949

50-
def __init__(
50+
def __init__( # pylint: disable=too-many-positional-arguments
5151
self,
5252
instructions: str,
5353
inputs: List[Tuple[str, Type[List]]],

Diff for: haystack/components/generators/azure.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ class AzureOpenAIGenerator(OpenAIGenerator):
5555
"""
5656

5757
# pylint: disable=super-init-not-called
58-
def __init__(
58+
def __init__( # pylint: disable=too-many-positional-arguments
5959
self,
6060
azure_endpoint: Optional[str] = None,
6161
api_version: Optional[str] = "2023-05-15",

Diff for: haystack/components/generators/chat/azure.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ class AzureOpenAIChatGenerator(OpenAIChatGenerator):
6262
"""
6363

6464
# pylint: disable=super-init-not-called
65-
def __init__(
65+
def __init__( # pylint: disable=too-many-positional-arguments
6666
self,
6767
azure_endpoint: Optional[str] = None,
6868
api_version: Optional[str] = "2023-05-15",

Diff for: haystack/components/generators/chat/hugging_face_local.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ class HuggingFaceLocalChatGenerator:
7171
```
7272
"""
7373

74-
def __init__(
74+
def __init__( # pylint: disable=too-many-positional-arguments
7575
self,
7676
model: str = "HuggingFaceH4/zephyr-7b-beta",
7777
task: Optional[Literal["text-generation", "text2text-generation"]] = None,
@@ -295,7 +295,7 @@ def run(self, messages: List[ChatMessage], generation_kwargs: Optional[Dict[str,
295295
]
296296
return {"replies": chat_messages}
297297

298-
def create_message(
298+
def create_message( # pylint: disable=too-many-positional-arguments
299299
self,
300300
text: str,
301301
index: int,

Diff for: haystack/components/rankers/meta_field.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ class MetaFieldRanker:
3838
```
3939
"""
4040

41-
def __init__(
41+
def __init__( # pylint: disable=too-many-positional-arguments
4242
self,
4343
meta_field: str,
4444
weight: float = 1.0,
@@ -106,6 +106,7 @@ def __init__(
106106

107107
def _validate_params(
108108
self,
109+
*,
109110
weight: float,
110111
top_k: Optional[int],
111112
ranking_mode: Literal["reciprocal_rank_fusion", "linear_score"],
@@ -156,7 +157,7 @@ def _validate_params(
156157
)
157158

158159
@component.output_types(documents=List[Document])
159-
def run(
160+
def run( # pylint: disable=too-many-positional-arguments
160161
self,
161162
documents: List[Document],
162163
top_k: Optional[int] = None,

Diff for: haystack/components/readers/extractive.py

+24-10
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ class ExtractiveReader:
5151
```
5252
"""
5353

54-
def __init__(
54+
def __init__( # pylint: disable=too-many-positional-arguments
5555
self,
5656
model: Union[Path, str] = "deepset/roberta-base-squad2-distilled",
5757
device: Optional[ComponentDevice] = None,
@@ -192,8 +192,9 @@ def warm_up(self):
192192
)
193193
self.device = ComponentDevice.from_multiple(device_map=DeviceMap.from_hf(self.model.hf_device_map))
194194

195+
@staticmethod
195196
def _flatten_documents(
196-
self, queries: List[str], documents: List[List[Document]]
197+
queries: List[str], documents: List[List[Document]]
197198
) -> Tuple[List[str], List[Document], List[int]]:
198199
"""
199200
Flattens queries and Documents so all query-document pairs are arranged along one batch axis.
@@ -203,8 +204,8 @@ def _flatten_documents(
203204
query_ids = [i for i, documents_ in enumerate(documents) for _ in documents_]
204205
return flattened_queries, flattened_documents, query_ids
205206

206-
def _preprocess(
207-
self, queries: List[str], documents: List[Document], max_seq_length: int, query_ids: List[int], stride: int
207+
def _preprocess( # pylint: disable=too-many-positional-arguments
208+
self, *, queries: List[str], documents: List[Document], max_seq_length: int, query_ids: List[int], stride: int
208209
) -> Tuple["torch.Tensor", "torch.Tensor", "torch.Tensor", List["Encoding"], List[int], List[int]]:
209210
"""
210211
Splits and tokenizes Documents and preserves structures by returning mappings to query and Document IDs.
@@ -256,6 +257,7 @@ def _preprocess(
256257

257258
def _postprocess(
258259
self,
260+
*,
259261
start: "torch.Tensor",
260262
end: "torch.Tensor",
261263
sequence_ids: "torch.Tensor",
@@ -285,9 +287,9 @@ def _postprocess(
285287
masked_logits = torch.where(mask, logits, -torch.inf)
286288
probabilities = torch.sigmoid(masked_logits * self.calibration_factor)
287289

288-
flat_probabilities = probabilities.flatten(-2, -1) # necessary for topk
290+
flat_probabilities = probabilities.flatten(-2, -1) # necessary for top-k
289291

290-
# topk can return invalid candidates as well if answers_per_seq > num_valid_candidates
292+
# top-k can return invalid candidates as well if answers_per_seq > num_valid_candidates
291293
# We only keep probability > 0 candidates later on
292294
candidates = torch.topk(flat_probabilities, answers_per_seq)
293295
seq_length = logits.shape[-1]
@@ -343,6 +345,7 @@ def _add_answer_page_number(self, answer: ExtractedAnswer) -> ExtractedAnswer:
343345

344346
def _nest_answers(
345347
self,
348+
*,
346349
start: List[List[int]],
347350
end: List[List[int]],
348351
probabilities: "torch.Tensor",
@@ -526,7 +529,7 @@ def deduplicate_by_overlap(
526529
return deduplicated_answers
527530

528531
@component.output_types(answers=List[ExtractedAnswer])
529-
def run(
532+
def run( # pylint: disable=too-many-positional-arguments
530533
self,
531534
query: str,
532535
documents: List[Document],
@@ -594,9 +597,15 @@ def run(
594597
no_answer = no_answer if no_answer is not None else self.no_answer
595598
overlap_threshold = overlap_threshold or self.overlap_threshold
596599

597-
flattened_queries, flattened_documents, query_ids = self._flatten_documents(queries, nested_documents)
600+
flattened_queries, flattened_documents, query_ids = ExtractiveReader._flatten_documents(
601+
queries, nested_documents
602+
)
598603
input_ids, attention_mask, sequence_ids, encodings, query_ids, document_ids = self._preprocess(
599-
flattened_queries, flattened_documents, max_seq_length, query_ids, stride
604+
queries=flattened_queries,
605+
documents=flattened_documents,
606+
max_seq_length=max_seq_length,
607+
query_ids=query_ids,
608+
stride=stride,
600609
)
601610

602611
num_batches = math.ceil(input_ids.shape[0] / max_batch_size) if max_batch_size else 1
@@ -625,7 +634,12 @@ def run(
625634
end_logits = torch.cat(end_logits_list)
626635

627636
start, end, probabilities = self._postprocess(
628-
start_logits, end_logits, sequence_ids, attention_mask, answers_per_seq, encodings
637+
start=start_logits,
638+
end=end_logits,
639+
sequence_ids=sequence_ids,
640+
attention_mask=attention_mask,
641+
answers_per_seq=answers_per_seq,
642+
encodings=encodings,
629643
)
630644

631645
answers = self._nest_answers(

Diff for: haystack/components/routers/transformers_text_router.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@ class TransformersTextRouter:
7272
```
7373
"""
7474

75-
def __init__(
75+
def __init__( # pylint: disable=too-many-positional-arguments
7676
self,
7777
model: str,
7878
labels: Optional[List[str]] = None,

Diff for: haystack/components/routers/zero_shot_text_router.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,7 @@ class TransformersZeroShotTextRouter:
9595
```
9696
"""
9797

98-
def __init__(
98+
def __init__( # pylint: disable=too-many-positional-arguments
9999
self,
100100
labels: List[str],
101101
multi_label: bool = False,

Diff for: haystack/document_stores/in_memory/document_store.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ class InMemoryDocumentStore:
5858
Stores data in-memory. It's ephemeral and cannot be saved to disk.
5959
"""
6060

61-
def __init__(
61+
def __init__( # pylint: disable=too-many-positional-arguments
6262
self,
6363
bm25_tokenization_regex: str = r"(?u)\b\w\w+\b",
6464
bm25_algorithm: Literal["BM25Okapi", "BM25L", "BM25Plus"] = "BM25L",
@@ -541,7 +541,7 @@ def bm25_retrieval(
541541

542542
return return_documents
543543

544-
def embedding_retrieval(
544+
def embedding_retrieval( # pylint: disable=too-many-positional-arguments
545545
self,
546546
query_embedding: List[float],
547547
filters: Optional[Dict[str, Any]] = None,

Diff for: haystack/logging.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -188,7 +188,7 @@ def patch_make_records_to_use_kwarg_string_interpolation(original_make_records:
188188
"""A decorator to ensure string interpolation is used."""
189189

190190
@functools.wraps(original_make_records)
191-
def _wrapper(name, level, fn, lno, msg, args, exc_info, func=None, extra=None, sinfo=None) -> Any:
191+
def _wrapper(name, level, fn, lno, msg, args, exc_info, func=None, extra=None, sinfo=None) -> Any: # pylint: disable=too-many-positional-arguments
192192
safe_extra = extra or {}
193193
try:
194194
interpolated_msg = msg.format(**safe_extra)

Diff for: haystack/testing/factory.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,7 @@ def to_dict(self) -> Dict[str, Any]:
124124
return cls
125125

126126

127-
def component_class(
127+
def component_class( # pylint: disable=too-many-positional-arguments
128128
name: str,
129129
input_types: Optional[Dict[str, Any]] = None,
130130
output_types: Optional[Dict[str, Any]] = None,

Diff for: haystack/utils/hf.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -166,7 +166,7 @@ def resolve_hf_device_map(device: Optional[ComponentDevice], model_kwargs: Optio
166166
return model_kwargs
167167

168168

169-
def resolve_hf_pipeline_kwargs(
169+
def resolve_hf_pipeline_kwargs( # pylint: disable=too-many-positional-arguments
170170
huggingface_pipeline_kwargs: Dict[str, Any],
171171
model: str,
172172
task: Optional[str],

Diff for: pyproject.toml

+1
Original file line numberDiff line numberDiff line change
@@ -242,6 +242,7 @@ max-locals = 45 # Default is 15
242242
max-module-lines = 2468 # Default is 1000
243243
max-nested-blocks = 9 # Default is 5
244244
max-statements = 206 # Default is 50
245+
245246
[tool.pylint.'SIMILARITIES']
246247
min-similarity-lines = 6
247248

Diff for: test/components/readers/test_extractive.py

+12-3
Original file line numberDiff line numberDiff line change
@@ -321,7 +321,7 @@ def test_flatten_documents(mock_reader: ExtractiveReader):
321321

322322
def test_preprocess(mock_reader: ExtractiveReader):
323323
_, _, seq_ids, _, query_ids, doc_ids = mock_reader._preprocess(
324-
example_queries * 3, example_documents[0], 384, [1, 1, 1], 0
324+
queries=example_queries * 3, documents=example_documents[0], max_seq_length=384, query_ids=[1, 1, 1], stride=0
325325
)
326326
expected_seq_ids = torch.full((3, 384), -1, dtype=torch.int)
327327
expected_seq_ids[:, :16] = 0
@@ -333,7 +333,11 @@ def test_preprocess(mock_reader: ExtractiveReader):
333333

334334
def test_preprocess_splitting(mock_reader: ExtractiveReader):
335335
_, _, seq_ids, _, query_ids, doc_ids = mock_reader._preprocess(
336-
example_queries * 4, example_documents[0] + [Document(content="a" * 64)], 96, [1, 1, 1, 1], 0
336+
queries=example_queries * 4,
337+
documents=example_documents[0] + [Document(content="a" * 64)],
338+
max_seq_length=96,
339+
query_ids=[1, 1, 1, 1],
340+
stride=0,
337341
)
338342
assert seq_ids.shape[0] == 5
339343
assert query_ids == [1, 1, 1, 1, 1]
@@ -362,7 +366,12 @@ def test_postprocess(mock_reader: ExtractiveReader):
362366
encoding.token_to_chars = lambda i: (int(i), int(i) + 1)
363367

364368
start_candidates, end_candidates, probs = mock_reader._postprocess(
365-
start, end, sequence_ids, attention_mask, 3, [encoding, encoding]
369+
start=start,
370+
end=end,
371+
sequence_ids=sequence_ids,
372+
attention_mask=attention_mask,
373+
answers_per_seq=3,
374+
encodings=[encoding, encoding],
366375
)
367376

368377
assert len(start_candidates) == len(end_candidates) == len(probs) == 2

0 commit comments

Comments
 (0)