Skip to content

Commit 2fd1d78

Browse files
sjrlanakin87silvanocerza
committed
feat: Adding StringJoiner (#8357)
* Adding StringJoiner * Release notes * Remove typing * Remove unused import * Try to fix header * Fix one test * Add to docs, move test to behavioral pipeline test * Undo changes * Fix test * Update haystack/components/joiners/string_joiner.py Co-authored-by: Stefano Fiorucci <[email protected]> * Update haystack/components/joiners/string_joiner.py Co-authored-by: Stefano Fiorucci <[email protected]> * Provide usage example * Apply suggestions from code review Co-authored-by: Stefano Fiorucci <[email protected]> --------- Co-authored-by: Stefano Fiorucci <[email protected]> Co-authored-by: Silvano Cerza <[email protected]>
1 parent ab2eb8e commit 2fd1d78

File tree

8 files changed

+135
-4
lines changed

8 files changed

+135
-4
lines changed

Diff for: README.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ pip install haystack-ai
2424

2525
Install from the `main` branch to try the newest features:
2626
```sh
27-
pip install git+https://github.com/deepset-ai/haystack.git@main
27+
pip install git+https://github.com/deepset-ai/haystack.git@main
2828
```
2929

3030
Haystack supports multiple installation methods including Docker images. For a comprehensive guide please refer

Diff for: docs/pydoc/config/joiners_api.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
loaders:
22
- type: haystack_pydoc_tools.loaders.CustomPythonLoader
33
search_path: [../../../haystack/components/joiners]
4-
modules: ["document_joiner", "branch", "answer_joiner"]
4+
modules: ["document_joiner", "branch", "answer_joiner", "string_joiner"]
55
ignore_when_discovered: ["__init__"]
66
processors:
77
- type: filter

Diff for: haystack/components/joiners/__init__.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -5,5 +5,6 @@
55
from .answer_joiner import AnswerJoiner
66
from .branch import BranchJoiner
77
from .document_joiner import DocumentJoiner
8+
from .string_joiner import StringJoiner
89

9-
__all__ = ["DocumentJoiner", "BranchJoiner", "AnswerJoiner"]
10+
__all__ = ["DocumentJoiner", "BranchJoiner", "AnswerJoiner", "StringJoiner"]

Diff for: haystack/components/joiners/string_joiner.py

+59
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
# SPDX-FileCopyrightText: 2022-present deepset GmbH <[email protected]>
2+
#
3+
# SPDX-License-Identifier: Apache-2.0
4+
5+
from typing import List
6+
7+
from haystack import component, logging
8+
from haystack.core.component.types import Variadic
9+
10+
logger = logging.getLogger(__name__)
11+
12+
13+
@component
14+
class StringJoiner:
15+
"""
16+
Component to join strings from different components to a list of strings.
17+
18+
### Usage example
19+
20+
```python
21+
from haystack.components.joiners import StringJoiner
22+
from haystack.components.builders import PromptBuilder
23+
from haystack.core.pipeline import Pipeline
24+
25+
from haystack.components.generators.chat import OpenAIChatGenerator
26+
from haystack.dataclasses import ChatMessage
27+
28+
string_1 = "What's Natural Language Processing?"
29+
string_2 = "What is life?"
30+
31+
pipeline = Pipeline()
32+
pipeline.add_component("prompt_builder_1", PromptBuilder("Builder 1: {{query}}"))
33+
pipeline.add_component("prompt_builder_2", PromptBuilder("Builder 2: {{query}}"))
34+
pipeline.add_component("string_joiner", StringJoiner())
35+
36+
pipeline.connect("prompt_builder_1.prompt", "string_joiner.strings")
37+
pipeline.connect("prompt_builder_2.prompt", "string_joiner.strings")
38+
39+
print(pipeline.run(data={"prompt_builder_1": {"query": string_1}, "prompt_builder_2": {"query": string_2}}))
40+
41+
>> {"string_joiner": {"strings": ["Builder 1: What's Natural Language Processing?", "Builder 2: What is life?"]}}
42+
```
43+
"""
44+
45+
@component.output_types(strings=List[str])
46+
def run(self, strings: Variadic[str]):
47+
"""
48+
Joins strings into a list of strings
49+
50+
:param strings:
51+
strings from different components
52+
53+
:returns:
54+
A dictionary with the following keys:
55+
- `strings`: Merged list of strings
56+
"""
57+
58+
out_strings = list(strings)
59+
return {"strings": out_strings}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
---
2+
features:
3+
- |
4+
Added component StringJoiner to join strings from different components to a list of strings.

Diff for: test/components/joiners/test_string_joiner.py

+37
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
# SPDX-FileCopyrightText: 2022-present deepset GmbH <[email protected]>
2+
#
3+
# SPDX-License-Identifier: Apache-2.0
4+
5+
from haystack.core.serialization import component_from_dict, component_to_dict
6+
from haystack.components.joiners.string_joiner import StringJoiner
7+
8+
9+
class TestStringJoiner:
10+
def test_init(self):
11+
joiner = StringJoiner()
12+
assert isinstance(joiner, StringJoiner)
13+
14+
def test_to_dict(self):
15+
joiner = StringJoiner()
16+
data = component_to_dict(joiner, name="string_joiner")
17+
assert data == {"type": "haystack.components.joiners.string_joiner.StringJoiner", "init_parameters": {}}
18+
19+
def test_from_dict(self):
20+
data = {"type": "haystack.components.joiners.string_joiner.StringJoiner", "init_parameters": {}}
21+
string_joiner = component_from_dict(StringJoiner, data=data, name="string_joiner")
22+
assert isinstance(string_joiner, StringJoiner)
23+
24+
def test_empty_list(self):
25+
joiner = StringJoiner()
26+
result = joiner.run([])
27+
assert result == {"strings": []}
28+
29+
def test_single_string(self):
30+
joiner = StringJoiner()
31+
result = joiner.run("a")
32+
assert result == {"strings": ["a"]}
33+
34+
def test_two_strings(self):
35+
joiner = StringJoiner()
36+
result = joiner.run(["a", "b"])
37+
assert result == {"strings": ["a", "b"]}

Diff for: test/core/pipeline/features/pipeline_run.feature

+1
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ Feature: Pipeline running
4343
| that is linear and a component in the middle receives optional input from other components and input from the user |
4444
| that has a loop in the middle |
4545
| that has variadic component that receives a conditional input |
46+
| that has a string variadic component |
4647

4748
Scenario Outline: Running a bad Pipeline
4849
Given a pipeline <kind>

Diff for: test/core/pipeline/features/test_run.py

+30-1
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
from haystack.components.preprocessors import DocumentCleaner, DocumentSplitter
1414
from haystack.components.retrievers.in_memory import InMemoryBM25Retriever
1515
from haystack.document_stores.in_memory import InMemoryDocumentStore
16-
from haystack.components.joiners import BranchJoiner, DocumentJoiner, AnswerJoiner
16+
from haystack.components.joiners import BranchJoiner, DocumentJoiner, AnswerJoiner, StringJoiner
1717
from haystack.testing.sample_components import (
1818
Accumulate,
1919
AddFixedValue,
@@ -2195,3 +2195,32 @@ def run(self, documents: List[Document]):
21952195
],
21962196
),
21972197
]
2198+
2199+
2200+
@given("a pipeline that has a string variadic component", target_fixture="pipeline_data")
2201+
def that_has_a_string_variadic_component():
2202+
string_1 = "What's Natural Language Processing?"
2203+
string_2 = "What's is life?"
2204+
2205+
pipeline = Pipeline()
2206+
pipeline.add_component("prompt_builder_1", PromptBuilder("Builder 1: {{query}}"))
2207+
pipeline.add_component("prompt_builder_2", PromptBuilder("Builder 2: {{query}}"))
2208+
pipeline.add_component("string_joiner", StringJoiner())
2209+
2210+
pipeline.connect("prompt_builder_1.prompt", "string_joiner.strings")
2211+
pipeline.connect("prompt_builder_2.prompt", "string_joiner.strings")
2212+
2213+
return (
2214+
pipeline,
2215+
[
2216+
PipelineRunData(
2217+
inputs={"prompt_builder_1": {"query": string_1}, "prompt_builder_2": {"query": string_2}},
2218+
expected_outputs={
2219+
"string_joiner": {
2220+
"strings": ["Builder 1: What's Natural Language Processing?", "Builder 2: What's is life?"]
2221+
}
2222+
},
2223+
expected_run_order=["prompt_builder_1", "prompt_builder_2", "string_joiner"],
2224+
)
2225+
],
2226+
)

0 commit comments

Comments
 (0)