Skip to content

Commit 34bd31e

Browse files
docs: fixing RecursiveSplitter pydoc markdown rendering
1 parent ed40d9f commit 34bd31e

File tree

1 file changed

+8
-8
lines changed

1 file changed

+8
-8
lines changed

Diff for: haystack/components/preprocessors/recursive_splitter.py

+8-8
Original file line numberDiff line numberDiff line change
@@ -34,20 +34,20 @@ class RecursiveDocumentSplitter:
3434
from haystack import Document
3535
from haystack.components.preprocessors import RecursiveDocumentSplitter
3636
37-
chunker = RecursiveDocumentSplitter(split_length=260, split_overlap=0, separators=["\n\n", "\n", ".", " "])
38-
text = '''Artificial intelligence (AI) - Introduction
37+
chunker = RecursiveDocumentSplitter(split_length=260, split_overlap=0, separators=["\\n\\n", "\\n", ".", " "])
38+
text = ('''Artificial intelligence (AI) - Introduction
3939
4040
AI, in its broadest sense, is intelligence exhibited by machines, particularly computer systems.
41-
AI technology is widely used throughout industry, government, and science. Some high-profile applications include advanced web search engines; recommendation systems; interacting via human speech; autonomous vehicles; generative and creative tools; and superhuman play and analysis in strategy games.'''
41+
AI technology is widely used throughout industry, government, and science. Some high-profile applications include advanced web search engines; recommendation systems; interacting via human speech; autonomous vehicles; generative and creative tools; and superhuman play and analysis in strategy games.''')
4242
chunker.warm_up()
4343
doc = Document(content=text)
4444
doc_chunks = chunker.run([doc])
4545
print(doc_chunks["documents"])
4646
>[
47-
>Document(id=..., content: 'Artificial intelligence (AI) - Introduction\n\n', meta: {'original_id': '65167a9823dd883de577e828ca4fd529e6f7241f0ff616acfce454d808478951', 'split_id': 0, 'split_idx_start': 0, '_split_overlap': []})
48-
>Document(id=..., content: 'AI, in its broadest sense, is intelligence exhibited by machines, particularly computer systems.\n', meta: {'original_id': '65167a9823dd883de577e828ca4fd529e6f7241f0ff616acfce454d808478951', 'split_id': 1, 'split_idx_start': 45, '_split_overlap': []})
49-
>Document(id=..., content: 'AI technology is widely used throughout industry, government, and science.', meta: {'original_id': '65167a9823dd883de577e828ca4fd529e6f7241f0ff616acfce454d808478951', 'split_id': 2, 'split_idx_start': 142, '_split_overlap': []})
50-
>Document(id=..., content: ' Some high-profile applications include advanced web search engines; recommendation systems; interac...', meta: {'original_id': '65167a9823dd883de577e828ca4fd529e6f7241f0ff616acfce454d808478951', 'split_id': 3, 'split_idx_start': 216, '_split_overlap': []})
47+
>Document(id=..., content: 'Artificial intelligence (AI) - Introduction\\n\\n', meta: {'original_id': '...', 'split_id': 0, 'split_idx_start': 0, '_split_overlap': []})
48+
>Document(id=..., content: 'AI, in its broadest sense, is intelligence exhibited by machines, particularly computer systems.\\n', meta: {'original_id': '...', 'split_id': 1, 'split_idx_start': 45, '_split_overlap': []})
49+
>Document(id=..., content: 'AI technology is widely used throughout industry, government, and science.', meta: {'original_id': '...', 'split_id': 2, 'split_idx_start': 142, '_split_overlap': []})
50+
>Document(id=..., content: ' Some high-profile applications include advanced web search engines; recommendation systems; interac...', meta: {'original_id': '...', 'split_id': 3, 'split_idx_start': 216, '_split_overlap': []})
5151
>]
5252
```
5353
""" # noqa: E501
@@ -72,7 +72,7 @@ def __init__(
7272
separators will be treated as regular expressions unless the separator is "sentence", in that case the
7373
text will be split into sentences using a custom sentence tokenizer based on NLTK.
7474
See: haystack.components.preprocessors.sentence_tokenizer.SentenceSplitter.
75-
If no separators are provided, the default separators ["\n\n", "sentence", "\n", " "] are used.
75+
If no separators are provided, the default separators ["\\n\\n", "sentence", "\\n", " "] are used.
7676
:param sentence_splitter_params: Optional parameters to pass to the sentence tokenizer.
7777
See: haystack.components.preprocessors.sentence_tokenizer.SentenceSplitter for more information.
7878

0 commit comments

Comments
 (0)