Skip to content

Commit f857dfc

Browse files
authored
Fix: Handle Russian preposition "с" in date parsing and add tests (#1261)
Fix: Handle Russian preposition "с" in date parsing and add tests
1 parent 02bd2e5 commit f857dfc

File tree

3 files changed

+22
-0
lines changed

3 files changed

+22
-0
lines changed

dateparser/search/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,8 @@ def search_dates(
5757
('on May 6th 2004', datetime.datetime(2004, 5, 6, 0, 0))]
5858
5959
"""
60+
text = _search_with_detection.preprocess_text(text, languages)
61+
6062
result = _search_with_detection.search_dates(
6163
text=text,
6264
languages=languages,

dateparser/search/search.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -295,3 +295,10 @@ def search_dates(
295295
language_shortname, text, settings=settings
296296
),
297297
}
298+
299+
def preprocess_text(self, text, languages):
300+
"""Preprocess text to handle language-specific quirks."""
301+
if languages and "ru" in languages:
302+
# Replace "с" (from) before numbers with a placeholder
303+
text = re.sub(r"\bс\s+(?=\d)", "[FROM] ", text)
304+
return text

tests/test_search.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1090,3 +1090,16 @@ def test_date_search_function_invalid_language_code(self, text, languages):
10901090
text=text, languages=languages, error_type=ValueError
10911091
)
10921092
self.check_error_message("Unknown language(s): 'unknown language code'")
1093+
1094+
def test_search_dates_with_prepositions(self):
1095+
"""Test `search_dates` for parsing Russian date ranges with prepositions and language detection."""
1096+
result = search_dates(
1097+
"Сервис будет недоступен с 12 января по 30 апреля.",
1098+
add_detected_language=True,
1099+
languages=["ru"],
1100+
)
1101+
expected = [
1102+
("12 января", datetime.datetime(2025, 1, 12, 0, 0), "ru"),
1103+
("30 апреля", datetime.datetime(2025, 4, 30, 0, 0), "ru"),
1104+
]
1105+
assert result == expected

0 commit comments

Comments
 (0)