Skip to content

Commit 2cafcae

Browse files
bites 113
1 parent 86851f4 commit 2cafcae

File tree

3 files changed

+35
-0
lines changed

3 files changed

+35
-0
lines changed

.gitignore

+1
Original file line numberDiff line numberDiff line change
@@ -47,3 +47,4 @@
4747
/162/README.md
4848
/203/README.md
4949
/191/README.md
50+
/113/README.md

113/non_ascii.py

+16
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
import string
2+
3+
4+
def extract_non_ascii_words(text):
5+
"""Filter a text returning a list of non-ascii words"""
6+
nonasciis = list()
7+
for word in str(text).split(" "):
8+
for ch in word:
9+
if ch not in string.ascii_letters and ch not in string.punctuation and ch not in string.digits:
10+
nonasciis.append(word)
11+
break
12+
13+
return nonasciis
14+
15+
16+
print(extract_non_ascii_words("An preost wes on leoden, Laȝamon was ihoten"))

113/test_non_ascii.py

+18
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
import pytest
2+
3+
from non_ascii import extract_non_ascii_words
4+
5+
6+
@pytest.mark.parametrize("phrase, expected", [
7+
('An preost wes on leoden, Laȝamon was ihoten', ['Laȝamon']),
8+
('He wes Leovenaðes sone -- liðe him be Drihten', ['Leovenaðes', 'liðe']),
9+
('He wonede at Ernleȝe at æðelen are chirechen', ['Ernleȝe', 'æðelen']),
10+
('Uppen Sevarne staþe, sel þar him þuhte', ['staþe,', 'þar', 'þuhte']),
11+
('Onfest Radestone, þer he bock radde', ['þer']),
12+
('Fichier non trouvé', ['trouvé']),
13+
('Over \u0e55\u0e57 57 flavours', ['๕๗']),
14+
('Sí ... habrá que saber algo de Unicode, ¿no?', ['Sí', 'habrá', '¿no?']),
15+
('This string only contains ascii words', []),
16+
])
17+
def test_extract_non_ascii_words(phrase, expected):
18+
assert extract_non_ascii_words(phrase) == expected

0 commit comments

Comments
 (0)