Skip to content

Commit 1a72e6d

Browse files
authored
Merge pull request #43 from Code4GovTech/dev
Markdown changes & Testcase
2 parents 8bba1d1 + 69534e1 commit 1a72e6d

File tree

2 files changed

+192
-10
lines changed

2 files changed

+192
-10
lines changed

tests.py

+122
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,122 @@
1+
import unittest
2+
from v2_utils import remove_unmatched_tags
3+
from app import app
4+
import json,random
5+
6+
7+
class CustomTestResult(unittest.TextTestResult):
8+
def addSuccess(self, test):
9+
super().addSuccess(test)
10+
print(f"{test._testMethodName} - passed")
11+
12+
13+
class CustomTestRunner(unittest.TextTestRunner):
14+
resultclass = CustomTestResult
15+
16+
17+
class TestRemoveUnmatchedTags(unittest.TestCase):
18+
"""
19+
Static test case input & output for check markdown handler function
20+
"""
21+
def test_remove_unmatched_tags_basic(self):
22+
input_text = "<div>Test content</p></div>"
23+
expected_output = "<div>Test content</div>"
24+
self.assertEqual(remove_unmatched_tags(input_text), expected_output)
25+
26+
def test_remove_unmatched_tags_unmatched_opening(self):
27+
input_text = "<div>Test content"
28+
expected_output = "<div>Test content</div>"
29+
self.assertEqual(remove_unmatched_tags(input_text), expected_output)
30+
31+
def test_remove_unmatched_tags_unmatched_closing(self):
32+
input_text = "<div><span><p>Test content</div>"
33+
expected_output = "<div><span><p>Test content</p></span></div>"
34+
self.assertEqual(remove_unmatched_tags(input_text), expected_output)
35+
36+
def test_remove_unmatched_tags_nested_tags(self):
37+
input_text = "<div><p>Test content</p></p></div>"
38+
expected_output = "<div><p>Test content</p></div>"
39+
self.assertEqual(remove_unmatched_tags(input_text), expected_output)
40+
41+
def test_remove_unmatched_tags_unmatched_nested_opening(self):
42+
input_text = "<div><p>Test content</div>"
43+
expected_output = "<div><p>Test content</p></div>"
44+
self.assertEqual(remove_unmatched_tags(input_text), expected_output)
45+
46+
def test_remove_unmatched_tags_unmatched_nested_closing(self):
47+
input_text = "<div>Test content</p></div>"
48+
expected_output = "<div>Test content</div>"
49+
self.assertEqual(remove_unmatched_tags(input_text), expected_output)
50+
51+
def test_remove_unmatched_tags_multiple_unmatched_tags(self):
52+
input_text = "<div>Test</div><p>Content</p><span>Here"
53+
expected_output = "<div>Test</div><p>Content</p><span>Here</span>"
54+
self.assertEqual(remove_unmatched_tags(input_text), expected_output)
55+
56+
def test_remove_unmatched_tags_text_with_no_tags(self):
57+
input_text = "Plain text with no tags"
58+
expected_output = "Plain text with no tags"
59+
self.assertEqual(remove_unmatched_tags(input_text), expected_output)
60+
61+
def test_remove_unmatched_tags_empty_string(self):
62+
input_text = ""
63+
expected_output = ""
64+
self.assertEqual(len(remove_unmatched_tags(input_text)),len(expected_output))
65+
66+
67+
class TestIssuesEndpoints(unittest.TestCase):
68+
69+
def setUp(self):
70+
self.app = app.test_client()
71+
self.app.testing = True
72+
self.issues_data = None # To store issues data for use in subsequent tests
73+
74+
# Fetch issues data during setup
75+
self._fetch_issues_data()
76+
77+
def _fetch_issues_data(self):
78+
# Validate the /issues endpoint and store the issues data
79+
response = self.app.get('/issues')
80+
self.assertEqual(response.status_code, 200)
81+
82+
data = json.loads(response.data)
83+
self.issues_data = data.get('issues', [])
84+
self.assertTrue(len(self.issues_data) > 0, "No issues found in response")
85+
86+
def test_get_issues_success(self):
87+
# Check if issues data is correctly fetched
88+
self.assertIsNotNone(self.issues_data, "Issues data is not populated")
89+
90+
def test_get_issues_detail_success(self):
91+
# Ensure the /issues endpoint was successfully called and issues data is available
92+
if not self.issues_data:
93+
self.skipTest("Skipping detail test as /issues endpoint did not return data")
94+
95+
# Use first data from /issues response to form the endpoint URL
96+
97+
index = random.randrange(1,len(self.issues_data)-1)
98+
sample_issue = self.issues_data[index]['issues'][0]
99+
issue_id = sample_issue['id']
100+
orgname = self.issues_data[index]['org_name']
101+
102+
endpoint = f'/v2/issues/{orgname}/{issue_id}'
103+
104+
response = self.app.get(endpoint)
105+
self.assertEqual(response.status_code, 200)
106+
107+
def test_get_repo_detail_success(self):
108+
# Ensure the /issues endpoint was successfully called and issues data is available
109+
if not self.issues_data:
110+
self.skipTest("Skipping detail test as /issues endpoint did not return data")
111+
112+
# Use first data from /issues response to form the endpoint URL
113+
index = random.randrange(1,len(self.issues_data)-1)
114+
orgname = self.issues_data[index]['org_name']
115+
endpoint = f'/issues/{orgname}'
116+
response = self.app.get(endpoint)
117+
self.assertEqual(response.status_code, 200)
118+
119+
120+
121+
if __name__ == '__main__':
122+
unittest.main(testRunner=CustomTestRunner())

v2_utils.py

+70-10
Original file line numberDiff line numberDiff line change
@@ -27,43 +27,103 @@ def define_link_data(usernames):
2727
logging.info(f"{e}---define_link_data")
2828
return []
2929

30+
def preprocess_nested_tags(text):
31+
try:
32+
segments = re.split(r'(<[^>]+>)', text)
33+
tag_stack = []
34+
corrected_segments = []
35+
36+
for segment in segments:
37+
if re.match(r'<[^/][^>]*>', segment): # Opening tag
38+
tag_stack.append(segment)
39+
corrected_segments.append(segment)
40+
elif re.match(r'</[^>]+>', segment): # Closing tag
41+
if tag_stack and tag_stack[-1][1:].split()[0] == segment[2:].split()[0]:
42+
tag_stack.pop()
43+
corrected_segments.append(segment)
44+
else:
45+
continue # Ignore unmatched closing tag
46+
else:
47+
corrected_segments.append(segment)
48+
49+
while tag_stack:
50+
open_tag = tag_stack.pop()
51+
tag_name = re.match(r'<([^ ]+)', open_tag).group(1)
52+
corrected_segments.append(f'</{tag_name}>')
53+
54+
return ''.join(corrected_segments)
55+
56+
except Exception as e:
57+
print(e,"error in preprocess_nested_tags function")
58+
return text
59+
60+
61+
3062
def remove_unmatched_tags(text):
3163
try:
32-
# Remove unmatched closing tags at the beginning of the string
33-
text = re.sub(r'^\s*</[^>]+>\s*', '', text)
64+
# Preprocess text to handle unmatched nested tags
65+
text = preprocess_nested_tags(text)
3466

67+
# Remove unmatched closing tags at the beginning of the string
68+
text = re.sub(r'^\s*</[^>]+>\s*', '', text)
3569
# Regex pattern to find matched or unmatched tags
36-
pattern = re.compile(r'(<([^>]+)>.*?</\2>)|(<[^/][^>]*>.*)', re.DOTALL)
70+
pattern = re.compile(r'(<([^>]+)>.*?</\2>)|(<[^/][^>]*>.*?)(?=<[^/][^>]*>|$)', re.DOTALL)
3771
matches = pattern.findall(text)
38-
72+
73+
#If get text without html tags
74+
if matches == []:
75+
return text
76+
3977
cleaned_text = ''
78+
open_tags = []
79+
4080
for match in matches:
4181
if match[0]: # Full matched <tag>...</tag> pairs
4282
cleaned_text += match[0]
4383
elif match[2]: # Unmatched opening <tag> tags
84+
# Add the tag to the list of open tags
85+
tag = re.match(r'<([^/][^>]*)>', match[2])
86+
if tag:
87+
tag_name = tag.group(1).split()[0]
88+
open_tags.append(tag_name)
4489
cleaned_text += match[2]
90+
91+
# Close any unmatched opening tags
92+
while open_tags:
93+
tag = open_tags.pop()
94+
cleaned_text += f'</{tag}>'
95+
96+
# Remove extra unmatched angle brackets
97+
cleaned_text = re.sub(r'>+', '>', cleaned_text)
98+
cleaned_text = re.sub(r'<+', '<', cleaned_text)
4599

100+
#For front end renders add ul tags
101+
if not cleaned_text.strip().startswith("<ul>"):
102+
return f"<ul>{cleaned_text}</ul>"
103+
46104
return cleaned_text
105+
47106
except Exception as e:
48107
print(e)
49108
return text
50-
51-
109+
110+
111+
52112

53113

54114
def week_data_formatter(html_content, type):
55115

56116
try:
57117
# Use regex to find week titles (e.g., Week 1, Week 2) and their corresponding task lists
58-
week_matches = re.findall(r'(Week \d+)', html_content)
59-
tasks_per_week = re.split(r'Week \d+', html_content)[1:] # Split the content by weeks and skip the first empty split
118+
week_matches = re.findall(r'Week\s*-?\s*\d+', html_content)
119+
tasks_per_week = re.split(r'Week\s*-?\s*\d+', html_content)[1:] # Split the content by weeks and skip the first empty split
60120

61121
weekly_updates = []
62122

63123
if type == "Learnings":
64124
# tasks_per_week = re.split(r'<h3>Week \d+</h3>', html_content)[1:]
65-
tasks_per_week = re.split(r'(<.*?>Week \d+<.*?>)', html_content)[1:]
66-
tasks_per_week = [tasks_per_week[i] for i in range(1, len(tasks_per_week), 2)]
125+
tasks_per_week = re.split(r'Week\s*-?\s*\d+', html_content)[1:]
126+
tasks_per_week = [tasks_per_week[i] for i in range(0, len(tasks_per_week))]
67127
for i, week in enumerate(week_matches):
68128
task_list_html = tasks_per_week[i] if i < len(tasks_per_week) else ""
69129
weekly_updates.append({

0 commit comments

Comments
 (0)