Skip to content

Commit 7403e18

Browse files
committed
fix: rewrite html to markdown converter for support of lists
1 parent 919e908 commit 7403e18

File tree

2 files changed

+66
-29
lines changed

2 files changed

+66
-29
lines changed

app/handler.py

+12-29
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,11 @@
11
"""Lambda function file for sending scheduled message to a connected Telegram chat via Chat ID."""
22

33
import os
4-
import re
54
import requests
65
import telegram
7-
from markdownify import markdownify as md
6+
7+
# AWS Lambda loads handler in a special way so we need to import local modules from 'app'
8+
from app.utils import markdown
89

910
LEETCODE_DOMAIN = 'https://leetcode.com'
1011
LEETCODE_ALL_PROBLEM_URL = LEETCODE_DOMAIN + '/problemset/all/'
@@ -60,31 +61,13 @@ def get_question_of_today():
6061
try:
6162
return response.json()
6263
except ValueError:
63-
print("Failed to decode JSON, API response:")
64+
print('Failed to decode JSON, API response:')
6465
print(response.text)
6566
raise
6667
except BaseException as error:
67-
print(f"Unexpected {error=}, {type(error)=}")
68+
print(f'Unexpected {error=}, {type(error)=}')
6869
raise
6970

70-
def generate_telegram_markdown(content):
71-
"""Convert HTML to Telegram Markdown syntax"""
72-
73-
formatted_content = content
74-
# Special handling for superscript and subscript since standard markdown does not support them
75-
formatted_content = re.sub('<sup>', '<sup>^', formatted_content)
76-
formatted_content = re.sub('<sub>', '<sub>_', formatted_content)
77-
# Convert allowed tags to markdown
78-
# Note that supported markdown syntax is different in Telegram
79-
# https://core.telegram.org/bots/api#formatting-options
80-
formatted_content = md(formatted_content, convert=['p', 'img', 'code', 'pre'])
81-
# Replace multiple empty lines
82-
formatted_content = re.sub('(\s+)?\n{2,}', '\n\n', formatted_content)
83-
# Special handling for images
84-
formatted_content = re.sub('\!\[(.+)?\]\((.+)\)', r'image: \2', formatted_content)
85-
86-
return formatted_content.strip()
87-
8871
def send_message(event, context):
8972
"""Lambda function handler to send text message."""
9073

@@ -100,12 +83,11 @@ def send_message(event, context):
10083
question_difficulty = question_info['question']['difficulty']
10184
question_content = question_info['question']['content']
10285

103-
104-
message = f"*{question_date}*\n" \
105-
f"*{question_id}. {question_title}*\n\n" \
106-
f"*Topic:* {question_topic}\n" \
107-
f"*Difficulty:* {question_difficulty}\n\n" \
108-
f"*Problem:*\n{generate_telegram_markdown(question_content)}"
86+
message = f'*{question_date}*\n' \
87+
f'*{question_id}. {question_title}*\n\n' \
88+
f'*Topic:* {question_topic}\n' \
89+
f'*Difficulty:* {question_difficulty}\n\n' \
90+
f'*Problem:*\n{markdown.generate(question_content)}'
10991

11092
bot = telegram.Bot(token=TOKEN)
11193
bot.send_message(
@@ -114,7 +96,8 @@ def send_message(event, context):
11496
reply_markup=telegram.InlineKeyboardMarkup([
11597
[telegram.InlineKeyboardButton(text="View on Leetcode", url=question_url)]
11698
]),
117-
parse_mode='Markdown'
99+
parse_mode='Markdown',
100+
disable_web_page_preview=True
118101
)
119102
else:
120103
raise Exception('Invalid API response. No "data" node found in API response.')

app/utils/markdown.py

+54
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
"""Convert HTML to Telegram markdown syntax"""
2+
3+
import re
4+
from markdownify import MarkdownConverter, BACKSLASH
5+
6+
class TelegramMarkdownConverter(MarkdownConverter):
7+
"""Create a custom MarkdownConverter that fits Telegram markdown format"""
8+
9+
def convert_img(self, el, text, convert_as_inline):
10+
src = el.attrs.get('src', None) or ''
11+
return f'Image: [{src}]({src})'
12+
13+
def convert_br(self, el, text, convert_as_inline):
14+
if convert_as_inline:
15+
return ""
16+
17+
if self.options['newline_style'].lower() == BACKSLASH:
18+
return '\\\n'
19+
20+
return '\n'
21+
22+
def convert_p(self, el, text, convert_as_inline):
23+
if convert_as_inline:
24+
return text
25+
return f'{text}\n\n' if text.strip() else ''
26+
27+
def convert_pre(self, el, text, convert_as_inline):
28+
if not text:
29+
return ''
30+
after_paragraph = False
31+
32+
if el.previous_sibling and el.previous_sibling.name in ['p']:
33+
after_paragraph = True
34+
35+
return ('\n' if not after_paragraph else '') + f"```{self.options['code_language']}\n{text.strip()}\n```\n\n"
36+
37+
def convert_sub(self, el, text, convert_as_inline):
38+
return f'_{text}'
39+
40+
def convert_sup(self, el, text, convert_as_inline):
41+
return f'^{text}'
42+
43+
def generate(html, **options):
44+
"""Convert function with options predefined"""
45+
46+
result = TelegramMarkdownConverter(
47+
**options,
48+
convert=['br', 'p', 'img', 'code', 'pre', 'ul', 'ol', 'li', 'a', 'sup', 'sub'],
49+
bullets='•••'
50+
).convert(html).strip()
51+
52+
result = re.sub('\n{2,}', '\n\n', result)
53+
54+
return result

0 commit comments

Comments
 (0)