-
-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathmarkdown.py
72 lines (52 loc) · 2.2 KB
/
markdown.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
"""Convert HTML to Telegram MarkdownV2 syntax"""
import re
from markdownify import MarkdownConverter, BACKSLASH
class TelegramMarkdownConverter(MarkdownConverter):
"""Create a custom MarkdownConverter that fits Telegram markdown format"""
def convert_img(self, el, text, convert_as_inline):
src = el.attrs.get('src', None) or ''
if convert_as_inline:
return f'(Image: [{src}]({src}))'
return f'\nImage: [{src}]({src})\n'
def convert_br(self, el, text, convert_as_inline):
if convert_as_inline:
return ""
if self.options['newline_style'].lower() == BACKSLASH:
return '\\\n'
return '\n'
def convert_p(self, el, text, convert_as_inline):
if convert_as_inline:
return text
return f'{text}\n\n' if text.strip() else ''
def convert_pre(self, el, text, convert_as_inline):
if not text:
return ''
after_paragraph = False
if el.previous_sibling and el.previous_sibling.name in ['p']:
after_paragraph = True
unescaped_text = text.strip().replace(r'\_', '_')
return ('\n' if not after_paragraph else '') + f"```{self.options['code_language']}\n{unescaped_text}\n```\n\n"
def convert_sub(self, el, text, convert_as_inline):
return f'_{text}' if text.strip() else text
def convert_sup(self, el, text, convert_as_inline):
return f'^{text}' if text.strip() else text
def convert_a(self, el, text, convert_as_inline):
el['href'] = re.sub(r'\(', '%28', el['href'])
el['href'] = re.sub(r'\)', '%29', el['href'])
return MarkdownConverter.convert_a(
self=self,
el=el,
text=text,
convert_as_inline=convert_as_inline
)
def convert_style(self, el, text, convert_as_inline):
return ''
def generate(html, **options):
"""Convert function with options predefined"""
result = TelegramMarkdownConverter(
**options,
convert=['br', 'p', 'img', 'code', 'pre', 'ul', 'ol', 'li', 'a', 'sup', 'sub', 'style'],
bullets='•••'
).convert(html).strip()
result = re.sub('\n{2,}', '\n\n', result)
return result