diff --git a/wiki/patches.txt b/wiki/patches.txt index 2469b29f..e14940fd 100644 --- a/wiki/patches.txt +++ b/wiki/patches.txt @@ -7,4 +7,4 @@ wiki.wiki.doctype.wiki_feedback.patches.delete_wiki_feedback_item [post_model_sync] wiki.wiki.doctype.wiki_space.patches.wiki_sidebar_migration wiki.wiki.doctype.wiki_settings.patches.wiki_navbar_item_migration -wiki.wiki.doctype.wiki_page.patches.convert_to_markdown +wiki.wiki.doctype.wiki_page.patches.convert_wiki_content_to_markdown diff --git a/wiki/wiki/doctype/wiki_page/patches/convert_to_markdown.py b/wiki/wiki/doctype/wiki_page/patches/convert_to_markdown.py deleted file mode 100644 index 6701160a..00000000 --- a/wiki/wiki/doctype/wiki_page/patches/convert_to_markdown.py +++ /dev/null @@ -1,8 +0,0 @@ -import frappe - - -def execute(): - wiki_pages = frappe.db.get_all("Wiki Page", fields=["name", "content"]) - for page in wiki_pages: - markdown_content = frappe.utils.to_markdown(page["content"]) - frappe.db.set_value("Wiki Page", page["name"], "content", markdown_content) diff --git a/wiki/wiki/doctype/wiki_page/patches/convert_wiki_content_to_markdown.py b/wiki/wiki/doctype/wiki_page/patches/convert_wiki_content_to_markdown.py new file mode 100644 index 00000000..47dbfcff --- /dev/null +++ b/wiki/wiki/doctype/wiki_page/patches/convert_wiki_content_to_markdown.py @@ -0,0 +1,72 @@ +import re + +import frappe +import six +from bs4 import Comment, Doctype, NavigableString +from markdownify import MarkdownConverter + +html_heading_re = re.compile(r"h[1-6]") + + +class CustomMarkdownConverter(MarkdownConverter): + # overeride markdownify's process_tag function to escape certain html tags + def process_tag(self, node, convert_as_inline, children_only=False): + text = "" + + # markdown headings or cells can't include + # block elements (elements w/newlines) + isHeading = html_heading_re.match(node.name) is not None + isCell = node.name in ["td", "th"] + convert_children_as_inline = convert_as_inline + + if not children_only and (isHeading or isCell): + convert_children_as_inline = True + + # Remove whitespace-only textnodes in purely nested nodes + def is_nested_node(el): + return el and el.name in ["ol", "ul", "li", "table", "thead", "tbody", "tfoot", "tr", "td", "th"] + + if is_nested_node(node): + for el in node.children: + # Only extract (remove) whitespace-only text node if any of the + # conditions is true: + # - el is the first element in its parent + # - el is the last element in its parent + # - el is adjacent to an nested node + can_extract = ( + not el.previous_sibling + or not el.next_sibling + or is_nested_node(el.previous_sibling) + or is_nested_node(el.next_sibling) + ) + if isinstance(el, NavigableString) and six.text_type(el).strip() == "" and can_extract: + el.extract() + + # Convert the children first + for el in node.children: + if isinstance(el, Comment) or isinstance(el, Doctype): + continue + elif isinstance(el, NavigableString): + text += self.process_text(el) + else: + if el.name in ["video", "iframe", "audio", "embed", "object", "source", "picture", "math"]: + text += self.process_text(el) + text += self.process_tag(el, convert_children_as_inline) + + if not children_only: + convert_fn = getattr(self, f"convert_{node.name}", None) + if convert_fn and self.should_convert_tag(node.name): + text = convert_fn(node, text, convert_as_inline) + + return text + + +def custom_markdownify(html, **options): + return CustomMarkdownConverter(**options).convert(html) + + +def execute(): + wiki_pages = frappe.db.get_all("Wiki Page", fields=["name", "content"]) + for page in wiki_pages: + markdown_content = custom_markdownify(page["content"]) + frappe.db.set_value("Wiki Page", page["name"], "content", markdown_content) diff --git a/wiki/wiki/doctype/wiki_page/wiki_page.py b/wiki/wiki/doctype/wiki_page/wiki_page.py index 9307ad9a..c4dd4a3f 100644 --- a/wiki/wiki/doctype/wiki_page/wiki_page.py +++ b/wiki/wiki/doctype/wiki_page/wiki_page.py @@ -452,12 +452,6 @@ def convert_markdown(markdown): return html -@frappe.whitelist() -def convert_html(html): - markdown = frappe.utils.to_markdown(html) - return markdown - - @frappe.whitelist() def update( name, diff --git a/yarn.lock b/yarn.lock index c5ad5ef1..6d222216 100644 --- a/yarn.lock +++ b/yarn.lock @@ -49,7 +49,7 @@ dependencies: type-fest "^2.0.0" -"@tiptap/core@^2.0.0", "@tiptap/core@^2.0.2": +"@tiptap/core@^2.0.2": version "2.0.2" resolved "https://registry.npmjs.org/@tiptap/core/-/core-2.0.2.tgz" integrity sha512-DBry6tpX7mYaTJkEDjVA4WmF8Kgthr275L0uIIOVdwW5nG5PAnOvREKyVOoMQnN3vR7CjtaCK+c3y+MCQhMA/g== @@ -74,7 +74,7 @@ resolved "https://registry.npmjs.org/@tiptap/extension-code-block-lowlight/-/extension-code-block-lowlight-2.0.2.tgz" integrity sha512-7BbRCKJE2oxsZ5n7HIjS0r/y1S/bSxEJgAFF1Tj3KN2IG3x48w+sqYxRMYmCZdoTexmmBpNF64uYXngKXB9/Ig== -"@tiptap/extension-code-block@^2.0.0", "@tiptap/extension-code-block@^2.0.2": +"@tiptap/extension-code-block@^2.0.2": version "2.0.2" resolved "https://registry.npmjs.org/@tiptap/extension-code-block/-/extension-code-block-2.0.2.tgz" integrity sha512-GL8ogok1tl1FkXwk0P0ZWYh6oAmSA+R3oubtDZJG1fLlezKLcLYCN/Q2jgYDHDwEOnxMc4JIiT7EYwJ0pqmNaQ== @@ -201,7 +201,7 @@ resolved "https://registry.npmjs.org/@tiptap/extension-text/-/extension-text-2.0.2.tgz" integrity sha512-kAO+WurWOyHIV/x8qHMF3bSlWrdlPtjEYmf+w8wHKy3FzE55eF6SsGt4FymClNkJmyXdgflXBB3Wv/Z53myy8g== -"@tiptap/pm@^2.0.0", "@tiptap/pm@^2.0.2": +"@tiptap/pm@^2.0.2": version "2.0.2" resolved "https://registry.npmjs.org/@tiptap/pm/-/pm-2.0.2.tgz" integrity sha512-vXlI82bZ4XrmVD6m/pO27gqlm+tU57mpjy9WjkJpEUOifQZK8LihR3l5k55Z0RqalV4/E79iU1cp8mw0v13nhA== @@ -580,7 +580,7 @@ prosemirror-menu@^1.2.1: prosemirror-history "^1.0.0" prosemirror-state "^1.0.0" -prosemirror-model@^1, prosemirror-model@^1.0.0, prosemirror-model@^1.16.0, prosemirror-model@^1.18.1, prosemirror-model@^1.19.0, prosemirror-model@^1.8.1: +prosemirror-model@^1.0.0, prosemirror-model@^1.16.0, prosemirror-model@^1.18.1, prosemirror-model@^1.19.0, prosemirror-model@^1.8.1: version "1.19.0" resolved "https://registry.npmjs.org/prosemirror-model/-/prosemirror-model-1.19.0.tgz" integrity sha512-/CvFGJnwc41EJSfDkQLly1cAJJJmBpZwwUJtwZPTjY2RqZJfM8HVbCreOY/jti8wTRbVyjagcylyGoeJH/g/3w== @@ -603,7 +603,7 @@ prosemirror-schema-list@^1.2.2: prosemirror-state "^1.0.0" prosemirror-transform "^1.0.0" -prosemirror-state@^1, prosemirror-state@^1.0.0, prosemirror-state@^1.2.2, prosemirror-state@^1.3.1, prosemirror-state@^1.4.1: +prosemirror-state@^1.0.0, prosemirror-state@^1.2.2, prosemirror-state@^1.3.1, prosemirror-state@^1.4.1: version "1.4.2" resolved "https://registry.npmjs.org/prosemirror-state/-/prosemirror-state-1.4.2.tgz" integrity sha512-puuzLD2mz/oTdfgd8msFbe0A42j5eNudKAAPDB0+QJRw8cO1ygjLmhLrg9RvDpf87Dkd6D4t93qdef00KKNacQ== @@ -640,7 +640,7 @@ prosemirror-transform@^1.0.0, prosemirror-transform@^1.1.0, prosemirror-transfor dependencies: prosemirror-model "^1.0.0" -prosemirror-view@^1, prosemirror-view@^1.0.0, prosemirror-view@^1.1.0, prosemirror-view@^1.13.3, prosemirror-view@^1.27.0, prosemirror-view@^1.28.2: +prosemirror-view@^1.0.0, prosemirror-view@^1.1.0, prosemirror-view@^1.13.3, prosemirror-view@^1.27.0, prosemirror-view@^1.28.2: version "1.30.2" resolved "https://registry.npmjs.org/prosemirror-view/-/prosemirror-view-1.30.2.tgz" integrity sha512-nTNzZvalQf9kHeEyO407LiV6DoOs/pXsid88UqW9Vvybo4ozJW2PJhkfZUxCUF1hR/9vJLdhxX84wuw9P9HsXA== @@ -739,7 +739,7 @@ w3c-keyname@^2.2.0: resolved "https://registry.npmjs.org/w3c-keyname/-/w3c-keyname-2.2.6.tgz" integrity sha512-f+fciywl1SJEniZHD6H+kUO8gOnwIr7f4ijKA6+ZvJFjeGi1r4PDLl53Ayud9O/rk64RqgoQine0feoeOU0kXg== -which@^1.2.9, which@1.2.x: +which@1.2.x, which@^1.2.9: version "1.2.14" resolved "https://registry.npmjs.org/which/-/which-1.2.14.tgz" integrity sha512-16uPglFkRPzgiUXYMi1Jf8Z5EzN1iB4V0ZtMXcHZnwsBtQhhHeCqoWw7tsUY42hJGNDWtUsVLTjakIa5BgAxCw==