Skip to content

Commit 0f6feb0

Browse files
feat: Broken Links Report
1 parent 443afe4 commit 0f6feb0

File tree

7 files changed

+191
-2
lines changed

7 files changed

+191
-2
lines changed

wiki/wiki/doctype/wiki_space/wiki_space.json

+3-2
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@
7171
],
7272
"index_web_pages_for_search": 1,
7373
"links": [],
74-
"modified": "2024-04-05 21:21:29.535486",
74+
"modified": "2024-12-11 15:27:44.629602",
7575
"modified_by": "Administrator",
7676
"module": "Wiki",
7777
"name": "Wiki Space",
@@ -105,5 +105,6 @@
105105
],
106106
"sort_field": "modified",
107107
"sort_order": "DESC",
108-
"states": []
108+
"states": [],
109+
"title_field": "route"
109110
}

wiki/wiki/report/__init__.py

Whitespace-only changes.

wiki/wiki/report/wiki_broken_links/__init__.py

Whitespace-only changes.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
# Copyright (c) 2024, Frappe and Contributors
2+
# See license.txt
3+
4+
import frappe
5+
from frappe.tests.utils import FrappeTestCase
6+
7+
from wiki.wiki.report.wiki_broken_links.wiki_broken_links import execute, get_broken_links
8+
9+
BROKEN_LINK = "https://frappewiki.notavalidtld"
10+
11+
TEST_MD_WITH_BROKEN_LINK = f"""
12+
## Hello
13+
14+
This is a test for a [broken link]({BROKEN_LINK}).
15+
16+
This is a [valid link](https://frappe.io).
17+
"""
18+
19+
20+
class TestWikiBrokenLinkChecker(FrappeTestCase):
21+
def setUp(self):
22+
frappe.db.delete("Wiki Page")
23+
self.test_wiki_page = frappe.get_doc(
24+
{
25+
"doctype": "Wiki Page",
26+
"content": TEST_MD_WITH_BROKEN_LINK,
27+
"title": "My Wiki Page",
28+
"route": "test-wiki-page-route",
29+
}
30+
).insert()
31+
32+
self.test_wiki_space = frappe.get_doc({"doctype": "Wiki Space", "route": "test-ws-route"}).insert()
33+
34+
def test_returns_correct_broken_links(self):
35+
broken_links = get_broken_links(TEST_MD_WITH_BROKEN_LINK)
36+
self.assertEqual(len(broken_links), 1)
37+
38+
def test_wiki_broken_link_report(self):
39+
_, data = execute()
40+
self.assertEqual(len(data), 1)
41+
self.assertEqual(data[0]["broken_link"], BROKEN_LINK)
42+
43+
def test_wiki_broken_list_report_with_filters(self):
44+
_, data = execute({"wiki_space": self.test_wiki_space.name})
45+
self.assertEqual(len(data), 0)
46+
47+
self.test_wiki_space.append(
48+
"wiki_sidebars", {"wiki_page": self.test_wiki_page, "parent_label": "Test Parent Label"}
49+
)
50+
self.test_wiki_space.save()
51+
52+
_, data = execute({"wiki_space": self.test_wiki_space.name})
53+
self.assertEqual(len(data), 1)
54+
self.assertEqual(data[0]["wiki_page"], self.test_wiki_page.name)
55+
self.assertEqual(data[0]["broken_link"], BROKEN_LINK)
56+
57+
def tearDown(self):
58+
frappe.db.rollback()
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
// Copyright (c) 2024, Frappe and contributors
2+
// For license information, please see license.txt
3+
4+
frappe.query_reports["Wiki Broken Links"] = {
5+
filters: [
6+
{
7+
fieldname: "wiki_space",
8+
label: __("Wiki Space"),
9+
fieldtype: "Link",
10+
options: "Wiki Space",
11+
},
12+
],
13+
};
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
{
2+
"add_total_row": 0,
3+
"columns": [],
4+
"creation": "2024-12-11 14:43:18.799835",
5+
"disabled": 0,
6+
"docstatus": 0,
7+
"doctype": "Report",
8+
"filters": [],
9+
"idx": 0,
10+
"is_standard": "Yes",
11+
"letterhead": null,
12+
"modified": "2024-12-11 14:48:34.139446",
13+
"modified_by": "Administrator",
14+
"module": "Wiki",
15+
"name": "Wiki Broken Links",
16+
"owner": "Administrator",
17+
"prepared_report": 0,
18+
"ref_doctype": "Wiki Page",
19+
"report_name": "Wiki Broken Links",
20+
"report_type": "Script Report",
21+
"roles": [
22+
{
23+
"role": "System Manager"
24+
},
25+
{
26+
"role": "Wiki Approver"
27+
}
28+
],
29+
"timeout": 0
30+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
# Copyright (c) 2024, Frappe and contributors
2+
# For license information, please see license.txt
3+
4+
import frappe
5+
import requests
6+
from bs4 import BeautifulSoup
7+
from frappe import _
8+
9+
10+
def execute(filters: dict | None = None):
11+
"""Return columns and data for the report.
12+
13+
This is the main entry point for the report. It accepts the filters as a
14+
dictionary and should return columns and data. It is called by the framework
15+
every time the report is refreshed or a filter is updated.
16+
"""
17+
columns = get_columns()
18+
data = get_data(filters)
19+
20+
return columns, data
21+
22+
23+
def get_columns() -> list[dict]:
24+
"""Return columns for the report.
25+
26+
One field definition per column, just like a DocType field definition.
27+
"""
28+
return [
29+
{
30+
"label": _("Wiki Page"),
31+
"fieldname": "wiki_page",
32+
"fieldtype": "Link",
33+
"options": "Wiki Page",
34+
"width": 200,
35+
},
36+
{
37+
"label": _("Broken Link"),
38+
"fieldname": "broken_link",
39+
"fieldtype": "Data",
40+
"options": "URL",
41+
"width": 400,
42+
},
43+
]
44+
45+
46+
def get_data(filters: dict | None = None) -> list[list]:
47+
"""Return data for the report.
48+
49+
The report data is a list of rows, with each row being a list of cell values.
50+
"""
51+
data = []
52+
53+
if not filters:
54+
wiki_pages = frappe.db.get_all("Wiki Page", fields=["name", "content"])
55+
elif filters.get("wiki_space"):
56+
wiki_space = filters.get("wiki_space")
57+
wiki_pages = frappe.db.get_all(
58+
"Wiki Group Item",
59+
fields=["wiki_page as name", "wiki_page.content as content"],
60+
filters={"parent": wiki_space, "parenttype": "Wiki Space"},
61+
)
62+
63+
for page in wiki_pages:
64+
broken_links_for_page = get_broken_links(page.content)
65+
rows = [{"broken_link": link, "wiki_page": page["name"]} for link in broken_links_for_page]
66+
data.extend(rows)
67+
68+
return data
69+
70+
71+
def get_broken_links(md_content: str):
72+
html = frappe.utils.md_to_html(md_content)
73+
soup = BeautifulSoup(html, "html.parser")
74+
75+
links = soup.find_all("a")
76+
images = soup.find_all("img")
77+
broken_links = []
78+
for el in links + images:
79+
url = el.attrs.get("href") or el.attrs.get("src")
80+
try:
81+
response = requests.head(url, verify=False, timeout=5)
82+
if response.status_code >= 400:
83+
broken_links.append(url)
84+
except Exception:
85+
broken_links.append(url)
86+
87+
return broken_links

0 commit comments

Comments
 (0)