Skip to content

Commit d9fd2ce

Browse files
authored
Rework use_directory_url=True validation to better catch incorrect links (#80)
1 parent ac22c24 commit d9fd2ce

File tree

5 files changed

+73
-13
lines changed

5 files changed

+73
-13
lines changed

htmlproofer/plugin.py

Lines changed: 13 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -194,18 +194,20 @@ def is_url_target_valid(url: str, src_path: str, files: Dict[str, File]) -> bool
194194
return True
195195

196196
url_target, _, optional_anchor = match.groups()
197-
_, extension = os.path.splitext(url_target)
198-
if extension == ".html":
199-
# URL is a link to another local Markdown file that may include an anchor.
200-
target_markdown = HtmlProoferPlugin.find_target_markdown(url_target, src_path, files)
201-
if target_markdown is None:
202-
# The corresponding Markdown page was not found.
203-
return False
204-
if optional_anchor and not HtmlProoferPlugin.contains_anchor(target_markdown, optional_anchor):
205-
# The corresponding Markdown header for this anchor was not found.
206-
return False
207-
elif HtmlProoferPlugin.find_source_file(url_target, src_path, files) is None:
197+
source_file = HtmlProoferPlugin.find_source_file(url_target, src_path, files)
198+
if source_file is None:
208199
return False
200+
201+
# If there's an anchor (fragment) on the link, we try to find it in the source_file
202+
if optional_anchor:
203+
_, extension = os.path.splitext(source_file.src_uri)
204+
# Currently only Markdown-based pages are supported, but conceptually others could be added below
205+
if extension == ".md":
206+
if source_file.page is None or source_file.page.markdown is None:
207+
return False
208+
if not HtmlProoferPlugin.contains_anchor(source_file.page.markdown, optional_anchor):
209+
return False
210+
209211
return True
210212

211213
@staticmethod

tests/integration/docs/nested/page1.md

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,11 +19,14 @@ This plugin can detect invalid anchor links to another page, such as
1919
[Acknowledgement](../index.md#BAD_ANCHOR)
2020
or to a nested page
2121
[Invalid Anchor](./page2.md#BAD_ANCHOR).
22+
It should also detect links to pages in the same directory without './'
23+
[Invalid Anchor](page2.md#BAD_ANCHOR).
2224

2325
But allows valid anchors such as
2426
[Main Page](../index.md#mkdocs-htmlproofer-plugin),
25-
[Table of Contents](../index.md#table-of-contents), and
26-
[Emoji Anchor](./page2.md#title-with-emojis).
27+
[Table of Contents](../index.md#table-of-contents),
28+
[Emoji Anchor](./page2.md#title-with-emojis), and
29+
[Good Heading](page2.md#good-heading).
2730

2831
## Image Link absolute/relative
2932

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
11
# Second Nested Test Page
22

33
## :smile_cat: Title with Emojis :material-star: :octicons-apps-16:
4+
5+
## Good Heading

tests/integration/mkdocs.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@ plugins:
1414
'#acknowledge',
1515
'../index.html#BAD_ANCHOR',
1616
'page2.html#BAD_ANCHOR',
17+
'../../#BAD_ANCHOR', # if use_directory_urls=True
18+
'../page2/#BAD_ANCHOR', # if use_directory_urls=True
1719
'../../../tests',
1820
]
1921
skip_downloads: True

tests/unit/test_plugin.py

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -284,6 +284,51 @@ def test_get_url_status__local_page(plugin):
284284
set(), files) == 0
285285

286286

287+
def test_get_url_status__local_page_with_directory_urls(plugin):
288+
index_page = Mock(spec=Page, markdown='# Heading\nContent')
289+
page1_page = Mock(spec=Page, markdown='# Page One\n## Sub Heading\nContent')
290+
special_char_page = Mock(spec=Page, markdown='# Heading éèà\n## Sub Heading éèà\nContent')
291+
mock_files = Files([
292+
Mock(spec=File, src_path='index.md', dest_path='index/index.html',
293+
dest_uri='index/index.html', url='index/', src_uri='index.md',
294+
page=index_page),
295+
Mock(spec=File, src_path='page1.md', dest_path='page1/index.html',
296+
dest_uri='page1/index.html', url='page1/', src_uri='page1.md',
297+
page=page1_page),
298+
Mock(spec=File, src_path='Dir éèà/éèà.md', dest_path='Dir éèà/éèà/index.html',
299+
dest_uri='Dir éèà/éèà/index.html',
300+
url='Dir%20%C3%A9%C3%A8%C3%A0/%C3%A9%C3%A8%C3%A0/',
301+
src_uri='Dir éèà/éèà.md', page=special_char_page),
302+
Mock(spec=File, src_path='Dir éèà/page1.md', dest_path='Dir éèà/page1/index.html',
303+
dest_uri='Dir éèà/page1/index.html',
304+
url='Dir%20%C3%A9%C3%A8%C3%A0/page1/',
305+
src_uri='Dir%20%C3%A9%C3%A8%C3%A0/page1.md',
306+
page=special_char_page),
307+
])
308+
files = {}
309+
files.update({os.path.normpath(file.url): file for file in mock_files})
310+
files.update({file.src_uri: file for file in mock_files})
311+
312+
assert plugin.get_url_status('../index/', 'page1.md', set(), files) == 0
313+
assert plugin.get_url_status('../index/#heading', 'page1.md', set(), files) == 0
314+
assert plugin.get_url_status('../index/#bad-heading', 'page1.md', set(), files) == 404
315+
316+
assert plugin.get_url_status('../page1/', 'page1.md', set(), files) == 0
317+
assert plugin.get_url_status('../page1/#sub-heading', 'page1.md', set(), files) == 0
318+
assert plugin.get_url_status('../page1/#heading', 'page1.md', set(), files) == 404
319+
320+
assert plugin.get_url_status('../page2/', 'page1.md', set(), files) == 404
321+
assert plugin.get_url_status('../page2/#heading', 'page1.md', set(), files) == 404
322+
323+
assert plugin.get_url_status(
324+
'../Dir%20%C3%A9%C3%A8%C3%A0/%C3%A9%C3%A8%C3%A0/#sub-heading-eea',
325+
'page1.md', set(), files) == 0
326+
assert plugin.get_url_status(
327+
'../%C3%A9%C3%A8%C3%A0/#sub-heading-eea',
328+
'Dir%20%C3%A9%C3%A8%C3%A0/page1.md',
329+
set(), files) == 0
330+
331+
287332
def test_get_url_status__non_markdown_page(plugin):
288333
index_page = Mock(spec=Page, markdown='# Heading\nContent')
289334
mock_files = Files([
@@ -293,6 +338,9 @@ def test_get_url_status__non_markdown_page(plugin):
293338
Mock(spec=File, src_path='drawing.svg', dest_path='drawing.svg',
294339
dest_uri='index.html', url='drawing.svg', src_uri='drawing.svg',
295340
page=None),
341+
Mock(spec=File, src_path='page.html', dest_path='page.html',
342+
dest_uri='page.html', url='page.html', src_uri='page.html',
343+
page=None),
296344
])
297345
files = {}
298346
files.update({os.path.normpath(file.url): file for file in mock_files})
@@ -302,6 +350,9 @@ def test_get_url_status__non_markdown_page(plugin):
302350
assert plugin.get_url_status('/drawing.svg', 'index.md', set(), files) == 0
303351
assert plugin.get_url_status('not-existing.svg', 'index.md', set(), files) == 404
304352

353+
assert plugin.get_url_status('page.html', 'index.md', set(), files) == 0
354+
assert plugin.get_url_status('page.html#heading', 'index.md', set(), files) == 0 # no validation for non-markdown pages
355+
305356

306357
def test_get_url_status__local_page_nested(plugin):
307358
index_page = Mock(spec=Page, markdown='# Heading\nContent')

0 commit comments

Comments
 (0)