From 8ac5a8600c4efdbd70393435a19696e52e5ce9b1 Mon Sep 17 00:00:00 2001
From: Ryan Pendleton <me@ryanp.me>
Date: Sun, 6 Oct 2019 05:41:09 -0600
Subject: [PATCH] ignore the fragment portion of URLs when crawling links

---
 main.go | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/main.go b/main.go
index 7935f47..e4234e3 100644
--- a/main.go
+++ b/main.go
@@ -269,8 +269,17 @@ L:
 				}
 			} else if t.Type == A && t.Href != "" {
 				ignore := false
-				if t.Href[0] == '#' {
+				hashIndex := strings.Index(t.Href, "#")
+				if hashIndex == 0 {
+					if *verbose {
+						log.Println("Link to", t.Href, "on page", u, "has an anchor to itself; skipping link")
+					}
 					ignore = true
+				} else if hashIndex > 0 {
+					if *verbose {
+						log.Println("Link to", t.Href, "on page", u, "has an anchor to another page; removing fragment from URL")
+					}
+					t.Href = string(t.Href[0:hashIndex])
 				}
 				if strings.Contains(t.Rel, "nofollow") {
 					if *noRobots {