From 945d43605560c442287e1c1458553fad65c48084 Mon Sep 17 00:00:00 2001 From: Julien Voisin Date: Fri, 13 Dec 2024 22:50:12 +0000 Subject: [PATCH] refactor(rewriter): replace regex with URL parsing for referrer override No need for brittle regex when matching plain strings or domain names. This should save some negligible amount of heap memory as well as tremendously speeding up the matching. --- internal/reader/rewrite/rules.go | 70 +++++++++++++------------------- 1 file changed, 28 insertions(+), 42 deletions(-) diff --git a/internal/reader/rewrite/rules.go b/internal/reader/rewrite/rules.go index 89cba31daa2..4a77b1fff03 100644 --- a/internal/reader/rewrite/rules.go +++ b/internal/reader/rewrite/rules.go @@ -3,7 +3,10 @@ package rewrite // import "miniflux.app/v2/internal/reader/rewrite" -import "regexp" +import ( + "net/url" + "strings" +) // List of predefined rewrite rules (alphabetically sorted) // Available rules: "add_image_title", "add_youtube_video" @@ -39,49 +42,32 @@ var predefinedRules = map[string]string{ "youtube.com": "add_youtube_video", } -type RefererRule struct { - URLPattern *regexp.Regexp - Referer string -} +// GetRefererForURL returns the referer for the given URL if it exists, otherwise an empty string. +func GetRefererForURL(u string) string { + parsedUrl, err := url.Parse(u) + if err != nil { + return "" + } -// List of predefined referer rules -var PredefinedRefererRules = []RefererRule{ - { - URLPattern: regexp.MustCompile(`^https://\w+\.sinaimg\.cn`), - Referer: "https://weibo.com", - }, - { - URLPattern: regexp.MustCompile(`^https://i\.pximg\.net`), - Referer: "https://www.pixiv.net", - }, - { - URLPattern: regexp.MustCompile(`^https://cdnfile\.sspai\.com`), - Referer: "https://sspai.com", - }, - { - URLPattern: regexp.MustCompile(`^https://(?:\w|-)+\.cdninstagram\.com`), - Referer: "https://www.instagram.com", - }, - { - URLPattern: regexp.MustCompile(`^https://sp1\.piokok\.com`), - Referer: "https://sp1.piokok.com", - }, - { - URLPattern: regexp.MustCompile(`^https://f\.video\.weibocdn\.com`), - Referer: "https://weibo.com", - }, - { - URLPattern: regexp.MustCompile(`^https://img\.hellogithub\.com`), - Referer: "https://hellogithub.com", - }, -} + switch parsedUrl.Hostname() { + case "i.pximg.net": + return "https://www.pixiv.net" + case "sp1.piokok.com": + return "https://sp1.piokok.com" + case "cdnfile.sspai.com": + return "https://sspai.com" + case "f.video.weibocdn.com": + return "https://weibo.com" + case "img.hellogithub.com": + return "https://hellogithub.com" + } -// GetRefererForURL returns the referer for the given URL if it exists, otherwise an empty string. -func GetRefererForURL(url string) string { - for _, rule := range PredefinedRefererRules { - if rule.URLPattern.MatchString(url) { - return rule.Referer - } + switch { + case strings.HasSuffix(parsedUrl.Hostname(), ".sinaimg.cn"): + return "https://weibo.com" + case strings.HasSuffix(parsedUrl.Hostname(), ".cdninstagram.com"): + return "https://www.instagram.com" } + return "" }