@@ -5,6 +5,7 @@ package subscription // import "miniflux.app/v2/internal/reader/subscription"
5
5
6
6
import (
7
7
"bytes"
8
+ "encoding/xml"
8
9
"fmt"
9
10
"io"
10
11
"log/slog"
@@ -125,6 +126,14 @@ func (f *SubscriptionFinder) FindSubscriptions(websiteURL, rssBridgeURL string)
125
126
slog .Debug ("Subscriptions found with well-known URLs" , slog .String ("website_url" , websiteURL ), slog .Any ("subscriptions" , subscriptions ))
126
127
return subscriptions , nil
127
128
}
129
+ // Step 7) Check if the website has feeds in its sitemap.
130
+ slog .Debug ("Try to detect feeds from sitemap" , slog .String ("website_url" , websiteURL ))
131
+ if subscriptions , localizedError := f .FindSubscriptionsFromSitemap (websiteURL ); localizedError != nil {
132
+ return nil , localizedError
133
+ } else if len (subscriptions ) > 0 {
134
+ slog .Debug ("Subscriptions found with sitemap" , slog .String ("website_url" , websiteURL ), slog .Any ("subscriptions" , subscriptions ))
135
+ return subscriptions , nil
136
+ }
128
137
129
138
return nil , nil
130
139
}
@@ -190,14 +199,16 @@ func (f *SubscriptionFinder) FindSubscriptionsFromWebPage(websiteURL, contentTyp
190
199
191
200
func (f * SubscriptionFinder ) FindSubscriptionsFromWellKnownURLs (websiteURL string ) (Subscriptions , * locale.LocalizedErrorWrapper ) {
192
201
knownURLs := map [string ]string {
193
- "atom.xml" : parser .FormatAtom ,
194
- "feed.xml" : parser .FormatAtom ,
195
- "feed/" : parser .FormatAtom ,
196
- "rss.xml" : parser .FormatRSS ,
197
- "rss/" : parser .FormatRSS ,
198
- "index.rss" : parser .FormatRSS ,
199
- "index.xml" : parser .FormatRSS ,
200
- "feed.atom" : parser .FormatAtom ,
202
+ "atom.xml" : parser .FormatAtom ,
203
+ "feed.xml" : parser .FormatAtom ,
204
+ "feed" : parser .FormatAtom ,
205
+ "rss.xml" : parser .FormatRSS ,
206
+ "rss" : parser .FormatRSS ,
207
+ "index.rss" : parser .FormatRSS ,
208
+ "index.xml" : parser .FormatRSS ,
209
+ "feed.atom" : parser .FormatAtom ,
210
+ "atom" : parser .FormatAtom ,
211
+ "index.atom" : parser .FormatAtom ,
201
212
}
202
213
203
214
websiteURLRoot := urllib .RootURL (websiteURL )
@@ -316,3 +327,66 @@ func (f *SubscriptionFinder) FindSubscriptionsFromYouTubePlaylistPage(websiteURL
316
327
317
328
return nil , nil
318
329
}
330
+
331
+ func (f * SubscriptionFinder ) FindSubscriptionsFromSitemap (websiteURL string ) (Subscriptions , * locale.LocalizedErrorWrapper ) {
332
+ websiteURLRoot := urllib .RootURL (websiteURL )
333
+
334
+ responseHandler := fetcher .NewResponseHandler (f .requestBuilder .ExecuteRequest (websiteURLRoot + "/sitemap.xml" ))
335
+ defer responseHandler .Close ()
336
+
337
+ if localizedError := responseHandler .LocalizedError (); localizedError != nil {
338
+ slog .Warn ("Unable to find subscriptions" , slog .String ("website_url" , websiteURL ), slog .Any ("error" , localizedError .Error ()))
339
+ return nil , localizedError
340
+ }
341
+
342
+ responseBody , localizedError := responseHandler .ReadBody (config .Opts .HTTPClientMaxBodySize ())
343
+ if localizedError != nil {
344
+ slog .Warn ("Unable to find subscriptions" , slog .String ("website_url" , websiteURL ), slog .Any ("error" , localizedError .Error ()))
345
+ return nil , localizedError
346
+ }
347
+ return findSubscriptionsFromDownloadedSitemap (bytes .NewReader (responseBody ))
348
+ }
349
+
350
+ func findSubscriptionsFromDownloadedSitemap (body io.Reader ) (Subscriptions , * locale.LocalizedErrorWrapper ) {
351
+ var subscriptions Subscriptions
352
+ loc := struct {
353
+ Content string `xml:",chardata"`
354
+ }{}
355
+
356
+ decoder := xml .NewDecoder (body )
357
+ for {
358
+ t , _ := decoder .Token ()
359
+ if t == nil {
360
+ break
361
+ }
362
+ switch se := t .(type ) {
363
+ case xml.StartElement :
364
+ if se .Name .Local != "loc" {
365
+ continue
366
+ }
367
+
368
+ if err := decoder .DecodeElement (& loc , & se ); err != nil {
369
+ slog .Warn ("Unable to decode loc" , slog .Any ("error" , err ))
370
+ }
371
+ feedUrl := loc .Content
372
+ switch {
373
+ case strings .Contains (feedUrl , ".xml" ),
374
+ strings .Contains (feedUrl , "rss" ):
375
+ subscriptions = append (subscriptions , & Subscription {
376
+ Type : parser .FormatRSS ,
377
+ Title : feedUrl ,
378
+ URL : feedUrl ,
379
+ })
380
+ case strings .Contains (feedUrl , "feed" ),
381
+ strings .Contains (feedUrl , "atom" ):
382
+ subscriptions = append (subscriptions , & Subscription {
383
+ Type : parser .FormatAtom ,
384
+ Title : feedUrl ,
385
+ URL : feedUrl ,
386
+ })
387
+ }
388
+ }
389
+ }
390
+
391
+ return subscriptions , nil
392
+ }
0 commit comments