Skip to content

Commit 1e61c5a

Browse files
authored
Merge pull request #54 from skx/feeds
New RSS support, and improvements to http-get.
2 parents 4217f43 + a41142b commit 1e61c5a

File tree

4 files changed

+164
-2
lines changed

4 files changed

+164
-2
lines changed

Diff for: README.md

+18
Original file line numberDiff line numberDiff line change
@@ -228,6 +228,18 @@ For examples please consult the output of `sysbox help expect`, but a simple exa
228228

229229

230230

231+
## feeds
232+
233+
The feeds sub-command retrieves the contents of the (single) remote URL which is specified, and outputs a list of all the RSS/Atom feeds which have been referenced within that file.
234+
235+
Basic usage would be:
236+
237+
$ sysbox feeds https://blog.steve.fi/
238+
239+
If no protocol is specified "https" is assumed, (for example an argument of "example.com" will be converted to https://example.com).
240+
241+
242+
231243
## find
232244

233245
The find sub-command allows finding files/directories that match a given number
@@ -269,6 +281,12 @@ A simple HTTP-server. Allows serving to localhost, or to the local LAN.
269281

270282
Very much "curl-lite", allows you to fetch the contents of a remote URL. SSL errors, etc, are handled, but only minimal options are supported.
271283

284+
Basic usage would be:
285+
286+
$ sysbox http-get https://example.com/
287+
288+
If no protocol is specified "https" is assumed, (for example an argument of "example.com" will be converted to https://example.com).
289+
272290

273291

274292
## ips

Diff for: cmd_feeds.go

+134
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,134 @@
1+
package main
2+
3+
import (
4+
"errors"
5+
"fmt"
6+
"io"
7+
"net/http"
8+
"net/url"
9+
"strings"
10+
11+
"github.com/skx/subcommands"
12+
"golang.org/x/net/html"
13+
)
14+
15+
// Structure for our options and state.
16+
type feedsCommand struct {
17+
18+
// We embed the NoFlags option, because we accept no command-line flags.
19+
subcommands.NoFlags
20+
}
21+
22+
// ErrNoFeeds is used if no feeds are found in a remote URL
23+
var ErrNoFeeds = errors.New("NO-FEED")
24+
25+
// Info returns the name of this subcommand.
26+
func (t *feedsCommand) Info() (string, string) {
27+
return "feeds", `Extract RSS feeds from remote URLS.
28+
29+
Details:
30+
31+
This command fetches the contents of the specified URL, much like
32+
the 'http-get' command would, and extracts any specified RSS feed
33+
from the contents of that remote URL.
34+
35+
Examples:
36+
37+
$ sysbox feeds https://blog.steve.fi/`
38+
}
39+
40+
func (t *feedsCommand) FindFeeds(base string) ([]string, error) {
41+
42+
ret := []string{}
43+
44+
if !strings.HasPrefix(base, "http") {
45+
base = "https://" + base
46+
}
47+
48+
// Make the request
49+
response, err := http.Get(base)
50+
if err != nil {
51+
return ret, err
52+
}
53+
54+
// Get the body.
55+
defer response.Body.Close()
56+
57+
z := html.NewTokenizer(response.Body)
58+
59+
for {
60+
tt := z.Next()
61+
switch tt {
62+
case html.ErrorToken:
63+
err := z.Err()
64+
if err == io.EOF {
65+
if len(ret) > 0 {
66+
return ret, nil
67+
}
68+
return ret, ErrNoFeeds
69+
}
70+
return ret, fmt.Errorf("%s", z.Err())
71+
case html.StartTagToken, html.SelfClosingTagToken:
72+
t := z.Token()
73+
if t.Data == "link" {
74+
isRSS := false
75+
u := ""
76+
for _, attr := range t.Attr {
77+
if attr.Key == "type" && (attr.Val == "application/rss+xml" || attr.Val == "application/atom+xml") {
78+
isRSS = true
79+
}
80+
81+
if attr.Key == "href" {
82+
u = attr.Val
83+
}
84+
}
85+
if isRSS {
86+
if !strings.HasPrefix(u, "http") {
87+
u, _ = url.JoinPath(base, u)
88+
}
89+
ret = append(ret, u)
90+
}
91+
}
92+
}
93+
}
94+
95+
// Nothing found?
96+
if len(ret) == 0 {
97+
return ret, ErrNoFeeds
98+
}
99+
return ret, nil
100+
}
101+
102+
// Execute is invoked if the user specifies `feeds` as the subcommand.
103+
func (t *feedsCommand) Execute(args []string) int {
104+
105+
// Ensure we have only a single URL
106+
if len(args) != 1 {
107+
fmt.Printf("Usage: feeds URL\n")
108+
return 1
109+
}
110+
111+
// The URL
112+
url := args[0]
113+
114+
// We'll default to https if the protocol isn't specified.
115+
if !strings.HasPrefix(url, "http") {
116+
url = "https://" + url
117+
}
118+
119+
out, err := t.FindFeeds(url)
120+
if err != nil {
121+
if err == ErrNoFeeds {
122+
fmt.Printf("No Feeds found in %s\n", url)
123+
} else {
124+
fmt.Printf("Error processing %s: %s\n", url, err)
125+
return 1
126+
}
127+
} else {
128+
for _, x := range out {
129+
fmt.Printf("%s\n", x)
130+
}
131+
}
132+
133+
return 0
134+
}

Diff for: cmd_http_get.go

+11-2
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ import (
66
"io"
77
"net/http"
88
"sort"
9+
"strings"
910
)
1011

1112
// Structure for our options and state.
@@ -51,10 +52,18 @@ func (hg *httpGetCommand) Execute(args []string) int {
5152
return 1
5253
}
5354

55+
// The URL
56+
url := args[0]
57+
58+
// We'll default to https if the protocol isn't specified.
59+
if !strings.HasPrefix(url, "http") {
60+
url = "https://" + url
61+
}
62+
5463
// Make the request
55-
response, err := http.Get(args[0])
64+
response, err := http.Get(url)
5665
if err != nil {
57-
fmt.Printf("error: %s", err.Error())
66+
fmt.Printf("error fetching %s: %s", url, err.Error())
5867
return 1
5968
}
6069

Diff for: main.go

+1
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ func main() {
4141
subcommands.Register(&envTemplateCommand{})
4242
subcommands.Register(&execSTDINCommand{})
4343
subcommands.Register(&expectCommand{})
44+
subcommands.Register(&feedsCommand{})
4445
subcommands.Register(&findCommand{})
4546
subcommands.Register(&fingerdCommand{})
4647
subcommands.Register(&html2TextCommand{})

0 commit comments

Comments
 (0)