diff --git a/README.md b/README.md index f3bbd40..a1094d0 100644 --- a/README.md +++ b/README.md @@ -228,6 +228,18 @@ For examples please consult the output of `sysbox help expect`, but a simple exa +## feeds + +The feeds sub-command retrieves the contents of the (single) remote URL which is specified, and outputs a list of all the RSS/Atom feeds which have been referenced within that file. + +Basic usage would be: + + $ sysbox feeds https://blog.steve.fi/ + +If no protocol is specified "https" is assumed, (for example an argument of "example.com" will be converted to https://example.com). + + + ## find The find sub-command allows finding files/directories that match a given number @@ -269,6 +281,12 @@ A simple HTTP-server. Allows serving to localhost, or to the local LAN. Very much "curl-lite", allows you to fetch the contents of a remote URL. SSL errors, etc, are handled, but only minimal options are supported. +Basic usage would be: + + $ sysbox http-get https://example.com/ + +If no protocol is specified "https" is assumed, (for example an argument of "example.com" will be converted to https://example.com). + ## ips diff --git a/cmd_feeds.go b/cmd_feeds.go new file mode 100644 index 0000000..f13d715 --- /dev/null +++ b/cmd_feeds.go @@ -0,0 +1,134 @@ +package main + +import ( + "errors" + "fmt" + "io" + "net/http" + "net/url" + "strings" + + "github.com/skx/subcommands" + "golang.org/x/net/html" +) + +// Structure for our options and state. +type feedsCommand struct { + + // We embed the NoFlags option, because we accept no command-line flags. + subcommands.NoFlags +} + +// ErrNoFeeds is used if no feeds are found in a remote URL +var ErrNoFeeds = errors.New("NO-FEED") + +// Info returns the name of this subcommand. +func (t *feedsCommand) Info() (string, string) { + return "feeds", `Extract RSS feeds from remote URLS. + +Details: + +This command fetches the contents of the specified URL, much like +the 'http-get' command would, and extracts any specified RSS feed +from the contents of that remote URL. + +Examples: + + $ sysbox feeds https://blog.steve.fi/` +} + +func (t *feedsCommand) FindFeeds(base string) ([]string, error) { + + ret := []string{} + + if !strings.HasPrefix(base, "http") { + base = "https://" + base + } + + // Make the request + response, err := http.Get(base) + if err != nil { + return ret, err + } + + // Get the body. + defer response.Body.Close() + + z := html.NewTokenizer(response.Body) + + for { + tt := z.Next() + switch tt { + case html.ErrorToken: + err := z.Err() + if err == io.EOF { + if len(ret) > 0 { + return ret, nil + } + return ret, ErrNoFeeds + } + return ret, fmt.Errorf("%s", z.Err()) + case html.StartTagToken, html.SelfClosingTagToken: + t := z.Token() + if t.Data == "link" { + isRSS := false + u := "" + for _, attr := range t.Attr { + if attr.Key == "type" && (attr.Val == "application/rss+xml" || attr.Val == "application/atom+xml") { + isRSS = true + } + + if attr.Key == "href" { + u = attr.Val + } + } + if isRSS { + if !strings.HasPrefix(u, "http") { + u, _ = url.JoinPath(base, u) + } + ret = append(ret, u) + } + } + } + } + + // Nothing found? + if len(ret) == 0 { + return ret, ErrNoFeeds + } + return ret, nil +} + +// Execute is invoked if the user specifies `feeds` as the subcommand. +func (t *feedsCommand) Execute(args []string) int { + + // Ensure we have only a single URL + if len(args) != 1 { + fmt.Printf("Usage: feeds URL\n") + return 1 + } + + // The URL + url := args[0] + + // We'll default to https if the protocol isn't specified. + if !strings.HasPrefix(url, "http") { + url = "https://" + url + } + + out, err := t.FindFeeds(url) + if err != nil { + if err == ErrNoFeeds { + fmt.Printf("No Feeds found in %s\n", url) + } else { + fmt.Printf("Error processing %s: %s\n", url, err) + return 1 + } + } else { + for _, x := range out { + fmt.Printf("%s\n", x) + } + } + + return 0 +} diff --git a/cmd_http_get.go b/cmd_http_get.go index 9f2b2bf..f8b3e45 100644 --- a/cmd_http_get.go +++ b/cmd_http_get.go @@ -6,6 +6,7 @@ import ( "io" "net/http" "sort" + "strings" ) // Structure for our options and state. @@ -51,10 +52,18 @@ func (hg *httpGetCommand) Execute(args []string) int { return 1 } + // The URL + url := args[0] + + // We'll default to https if the protocol isn't specified. + if !strings.HasPrefix(url, "http") { + url = "https://" + url + } + // Make the request - response, err := http.Get(args[0]) + response, err := http.Get(url) if err != nil { - fmt.Printf("error: %s", err.Error()) + fmt.Printf("error fetching %s: %s", url, err.Error()) return 1 } diff --git a/main.go b/main.go index a23b64a..473f77f 100644 --- a/main.go +++ b/main.go @@ -41,6 +41,7 @@ func main() { subcommands.Register(&envTemplateCommand{}) subcommands.Register(&execSTDINCommand{}) subcommands.Register(&expectCommand{}) + subcommands.Register(&feedsCommand{}) subcommands.Register(&findCommand{}) subcommands.Register(&fingerdCommand{}) subcommands.Register(&html2TextCommand{})