-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathmain.go
60 lines (49 loc) · 1.3 KB
/
main.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
package main
import (
"crawler/lib"
"fmt"
"github.com/PuerkitoBio/goquery"
"log"
)
// Extract all http** links from a given webpage
func crawl(url string) string {
doc, err := goquery.NewDocument(url)
if err != nil {
log.Fatal(err)
}
doc.Find(".box").Each(func(i int, s *goquery.Selection) {
// For each item found, get the band and title
link, status := s.Find("a").Attr("href")
if status {
title := s.Find("span").Text()
fmt.Printf("Review %d: %s - %s\n", i, link, title)
}
})
/*resp, err := http.Get(url)
if err != nil {
fmt.Println("ERROR: Failed to crawl \"" + url + "\"")
return "error"
}
b := resp.Body
defer b.Close() // close Body when the function returns
log.Println("=====================================")
bodyByte, _ := ioutil.ReadAll(b)
resStr := string(bodyByte)
//log.Println(resStr)
lib.WriteFile(resStr, url)*/
return "done"
}
func main() {
//1. Extract url and download html to local.
seedUrl := "http://daily.zhihu.com/"
//seedUrls := []string{"https://docs.docker.com/", "http://daily.zhihu.com/"}
var strs string
/*for _, url := range seedUrls {
strs = crawl(url)
}*/
strs = crawl(seedUrl)
log.Println(strs)
//2. parse html sample.
//s := `<p>Links:</p><ul><li><a href="foo">Foo</a><li><a href="/bar/baz">BarBaz</a></ul>`
lib.Parse(strs)
}