-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathmain.go
133 lines (107 loc) · 2.48 KB
/
main.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
package ipscraper
import (
"fmt"
"io/ioutil"
"log"
"net/http"
"net/url"
"strings"
"sync"
"time"
"golang.org/x/net/context"
"golang.org/x/sync/semaphore"
)
const (
timeout = 5 * time.Second
maxGoroutines = 32
)
type Provider interface {
Get() ([]string, error)
}
func New() *Scraper {
return &Scraper{
providers: []Provider{NewFreeProxyList(), NewOpenProxyList(), NewGeonodeList(),
NewPlainProxyList("https://raw.githubusercontent.com/clarketm/proxy-list/master/proxy-list-raw.txt"),
NewPlainProxyList("https://api.proxyscrape.com/v2/?request=getproxies&protocol=http&timeout=5000&country=all&ssl=all&anonymity=all&simplified=true")},
sem: semaphore.NewWeighted(maxGoroutines),
wg: &sync.WaitGroup{},
cache: &sync.Map{},
}
}
type Scraper struct {
providers []Provider
sem *semaphore.Weighted
wg *sync.WaitGroup
cache *sync.Map
}
func (s *Scraper) Get() ([]string, error) {
var allProxies []string
for i := range s.providers {
proxies, err := s.providers[i].Get()
if err != nil {
return nil, fmt.Errorf("provider get: %w", err)
}
allProxies = append(allProxies, proxies...)
}
var res = make(chan string, len(allProxies))
for i := range allProxies {
s.wg.Add(1)
// TODO: Check error
s.sem.Acquire(context.Background(), 1)
go func(ip string) {
defer s.wg.Done()
defer s.sem.Release(1)
val, err := s.checkIPWithCache(ip)
if err != nil {
log.Printf("check IP with cache: %v\n", err)
return
}
if val {
res <- ip
}
}(allProxies[i])
}
s.wg.Wait()
close(res)
var result []string
for ip := range res {
result = append(result, ip)
}
return result, nil
}
func (s *Scraper) checkIPWithCache(ip string) (bool, error) {
val, ok := s.cache.Load(ip)
if ok {
return val.(bool), nil
}
valid, err := checkIP(ip)
if err != nil {
return valid, fmt.Errorf("check IP: %w", err)
}
s.cache.Store(ip, valid)
return valid, nil
}
func checkIP(ip string) (bool, error) {
const myIp = "https://api.myip.com"
_ip, err := url.Parse(ip)
if err != nil {
return false, err
}
resp, err := (&http.Client{
Timeout: timeout,
Transport: &http.Transport{
Proxy: http.ProxyURL(_ip),
},
}).Get(myIp)
if err != nil {
return false, fmt.Errorf("http get: %w", err)
}
jsonResp, err := ioutil.ReadAll(resp.Body)
if err != nil {
return false, fmt.Errorf("read all: %w", err)
}
if !strings.Contains(string(jsonResp), strings.Split(_ip.Host, ":")[0]) {
return false, nil
}
return true, nil
}