File tree 6 files changed +28
-20
lines changed
6 files changed +28
-20
lines changed Original file line number Diff line number Diff line change 1
- FROM python:3.6
1
+ FROM python:3.6-alpine
2
2
WORKDIR /app
3
3
COPY . .
4
- RUN pip install -r requirements.txt -i https://pypi.douban.com/simple
5
- # RUN pip install -r requirements.txt -i
4
+ # RUN pip install -r requirements.txt -i https://pypi.douban.com/simple
5
+ RUN apk add --no-cache libxml2-dev libxslt-dev gcc musl-dev && \
6
+ pip install -r requirements.txt && \
7
+ apk del gcc musl-dev libxml2-dev
6
8
VOLUME ["/app/proxypool/crawlers/private" ]
7
9
CMD ["supervisord" , "-c" , "supervisord.conf" ]
Original file line number Diff line number Diff line change 14
14
15
15
代理池原理解析可见「[ 如何搭建一个高效的代理池] ( https://cuiqingcai.com/7048.html ) 」,建议使用之前阅读。
16
16
17
- ## 运行示例
17
+ ## 使用准备
18
18
19
- API Server 可以见 [ 部署样例 ] ( https://proxypool.scrape.center/ ) ,随机代理 [ 取用地址 ] ( https://proxypool.scrape.center/random ) ,代理源比较少,仅供演示。
19
+ 首先当然是克隆代码并进入 ProxyPool 文件夹:
20
20
21
- 本样例为 GitHub Actions + Kubernetes 自动部署 master 分支代码结果。
21
+ ```
22
+ git clone https://github.com/Python3WebSpider/ProxyPool.git
23
+ cd ProxyPool
24
+ ```
25
+
26
+ 然后选用下面 Docker 和常规方式任意一个执行即可。
22
27
23
28
## 使用要求
24
29
25
- 可以通过两种方式来运行代理池,一种方式是使用 Docker(推荐),另一种方式是常规方式运行。
30
+ 可以通过两种方式来运行代理池,一种方式是使用 Docker(推荐),另一种方式是常规方式运行,要求如下:
26
31
27
32
### Docker
28
33
@@ -31,6 +36,8 @@ API Server 可以见[部署样例](https://proxypool.scrape.center/),随机代
31
36
* Docker
32
37
* Docker-Compose
33
38
39
+ 安装方法自行搜索即可。
40
+
34
41
### 常规方式
35
42
36
43
常规方式要求有 Python 环境、Redis 环境,具体要求如下:
Original file line number Diff line number Diff line change 1
- version : ' 3 '
1
+ version : " 3 "
2
2
services :
3
3
redis4proxypool :
4
4
image : redis:alpine
5
5
container_name : redis4proxypool
6
- command : redis-server
7
6
ports :
8
- - " 6378 :6379"
7
+ - " 6374 :6379"
9
8
# restart: always
10
9
proxypool :
11
10
build : .
12
- image : ' germey/proxypool'
11
+ image : " germey/proxypool"
13
12
container_name : proxypool
14
13
ports :
15
14
- " 5555:5555"
16
15
restart : always
17
16
# volumes:
18
17
# - proxypool/crawlers/private:/app/proxypool/crawlers/private
19
18
environment :
20
- REDIS_HOST : redis4proxypool
19
+ REDIS_HOST : redis4proxypool
Original file line number Diff line number Diff line change @@ -17,7 +17,7 @@ def getChinaIP(ip='127.0.0.1'):
17
17
reader = geolite2 .reader ()
18
18
ip_info = reader .get (ip )
19
19
geolite2 .close ()
20
- # print(ip_info)
20
+ print (ip_info )
21
21
return True if ip_info ['country' ]['iso_code' ] == 'CN' else False
22
22
23
23
@@ -32,8 +32,8 @@ def run(self):
32
32
pure_ip_address = self .proxyip .split (':' )[0 ]
33
33
# 验证IP归属
34
34
if not getChinaIP (pure_ip_address ):
35
- pass
36
- # raise ValueError('不是有效IP')
35
+ # pass
36
+ raise ValueError ('不是有效IP' )
37
37
#
38
38
start = time .time ()
39
39
# 消除关闭证书验证的警告
@@ -88,8 +88,8 @@ def run(self):
88
88
# apiUrl = "http://127.0.0.1:5555/all"
89
89
apiUrl = "http://127.0.0.1:5555/random"
90
90
# 要抓取的目标网站地址
91
- # targetUrl = "http://bb.cf08tp.cn/Home/index.php?m=Index&a=vote&vid=335688&id=2676&tp="
92
- targetUrl = 'http://www.so.com '
91
+ targetUrl = "http://bb.cf08tp.cn/Home/index.php?m=Index&a=vote&vid=335688&id=2676&tp="
92
+ # targetUrl = 'http://bb.cf08tp.cn/Home/index.php?m=Index&a=vote&vid=335608&id=2676&tp= '
93
93
fetchSecond = 5
94
94
# 开始自动获取IP
95
95
GetIpThread (fetchSecond ).start ()
Original file line number Diff line number Diff line change @@ -85,8 +85,8 @@ def run(self):
85
85
break
86
86
87
87
def run_tester ():
88
- host = '111.246.42.52 '
89
- port = '8888 '
88
+ host = '96.113.165.182 '
89
+ port = '3128 '
90
90
tasks = [tester .test (Proxy (host = host , port = port ))]
91
91
tester .loop .run_until_complete (asyncio .wait (tasks ))
92
92
Original file line number Diff line number Diff line change @@ -8,6 +8,6 @@ loguru==0.5.3
8
8
pyquery == 1.4.3
9
9
supervisor == 4.2.1
10
10
redis == 3.5.3
11
- lxml == 4.6.2
11
+ lxml == 4.6.3
12
12
fake_headers == 1.0.2
13
13
maxminddb_geolite2 == 2018.703
You can’t perform that action at this time.
0 commit comments