File tree Expand file tree Collapse file tree 6 files changed +28
-20
lines changed
Expand file tree Collapse file tree 6 files changed +28
-20
lines changed Original file line number Diff line number Diff line change 1- FROM python:3.6
1+ FROM python:3.6-alpine
22WORKDIR /app
33COPY . .
4- RUN pip install -r requirements.txt -i https://pypi.douban.com/simple
5- # RUN pip install -r requirements.txt -i
4+ # RUN pip install -r requirements.txt -i https://pypi.douban.com/simple
5+ RUN apk add --no-cache libxml2-dev libxslt-dev gcc musl-dev && \
6+ pip install -r requirements.txt && \
7+ apk del gcc musl-dev libxml2-dev
68VOLUME ["/app/proxypool/crawlers/private" ]
79CMD ["supervisord" , "-c" , "supervisord.conf" ]
Original file line number Diff line number Diff line change 1414
1515代理池原理解析可见「[ 如何搭建一个高效的代理池] ( https://cuiqingcai.com/7048.html ) 」,建议使用之前阅读。
1616
17- ## 运行示例
17+ ## 使用准备
1818
19- API Server 可以见 [ 部署样例 ] ( https://proxypool.scrape.center/ ) ,随机代理 [ 取用地址 ] ( https://proxypool.scrape.center/random ) ,代理源比较少,仅供演示。
19+ 首先当然是克隆代码并进入 ProxyPool 文件夹:
2020
21- 本样例为 GitHub Actions + Kubernetes 自动部署 master 分支代码结果。
21+ ```
22+ git clone https://github.com/Python3WebSpider/ProxyPool.git
23+ cd ProxyPool
24+ ```
25+
26+ 然后选用下面 Docker 和常规方式任意一个执行即可。
2227
2328## 使用要求
2429
25- 可以通过两种方式来运行代理池,一种方式是使用 Docker(推荐),另一种方式是常规方式运行。
30+ 可以通过两种方式来运行代理池,一种方式是使用 Docker(推荐),另一种方式是常规方式运行,要求如下:
2631
2732### Docker
2833
@@ -31,6 +36,8 @@ API Server 可以见[部署样例](https://proxypool.scrape.center/),随机代
3136* Docker
3237* Docker-Compose
3338
39+ 安装方法自行搜索即可。
40+
3441### 常规方式
3542
3643常规方式要求有 Python 环境、Redis 环境,具体要求如下:
Original file line number Diff line number Diff line change 1- version : ' 3 '
1+ version : " 3 "
22services :
33 redis4proxypool :
44 image : redis:alpine
55 container_name : redis4proxypool
6- command : redis-server
76 ports :
8- - " 6378 :6379"
7+ - " 6374 :6379"
98 # restart: always
109 proxypool :
1110 build : .
12- image : ' germey/proxypool'
11+ image : " germey/proxypool"
1312 container_name : proxypool
1413 ports :
1514 - " 5555:5555"
1615 restart : always
1716 # volumes:
1817 # - proxypool/crawlers/private:/app/proxypool/crawlers/private
1918 environment :
20- REDIS_HOST : redis4proxypool
19+ REDIS_HOST : redis4proxypool
Original file line number Diff line number Diff line change @@ -17,7 +17,7 @@ def getChinaIP(ip='127.0.0.1'):
1717 reader = geolite2 .reader ()
1818 ip_info = reader .get (ip )
1919 geolite2 .close ()
20- # print(ip_info)
20+ print (ip_info )
2121 return True if ip_info ['country' ]['iso_code' ] == 'CN' else False
2222
2323
@@ -32,8 +32,8 @@ def run(self):
3232 pure_ip_address = self .proxyip .split (':' )[0 ]
3333 # 验证IP归属
3434 if not getChinaIP (pure_ip_address ):
35- pass
36- # raise ValueError('不是有效IP')
35+ # pass
36+ raise ValueError ('不是有效IP' )
3737 #
3838 start = time .time ()
3939 # 消除关闭证书验证的警告
@@ -88,8 +88,8 @@ def run(self):
8888 # apiUrl = "http://127.0.0.1:5555/all"
8989 apiUrl = "http://127.0.0.1:5555/random"
9090 # 要抓取的目标网站地址
91- # targetUrl = "http://bb.cf08tp.cn/Home/index.php?m=Index&a=vote&vid=335688&id=2676&tp="
92- targetUrl = 'http://www.so.com '
91+ targetUrl = "http://bb.cf08tp.cn/Home/index.php?m=Index&a=vote&vid=335688&id=2676&tp="
92+ # targetUrl = 'http://bb.cf08tp.cn/Home/index.php?m=Index&a=vote&vid=335608&id=2676&tp= '
9393 fetchSecond = 5
9494 # 开始自动获取IP
9595 GetIpThread (fetchSecond ).start ()
Original file line number Diff line number Diff line change @@ -85,8 +85,8 @@ def run(self):
8585 break
8686
8787def run_tester ():
88- host = '111.246.42.52 '
89- port = '8888 '
88+ host = '96.113.165.182 '
89+ port = '3128 '
9090 tasks = [tester .test (Proxy (host = host , port = port ))]
9191 tester .loop .run_until_complete (asyncio .wait (tasks ))
9292
Original file line number Diff line number Diff line change @@ -8,6 +8,6 @@ loguru==0.5.3
88pyquery == 1.4.3
99supervisor == 4.2.1
1010redis == 3.5.3
11- lxml == 4.6.2
11+ lxml == 4.6.3
1212fake_headers == 1.0.2
1313maxminddb_geolite2 == 2018.703
You can’t perform that action at this time.
0 commit comments