Skip to content

Commit af1ec3c

Browse files
author
jy
committed
Merge branch 'master'
2 parents 556b320 + 1de4f95 commit af1ec3c

File tree

6 files changed

+28
-20
lines changed

6 files changed

+28
-20
lines changed

Dockerfile

+5-3
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
1-
FROM python:3.6
1+
FROM python:3.6-alpine
22
WORKDIR /app
33
COPY . .
4-
RUN pip install -r requirements.txt -i https://pypi.douban.com/simple
5-
# RUN pip install -r requirements.txt -i
4+
# RUN pip install -r requirements.txt -i https://pypi.douban.com/simple
5+
RUN apk add --no-cache libxml2-dev libxslt-dev gcc musl-dev && \
6+
pip install -r requirements.txt && \
7+
apk del gcc musl-dev libxml2-dev
68
VOLUME ["/app/proxypool/crawlers/private"]
79
CMD ["supervisord", "-c", "supervisord.conf"]

README.md

+11-4
Original file line numberDiff line numberDiff line change
@@ -14,15 +14,20 @@
1414

1515
代理池原理解析可见「[如何搭建一个高效的代理池](https://cuiqingcai.com/7048.html)」,建议使用之前阅读。
1616

17-
## 运行示例
17+
## 使用准备
1818

19-
API Server 可以见[部署样例](https://proxypool.scrape.center/),随机代理[取用地址](https://proxypool.scrape.center/random),代理源比较少,仅供演示。
19+
首先当然是克隆代码并进入 ProxyPool 文件夹:
2020

21-
本样例为 GitHub Actions + Kubernetes 自动部署 master 分支代码结果。
21+
```
22+
git clone https://github.com/Python3WebSpider/ProxyPool.git
23+
cd ProxyPool
24+
```
25+
26+
然后选用下面 Docker 和常规方式任意一个执行即可。
2227

2328
## 使用要求
2429

25-
可以通过两种方式来运行代理池,一种方式是使用 Docker(推荐),另一种方式是常规方式运行
30+
可以通过两种方式来运行代理池,一种方式是使用 Docker(推荐),另一种方式是常规方式运行,要求如下:
2631

2732
### Docker
2833

@@ -31,6 +36,8 @@ API Server 可以见[部署样例](https://proxypool.scrape.center/),随机代
3136
* Docker
3237
* Docker-Compose
3338

39+
安装方法自行搜索即可。
40+
3441
### 常规方式
3542

3643
常规方式要求有 Python 环境、Redis 环境,具体要求如下:

docker-compose.yml

+4-5
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,19 @@
1-
version: '3'
1+
version: "3"
22
services:
33
redis4proxypool:
44
image: redis:alpine
55
container_name: redis4proxypool
6-
command: redis-server
76
ports:
8-
- "6378:6379"
7+
- "6374:6379"
98
# restart: always
109
proxypool:
1110
build: .
12-
image: 'germey/proxypool'
11+
image: "germey/proxypool"
1312
container_name: proxypool
1413
ports:
1514
- "5555:5555"
1615
restart: always
1716
# volumes:
1817
# - proxypool/crawlers/private:/app/proxypool/crawlers/private
1918
environment:
20-
REDIS_HOST: redis4proxypool
19+
REDIS_HOST: redis4proxypool

examples/usage2.py

+5-5
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ def getChinaIP(ip='127.0.0.1'):
1717
reader = geolite2.reader()
1818
ip_info = reader.get(ip)
1919
geolite2.close()
20-
# print(ip_info)
20+
print(ip_info)
2121
return True if ip_info['country']['iso_code'] == 'CN' else False
2222

2323

@@ -32,8 +32,8 @@ def run(self):
3232
pure_ip_address = self.proxyip.split(':')[0]
3333
# 验证IP归属
3434
if not getChinaIP(pure_ip_address):
35-
pass
36-
# raise ValueError('不是有效IP')
35+
# pass
36+
raise ValueError('不是有效IP')
3737
#
3838
start = time.time()
3939
# 消除关闭证书验证的警告
@@ -88,8 +88,8 @@ def run(self):
8888
# apiUrl = "http://127.0.0.1:5555/all"
8989
apiUrl = "http://127.0.0.1:5555/random"
9090
# 要抓取的目标网站地址
91-
# targetUrl = "http://bb.cf08tp.cn/Home/index.php?m=Index&a=vote&vid=335688&id=2676&tp="
92-
targetUrl = 'http://www.so.com'
91+
targetUrl = "http://bb.cf08tp.cn/Home/index.php?m=Index&a=vote&vid=335688&id=2676&tp="
92+
# targetUrl = 'http://bb.cf08tp.cn/Home/index.php?m=Index&a=vote&vid=335608&id=2676&tp='
9393
fetchSecond = 5
9494
# 开始自动获取IP
9595
GetIpThread(fetchSecond).start()

proxypool/processors/tester.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -85,8 +85,8 @@ def run(self):
8585
break
8686

8787
def run_tester():
88-
host = '111.246.42.52'
89-
port = '8888'
88+
host = '96.113.165.182'
89+
port = '3128'
9090
tasks = [tester.test(Proxy(host=host, port=port))]
9191
tester.loop.run_until_complete(asyncio.wait(tasks))
9292

requirements.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,6 @@ loguru==0.5.3
88
pyquery==1.4.3
99
supervisor==4.2.1
1010
redis==3.5.3
11-
lxml==4.6.2
11+
lxml==4.6.3
1212
fake_headers==1.0.2
1313
maxminddb_geolite2==2018.703

0 commit comments

Comments
 (0)