Skip to content

Commit 133b4e0

Browse files
committed
update
1 parent 74b8544 commit 133b4e0

File tree

8 files changed

+41
-23
lines changed

8 files changed

+41
-23
lines changed

importer.py

+4
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
from proxypool.importer import scan
2+
3+
if __name__ == '__main__':
4+
scan()

proxypool/crawler.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
ccimport json
1+
import json
22
import re
33
from .utils import get_page
44
from pyquery import PyQuery as pq
@@ -25,7 +25,7 @@ def get_proxies(self, callback):
2525
return proxies
2626

2727
def crawl_daxiang(self):
28-
url = 'http://vtp.daxiangdaili.com/ip/?tid=559363191592228&num=100&filter=on'
28+
url = 'http://vtp.daxiangdaili.com/ip/?tid=559363191592228&num=50&filter=on'
2929
html = get_page(url)
3030
if html:
3131
urls = html.split('\n')

proxypool/db.py

+14-6
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
from proxypool.setting import REDIS_HOST, REDIS_PORT, REDIS_PASSWORD, REDIS_KEY
44
from proxypool.setting import MAX_SCORE, MIN_SCORE, INITIAL_SCORE
55
from random import choice
6+
import re
67

78

89
class RedisClient(object):
@@ -22,6 +23,9 @@ def add(self, proxy, score=INITIAL_SCORE):
2223
:param score: 分数
2324
:return: 添加结果
2425
"""
26+
if not re.match('\d+\.\d+\.\d+\.\d+\:\d+', proxy):
27+
print('代理不符合规范', proxy, '丢弃')
28+
return
2529
if not self.db.zscore(REDIS_KEY, proxy):
2630
return self.db.zadd(REDIS_KEY, score, proxy)
2731

@@ -84,14 +88,18 @@ def all(self):
8488
:return: 全部代理列表
8589
"""
8690
return self.db.zrangebyscore(REDIS_KEY, MIN_SCORE, MAX_SCORE)
91+
92+
def batch(self, start, stop):
93+
"""
94+
批量获取
95+
:param start: 开始索引
96+
:param stop: 结束索引
97+
:return: 代理列表
98+
"""
99+
return self.db.zrevrange(REDIS_KEY, start, stop - 1)
87100

88101

89102
if __name__ == '__main__':
90103
conn = RedisClient()
91-
result = conn.all()
104+
result = conn.batch(680, 688)
92105
print(result)
93-
random = conn.random()
94-
print('Random', random)
95-
top = conn.top()
96-
print('Top', top)
97-
conn.decrease('a')

proxypool/getter.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
from proxypool.db import RedisClient
33
from proxypool.crawler import Crawler
44
from proxypool.setting import *
5-
5+
import sys
66

77
class Getter():
88
def __init__(self):
@@ -25,5 +25,6 @@ def run(self):
2525
callback = self.crawler.__CrawlFunc__[callback_label]
2626
# 获取代理
2727
proxies = self.crawler.get_proxies(callback)
28+
sys.stdout.flush()
2829
for proxy in proxies:
2930
self.redis.add(proxy)

proxypool/scheduler.py

+1
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
from proxypool.db import RedisClient
77
from proxypool.setting import *
88

9+
910
class Scheduler():
1011
def schedule_tester(self, cycle=TESTER_CYCLE):
1112
"""

proxypool/setting.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -37,4 +37,4 @@
3737
API_ENABLED = True
3838

3939
# 最大批测试量
40-
BATCH_TEST_SIZE = 100
40+
BATCH_TEST_SIZE = 10

proxypool/tester.py

+10-8
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,7 @@
11
import asyncio
22
import aiohttp
33
import time
4-
<<<<<<< HEAD
5-
=======
6-
7-
>>>>>>> dbb6bb89903b1cd158470ac472a5a930f7f0978b
4+
import sys
85
try:
96
from aiohttp import ClientError
107
except:
@@ -48,12 +45,17 @@ def run(self):
4845
"""
4946
print('测试器开始运行')
5047
try:
51-
proxies = self.redis.all()
52-
loop = asyncio.get_event_loop()
53-
for i in range(0, len(proxies), BATCH_TEST_SIZE):
54-
test_proxies = proxies[i:i + BATCH_TEST_SIZE]
48+
count = self.redis.count()
49+
print('当前剩余', count, '个代理')
50+
for i in range(0, count, BATCH_TEST_SIZE):
51+
start = i
52+
stop = min(i + BATCH_TEST_SIZE, count)
53+
print('正在测试第', start + 1, '-', stop, '个代理')
54+
test_proxies = self.redis.batch(start, stop)
55+
loop = asyncio.get_event_loop()
5556
tasks = [self.test_single_proxy(proxy) for proxy in test_proxies]
5657
loop.run_until_complete(asyncio.wait(tasks))
58+
sys.stdout.flush()
5759
time.sleep(5)
5860
except Exception as e:
5961
print('测试器发生错误', e.args)

run.py

+7-5
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,16 @@
11
from proxypool.scheduler import Scheduler
22
import sys
33
import io
4+
45
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
56

7+
68
def main():
7-
try:
8-
s = Scheduler()
9-
s.run()
10-
except:
11-
main()
9+
try:
10+
s = Scheduler()
11+
s.run()
12+
except:
13+
main()
1214

1315

1416
if __name__ == '__main__':

0 commit comments

Comments
 (0)