-
Notifications
You must be signed in to change notification settings - Fork 2.1k
/
Copy pathgetter.py
48 lines (41 loc) · 1.22 KB
/
getter.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
from loguru import logger
from proxypool.storages.redis import RedisClient
from proxypool.setting import PROXY_NUMBER_MAX
from proxypool.crawlers import __all__ as crawlers_cls
class Getter(object):
"""
getter of proxypool
"""
def __init__(self):
"""
init db and crawlers
"""
self.redis = RedisClient()
self.crawlers_cls = crawlers_cls
self.crawlers = [crawler_cls() for crawler_cls in self.crawlers_cls]
def is_full(self):
"""
if proxypool if full
return: bool
"""
return self.redis.count() >= PROXY_NUMBER_MAX
@logger.catch
def run(self):
"""
run crawlers to get proxy
:return:
"""
if self.is_full():
return
for crawler in self.crawlers:
logger.info(f'crawler {crawler} to get proxy')
proxies = crawler.run()
if proxies:
for proxy in proxies:
self.redis.add(proxy)
logger.info(f'crawled {len(proxies)} proxies from {crawler}')
else:
logger.debug(f'cannot crawl proxies from {crawler}')
if __name__ == '__main__':
getter = Getter()
getter.run()