|
5 | 5 | # @Url : 现在实现了极速HTTP的接口,官网地址:https://www.jisuhttp.com/?pl=mAKphQ&plan=ZY&kd=Yang
|
6 | 6 |
|
7 | 7 | import asyncio
|
| 8 | +import json |
8 | 9 | import os
|
9 | 10 | from abc import ABC, abstractmethod
|
10 | 11 | from typing import Dict, List, Optional
|
11 | 12 | from urllib.parse import urlencode
|
12 | 13 |
|
13 | 14 | import httpx
|
| 15 | +import redis |
14 | 16 | from pydantic import BaseModel, Field
|
15 | 17 |
|
| 18 | +import config |
16 | 19 | from tools import utils
|
17 | 20 |
|
18 | 21 |
|
@@ -41,71 +44,108 @@ async def get_proxies(self, num: int) -> List[Dict]:
|
41 | 44 | pass
|
42 | 45 |
|
43 | 46 |
|
| 47 | +class RedisDbIpCache: |
| 48 | + def __init__(self): |
| 49 | + self.redis_client = redis.Redis(host=config.REDIS_DB_HOST, password=config.REDIS_DB_PWD) |
| 50 | + |
| 51 | + def set_ip(self, ip_key: str, ip_value_info: str, ex: int): |
| 52 | + """ |
| 53 | + 设置IP并带有过期时间,到期之后由 redis 负责删除 |
| 54 | + :param ip_key: |
| 55 | + :param ip_value_info: |
| 56 | + :param ex: |
| 57 | + :return: |
| 58 | + """ |
| 59 | + self.redis_client.set(name=ip_key, value=ip_value_info, ex=ex) |
| 60 | + |
| 61 | + def load_all_ip(self, proxy_brand_name: str) -> List[IpInfoModel]: |
| 62 | + """ |
| 63 | + 从 redis 中加载所有还未过期的 IP 信息 |
| 64 | + :param proxy_brand_name: 代理商名称 |
| 65 | + :return: |
| 66 | + """ |
| 67 | + all_ip_list: List[IpInfoModel] = [] |
| 68 | + all_ip_keys: List[str] = self.redis_client.keys(pattern=f"{proxy_brand_name}_*") |
| 69 | + try: |
| 70 | + for ip_key in all_ip_keys: |
| 71 | + ip_value = self.redis_client.get(ip_key) |
| 72 | + if not ip_value: |
| 73 | + continue |
| 74 | + all_ip_list.append(IpInfoModel(**json.loads(ip_value))) |
| 75 | + except Exception as e: |
| 76 | + utils.logger.error("[RedisDbIpCache.load_all_ip] get ip err from redis db", e) |
| 77 | + return all_ip_list |
| 78 | + |
| 79 | + |
44 | 80 | class JiSuHttpProxy(ProxyProvider):
|
45 |
| - def __init__(self, exract_type: str, key: str, crypto: str, res_type: str, protocol: int, time: int): |
| 81 | + def __init__(self, key: str, crypto: str, time_validity_period: int): |
46 | 82 | """
|
47 | 83 | 极速HTTP 代理IP实现
|
48 | 84 | 官网地址:https://www.jisuhttp.com/?pl=mAKphQ&plan=ZY&kd=Yang
|
49 |
| - :param exract_type: 提取方式 |
50 |
| - :param key: 提取key值 (到上面链接的官网去注册后获取) |
51 |
| - :param crypto: 加密签名 (到上面链接的官网去注册后获取) |
52 |
| - :param res_type: 返回的数据格式:TXT、JSON |
53 |
| - :param protocol: IP协议:1:HTTP、2:HTTPS、3:SOCKS5 |
54 |
| - :param time: IP使用时长,支持3、5、10、15、30分钟时效 |
| 85 | + :param key: 提取key值 (去官网注册后获取) |
| 86 | + :param crypto: 加密签名 (去官网注册后获取) |
55 | 87 | """
|
56 |
| - self.exract_type = exract_type |
| 88 | + self.proxy_brand_name = "JISUHTTP" |
57 | 89 | self.api_path = "https://api.jisuhttp.com"
|
58 | 90 | self.params = {
|
59 | 91 | "key": key,
|
60 | 92 | "crypto": crypto,
|
61 |
| - "type": res_type, |
62 |
| - "port": protocol, |
63 |
| - "time": time, |
| 93 | + "time": time_validity_period, # IP使用时长,支持3、5、10、15、30分钟时效 |
| 94 | + "type": "json", # 数据结果为json |
| 95 | + "port": "2", # IP协议:1:HTTP、2:HTTPS、3:SOCKS5 |
64 | 96 | "pw": "1", # 是否使用账密验证, 1:是,0:否,否表示白名单验证;默认为0
|
65 | 97 | "se": "1", # 返回JSON格式时是否显示IP过期时间, 1:显示,0:不显示;默认为0
|
66 | 98 | }
|
| 99 | + self.ip_cache = RedisDbIpCache() |
67 | 100 |
|
68 | 101 | async def get_proxies(self, num: int) -> List[IpInfoModel]:
|
69 | 102 | """
|
70 | 103 | :param num:
|
71 | 104 | :return:
|
72 | 105 | """
|
73 |
| - if self.exract_type == "API": |
74 |
| - uri = "/fetchips" |
75 |
| - self.params.update({"num": num}) |
76 |
| - ip_infos = [] |
77 |
| - async with httpx.AsyncClient() as client: |
78 |
| - url = self.api_path + uri + '?' + urlencode(self.params) |
79 |
| - utils.logger.info(f"[JiSuHttpProxy] get ip proxy url:{url}") |
80 |
| - response = await client.get(url, headers={"User-Agent": "MediaCrawler"}) |
81 |
| - res_dict: Dict = response.json() |
82 |
| - if res_dict.get("code") == 0: |
83 |
| - data: List[Dict] = res_dict.get("data") |
84 |
| - for ip_item in data: |
85 |
| - ip_info_model = IpInfoModel( |
86 |
| - ip=ip_item.get("ip"), |
87 |
| - port=ip_item.get("port"), |
88 |
| - user=ip_item.get("user"), |
89 |
| - password=ip_item.get("pass"), |
90 |
| - expired_time_ts=utils.get_unix_time_from_time_str(ip_item.get("expire")) |
91 |
| - ) |
92 |
| - ip_infos.append(ip_info_model) |
93 |
| - else: |
94 |
| - raise IpGetError(res_dict.get("msg", "unkown err")) |
95 |
| - return ip_infos |
96 |
| - else: |
97 |
| - pass |
98 | 106 |
|
| 107 | + # 优先从缓存中拿 IP |
| 108 | + ip_cache_list = self.ip_cache.load_all_ip(proxy_brand_name=self.proxy_brand_name) |
| 109 | + if len(ip_cache_list) >= num: |
| 110 | + return ip_cache_list[:num] |
| 111 | + |
| 112 | + # 如果缓存中的数量不够,从IP代理商获取补上,再存入缓存中 |
| 113 | + need_get_count = num - len(ip_cache_list) |
| 114 | + self.params.update({"num": need_get_count}) |
| 115 | + ip_infos = [] |
| 116 | + async with httpx.AsyncClient() as client: |
| 117 | + url = self.api_path + "/fetchips" + '?' + urlencode(self.params) |
| 118 | + utils.logger.info(f"[JiSuHttpProxy] get ip proxy url:{url}") |
| 119 | + response = await client.get(url, headers={ |
| 120 | + "User-Agent": "MediaCrawler https://github.com/NanmiCoder/MediaCrawler"}) |
| 121 | + res_dict: Dict = response.json() |
| 122 | + if res_dict.get("code") == 0: |
| 123 | + data: List[Dict] = res_dict.get("data") |
| 124 | + current_ts = utils.get_unix_timestamp() |
| 125 | + for ip_item in data: |
| 126 | + ip_info_model = IpInfoModel( |
| 127 | + ip=ip_item.get("ip"), |
| 128 | + port=ip_item.get("port"), |
| 129 | + user=ip_item.get("user"), |
| 130 | + password=ip_item.get("pass"), |
| 131 | + expired_time_ts=utils.get_unix_time_from_time_str(ip_item.get("expire")) |
| 132 | + ) |
| 133 | + ip_key = f"JISUHTTP_{ip_info_model.ip}_{ip_info_model.port}_{ip_info_model.user}_{ip_info_model.password}" |
| 134 | + ip_value = ip_info_model.model_dump_json() |
| 135 | + ip_infos.append(ip_info_model) |
| 136 | + self.ip_cache.set_ip(ip_key, ip_value, ex=ip_info_model.expired_time_ts - current_ts) |
| 137 | + else: |
| 138 | + raise IpGetError(res_dict.get("msg", "unkown err")) |
| 139 | + return ip_cache_list + ip_infos |
99 | 140 |
|
100 | 141 |
|
101 | 142 | IpProxy = JiSuHttpProxy(
|
102 | 143 | key=os.getenv("jisu_key", ""), # 通过环境变量的方式获取极速HTTPIP提取key值
|
103 | 144 | crypto=os.getenv("jisu_crypto", ""), # 通过环境变量的方式获取极速HTTPIP提取加密签名
|
104 |
| - res_type="json", |
105 |
| - protocol=2, |
106 |
| - time=30 |
| 145 | + time_validity_period=30 # 30分钟(最长时效) |
107 | 146 | )
|
108 | 147 |
|
109 | 148 | if __name__ == '__main__':
|
| 149 | + # 每一次提取都要消耗 IP 数量,谨慎使用 |
110 | 150 | _ip_infos = asyncio.run(IpProxy.get_proxies(1))
|
111 | 151 | print(_ip_infos)
|
0 commit comments