Skip to content

Commit

Permalink
use faster uniparser download adapter
Browse files Browse the repository at this point in the history
  • Loading branch information
ClericPy committed Apr 18, 2020
1 parent 97c88e7 commit ad52ecf
Show file tree
Hide file tree
Showing 3 changed files with 15 additions and 12 deletions.
4 changes: 2 additions & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
uniparser>=1.3.7
uniparser>=1.3.9
fastapi
uvicorn
databases
torequests>=4.9.11
torequests>=5.0.0
fire
jinja2
aiofiles
Expand Down
2 changes: 1 addition & 1 deletion watchdogs/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,6 @@
from .config import Config
from .main import init_app

__version__ = '1.6.6'
__version__ = '1.6.7'
__all__ = ['Config', 'init_app']
logging.getLogger('watchdogs').addHandler(logging.NullHandler())
21 changes: 12 additions & 9 deletions watchdogs/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@
from json import dumps, loads
from logging.handlers import RotatingFileHandler

import uniparser.fastapi_ui
from uniparser.parsers import AsyncFrequency, UDFParser, Uniparser
from uniparser.utils import TorequestsAiohttpAsyncAdapter

from .background import background_loop, db_backup_handler
from .callbacks import CallbackHandler
Expand Down Expand Up @@ -86,11 +88,12 @@ def setup_models():

async def setup_uniparser():
from uniparser.config import GlobalConfig
from torequests.utils import (
curlparse, escape, guess_interval, itertools_chain, json, parse_qs,
parse_qsl, ptime, quote, quote_plus, slice_by_size, slice_into_pieces,
split_n, timeago, ttime, unescape, unique, unquote, unquote_plus,
urljoin, urlparse, urlsplit, urlunparse)
from torequests.utils import (curlparse, escape, guess_interval,
itertools_chain, json, parse_qs, parse_qsl,
ptime, quote, quote_plus, slice_by_size,
slice_into_pieces, split_n, timeago, ttime,
unescape, unique, unquote, unquote_plus,
urljoin, urlparse, urlsplit, urlunparse)
UDFParser._GLOBALS_ARGS.update({
'curlparse': curlparse,
'escape': escape,
Expand Down Expand Up @@ -120,6 +123,9 @@ async def setup_uniparser():
Uniparser._DEFAULT_ASYNC_FREQUENCY = AsyncFrequency(
*Config.DEFAULT_HOST_FREQUENCY)
await load_host_freqs()
Config.uniparser = Uniparser(
request_adapter=TorequestsAiohttpAsyncAdapter())
uniparser.fastapi_ui.views.uni = Config.uniparser


def setup_cdn_urls(use_default_cdn=False):
Expand Down Expand Up @@ -186,10 +192,7 @@ async def setup_md5_salt():
async def setup_crawler():
from uniparser import Crawler

crawler = Crawler(storage=Config.rule_db)
Config.logger.info(
f'Downloader middleware installed: {crawler.uniparser.ensure_adapter(False).__class__.__name__}'
)
crawler = Crawler(uniparser=Config.uniparser, storage=Config.rule_db)
Config.crawler = crawler
if Config.callback_handler is None:
Config.callback_handler = CallbackHandler()
Expand Down

0 comments on commit ad52ecf

Please sign in to comment.