From a0a10a25638bbf7d2119d11610ff7f6d0d1bd007 Mon Sep 17 00:00:00 2001 From: Linusp Date: Tue, 5 Dec 2023 09:32:41 +0800 Subject: [PATCH 1/5] use pyproject.toml instead of setup.py --- pyproject.toml | 35 +++++++++++++++++++++++++++++++++++ setup.py | 28 ---------------------------- 2 files changed, 35 insertions(+), 28 deletions(-) delete mode 100644 setup.py diff --git a/pyproject.toml b/pyproject.toml index 4198230..b4f3eb4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -25,3 +25,38 @@ ensure_newline_before_comments = true line_length = 100 skip = [".ipython"] remove_redundant_aliases = true + +[project] +name = "python-inoreader" +version = "0.4.7" +description = "Python wrapper of Inoreader API" +authors = [ + {name = "Linusp", email = "linusp1024@gmail.com"}, +] +dependencies = [ + "lxml", + "requests", + "PyYAML", + "click", + "requests-oauthlib", + "flask", + "tabulate", +] +requires-python = ">=3.6" +readme = "README.md" +license = {text = "MIT"} +classifiers = [ + "License :: OSI Approved :: MIT License", + "Programming Language :: Python", + "Programming Language :: Python :: 3", +] + +[build-system] +requires = ["setuptools", "wheel"] +build-backend = "setuptools.build_meta" + +[project.scripts] +inoreader = "inoreader.main:main" + +[project.urls] +Homepage = "https://github.com/Linusp/python-inoreader" diff --git a/setup.py b/setup.py deleted file mode 100644 index baa9b4c..0000000 --- a/setup.py +++ /dev/null @@ -1,28 +0,0 @@ -#!/usr/bin/env python -# coding: utf-8 - -from setuptools import find_packages, setup - -VERSION = '0.5.0' -REQS = [ - 'lxml', - 'requests', - 'PyYAML', - 'click', - 'requests-oauthlib', - 'flask', - 'tabulate', -] - - -setup( - name='inoreader', - version=VERSION, - description='Python wrapper of Inoreader API', - license='MIT', - packages=find_packages(), - install_requires=REQS, - include_package_data=True, - zip_safe=False, - entry_points={'console_scripts': ['inoreader=inoreader.main:main']}, -) From 81b5a1eaa74c7b04c957da3e02d289f8cd9d3bfd Mon Sep 17 00:00:00 2001 From: Linusp Date: Fri, 22 Mar 2024 18:06:21 +0800 Subject: [PATCH 2/5] replace black/isort/flake8 with ruff --- .pre-commit-config.yaml | 26 +++------------ Makefile | 8 ++++- pyproject.toml | 70 ++++++++++++++++++++++++----------------- setup.cfg | 7 ----- 4 files changed, 54 insertions(+), 57 deletions(-) delete mode 100644 setup.cfg diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 081527c..e53cc9d 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -26,28 +26,12 @@ repos: ^.pytest_cache/ ) - - repo: https://github.com/PyCQA/isort - rev: 5.12.0 + - repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.3.3 hooks: - - id: isort - - - repo: https://github.com/psf/black - rev: 22.6.0 - hooks: - - id: black - - - repo: https://github.com/PyCQA/flake8 - rev: 6.1.0 - hooks: - - id: flake8 - additional_dependencies: - - setuptools - - flake8-bugbear - - flake8-comprehensions - - flake8-debugger - - flake8-logging-format - - flake8-pytest-style - - flake8-tidy-imports + - id: ruff + args: [ --fix ] + - id: ruff-format - repo: https://github.com/codespell-project/codespell rev: v2.1.0 diff --git a/Makefile b/Makefile index 223f4e9..1d8c0d8 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,11 @@ lint: clean - flake8 inoreader --format=pylint + - pip install ruff codespell -q + - ruff check inoreader/ + - codespell + +format: + - pip install ruff -q + - ruff format inoreader/ clean: - find . -iname "*__pycache__" | xargs rm -rf diff --git a/pyproject.toml b/pyproject.toml index b4f3eb4..7cd24bc 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,31 +1,3 @@ -[tool.black] -line-length = 100 -skip-string-normalization = true -extend-exclude = ''' -/( - # exclude migrations - | migrations -)/ -''' - -[tool.coverage.report] -exclude_lines = [ - "pragma: no cover", - "def __repr__", - "if typing.TYPE_CHECKING:", - "if TYPE_CHECKING:", -] - -[tool.isort] -multi_line_output = 3 -include_trailing_comma = true -force_grid_wrap = 0 -use_parentheses = true -ensure_newline_before_comments = true -line_length = 100 -skip = [".ipython"] -remove_redundant_aliases = true - [project] name = "python-inoreader" version = "0.4.7" @@ -60,3 +32,45 @@ inoreader = "inoreader.main:main" [project.urls] Homepage = "https://github.com/Linusp/python-inoreader" + +[tool.codespell] +skip = "*.lock,./.tox,./.venv,./.git,./venv,./*.json,./*.jsonl,./*.yaml" +quiet-level = 3 +ignore-words-list = "inoreader" +count = "" + +[tool.ruff] +exclude = [ + ".bzr", + ".direnv", + ".eggs", + ".git", + ".git-rewrite", + ".hg", + ".ipynb_checkpoints", + ".mypy_cache", + ".nox", + ".pants.d", + ".pyenv", + ".pytest_cache", + ".pytype", + ".ruff_cache", + ".svn", + ".tox", + ".venv", + ".vscode", + "__pypackages__", + "_build", + "buck-out", + "build", + "dist", + "node_modules", + "site-packages", + "venv", +] +line-length = 100 +indent-width = 4 + +[tool.ruff.lint] +select = ["E", "F"] +ignore = ["E201", "E202"] diff --git a/setup.cfg b/setup.cfg deleted file mode 100644 index 017daae..0000000 --- a/setup.cfg +++ /dev/null @@ -1,7 +0,0 @@ -[flake8] -max-line-length = 100 -ignore = E201,E202,E203 - -[pep8] -max-line-length = 100 -ignore = E201,E202,E203 From 381b3cecf6d466645b8d06395494d9f335304aa3 Mon Sep 17 00:00:00 2001 From: Linusp Date: Fri, 22 Mar 2024 18:06:56 +0800 Subject: [PATCH 3/5] format code --- inoreader/__init__.py | 2 +- inoreader/article.py | 24 +-- inoreader/client.py | 126 +++++++------- inoreader/config.py | 26 +-- inoreader/consts.py | 10 +- inoreader/exception.py | 4 +- inoreader/filter.py | 10 +- inoreader/main.py | 338 +++++++++++++++++++------------------- inoreader/sim.py | 42 ++--- inoreader/subscription.py | 16 +- inoreader/utils.py | 34 ++-- 11 files changed, 316 insertions(+), 316 deletions(-) diff --git a/inoreader/__init__.py b/inoreader/__init__.py index 24c2da4..1c77bf1 100644 --- a/inoreader/__init__.py +++ b/inoreader/__init__.py @@ -1,4 +1,4 @@ # coding: utf-8 from .client import InoreaderClient -__all__ = ['InoreaderClient'] +__all__ = ["InoreaderClient"] diff --git a/inoreader/article.py b/inoreader/article.py index d9f2a33..308165d 100644 --- a/inoreader/article.py +++ b/inoreader/article.py @@ -23,7 +23,7 @@ def __init__( self.categories = categories self.link = link self.published = published - self.content = content.strip() if content else '' + self.content = content.strip() if content else "" self.text = extract_text(self.content) self.author = author self.feed_id = feed_id @@ -33,22 +33,22 @@ def __init__( @classmethod def from_json(cls, data): article_data = { - 'id': data['id'], - 'title': data['title'], - 'categories': data['categories'], - 'published': data['published'], - 'content': data.get('summary', {}).get('content'), - 'author': data.get('author'), + "id": data["id"], + "title": data["title"], + "categories": data["categories"], + "published": data["published"], + "content": data.get("summary", {}).get("content"), + "author": data.get("author"), } - links = [item['href'] for item in data['canonical']] - article_data['link'] = links[0] if links else '' + links = [item["href"] for item in data["canonical"]] + article_data["link"] = links[0] if links else "" # feed info article_data.update( { - 'feed_id': data['origin']['streamId'], - 'feed_title': normalize_whitespace(data['origin']['title']), - 'feed_link': data['origin']['htmlUrl'], + "feed_id": data["origin"]["streamId"], + "feed_title": normalize_whitespace(data["origin"]["title"]), + "feed_link": data["origin"]["htmlUrl"], } ) diff --git a/inoreader/client.py b/inoreader/client.py index 12911d6..bd2feff 100644 --- a/inoreader/client.py +++ b/inoreader/client.py @@ -25,20 +25,20 @@ class InoreaderClient(object): # paths - TOKEN_PATH = '/oauth2/token' - USER_INFO_PATH = 'user-info' - TAG_LIST_PATH = 'tag/list' - SUBSCRIPTION_LIST_PATH = 'subscription/list' - STREAM_CONTENTS_PATH = 'stream/contents/' - EDIT_TAG_PATH = 'edit-tag' - EDIT_SUBSCRIPTION_PATH = 'subscription/edit' + TOKEN_PATH = "/oauth2/token" + USER_INFO_PATH = "user-info" + TAG_LIST_PATH = "tag/list" + SUBSCRIPTION_LIST_PATH = "subscription/list" + STREAM_CONTENTS_PATH = "stream/contents/" + EDIT_TAG_PATH = "edit-tag" + EDIT_SUBSCRIPTION_PATH = "subscription/edit" # tags - GENERAL_TAG_TEMPLATE = 'user/-/label/{}' - READ_TAG = 'user/-/state/com.google/read' - STARRED_TAG = 'user/-/state/com.google/starred' - LIKED_TAG = 'user/-/state/com.google/like' - BROADCAST_TAG = 'user/-/state/com.google/broadcast' + GENERAL_TAG_TEMPLATE = "user/-/label/{}" + READ_TAG = "user/-/state/com.google/read" + STARRED_TAG = "user/-/state/com.google/starred" + LIKED_TAG = "user/-/state/com.google/like" + BROADCAST_TAG = "user/-/state/com.google/broadcast" def __init__( self, app_id, app_key, access_token, refresh_token, expires_at, config_manager=None @@ -51,9 +51,9 @@ def __init__( self.session = requests.Session() self.session.headers.update( { - 'AppId': self.app_id, - 'AppKey': self.app_key, - 'Authorization': 'Bearer {}'.format(self.access_token), + "AppId": self.app_id, + "AppKey": self.app_key, + "Authorization": "Bearer {}".format(self.access_token), } ) self.config_manager = config_manager @@ -76,16 +76,16 @@ def parse_response(response, json_data=True): def refresh_access_token(self): url = urljoin(BASE_URL, self.TOKEN_PATH) payload = { - 'client_id': self.app_id, - 'client_secret': self.app_key, - 'grant_type': 'refresh_token', - 'refresh_token': self.refresh_token, + "client_id": self.app_id, + "client_secret": self.app_key, + "grant_type": "refresh_token", + "refresh_token": self.refresh_token, } response = self.parse_response(requests.post(url, json=payload, proxies=self.proxies)) - self.access_token = response['access_token'] - self.refresh_token = response['refresh_token'] - self.expires_at = datetime.now().timestamp() + response['expires_in'] - self.session.headers['Authorization'] = 'Bearer {}'.format(self.access_token) + self.access_token = response["access_token"] + self.refresh_token = response["refresh_token"] + self.expires_at = datetime.now().timestamp() + response["expires_in"] + self.session.headers["Authorization"] = "Bearer {}".format(self.access_token) if self.config_manager: self.config_manager.access_token = self.access_token @@ -103,36 +103,36 @@ def get_folders(self): self.check_token() url = urljoin(BASE_URL, self.TAG_LIST_PATH) - params = {'types': 1, 'counts': 1} + params = {"types": 1, "counts": 1} response = self.parse_response(self.session.post(url, params=params, proxies=self.proxies)) folders = [] - for item in response['tags']: - if item.get('type') != 'folder': + for item in response["tags"]: + if item.get("type") != "folder": continue - folder_name = item['id'].split('/')[-1] - folders.append({'name': folder_name, 'unread_count': item['unread_count']}) + folder_name = item["id"].split("/")[-1] + folders.append({"name": folder_name, "unread_count": item["unread_count"]}) - folders.sort(key=itemgetter('name')) + folders.sort(key=itemgetter("name")) return folders def get_tags(self): self.check_token() url = urljoin(BASE_URL, self.TAG_LIST_PATH) - params = {'types': 1, 'counts': 1} + params = {"types": 1, "counts": 1} response = self.parse_response(self.session.post(url, params=params, proxies=self.proxies)) tags = [] - for item in response['tags']: - if item.get('type') != 'tag': + for item in response["tags"]: + if item.get("type") != "tag": continue - folder_name = item['id'].split('/')[-1] - tags.append({'name': folder_name, 'unread_count': item['unread_count']}) + folder_name = item["id"].split("/")[-1] + tags.append({"name": folder_name, "unread_count": item["unread_count"]}) - tags.sort(key=itemgetter('name')) + tags.sort(key=itemgetter("name")) return tags def get_subscription_list(self): @@ -140,10 +140,10 @@ def get_subscription_list(self): url = urljoin(BASE_URL, self.SUBSCRIPTION_LIST_PATH) response = self.parse_response(self.session.get(url, proxies=self.proxies)) - for item in response['subscriptions']: + for item in response["subscriptions"]: yield Subscription.from_json(item) - def get_stream_contents(self, stream_id, c='', limit=None): + def get_stream_contents(self, stream_id, c="", limit=None): fetched_count = 0 stop = False while not stop: @@ -161,16 +161,16 @@ def get_stream_contents(self, stream_id, c='', limit=None): if c is None: break - def __get_stream_contents(self, stream_id, continuation=''): + def __get_stream_contents(self, stream_id, continuation=""): self.check_token() url = urljoin(BASE_URL, self.STREAM_CONTENTS_PATH + quote_plus(stream_id)) - params = {'n': 50, 'r': '', 'c': continuation, 'output': 'json'} # default 20, max 1000 + params = {"n": 50, "r": "", "c": continuation, "output": "json"} # default 20, max 1000 response = self.parse_response(self.session.post(url, params=params, proxies=self.proxies)) - if 'continuation' in response: - return response['items'], response['continuation'] + if "continuation" in response: + return response["items"], response["continuation"] else: - return response['items'], None + return response["items"], None def fetch_articles(self, folder=None, tags=None, unread=True, starred=False, limit=None): self.check_token() @@ -179,20 +179,20 @@ def fetch_articles(self, folder=None, tags=None, unread=True, starred=False, lim if folder: url = urljoin(url, quote_plus(self.GENERAL_TAG_TEMPLATE.format(folder))) - params = {'c': str(uuid4())} + params = {"c": str(uuid4())} if unread: - params['xt'] = self.READ_TAG + params["xt"] = self.READ_TAG if starred: - params['it'] = self.STARRED_TAG + params["it"] = self.STARRED_TAG fetched_count = 0 response = self.parse_response(self.session.post(url, params=params, proxies=self.proxies)) - for data in response['items']: + for data in response["items"]: categories = { - category.split('/')[-1] - for category in data.get('categories', []) - if category.find('label') > 0 + category.split("/")[-1] + for category in data.get("categories", []) + if category.find("label") > 0 } if tags and not categories.issuperset(set(tags)): continue @@ -202,17 +202,17 @@ def fetch_articles(self, folder=None, tags=None, unread=True, starred=False, lim if limit and fetched_count >= limit: break - continuation = response.get('continuation') + continuation = response.get("continuation") while continuation and (not limit or fetched_count < limit): - params['c'] = continuation + params["c"] = continuation response = self.parse_response( self.session.post(url, params=params, proxies=self.proxies) ) - for data in response['items']: + for data in response["items"]: categories = { - category.split('/')[-1] - for category in data.get('categories', []) - if category.find('label') > 0 + category.split("/")[-1] + for category in data.get("categories", []) + if category.find("label") > 0 } if tags and not categories.issuperset(set(tags)): continue @@ -221,7 +221,7 @@ def fetch_articles(self, folder=None, tags=None, unread=True, starred=False, lim if limit and fetched_count >= limit: break - continuation = response.get('continuation') + continuation = response.get("continuation") def fetch_unread(self, folder=None, tags=None, limit=None): for article in self.fetch_articles(folder=folder, tags=tags, unread=True): @@ -237,7 +237,7 @@ def add_general_label(self, articles, label): url = urljoin(BASE_URL, self.EDIT_TAG_PATH) for start in range(0, len(articles), 10): end = min(start + 10, len(articles)) - params = {'a': label, 'i': [articles[idx].id for idx in range(start, end)]} + params = {"a": label, "i": [articles[idx].id for idx in range(start, end)]} self.parse_response( self.session.post(url, params=params, proxies=self.proxies), json_data=False ) @@ -248,7 +248,7 @@ def remove_general_label(self, articles, label): url = urljoin(BASE_URL, self.EDIT_TAG_PATH) for start in range(0, len(articles), 10): end = min(start + 10, len(articles)) - params = {'r': label, 'i': [articles[idx].id for idx in range(start, end)]} + params = {"r": label, "i": [articles[idx].id for idx in range(start, end)]} self.parse_response( self.session.post(url, params=params, proxies=self.proxies), json_data=False ) @@ -285,16 +285,16 @@ def edit_subscription(self, stream_id, action, title=None, add_folder=None, remo url = urljoin(BASE_URL, self.EDIT_SUBSCRIPTION_PATH) # https://us.inoreader.com/developers/edit-subscription # The documentation looks a bit outdated, `follow`/`unfollow` don't work - action = {'follow': 'subscribe', 'unfollow': 'unsubscribe'}.get(action) or action - params = {'ac': action, 's': stream_id} + action = {"follow": "subscribe", "unfollow": "unsubscribe"}.get(action) or action + params = {"ac": action, "s": stream_id} if title: - params['t'] = title + params["t"] = title if add_folder: - params['a'] = add_folder + params["a"] = add_folder if remove_folder: - params['r'] = remove_folder + params["r"] = remove_folder r = self.session.post(url, params=params, proxies=self.proxies) response = self.parse_response( diff --git a/inoreader/config.py b/inoreader/config.py index e54db05..62d05f3 100644 --- a/inoreader/config.py +++ b/inoreader/config.py @@ -20,55 +20,55 @@ def load(self): self.data[section_name] = dict(config_parser[section_name]) def save(self): - with codecs.open(self.config_file, mode='w', encoding='utf-8') as f: + with codecs.open(self.config_file, mode="w", encoding="utf-8") as f: config_parser = ConfigParser() config_parser.update(self.data) config_parser.write(f) @property def app_id(self): - return self.data.get('auth', {}).get('appid') + return self.data.get("auth", {}).get("appid") @app_id.setter def app_id(self, value): - self.data.setdefault('auth', {})['appid'] = value + self.data.setdefault("auth", {})["appid"] = value @property def app_key(self): - return self.data.get('auth', {}).get('appkey') + return self.data.get("auth", {}).get("appkey") @app_key.setter def app_key(self, value): - self.data.setdefault('auth', {})['appkey'] = value + self.data.setdefault("auth", {})["appkey"] = value @property def access_token(self): - return self.data.get('auth', {}).get('access_token') + return self.data.get("auth", {}).get("access_token") @access_token.setter def access_token(self, value): - self.data.setdefault('auth', {})['access_token'] = value + self.data.setdefault("auth", {})["access_token"] = value @property def refresh_token(self): - return self.data.get('auth', {}).get('refresh_token') + return self.data.get("auth", {}).get("refresh_token") @refresh_token.setter def refresh_token(self, value): - self.data.setdefault('auth', {})['refresh_token'] = value + self.data.setdefault("auth", {})["refresh_token"] = value @property def expires_at(self): - return self.data.get('auth', {}).get('expires_at') + return self.data.get("auth", {}).get("expires_at") @expires_at.setter def expires_at(self, value): - self.data.setdefault('auth', {})['expires_at'] = value + self.data.setdefault("auth", {})["expires_at"] = value @property def proxies(self): - return self.data.get('proxies', {}) + return self.data.get("proxies", {}) @proxies.setter def proxies(self, value): - self.data['proxies'] = value + self.data["proxies"] = value diff --git a/inoreader/consts.py b/inoreader/consts.py index 107f482..b410509 100644 --- a/inoreader/consts.py +++ b/inoreader/consts.py @@ -1,10 +1,10 @@ # coding: utf-8 import os -BASE_URL = 'https://www.inoreader.com/reader/api/0/' -LOGIN_URL = 'https://www.inoreader.com/accounts/ClientLogin' +BASE_URL = "https://www.inoreader.com/reader/api/0/" +LOGIN_URL = "https://www.inoreader.com/accounts/ClientLogin" -DEFAULT_APPID = 'your_app_id' -DEFAULT_APPKEY = 'your_app_key' +DEFAULT_APPID = "your_app_id" +DEFAULT_APPKEY = "your_app_key" -CONFIG_FILE = os.path.join(os.environ.get('HOME'), '.inoreader') +CONFIG_FILE = os.path.join(os.environ.get("HOME"), ".inoreader") diff --git a/inoreader/exception.py b/inoreader/exception.py index 0e39eba..313cb60 100644 --- a/inoreader/exception.py +++ b/inoreader/exception.py @@ -1,8 +1,8 @@ class NotLoginError(ValueError): def __repr__(self): - return '' + return "" class APIError(ValueError): def __repr__(self): - return '' + return "" diff --git a/inoreader/filter.py b/inoreader/filter.py index f2ed176..26e370a 100644 --- a/inoreader/filter.py +++ b/inoreader/filter.py @@ -14,7 +14,7 @@ def wrap(cls): return wrap -@register_filter('include_any') +@register_filter("include_any") class IncludeAnyFilter(object): def __init__(self, rules): self.rules = [re.compile(regexp, re.IGNORECASE) for regexp in rules] @@ -27,7 +27,7 @@ def validate(self, text): return False -@register_filter('include_all') +@register_filter("include_all") class IncludeAllFilter(object): def __init__(self, rules): self.rules = [re.compile(regexp, re.IGNORECASE) for regexp in rules] @@ -40,7 +40,7 @@ def validate(self, text): return True -@register_filter('exclude') +@register_filter("exclude") class ExcludeFilter(object): def __init__(self, rules): self.rules = [re.compile(regexp, re.IGNORECASE) for regexp in rules] @@ -54,10 +54,10 @@ def validate(self, text): def get_filter(config): - filter_type = config['type'] + filter_type = config["type"] if filter_type not in _FILTERS: raise ValueError("unsupported filter type: {}".format(filter_type)) filter_cls = _FILTERS[filter_type] - params = {k: v for k, v in config.items() if k != 'type'} + params = {k: v for k, v in config.items() if k != "type"} return filter_cls(**params) diff --git a/inoreader/main.py b/inoreader/main.py index 3bc07d1..9075edb 100644 --- a/inoreader/main.py +++ b/inoreader/main.py @@ -30,34 +30,34 @@ from inoreader.sim import InvIndex, sim_of from inoreader.utils import download_image -APPID_ENV_NAME = 'INOREADER_APP_ID' -APPKEY_ENV_NAME = 'INOREADER_APP_KEY' -TOKEN_ENV_NAME = 'INOREADER_AUTH_TOKEN' +APPID_ENV_NAME = "INOREADER_APP_ID" +APPKEY_ENV_NAME = "INOREADER_APP_KEY" +TOKEN_ENV_NAME = "INOREADER_AUTH_TOKEN" ENV_NAMES = [APPID_ENV_NAME, APPKEY_ENV_NAME, TOKEN_ENV_NAME] -CONFIG_FILE = os.path.join(os.environ.get('HOME'), '.inoreader') +CONFIG_FILE = os.path.join(os.environ.get("HOME"), ".inoreader") LOGGER = logging.getLogger(__name__) dictConfig( { - 'version': 1, - 'formatters': { - 'simple': { - 'format': '%(asctime)s - %(message)s', + "version": 1, + "formatters": { + "simple": { + "format": "%(asctime)s - %(message)s", } }, - 'handlers': { - 'default': { - 'level': 'DEBUG', - 'class': 'logging.StreamHandler', - 'formatter': 'simple', + "handlers": { + "default": { + "level": "DEBUG", + "class": "logging.StreamHandler", + "formatter": "simple", "stream": "ext://sys.stdout", }, }, - 'loggers': { - '__main__': {'handlers': ['default'], 'level': 'DEBUG', 'propagate': False}, - 'inoreader': {'handlers': ['default'], 'level': 'DEBUG', 'propagate': True}, + "loggers": { + "__main__": {"handlers": ["default"], "level": "DEBUG", "propagate": False}, + "inoreader": {"handlers": ["default"], "level": "DEBUG", "propagate": True}, }, } ) @@ -86,7 +86,7 @@ def wrapper(*args, **kwargs): try: return func(*args, **kwargs) except NotLoginError: - print('Error: Please login first!') + print("Error: Please login first!") sys.exit(1) except APIError as exception: print("Error:", str(exception)) @@ -95,7 +95,7 @@ def wrapper(*args, **kwargs): return wrapper -@click.group(context_settings={'help_option_names': ['-h', '--help']}) +@click.group(context_settings={"help_option_names": ["-h", "--help"]}) def main(): pass @@ -108,10 +108,10 @@ def login(): # disable flask output app.logger.disabled = True - logger = logging.getLogger('werkzeug') + logger = logging.getLogger("werkzeug") logger.setLevel(logging.ERROR) logger.disabled = True - sys.modules['flask.cli'].show_server_banner = lambda *x: None + sys.modules["flask.cli"].show_server_banner = lambda *x: None # use queue to pass data between threads queue = Queue() @@ -122,34 +122,34 @@ def login(): state = str(uuid4()) oauth = OAuth2Session( app_id, - redirect_uri='http://localhost:8080/oauth/redirect', - scope='read write', + redirect_uri="http://localhost:8080/oauth/redirect", + scope="read write", state=state, ) - @app.route('/oauth/redirect') + @app.route("/oauth/redirect") def redirect(): token = oauth.fetch_token( - 'https://www.inoreader.com/oauth2/token', + "https://www.inoreader.com/oauth2/token", authorization_response=request.url, client_secret=app_key, proxies=config.proxies, ) queue.put(token) queue.task_done() - return 'Done.' + return "Done." func = partial(app.run, port=8080, debug=False) threading.Thread(target=func, daemon=True).start() - os.environ['OAUTHLIB_INSECURE_TRANSPORT'] = '1' - authorization_url, ret_state = oauth.authorization_url('https://www.inoreader.com/oauth2/auth') + os.environ["OAUTHLIB_INSECURE_TRANSPORT"] = "1" + authorization_url, ret_state = oauth.authorization_url("https://www.inoreader.com/oauth2/auth") if state != ret_state: LOGGER.error("Server return bad state") sys.exit(1) token = None - print('Open the link to authorize access:', authorization_url) + print("Open the link to authorize access:", authorization_url) while True: token = queue.get() if token: @@ -159,9 +159,9 @@ def redirect(): if token: config.app_id = app_id config.app_key = app_key - config.access_token = token['access_token'] - config.refresh_token = token['refresh_token'] - config.expires_at = token['expires_at'] + config.access_token = token["access_token"] + config.refresh_token = token["refresh_token"] + config.expires_at = token["expires_at"] config.save() LOGGER.info("Login successfully, tokens are saved in config file %s", config.config_file) else: @@ -178,9 +178,9 @@ def list_folders(): output_info = [["Folder", "Unread Count"]] for item in res: - output_info.append([item['name'], item['unread_count']]) + output_info.append([item["name"], item["unread_count"]]) - print(tabulate(output_info, headers='firstrow', tablefmt="github")) + print(tabulate(output_info, headers="firstrow", tablefmt="github")) @main.command("list-tags") @@ -192,60 +192,60 @@ def list_tags(): output_info = [["Tag", "Unread Count"]] for item in res: - output_info.append([item['name'], item['unread_count']]) + output_info.append([item["name"], item["unread_count"]]) - print(tabulate(output_info, headers='firstrow', tablefmt="github")) + print(tabulate(output_info, headers="firstrow", tablefmt="github")) @main.command("fetch-unread") -@click.option("-f", "--folder", required=True, help='Folder which articles belong to') +@click.option("-f", "--folder", required=True, help="Folder which articles belong to") @click.option("-t", "--tags", help="Tag(s) for filtering, separate with comma") @click.option("-o", "--outfile", required=True, help="Filename to save articles") @click.option( "--out-format", - type=click.Choice(['json', 'csv', 'plain', 'markdown', 'org-mode']), - default='json', - help='Format of output file, default: json', + type=click.Choice(["json", "csv", "plain", "markdown", "org-mode"]), + default="json", + help="Format of output file, default: json", ) @catch_error def fetch_unread(folder, tags, outfile, out_format): """Fetch unread articles""" client = get_client() - tag_list = [] if not tags else tags.split(',') - fout = codecs.open(outfile, mode='w', encoding='utf-8') - writer = csv.writer(fout, delimiter=',') if out_format == 'csv' else None + tag_list = [] if not tags else tags.split(",") + fout = codecs.open(outfile, mode="w", encoding="utf-8") + writer = csv.writer(fout, delimiter=",") if out_format == "csv" else None for idx, article in enumerate(client.fetch_unread(folder=folder, tags=tag_list)): if idx > 0 and (idx % 10) == 0: LOGGER.info("fetched %d articles", idx) title = article.title text = article.text link = article.link - if out_format == 'json': + if out_format == "json": print( - json.dumps({'title': title, 'content': text, 'url': link}, ensure_ascii=False), + json.dumps({"title": title, "content": text, "url": link}, ensure_ascii=False), file=fout, ) - elif out_format == 'csv': + elif out_format == "csv": writer.writerow([link, title, text]) - elif out_format == 'plain': - print('TITLE: {}'.format(title), file=fout) + elif out_format == "plain": + print("TITLE: {}".format(title), file=fout) print("LINK: {}".format(link), file=fout) print("CONTENT: {}".format(text), file=fout) print(file=fout) - elif out_format == 'markdown': + elif out_format == "markdown": if link: - print('# [{}]({})\n'.format(title, link), file=fout) + print("# [{}]({})\n".format(title, link), file=fout) else: - print('# {}\n'.format(title), file=fout) - print(text + '\n', file=fout) - elif out_format == 'org-mode': + print("# {}\n".format(title), file=fout) + print(text + "\n", file=fout) + elif out_format == "org-mode": if link: - title = title.replace('[', '_').replace(']', '_') - print('* [[{}][{}]]\n'.format(link, title), file=fout) + title = title.replace("[", "_").replace("]", "_") + print("* [[{}][{}]]\n".format(link, title), file=fout) else: - print('* {}\n'.format(title), file=fout) - print(text + '\n', file=fout) + print("* {}\n".format(title), file=fout) + print(text + "\n", file=fout) LOGGER.info("fetched %d articles and saved them in %s", idx + 1, outfile) @@ -253,36 +253,36 @@ def fetch_unread(folder, tags, outfile, out_format): def apply_action(articles, client, action, tags): - if action == 'tag': - for tag in tags.split(','): + if action == "tag": + for tag in tags.split(","): client.add_tag(articles, tag) for article in articles: LOGGER.info("Add tags [%s] on article: %s", tags, article.title) - elif action == 'mark_as_read': + elif action == "mark_as_read": client.mark_as_read(articles) for article in articles: LOGGER.info("Mark article as read: %s", article.title) - elif action == 'like': + elif action == "like": client.mark_as_liked(articles) for article in articles: LOGGER.info("Mark article as liked: %s", article.title) - elif action == 'broadcast': + elif action == "broadcast": client.broadcast(articles) for article in articles: LOGGER.info("Broadcast article: %s", article.title) - elif action == 'star': + elif action == "star": client.mark_as_starred(articles) for article in articles: LOGGER.info("Starred article: %s", article.title) - elif action == 'unstar': + elif action == "unstar": client.remove_starred(articles) for article in articles: LOGGER.info("Unstarred article: %s", article.title) @main.command("filter") -@click.option("-r", "--rules-file", required=True, help='YAML file with your rules') +@click.option("-r", "--rules-file", required=True, help="YAML file with your rules") @catch_error def filter_articles(rules_file): """Select articles and do something""" @@ -291,65 +291,65 @@ def filter_articles(rules_file): for rule in yaml.load(open(rules_file), Loader=yaml.Loader): fields = [ field - for field in rule.get('fields', ['title', 'content']) - if field in ('title', 'content') + for field in rule.get("fields", ["title", "content"]) + if field in ("title", "content") ] - cur_filter = get_filter(rule['filter']) + cur_filter = get_filter(rule["filter"]) actions = [] # only 'mark_as_read', 'like', 'star', 'broadcast', 'tag' is supported now - for action in rule.get('actions', [{'type': 'mark_as_read'}]): - if action['type'] not in ( - 'mark_as_read', - 'like', - 'star', - 'broadcast', - 'tag', - 'unstar', + for action in rule.get("actions", [{"type": "mark_as_read"}]): + if action["type"] not in ( + "mark_as_read", + "like", + "star", + "broadcast", + "tag", + "unstar", ): continue actions.append(action) articles = [] - if 'folders' in rule: - for folder in rule['folders']: + if "folders" in rule: + for folder in rule["folders"]: articles.extend(client.fetch_unread(folder=folder)) else: - for articles_info in rule.get('articles', []): + for articles_info in rule.get("articles", []): articles.extend(client.fetch_articles(**articles_info)) # FIXME: deduplicate count = 0 for article in articles: matched = False - if 'title' in fields and cur_filter.validate(article.title): + if "title" in fields and cur_filter.validate(article.title): matched = True - if 'content' in fields and cur_filter.validate(article.text): + if "content" in fields and cur_filter.validate(article.text): matched = True if matched: for action in actions: - matched_articles[action['type']].append((article, action)) + matched_articles[action["type"]].append((article, action)) count += 1 LOGGER.info( "matched %d articles with filter named '%s'", count, - rule['name'], + rule["name"], ) for action_name in matched_articles: articles, actions = zip(*matched_articles[action_name]) - if action_name != 'tag': + if action_name != "tag": apply_action(articles, client, action_name, None) else: for article, action in zip(articles, actions): - apply_action([article], client, 'tag', action['tags']) + apply_action([article], client, "tag", action["tags"]) @main.command("get-subscriptions") @click.option("-o", "--outfile", help="Filename to save results") -@click.option("-f", "--folder", help='Folder which subscriptions belong to') +@click.option("-f", "--folder", help="Folder which subscriptions belong to") @click.option( "--out-format", type=click.Choice(["json", "csv"]), @@ -362,27 +362,27 @@ def get_subscriptions(outfile, folder, out_format): client = get_client() results = [] for sub in client.get_subscription_list(): - sub_categories = {category['label'] for category in sub.categories} + sub_categories = {category["label"] for category in sub.categories} if folder and folder not in sub_categories: continue results.append( { - 'id': sub.id, - 'title': sub.title, - 'url': sub.url, - 'folders': ';'.join(sub_categories), + "id": sub.id, + "title": sub.title, + "url": sub.url, + "folders": ";".join(sub_categories), } ) - fout = open(outfile, 'w') if outfile else sys.stdout - if out_format == 'csv': - headers = ['id', 'title', 'url', 'folders'] + fout = open(outfile, "w") if outfile else sys.stdout + if out_format == "csv": + headers = ["id", "title", "url", "folders"] writer = csv.DictWriter(fout, headers, quoting=csv.QUOTE_ALL, delimiter="\t") writer.writeheader() for item in results: writer.writerow(item) - elif out_format == 'json': + elif out_format == "json": json.dump(results, fout, ensure_ascii=False, indent=4) if outfile: @@ -390,11 +390,11 @@ def get_subscriptions(outfile, folder, out_format): @main.command("fetch-articles") -@click.option("-i", "--stream-id", required=True, help='Stream ID which you want to fetch') +@click.option("-i", "--stream-id", required=True, help="Stream ID which you want to fetch") @click.option("-o", "--outfile", required=True, help="Filename to save results") @click.option( "--out-format", - type=click.Choice(["json", "csv", 'plain', 'markdown', 'org-mode']), + type=click.Choice(["json", "csv", "plain", "markdown", "org-mode"]), default="json", help="Format of output, default: json", ) @@ -403,10 +403,10 @@ def fetch_articles(outfile, stream_id, out_format): """Fetch articles by stream id""" client = get_client() - fout = codecs.open(outfile, mode='w', encoding='utf-8') + fout = codecs.open(outfile, mode="w", encoding="utf-8") writer = None - if out_format == 'csv': - writer = csv.DictWriter(fout, ['title', 'content'], delimiter=',', quoting=csv.QUOTE_ALL) + if out_format == "csv": + writer = csv.DictWriter(fout, ["title", "content"], delimiter=",", quoting=csv.QUOTE_ALL) writer.writeheader() for idx, article in enumerate(client.get_stream_contents(stream_id)): @@ -415,20 +415,20 @@ def fetch_articles(outfile, stream_id, out_format): title = article.title text = article.text - if out_format == 'json': - print(json.dumps({'title': title, 'content': text}, ensure_ascii=False), file=fout) - elif out_format == 'csv': - writer.writerow({'title': title, 'content': text}) - elif out_format == 'plain': - print('TITLE: {}'.format(title), file=fout) + if out_format == "json": + print(json.dumps({"title": title, "content": text}, ensure_ascii=False), file=fout) + elif out_format == "csv": + writer.writerow({"title": title, "content": text}) + elif out_format == "plain": + print("TITLE: {}".format(title), file=fout) print("CONTENT: {}".format(text), file=fout) print(file=fout) - elif out_format == 'markdown': - print('# {}\n'.format(title), file=fout) - print(text + '\n', file=fout) - elif out_format == 'org-mode': - print('* {}\n'.format(title), file=fout) - print(text + '\n', file=fout) + elif out_format == "markdown": + print("# {}\n".format(title), file=fout) + print(text + "\n", file=fout) + elif out_format == "org-mode": + print("* {}\n".format(title), file=fout) + print(text + "\n", file=fout) LOGGER.info("fetched %d articles and saved them in %s", idx + 1, outfile) @@ -452,7 +452,7 @@ def dedupe(folder, thresh): for docid, doc, _ in related: if docid == article.id: continue - sims[doc] = sim_of(doc, article.title, method='cosine', term='char', ngram_range=(2, 3)) + sims[doc] = sim_of(doc, article.title, method="cosine", term="char", ngram_range=(2, 3)) if sims and max(sims.values()) >= thresh: top_doc, top_score = sims.most_common()[0] @@ -463,11 +463,11 @@ def dedupe(folder, thresh): index.add_doc(article) LOGGER.info("fetched %d articles and found %d duplicate", idx + 1, len(matched_articles)) - apply_action(matched_articles, client, 'mark_as_read', None) + apply_action(matched_articles, client, "mark_as_read", None) @main.command("fetch-starred") -@click.option("-f", "--folder", help='Folder which articles belong to') +@click.option("-f", "--folder", help="Folder which articles belong to") @click.option("-t", "--tags", help="Tag(s) for filtering, separate with comma") @click.option( "-o", "--outfile", help="Filename to save articles, required when output format is `csv`" @@ -479,31 +479,31 @@ def dedupe(folder, thresh): @click.option("--save-image", is_flag=True) @click.option( "--out-format", - type=click.Choice(['json', 'csv', 'markdown', 'org-mode']), - default='json', - help='Format of output file, default: json', + type=click.Choice(["json", "csv", "markdown", "org-mode"]), + default="json", + help="Format of output file, default: json", ) @catch_error def fetch_starred(folder, tags, outfile, outdir, limit, save_image, out_format): """Fetch starred articles""" client = get_client() - if out_format == 'csv' and not outfile: + if out_format == "csv" and not outfile: click.secho("`outfile` is required!", fg="red") return -1 - elif out_format != 'csv' and not outdir: + elif out_format != "csv" and not outdir: click.secho("`outdir` is required!", fg="red") return -1 - if out_format == 'csv': - fout = codecs.open(outfile, mode='w', encoding='utf-8') + if out_format == "csv": + fout = codecs.open(outfile, mode="w", encoding="utf-8") writer = ( - csv.writer(fout, delimiter=',', quoting=csv.QUOTE_ALL) if out_format == 'csv' else None + csv.writer(fout, delimiter=",", quoting=csv.QUOTE_ALL) if out_format == "csv" else None ) elif not os.path.exists(outdir): os.makedirs(outdir) - tag_list = [] if not tags else tags.split(',') + tag_list = [] if not tags else tags.split(",") url_to_image = {} fetched_count = 0 for article in client.fetch_starred(folder=folder, tags=tag_list, limit=limit): @@ -514,38 +514,38 @@ def fetch_starred(folder, tags, outfile, outdir, limit, save_image, out_format): title = article.title text = article.text link = article.link - if out_format == 'csv': + if out_format == "csv": writer.writerow([link, title, text]) continue - filename = re.sub(r'\s+', '_', title) - filename = re.sub(r'[\[\]\(\)()::,,/|]', '_', filename) - filename = re.sub(r'[“”\'"]', '', filename) - filename = re.sub(r'-+', '-', filename) + filename = re.sub(r"\s+", "_", title) + filename = re.sub(r"[\[\]\(\)()::,,/|]", "_", filename) + filename = re.sub(r'[“”\'"]', "", filename) + filename = re.sub(r"-+", "-", filename) filename = filename[:50] - if out_format == 'json': - filename += '.json' - elif out_format == 'markdown': - filename += '.md' - elif out_format == 'org-mode': - filename += '.org' + if out_format == "json": + filename += ".json" + elif out_format == "markdown": + filename += ".md" + elif out_format == "org-mode": + filename += ".org" if save_image: - image_contents = re.findall(r'!\[(?:[^\[\]]+)\]\((?:[^\(\)]+)\)', text) + image_contents = re.findall(r"!\[(?:[^\[\]]+)\]\((?:[^\(\)]+)\)", text) for image_content in image_contents: - match = re.match(r'!\[(?P[^\[\]]+)\]\((?P[^\(\)]+)\)', image_content) - image_alt, image_url = itemgetter('alt', 'url')(match.groupdict()) + match = re.match(r"!\[(?P[^\[\]]+)\]\((?P[^\(\)]+)\)", image_content) + image_alt, image_url = itemgetter("alt", "url")(match.groupdict()) if image_url in url_to_image: text = text.replace( - image_content, '![{}]({})'.format(image_alt, url_to_image[image_url]) + image_content, "![{}]({})".format(image_alt, url_to_image[image_url]) ) continue - image_filename = '' - if not re.findall(r'[\?\!\/=\&]', image_alt): - image_filename = re.sub(r'\.[a-z]+$', '', image_alt) + image_filename = "" + if not re.findall(r"[\?\!\/=\&]", image_alt): + image_filename = re.sub(r"\.[a-z]+$", "", image_alt) else: - image_filename = str(uuid4()).replace('-', '') + image_filename = str(uuid4()).replace("-", "") return_image_file = download_image( image_url, outdir, image_filename, proxies=client.proxies @@ -553,31 +553,31 @@ def fetch_starred(folder, tags, outfile, outdir, limit, save_image, out_format): if return_image_file: LOGGER.info('Download image as "%s" from "%s"', return_image_file, image_url) text = text.replace( - image_content, '![{}]({})'.format(image_alt, return_image_file) + image_content, "![{}]({})".format(image_alt, return_image_file) ) url_to_image[image_url] = return_image_file - with open(os.path.join(outdir, filename), 'w') as fout: - if out_format == 'json': + with open(os.path.join(outdir, filename), "w") as fout: + if out_format == "json": json.dump( - {'title': title, 'content': text, 'url': link}, + {"title": title, "content": text, "url": link}, fout, ensure_ascii=False, indent=4, ) - elif out_format == 'markdown': - print(title + '\n=====\n\nLINK: ' + link + '\n\n', file=fout) - text = re.sub(r'!\[([^\[\]]+)\]\(([^\(\)]+)\)', r'\n![\1](\2)\n', text) - print(text + '\n', file=fout) - elif out_format == 'org-mode': - print('#+TITLE: ' + title + '\n\nLINK: ' + link + '\n\n', file=fout) - text = re.sub(r'!\[([^\[\]]+)\]\(([^\(\)]+)\)', r'\n[[file:\2][\1]]\n', text) - text = re.sub(r'\[([^\[\]]+)\]\(([^\(\)]+)\)', r'[[\2][\1]]', text) - print(text + '\n', file=fout) + elif out_format == "markdown": + print(title + "\n=====\n\nLINK: " + link + "\n\n", file=fout) + text = re.sub(r"!\[([^\[\]]+)\]\(([^\(\)]+)\)", r"\n![\1](\2)\n", text) + print(text + "\n", file=fout) + elif out_format == "org-mode": + print("#+TITLE: " + title + "\n\nLINK: " + link + "\n\n", file=fout) + text = re.sub(r"!\[([^\[\]]+)\]\(([^\(\)]+)\)", r"\n[[file:\2][\1]]\n", text) + text = re.sub(r"\[([^\[\]]+)\]\(([^\(\)]+)\)", r"[[\2][\1]]", text) + print(text + "\n", file=fout) LOGGER.info('saved article "%s" in directory "%s"', title, outdir) - if out_format == 'csv': + if out_format == "csv": fout.close() LOGGER.info("fetched %d articles and saved them in %s", fetched_count, outfile) else: @@ -589,32 +589,32 @@ def fetch_starred(folder, tags, outfile, outdir, limit, save_image, out_format): "-a", "--action", required=True, - type=click.Choice(['follow', 'unfollow', 'rename', 'add-folder', 'remove-folder']), + type=click.Choice(["follow", "unfollow", "rename", "add-folder", "remove-folder"]), help="", ) -@click.option("-i", "--stream-id", required=True, help='Stream ID which you want to fetch') -@click.option("-n", "--name", help='The name of subscription, for action follow/rename(required)') -@click.option("-f", "--folder", help='Folder which subscription belong to') +@click.option("-i", "--stream-id", required=True, help="Stream ID which you want to fetch") +@click.option("-n", "--name", help="The name of subscription, for action follow/rename(required)") +@click.option("-f", "--folder", help="Folder which subscription belong to") @catch_error def edit_subscriptions(action, stream_id, name, folder): """Get your subscriptions""" edit_action = action - if action in ('rename', 'add-folder', 'remove-folder'): - edit_action = 'edit' - if action == 'rename' and not name: + if action in ("rename", "add-folder", "remove-folder"): + edit_action = "edit" + if action == "rename" and not name: click.secho("`name` is required for action `rename`!", fg="red") return -1 - elif action in ('add-folder', 'remove_starred') and not folder: + elif action in ("add-folder", "remove_starred") and not folder: click.secho(f"`folder` is required for action `{action}`", fg="red") return -1 client = get_client() - stream_id = 'feed/' + stream_id if not stream_id.startswith('feed/') else stream_id - if folder and not folder.startswith('user/-/label/'): + stream_id = "feed/" + stream_id if not stream_id.startswith("feed/") else stream_id + if folder and not folder.startswith("user/-/label/"): folder = client.GENERAL_TAG_TEMPLATE.format(folder) - add_folder = folder if action in ('follow', 'add-folder') else None - remove_folder = folder if action == 'remove-folder' else None + add_folder = folder if action in ("follow", "add-folder") else None + remove_folder = folder if action == "remove-folder" else None try: response = client.edit_subscription( stream_id, edit_action, title=name, add_folder=add_folder, remove_folder=remove_folder @@ -625,5 +625,5 @@ def edit_subscriptions(action, stream_id, name, folder): return -1 -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/inoreader/sim.py b/inoreader/sim.py index 10c7430..33beb22 100644 --- a/inoreader/sim.py +++ b/inoreader/sim.py @@ -6,21 +6,21 @@ PUNCTS_PAT = re.compile( r'(?:[#\$&@.,;:!?,。!?、:;  \u3300\'`"~_\+\-\*\/\\|\\^=<>\[\]\(\)\{\}()“”‘’\s]|' - r'[\u2000-\u206f]|' - r'[\u3000-\u303f]|' - r'[\uff30-\uff4f]|' - r'[\uff00-\uff0f\uff1a-\uff20\uff3b-\uff40\uff5b-\uff65])+' + r"[\u2000-\u206f]|" + r"[\u3000-\u303f]|" + r"[\uff30-\uff4f]|" + r"[\uff00-\uff0f\uff1a-\uff20\uff3b-\uff40\uff5b-\uff65])+" ) def make_terms(text, term, ngram_range=None, lower=True, ignore_punct=True, gram_as_tuple=False): if lower: text = text.lower() - if term == 'word': + if term == "word": # term_seq = [word.strip() for word in jieba.cut(text) if word.strip()] term_seq = [word.strip() for word in text.split() if word.strip()] - elif term == 'char': - term_seq = list(re.sub(r'\s', '', text)) + elif term == "char": + term_seq = list(re.sub(r"\s", "", text)) else: raise ValueError(f"unsupported term type: {term}") @@ -35,7 +35,7 @@ def make_terms(text, term, ngram_range=None, lower=True, ignore_punct=True, gram if gram_as_tuple: gram = tuple(term_seq[idx : idx + gram_level]) else: - gram = ''.join(term_seq[idx : idx + gram_level]) + gram = "".join(term_seq[idx : idx + gram_level]) if gram not in cur_grams: if ignore_punct and any(PUNCTS_PAT.match(item) for item in gram): pass @@ -46,15 +46,15 @@ def make_terms(text, term, ngram_range=None, lower=True, ignore_punct=True, gram def lcs_sim( - s1, s2, term='char', ngram_range=None, ngram_weights=None, lower=True, ignore_punct=True + s1, s2, term="char", ngram_range=None, ngram_weights=None, lower=True, ignore_punct=True ): - s1_terms = make_terms(s1, 'char', None, lower, ignore_punct) - s2_terms = make_terms(s2, 'char', None, lower, ignore_punct) + s1_terms = make_terms(s1, "char", None, lower, ignore_punct) + s2_terms = make_terms(s2, "char", None, lower, ignore_punct) return SequenceMatcher(a=s1_terms, b=s2_terms).ratio() def jaccard_sim( - s1, s2, term='word', ngram_range=None, ngram_weights=None, lower=True, ignore_punct=True + s1, s2, term="word", ngram_range=None, ngram_weights=None, lower=True, ignore_punct=True ): if not ngram_range or ngram_range[1] == ngram_range[0] + 1: first_term_set = set(make_terms(s1, term, ngram_range, lower, ignore_punct)) @@ -82,7 +82,7 @@ def jaccard_sim( def cosine_sim( - s1, s2, term='word', ngram_range=None, ngram_weights=None, lower=True, ignore_punct=True + s1, s2, term="word", ngram_range=None, ngram_weights=None, lower=True, ignore_punct=True ): if not ngram_range or ngram_range[1] == ngram_range[0] + 1: first_term_freq = Counter(make_terms(s1, term, ngram_range, lower, ignore_punct)) @@ -124,11 +124,11 @@ def cosine_sim( return sum([score * weight for score, weight in zip(scores, weights)]) -def sim_of(s1, s2, method='cosine', term='word', ngram_range=None, lower=True, ignore_punct=True): +def sim_of(s1, s2, method="cosine", term="word", ngram_range=None, lower=True, ignore_punct=True): method_func = { - 'lcs': lcs_sim, - 'jaccard': jaccard_sim, - 'cosine': cosine_sim, + "lcs": lcs_sim, + "jaccard": jaccard_sim, + "cosine": cosine_sim, }.get(method) if not method_func: raise ValueError("unsupported method: {}".format(method)) @@ -149,7 +149,7 @@ def add_doc(self, doc): return False self._id2doc[doc.id] = doc.title - terms = set(make_terms(doc.title, 'char', (3, 4))) + terms = set(make_terms(doc.title, "char", (3, 4))) for term in terms: self._index[term].add(doc.id) @@ -157,7 +157,7 @@ def add_doc(self, doc): def retrieve(self, query, k=10): related = Counter() - terms = set(make_terms(query, 'char', (3, 4))) + terms = set(make_terms(query, "char", (3, 4))) for term in terms: for qid in self._index.get(term, []): related[qid] += 1 @@ -165,7 +165,7 @@ def retrieve(self, query, k=10): return [(idx, self._id2doc[idx], score) for idx, score in related.most_common(k)] def save(self, fname): - pickle.dump((self._id2doc, self._index), open(fname, 'wb')) + pickle.dump((self._id2doc, self._index), open(fname, "wb")) def load(self, fname): - self._id2doc, self._index = pickle.load(open(fname, 'rb')) + self._id2doc, self._index = pickle.load(open(fname, "rb")) diff --git a/inoreader/subscription.py b/inoreader/subscription.py index 06b16ce..9656465 100644 --- a/inoreader/subscription.py +++ b/inoreader/subscription.py @@ -16,13 +16,13 @@ def __init__(self, id, title, categories, sortid, firstitemmsec, url, htmlUrl, i @classmethod def from_json(cls, data): subscription_info = { - 'id': data['id'], - 'title': data['title'], - 'categories': list(data['categories']), - 'sortid': data['sortid'], - 'firstitemmsec': data['firstitemmsec'], - 'url': data['url'], - 'htmlUrl': data['htmlUrl'], - 'iconUrl': data['iconUrl'], + "id": data["id"], + "title": data["title"], + "categories": list(data["categories"]), + "sortid": data["sortid"], + "firstitemmsec": data["firstitemmsec"], + "url": data["url"], + "htmlUrl": data["htmlUrl"], + "iconUrl": data["iconUrl"], } return cls(**subscription_info) diff --git a/inoreader/utils.py b/inoreader/utils.py index b653861..b5809d3 100644 --- a/inoreader/utils.py +++ b/inoreader/utils.py @@ -10,8 +10,8 @@ def normalize_whitespace(text): - text = re.sub(r'[\n\r\t]', ' ', text) - text = re.sub(r' +', ' ', text) + text = re.sub(r"[\n\r\t]", " ", text) + text = re.sub(r" +", " ", text) return text.strip() @@ -21,21 +21,21 @@ def extract_text(html_content): return html_content content = html.fromstring(html_content) - for img in content.iter('img'): - img_src = img.get('src') - img_alt = img.get('alt') or img_src + for img in content.iter("img"): + img_src = img.get("src") + img_alt = img.get("alt") or img_src if not img_src: continue - img.text = '![%s](%s)' % (img_alt, img_src) + img.text = "![%s](%s)" % (img_alt, img_src) - for link in content.iter('a'): - url = link.get('href') + for link in content.iter("a"): + url = link.get("href") text = link.text or url if not url: continue - link.text = '[%s](%s)' % (text, url) + link.text = "[%s](%s)" % (text, url) try: return content.text_content().replace("\xa0", "").strip() except Exception: @@ -47,20 +47,20 @@ def download_image(url, path, filename, proxies=None): if response.status_code not in (200, 201): return None - content_type = response.headers.get('Content-Type', '') - if not content_type or not content_type.startswith('image/'): + content_type = response.headers.get("Content-Type", "") + if not content_type or not content_type.startswith("image/"): return None - content_length = int(response.headers.get('Content-Length') or '0') + content_length = int(response.headers.get("Content-Length") or "0") if content_length <= 0: return None - suffix = content_type.replace('image/', '') - if suffix == 'svg+xml': - suffix = 'svg' + suffix = content_type.replace("image/", "") + if suffix == "svg+xml": + suffix = "svg" - image_filename = filename + '.' + suffix - with open(os.path.join(path, image_filename), 'wb') as f: + image_filename = filename + "." + suffix + with open(os.path.join(path, image_filename), "wb") as f: response.raw.decode_content = True shutil.copyfileobj(response.raw, f) From 27be9adc23ee8dfd1b0ecd4dbd714a32c25fa7ec Mon Sep 17 00:00:00 2001 From: Linusp Date: Fri, 22 Mar 2024 17:56:17 +0800 Subject: [PATCH 4/5] update Makefile --- Makefile | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/Makefile b/Makefile index 1d8c0d8..98b5a4b 100644 --- a/Makefile +++ b/Makefile @@ -18,6 +18,12 @@ clean: venv: - virtualenv --python=$(shell which python3) --prompt '' venv -deps: - - pip install -U pip setuptools - - pip install -r requirements.txt +lock-requirements: + - pip install pip-tools -q + - pip-compile -o requirements.txt + +deps: lock-requirements + - pip-sync + +build: lint test + - python -m build From d334842479a70ba0c28d6d8c92d7c1e87f8c5bba Mon Sep 17 00:00:00 2001 From: Linusp Date: Fri, 22 Mar 2024 18:03:24 +0800 Subject: [PATCH 5/5] add automated releases on tags --- .github/workflows/publish.yaml | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) create mode 100644 .github/workflows/publish.yaml diff --git a/.github/workflows/publish.yaml b/.github/workflows/publish.yaml new file mode 100644 index 0000000..6ac9f3f --- /dev/null +++ b/.github/workflows/publish.yaml @@ -0,0 +1,27 @@ +name: Build distribution + +on: [push, pull_request] + +jobs: + test: + runs-on: "ubuntu-latest" + + steps: + - uses: actions/checkout@v3 + - uses: actions/setup-python@v3 + with: + python-version: 3.8 + + - name: Install build dependencies + run: python -m pip install build wheel + + - name: Build distributions + shell: bash -l {0} + run: python -m build + + - name: Publish package to PyPI + if: github.repository == 'Linusp/python-inoreader' && github.event_name == 'push' && startsWith(github.ref, 'refs/tags') + uses: pypa/gh-action-pypi-publish@master + with: + user: __token__ + password: ${{ secrets.pypi_password }}