Skip to content

Commit 882d355

Browse files
authored
Merge pull request #34 from radix-ai/ls-betterlinting
More linters, stricter linting
2 parents 42feb9f + b037131 commit 882d355

File tree

6 files changed

+143
-107
lines changed

6 files changed

+143
-107
lines changed

.circleci/config.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ jobs:
2828
name: Run linters
2929
command: |
3030
source activate graphchain-circleci-env
31-
flake8 graphchain --max-complexity=10
31+
flake8 graphchain --max-complexity=10 --ignore=W504
3232
pydocstyle graphchain --convention=numpy
3333
mypy graphchain --ignore-missing-imports --strict
3434
- run:

environment.circleci.yml

+11-8
Original file line numberDiff line numberDiff line change
@@ -4,16 +4,19 @@ channels:
44
- conda-forge
55
dependencies:
66
- cloudpickle=0.6
7-
- dask=0.19
8-
- flake8=3.5
9-
- flake8-comprehensions=1.4
7+
- dask=1.0
108
- fs-s3fs=0.1
11-
- joblib=0.12
9+
- joblib=0.13
1210
- mypy<0.700
13-
- pydocstyle=2.1
14-
- pytest=3.8
11+
- pydocstyle=3.0
12+
- pytest=4.0
1513
- pytest-cov=2.6
16-
- pytest-xdist=1.23
14+
- pytest-xdist=1.25
1715
- pip:
16+
- flake8~=3.6.0
17+
- flake8-comprehensions~=1.4.1
18+
- flake8-bandit~=2.0.0
1819
- flake8-bugbear~=18.8.0
19-
- lz4~=2.1.0
20+
- flake8-mutable~=1.2.0
21+
- flake8-rst-docstrings~=0.0.8
22+
- lz4~=2.1.6

environment.local.yml

+5-2
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,6 @@ dependencies:
66
- autopep8
77
- cloudpickle
88
- dask
9-
- flake8
10-
- flake8-comprehensions
119
- fs-s3fs
1210
- isort
1311
- joblib
@@ -22,5 +20,10 @@ dependencies:
2220
- sphinx_rtd_theme
2321
- twine
2422
- pip:
23+
- flake8
24+
- flake8-comprehensions
25+
- flake8-bandit
2526
- flake8-bugbear
27+
- flake8-mutable
28+
- flake8-rst-docstrings
2629
- lz4

graphchain/core.py

+51-47
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,8 @@
44
import logging
55
import pickle
66
import time
7-
from typing import (Any, Callable, Container, Hashable, Iterable, Optional,
8-
Union)
7+
from typing import (Any, Callable, Container, Dict, Hashable, Iterable,
8+
Optional, Union)
99

1010
import cloudpickle
1111
import dask
@@ -23,11 +23,11 @@ class CachedComputation:
2323

2424
def __init__(
2525
self,
26-
dsk: dict,
26+
dsk: Dict[Hashable, Any],
2727
key: Hashable,
2828
computation: Any,
2929
location: Union[str, fs.base.FS],
30-
write_to_cache: Union[bool, str]='auto') -> None:
30+
write_to_cache: Union[bool, str] = 'auto') -> None:
3131
"""Cache a dask graph computation.
3232
3333
Parameters
@@ -51,9 +51,9 @@ def __init__(
5151
5252
Returns
5353
-------
54-
CachedComputation
55-
A wrapper for the computation object to replace the original
56-
computation with in the dask graph.
54+
CachedComputation
55+
A wrapper for the computation object to replace the original
56+
computation with in the dask graph.
5757
"""
5858
self.dsk = dsk
5959
self.key = key
@@ -62,7 +62,7 @@ def __init__(
6262
self.write_to_cache = write_to_cache
6363

6464
@property # type: ignore
65-
@functools.lru_cache() # type: ignore
65+
@functools.lru_cache()
6666
def cache_fs(self) -> fs.base.FS:
6767
"""Open a PyFilesystem FS to the cache directory."""
6868
# create=True does not yet work for S3FS [1]. This should probably be
@@ -133,7 +133,7 @@ def estimate_load_time(self, result: Any) -> float:
133133
500e6 if isinstance(self.cache_fs, fs.osfs.OSFS) else 50e6))
134134
return read_latency + size / read_throughput
135135

136-
@functools.lru_cache() # type: ignore
136+
@functools.lru_cache()
137137
def read_time(self, timing_type: str) -> float:
138138
"""Read the time to load, compute, or store from file."""
139139
time_filename = f'{self.hash}.time.{timing_type}'
@@ -154,7 +154,7 @@ def write_log(self, log_type: str) -> None:
154154
with self.cache_fs.open(log_filename, 'w') as fid: # type: ignore
155155
fid.write(self.hash)
156156

157-
def time_to_result(self, memoize: bool=True) -> float:
157+
def time_to_result(self, memoize: bool = True) -> float:
158158
"""Estimate the time to load or compute this computation."""
159159
if hasattr(self, '_time_to_result'):
160160
return self._time_to_result # type: ignore
@@ -286,10 +286,11 @@ def __call__(self, *args: Any, **kwargs: Any) -> Any:
286286

287287

288288
def optimize(
289-
dsk: dict,
290-
keys: Optional[Union[Hashable, Iterable[Hashable]]]=None,
291-
skip_keys: Optional[Container[Hashable]]=None,
292-
location: Union[str, fs.base.FS]="./__graphchain_cache__") -> dict:
289+
dsk: Dict[Hashable, Any],
290+
keys: Optional[Union[Hashable, Iterable[Hashable]]] = None,
291+
skip_keys: Optional[Container[Hashable]] = None,
292+
location: Union[str, fs.base.FS] = "./__graphchain_cache__") \
293+
-> Dict[Hashable, Any]:
293294
"""Optimize a dask graph with cached computations.
294295
295296
According to the dask graph specification [1]_, a dask graph is a
@@ -318,23 +319,23 @@ def optimize(
318319
319320
Parameters
320321
----------
321-
dsk
322-
The dask graph to optimize with caching computations.
323-
keys
324-
Not used. Is present for compatibility with dask optimizers [2]_.
325-
skip_keys
326-
A container of keys not to cache.
327-
location
328-
A PyFilesystem FS URL to store the cached computations in. Can be a
329-
local directory such as ``'./__graphchain_cache__'`` or a remote
330-
directory such as ``'s3://bucket/__graphchain_cache__'``. You can
331-
also pass a PyFilesystem itself instead.
322+
dsk
323+
The dask graph to optimize with caching computations.
324+
keys
325+
Not used. Is present for compatibility with dask optimizers [2]_.
326+
skip_keys
327+
A container of keys not to cache.
328+
location
329+
A PyFilesystem FS URL to store the cached computations in. Can be a
330+
local directory such as ``'./__graphchain_cache__'`` or a remote
331+
directory such as ``'s3://bucket/__graphchain_cache__'``. You can
332+
also pass a PyFilesystem itself instead.
332333
333334
Returns
334335
-------
335-
dict
336-
A copy of the dask graph where the computations have been replaced
337-
by ``CachedComputation``'s.
336+
dict
337+
A copy of the dask graph where the computations have been replaced by
338+
``CachedComputation``'s.
338339
339340
References
340341
----------
@@ -361,11 +362,14 @@ def optimize(
361362

362363

363364
def get(
364-
dsk: dict,
365+
dsk: Dict[Hashable, Any],
365366
keys: Union[Hashable, Iterable[Hashable]],
366-
skip_keys: Optional[Container[Hashable]]=None,
367-
location: Union[str, fs.base.FS]="./__graphchain_cache__",
368-
scheduler: Optional[Callable]=None) -> Any:
367+
skip_keys: Optional[Container[Hashable]] = None,
368+
location: Union[str, fs.base.FS] = "./__graphchain_cache__",
369+
scheduler: Optional[Callable[
370+
[Dict[Hashable, Any], Union[Hashable, Iterable[Hashable]]],
371+
Any
372+
]] = None) -> Any:
369373
"""Get one or more keys from a dask graph with caching.
370374
371375
Optimizes a dask graph with ``graphchain.optimize`` and then computes the
@@ -377,24 +381,24 @@ def get(
377381
378382
Parameters
379383
----------
380-
dsk
381-
The dask graph to query.
382-
keys
383-
The keys to compute.
384-
skip_keys
385-
A container of keys not to cache.
386-
location
387-
A PyFilesystem FS URL to store the cached computations in. Can be a
388-
local directory such as ``'./__graphchain_cache__'`` or a remote
389-
directory such as ``'s3://bucket/__graphchain_cache__'``. You can
390-
also pass a PyFilesystem itself instead.
391-
scheduler
392-
The dask scheduler to use to retrieve the keys from the graph.
384+
dsk
385+
The dask graph to query.
386+
keys
387+
The keys to compute.
388+
skip_keys
389+
A container of keys not to cache.
390+
location
391+
A PyFilesystem FS URL to store the cached computations in. Can be a
392+
local directory such as ``'./__graphchain_cache__'`` or a remote
393+
directory such as ``'s3://bucket/__graphchain_cache__'``. You can also
394+
pass a PyFilesystem itself instead.
395+
scheduler
396+
The dask scheduler to use to retrieve the keys from the graph.
393397
394398
Returns
395399
-------
396-
Any
397-
The computed values corresponding to the given keys.
400+
Any
401+
The computed values corresponding to the given keys.
398402
"""
399403
cached_dsk = optimize(dsk, keys, skip_keys=skip_keys, location=location)
400404
scheduler = \

0 commit comments

Comments
 (0)