Skip to content

Commit 2abf718

Browse files
authored
Merge pull request #30 from fsspec/next
getting rid of some old mess
2 parents 412c542 + 100b14f commit 2abf718

11 files changed

+97
-690
lines changed

.github/workflows/default_gateways.yml

-34
This file was deleted.

.github/workflows/local_gateway.yml

+1-6
Original file line numberDiff line numberDiff line change
@@ -10,12 +10,7 @@ jobs:
1010
max-parallel: 4
1111
matrix:
1212
python-version: ["3.8", "3.9", "3.10"]
13-
ipfs-version: ["0.12.0"]
14-
include:
15-
- python-version: "3.10"
16-
ipfs-version: "0.9.1"
17-
env:
18-
IPFSSPEC_GATEWAYS: "http://127.0.0.1:8080" # use only localhost as gateway
13+
ipfs-version: ["0.27.0"] # this is the latest IPFS version supporting /api/v0, see issue #28
1914
steps:
2015
- uses: actions/checkout@v1
2116
- name: Set up Python ${{ matrix.python-version }}

README.md

+6-2
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,10 @@ with fsspec.open("ipfs://QmZ4tDuvesekSs4qM5ZBKpXiZGun7S2CYtEZRB3DYXkjGx", "r") a
2020
print(f.read())
2121
```
2222

23-
The current implementation uses a HTTP gateway to access the data. It tries to use a local one (which is expected to be found at `http://127.0.0.1:8080`) and falls back to `ipfs.io` if the local gateway is not available.
23+
The current implementation uses a HTTP gateway to access the data. It uses [IPIP-280](https://github.com/ipfs/specs/pull/280) to determine which gateway to use. If you have a current installation of an IPFS node (e.g. kubo, IPFS Desktop etc...), you should be fine. In case you want to use a different gateway, you can use any of the methods specified in IPIP-280, e.g.:
2424

25-
You can modify the list of gateways using the space separated environment variable `IPFSSPEC_GATEWAYS`.
25+
* create the file `~/.ipfs/gateway` with the gateway address as first line
26+
* define the environment variable `IPFS_GATEWAY` to the gateway address
27+
* create the file `/etc/ipfs/gateway` with the gateway address as first line
28+
29+
No matter which option you use, the gateway has to be specified as an HTTP(S) url, e.g.: `http://127.0.0.1:8080`.

ipfsspec/__init__.py

+1-2
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,7 @@
1-
from .core import IPFSFileSystem
21
from .async_ipfs import AsyncIPFSFileSystem
32

43
from ._version import get_versions
54
__version__ = get_versions()['version']
65
del get_versions
76

8-
__all__ = ["__version__", "IPFSFileSystem", "AsyncIPFSFileSystem"]
7+
__all__ = ["__version__", "AsyncIPFSFileSystem"]

ipfsspec/async_ipfs.py

+88-118
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,16 @@
11
import io
2-
import time
2+
import os
3+
import platform
34
import weakref
5+
from functools import lru_cache
6+
from pathlib import Path
7+
import warnings
48

5-
import asyncio
69
import aiohttp
710

811
from fsspec.asyn import AsyncFileSystem, sync, sync_wrapper
912
from fsspec.exceptions import FSTimeoutError
1013

11-
from .utils import get_default_gateways
12-
1314
import logging
1415

1516
logger = logging.getLogger("ipfsspec")
@@ -138,129 +139,98 @@ def __str__(self):
138139
return f"GW({self.url})"
139140

140141

141-
class GatewayState:
142-
def __init__(self):
143-
self.reachable = True
144-
self.next_request_time = 0
145-
self.backoff_time = 0
146-
self.start_backoff = 1e-5
147-
self.max_backoff = 5
148-
149-
def schedule_next(self):
150-
self.next_request_time = time.monotonic() + self.backoff_time
151-
152-
def backoff(self):
153-
if self.backoff_time < self.start_backoff:
154-
self.backoff_time = self.start_backoff
155-
else:
156-
self.backoff_time *= 2
157-
self.reachable = True
158-
self.schedule_next()
159-
160-
def speedup(self, not_below=0):
161-
did_speed_up = False
162-
if self.backoff_time > not_below:
163-
self.backoff_time *= 0.9
164-
did_speed_up = True
165-
self.reachable = True
166-
self.schedule_next()
167-
return did_speed_up
168-
169-
def broken(self):
170-
self.backoff_time = self.max_backoff
171-
self.reachable = False
172-
self.schedule_next()
173-
174-
def trying_to_reach(self):
175-
self.next_request_time = time.monotonic() + 1
176-
177-
178-
class MultiGateway(AsyncIPFSGatewayBase):
179-
def __init__(self, gws, max_backoff_rounds=50):
180-
self.gws = [(GatewayState(), gw) for gw in gws]
181-
self.max_backoff_rounds = max_backoff_rounds
182-
183-
@property
184-
def _gws_in_priority_order(self):
185-
now = time.monotonic()
186-
return sorted(self.gws, key=lambda x: max(now, x[0].next_request_time))
187-
188-
async def _gw_op(self, op):
189-
for _ in range(self.max_backoff_rounds):
190-
for state, gw in self._gws_in_priority_order:
191-
not_before = state.next_request_time
192-
if not state.reachable:
193-
state.trying_to_reach()
194-
else:
195-
state.schedule_next()
196-
now = time.monotonic()
197-
if not_before > now:
198-
await asyncio.sleep(not_before - now)
199-
logger.debug("tring %s", gw)
200-
try:
201-
res = await op(gw)
202-
if state.speedup(time.monotonic() - now):
203-
logger.debug("%s speedup", gw)
204-
return res
205-
except FileNotFoundError: # early exit if object doesn't exist
206-
raise
207-
except (RequestsTooQuick, aiohttp.ClientResponseError, asyncio.TimeoutError) as e:
208-
state.backoff()
209-
logger.debug("%s backoff %s", gw, e)
210-
break
211-
except IOError as e:
212-
exception = e
213-
state.broken()
214-
logger.debug("%s broken", gw)
215-
continue
216-
else:
217-
raise exception
218-
raise RequestsTooQuick()
219-
220-
async def api_get(self, endpoint, session, **kwargs):
221-
return await self._gw_op(lambda gw: gw.api_get(endpoint, session, **kwargs))
222-
223-
async def api_post(self, endpoint, session, **kwargs):
224-
return await self._gw_op(lambda gw: gw.api_post(endpoint, session, **kwargs))
225-
226-
async def cid_head(self, path, session, headers=None, **kwargs):
227-
return await self._gw_op(lambda gw: gw.cid_head(path, session, headers=headers, **kwargs))
228-
229-
async def cid_get(self, path, session, headers=None, **kwargs):
230-
return await self._gw_op(lambda gw: gw.cid_get(path, session, headers=headers, **kwargs))
231-
232-
async def cat(self, path, session):
233-
return await self._gw_op(lambda gw: gw.cat(path, session))
234-
235-
async def ls(self, path, session):
236-
return await self._gw_op(lambda gw: gw.ls(path, session))
237-
238-
def state_report(self):
239-
return "\n".join(f"{s.next_request_time}, {gw}" for s, gw in self.gws)
240-
241-
def __str__(self):
242-
return "Multi-GW(" + ", ".join(str(gw) for _, gw in self.gws) + ")"
243-
244-
245142
async def get_client(**kwargs):
246143
timeout = aiohttp.ClientTimeout(sock_connect=1, sock_read=5)
247144
kwargs = {"timeout": timeout, **kwargs}
248145
return aiohttp.ClientSession(**kwargs)
249146

250147

251-
DEFAULT_GATEWAY = None
148+
def gateway_from_file(gateway_path):
149+
if gateway_path.exists():
150+
with open(gateway_path) as gw_file:
151+
ipfs_gateway = gw_file.readline().strip()
152+
logger.debug("using IPFS gateway from %s: %s", gateway_path, ipfs_gateway)
153+
return AsyncIPFSGateway(ipfs_gateway)
154+
return None
252155

253156

157+
@lru_cache
254158
def get_gateway():
255-
global DEFAULT_GATEWAY
256-
if DEFAULT_GATEWAY is None:
257-
use_gateway(*get_default_gateways())
258-
return DEFAULT_GATEWAY
259-
260-
261-
def use_gateway(*urls):
262-
global DEFAULT_GATEWAY
263-
DEFAULT_GATEWAY = MultiGateway([AsyncIPFSGateway(url) for url in urls])
159+
"""
160+
Get IPFS gateway according to IPIP-280
161+
162+
see: https://github.com/ipfs/specs/pull/280
163+
"""
164+
165+
# IPFS_GATEWAY environment variable should override everything
166+
ipfs_gateway = os.environ.get("IPFS_GATEWAY", "")
167+
if ipfs_gateway:
168+
logger.debug("using IPFS gateway from IPFS_GATEWAY environment variable: %s", ipfs_gateway)
169+
return AsyncIPFSGateway(ipfs_gateway)
170+
171+
# internal configuration: accept IPFSSPEC_GATEWAYS for backwards compatibility
172+
if ipfsspec_gateways := os.environ.get("IPFSSPEC_GATEWAYS", ""):
173+
ipfs_gateway = ipfsspec_gateways.split()[0]
174+
logger.debug("using IPFS gateway from IPFSSPEC_GATEWAYS environment variable: %s", ipfs_gateway)
175+
warnings.warn("The IPFSSPEC_GATEWAYS environment variable is deprecated, please configure your IPFS Gateway according to IPIP-280, e.g. by using the IPFS_GATEWAY environment variable or using the ~/.ipfs/gateway file.", DeprecationWarning)
176+
return AsyncIPFSGateway(ipfs_gateway)
177+
178+
# check various well-known files for possible gateway configurations
179+
if ipfs_path := os.environ.get("IPFS_PATH", ""):
180+
if ipfs_gateway := gateway_from_file(Path(ipfs_path) / "gateway"):
181+
return ipfs_gateway
182+
183+
if home := os.environ.get("HOME", ""):
184+
if ipfs_gateway := gateway_from_file(Path(home) / ".ipfs" / "gateway"):
185+
return ipfs_gateway
186+
187+
if config_home := os.environ.get("XDG_CONFIG_HOME", ""):
188+
if ipfs_gateway := gateway_from_file(Path(config_home) / "ipfs" / "gateway"):
189+
return ipfs_gateway
190+
191+
if ipfs_gateway := gateway_from_file(Path("/etc") / "ipfs" / "gateway"):
192+
return ipfs_gateway
193+
194+
system = platform.system()
195+
196+
if system == "Windows":
197+
candidates = [
198+
Path(os.environ.get("LOCALAPPDATA")) / "ipfs" / "gateway",
199+
Path(os.environ.get("APPDATA")) / "ipfs" / "gateway",
200+
Path(os.environ.get("PROGRAMDATA")) / "ipfs" / "gateway",
201+
]
202+
elif system == "Darwin":
203+
candidates = [
204+
Path(os.environ.get("HOME")) / "Library" / "Application Support" / "ipfs" / "gateway",
205+
Path("/Library") / "Application Support" / "ipfs" / "gateway",
206+
]
207+
elif system == "Linux":
208+
candidates = [
209+
Path(os.environ.get("HOME")) / ".config" / "ipfs" / "gateway",
210+
Path("/etc") / "ipfs" / "gateway",
211+
]
212+
else:
213+
candidates = []
214+
215+
for candidate in candidates:
216+
if ipfs_gateway := gateway_from_file(candidate):
217+
return ipfs_gateway
218+
219+
# if we reach this point, no gateway is configured
220+
raise RuntimeError("IPFS Gateway could not be found!\n"
221+
"In order to access IPFS, you must configure an "
222+
"IPFS Gateway using a IPIP-280 configuration method. "
223+
"Possible options are: \n"
224+
" * set the environment variable IPFS_GATEWAY\n"
225+
" * write a gateway in the first line of the file ~/.ipfs/gateway\n"
226+
"\n"
227+
"It's always best to run your own IPFS gateway, e.g. by using "
228+
"IPFS Desktop (https://docs.ipfs.tech/install/ipfs-desktop/) or "
229+
"the command line version Kubo (https://docs.ipfs.tech/install/command-line/). "
230+
"If you can't run your own gateway, you may also try using the "
231+
"public IPFS gateway at https://ipfs.io or https://dweb.link . "
232+
"However, this is not recommended for productive use and you may experience "
233+
"severe performance issues.")
264234

265235

266236
class AsyncIPFSFileSystem(AsyncFileSystem):

0 commit comments

Comments
 (0)