Skip to content

Commit e9a4c7a

Browse files
committed
Add include/exclude filter support for pull-through caching
wip
1 parent cfe0017 commit e9a4c7a

File tree

2 files changed

+102
-20
lines changed

2 files changed

+102
-20
lines changed

pulp_python/app/pypi/views.py

+25-19
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import json
12
import logging
23
import requests
34

@@ -23,7 +24,7 @@
2324
from packaging.utils import canonicalize_name
2425
from urllib.parse import urljoin, urlparse, urlunsplit
2526
from pathlib import PurePath
26-
from pypi_simple import parse_links_stream_response
27+
from pypi_simple import ACCEPT_JSON_PREFERRED, ProjectPage
2728

2829
from pulpcore.plugin.viewsets import OperationPostponedResponse
2930
from pulpcore.plugin.tasking import dispatch
@@ -45,6 +46,7 @@
4546
python_content_to_json,
4647
PYPI_LAST_SERIAL,
4748
PYPI_SERIAL_CONSTANT,
49+
get_remote_package_filter,
4850
)
4951

5052
from pulp_python.app import tasks
@@ -232,27 +234,31 @@ def list(self, request, path):
232234

233235
def pull_through_package_simple(self, package, path, remote):
234236
"""Gets the package's simple page from remote."""
235-
def parse_url(link):
236-
parsed = urlparse(link.url)
237-
digest, _, value = parsed.fragment.partition('=')
237+
def parse_package(dis_package):
238+
parsed = urlparse(dis_package.url)
238239
stripped_url = urlunsplit(chain(parsed[:3], ("", "")))
239-
redirect = f'{path}/{link.text}?redirect={stripped_url}'
240-
d_url = urljoin(self.base_content_url, redirect)
241-
return link.text, d_url, value if digest == 'sha256' else ''
240+
redirect_path = f'{path}/{dis_package.filename}?redirect={stripped_url}'
241+
d_url = urljoin(self.base_content_url, redirect_path)
242+
return dis_package.filename, d_url, dis_package.digests.get("sha256", "")
243+
244+
rfilter = get_remote_package_filter(remote)
245+
if not rfilter.filter_project(package):
246+
return Http404(f"{package} does not exist.")
242247

243248
url = remote.get_remote_artifact_url(f'simple/{package}/')
244-
kwargs = {}
245-
if proxy_url := remote.proxy_url:
246-
if remote.proxy_username or remote.proxy_password:
247-
parsed_proxy = urlparse(proxy_url)
248-
netloc = f"{remote.proxy_username}:{remote.proxy_password}@{parsed_proxy.netloc}"
249-
proxy_url = urlunsplit((parsed_proxy.scheme, netloc, "", "", ""))
250-
kwargs["proxies"] = {"http": proxy_url, "https": proxy_url}
251-
252-
response = requests.get(url, stream=True, **kwargs)
253-
links = parse_links_stream_response(response)
254-
packages = (parse_url(link) for link in links)
255-
return StreamingHttpResponse(write_simple_detail(package, packages, streamed=True))
249+
remote.headers.append({"Accept": ACCEPT_JSON_PREFERRED})
250+
downloader = remote.get_downloader(url=url, max_retries=1)
251+
try:
252+
d = downloader.fetch()
253+
except Exception:
254+
return Http404(f"Could not find {package}.")
255+
256+
if d.headers["content-type"] == "application/vnd.pypi.simple.v1+json":
257+
page = ProjectPage.from_json_data(json.load(open(d.path, "rb"), base_url=remote.url))
258+
else:
259+
page = ProjectPage.from_html(package, open(d.path, "rb").read(), base_url=remote.url)
260+
packages = [parse_package(p) for p in page.packages if rfilter.filter_release(package, p.version)]
261+
return Response(write_simple_detail(package, packages))
256262

257263
@extend_schema(operation_id="pypi_simple_package_read", summary="Get package simple page")
258264
def retrieve(self, request, path, package):

pulp_python/app/utils.py

+77-1
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,8 @@
66
from django.conf import settings
77
from jinja2 import Template
88
from packaging.utils import canonicalize_name
9-
from packaging.version import parse
9+
from packaging.requirements import Requirement
10+
from packaging.version import parse, InvalidVersion
1011

1112

1213
PYPI_LAST_SERIAL = "X-PYPI-LAST-SERIAL"
@@ -315,3 +316,78 @@ def write_simple_detail(project_name, project_packages, streamed=False):
315316
detail = Template(simple_detail_template)
316317
context = {"project_name": project_name, "project_packages": project_packages}
317318
return detail.stream(**context) if streamed else detail.render(**context)
319+
320+
321+
class PackageIncludeFilter:
322+
"""A special class to help filter Package's based on a remote's include/exclude"""
323+
324+
def __init__(self, remote):
325+
self.remote = remote
326+
self._filter_includes = self._parse_packages(self.remote.includes)
327+
self._filter_excludes = self._parse_packages(self.remote.excludes)
328+
329+
def _parse_packages(self, packages):
330+
config = defaultdict(lambda: defaultdict(list))
331+
for value in packages:
332+
requirement = Requirement(value)
333+
requirement.name = canonicalize_name(requirement.name)
334+
if requirement.specifier:
335+
requirement.specifier.prereleases = True
336+
config["range"][requirement.name].append(requirement)
337+
else:
338+
config["full"][requirement.name].append(requirement)
339+
return config
340+
341+
def filter_project(self, project_name):
342+
"""Return true/false if project_name would be allowed through remote's filters."""
343+
project_name = canonicalize_name(project_name)
344+
include_full = self._filter_includes.get("full", {})
345+
include_range = self._filter_includes.get("range", {})
346+
include = set(*include_range.keys(), *include_full.keys())
347+
if include and project_name not in include:
348+
return False
349+
350+
exclude_full = self._filter_excludes.get("full", {})
351+
if project_name in exclude_full:
352+
return False
353+
354+
return True
355+
356+
def filter_release(self, project_name, version):
357+
"""Returns true/false if release would be allowed through remote's filters."""
358+
project_name = canonicalize_name(project_name)
359+
if not self.filter_project(project_name):
360+
return False
361+
362+
try:
363+
version = parse(version)
364+
except InvalidVersion:
365+
return False
366+
367+
include_range = self._filter_includes.get("range", {})
368+
if project_name in include_range:
369+
for req in include_range[project_name]:
370+
if version in req.specifier:
371+
break
372+
else:
373+
return False
374+
375+
exclude_range = self._filter_excludes.get("range", {})
376+
if project_name in exclude_range:
377+
for req in exclude_range[project_name]:
378+
if version in req.specifier:
379+
return False
380+
381+
return True
382+
383+
384+
_remote_filters = {}
385+
def get_remote_package_filter(remote):
386+
if date_filter_tuple := _remote_filters.get(remote.pulp_id):
387+
last_update, rfilter = date_filter_tuple
388+
if last_update == remote.pulp_last_updated:
389+
return rfilter
390+
391+
rfilter = PackageIncludeFilter(remote)
392+
_remote_filters[remote.pulp_id] = (remote.pulp_last_updated, rfilter)
393+
return rfilter

0 commit comments

Comments
 (0)