Skip to content

Commit 1530d2f

Browse files
authored
Merge pull request #747 from gerrod3/pull-through-filters
Add include/exclude filter support for pull-through caching
2 parents 64450cf + fc82100 commit 1530d2f

File tree

5 files changed

+161
-25
lines changed

5 files changed

+161
-25
lines changed

CHANGES/706.feature

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Pull-through caching now respects the include/exclude filters on the upstream remote.

docs/user/guides/publish.md

+3
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,9 @@ from the remote source and have Pulp store that package as orphaned content.
8888
pulp python distribution update --name foo --remote bar
8989
```
9090

91+
!!! note
92+
Pull-through caching will respect the includes/excludes filters on the supplied remote.
93+
9194
!!! warning
9295
Support for pull-through caching is provided as a tech preview in Pulp 3.
9396
Functionality may not work or may be incomplete. Also, backwards compatibility when upgrading

pulp_python/app/pypi/views.py

+35-22
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
1+
import json
12
import logging
2-
import requests
33

4+
from aiohttp.client_exceptions import ClientError
45
from rest_framework.viewsets import ViewSet
56
from rest_framework.response import Response
67
from django.core.exceptions import ObjectDoesNotExist
@@ -15,19 +16,21 @@
1516
Http404,
1617
HttpResponseForbidden,
1718
HttpResponseBadRequest,
18-
StreamingHttpResponse
19+
StreamingHttpResponse,
20+
HttpResponse,
1921
)
2022
from drf_spectacular.utils import extend_schema
2123
from dynaconf import settings
2224
from itertools import chain
2325
from packaging.utils import canonicalize_name
2426
from urllib.parse import urljoin, urlparse, urlunsplit
2527
from pathlib import PurePath
26-
from pypi_simple import parse_links_stream_response
28+
from pypi_simple import ACCEPT_JSON_PREFERRED, ProjectPage
2729

2830
from pulpcore.plugin.viewsets import OperationPostponedResponse
2931
from pulpcore.plugin.tasking import dispatch
3032
from pulpcore.plugin.util import get_domain
33+
from pulpcore.plugin.exceptions import TimeoutException
3134
from pulp_python.app.models import (
3235
PythonDistribution,
3336
PythonPackageContent,
@@ -37,14 +40,15 @@
3740
SummarySerializer,
3841
PackageMetadataSerializer,
3942
PackageUploadSerializer,
40-
PackageUploadTaskSerializer
43+
PackageUploadTaskSerializer,
4144
)
4245
from pulp_python.app.utils import (
4346
write_simple_index,
4447
write_simple_detail,
4548
python_content_to_json,
4649
PYPI_LAST_SERIAL,
4750
PYPI_SERIAL_CONSTANT,
51+
get_remote_package_filter,
4852
)
4953

5054
from pulp_python.app import tasks
@@ -233,27 +237,36 @@ def list(self, request, path):
233237

234238
def pull_through_package_simple(self, package, path, remote):
235239
"""Gets the package's simple page from remote."""
236-
def parse_url(link):
237-
parsed = urlparse(link.url)
238-
digest, _, value = parsed.fragment.partition('=')
240+
def parse_package(release_package):
241+
parsed = urlparse(release_package.url)
239242
stripped_url = urlunsplit(chain(parsed[:3], ("", "")))
240-
redirect = f'{path}/{link.text}?redirect={stripped_url}'
241-
d_url = urljoin(self.base_content_url, redirect)
242-
return link.text, d_url, value if digest == 'sha256' else ''
243+
redirect_path = f'{path}/{release_package.filename}?redirect={stripped_url}'
244+
d_url = urljoin(self.base_content_url, redirect_path)
245+
return release_package.filename, d_url, release_package.digests.get("sha256", "")
246+
247+
rfilter = get_remote_package_filter(remote)
248+
if not rfilter.filter_project(package):
249+
raise Http404(f"{package} does not exist.")
243250

244251
url = remote.get_remote_artifact_url(f'simple/{package}/')
245-
kwargs = {}
246-
if proxy_url := remote.proxy_url:
247-
if remote.proxy_username or remote.proxy_password:
248-
parsed_proxy = urlparse(proxy_url)
249-
netloc = f"{remote.proxy_username}:{remote.proxy_password}@{parsed_proxy.netloc}"
250-
proxy_url = urlunsplit((parsed_proxy.scheme, netloc, "", "", ""))
251-
kwargs["proxies"] = {"http": proxy_url, "https": proxy_url}
252-
253-
response = requests.get(url, stream=True, **kwargs)
254-
links = parse_links_stream_response(response)
255-
packages = (parse_url(link) for link in links)
256-
return StreamingHttpResponse(write_simple_detail(package, packages, streamed=True))
252+
remote.headers = remote.headers or []
253+
remote.headers.append({"Accept": ACCEPT_JSON_PREFERRED})
254+
downloader = remote.get_downloader(url=url, max_retries=1)
255+
try:
256+
d = downloader.fetch()
257+
except ClientError:
258+
return HttpResponse(f"Failed to fetch {package} from {remote.url}.", status=502)
259+
except TimeoutException:
260+
return HttpResponse(f"{remote.url} timed out while fetching {package}.", status=504)
261+
262+
if d.headers["content-type"] == "application/vnd.pypi.simple.v1+json":
263+
page = ProjectPage.from_json_data(json.load(open(d.path, "rb")), base_url=remote.url)
264+
else:
265+
page = ProjectPage.from_html(package, open(d.path, "rb").read(), base_url=remote.url)
266+
packages = [
267+
parse_package(p) for p in page.packages if rfilter.filter_release(package, p.version)
268+
]
269+
return HttpResponse(write_simple_detail(package, packages))
257270

258271
@extend_schema(operation_id="pypi_simple_package_read", summary="Get package simple page")
259272
def retrieve(self, request, path, package):

pulp_python/app/utils.py

+79-1
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,8 @@
77
from django.conf import settings
88
from jinja2 import Template
99
from packaging.utils import canonicalize_name
10-
from packaging.version import parse
10+
from packaging.requirements import Requirement
11+
from packaging.version import parse, InvalidVersion
1112

1213

1314
PYPI_LAST_SERIAL = "X-PYPI-LAST-SERIAL"
@@ -356,3 +357,80 @@ def write_simple_detail(project_name, project_packages, streamed=False):
356357
detail = Template(simple_detail_template)
357358
context = {"project_name": project_name, "project_packages": project_packages}
358359
return detail.stream(**context) if streamed else detail.render(**context)
360+
361+
362+
class PackageIncludeFilter:
363+
"""A special class to help filter Package's based on a remote's include/exclude"""
364+
365+
def __init__(self, remote):
366+
self.remote = remote.cast()
367+
self._filter_includes = self._parse_packages(self.remote.includes)
368+
self._filter_excludes = self._parse_packages(self.remote.excludes)
369+
370+
def _parse_packages(self, packages):
371+
config = defaultdict(lambda: defaultdict(list))
372+
for value in packages:
373+
requirement = Requirement(value)
374+
requirement.name = canonicalize_name(requirement.name)
375+
if requirement.specifier:
376+
requirement.specifier.prereleases = True
377+
config["range"][requirement.name].append(requirement)
378+
else:
379+
config["full"][requirement.name].append(requirement)
380+
return config
381+
382+
def filter_project(self, project_name):
383+
"""Return true/false if project_name would be allowed through remote's filters."""
384+
project_name = canonicalize_name(project_name)
385+
include_full = self._filter_includes.get("full", {})
386+
include_range = self._filter_includes.get("range", {})
387+
include = set(include_range.keys()).union(include_full.keys())
388+
if include and project_name not in include:
389+
return False
390+
391+
exclude_full = self._filter_excludes.get("full", {})
392+
if project_name in exclude_full:
393+
return False
394+
395+
return True
396+
397+
def filter_release(self, project_name, version):
398+
"""Returns true/false if release would be allowed through remote's filters."""
399+
project_name = canonicalize_name(project_name)
400+
if not self.filter_project(project_name):
401+
return False
402+
403+
try:
404+
version = parse(version)
405+
except InvalidVersion:
406+
return False
407+
408+
include_range = self._filter_includes.get("range", {})
409+
if project_name in include_range:
410+
for req in include_range[project_name]:
411+
if version in req.specifier:
412+
break
413+
else:
414+
return False
415+
416+
exclude_range = self._filter_excludes.get("range", {})
417+
if project_name in exclude_range:
418+
for req in exclude_range[project_name]:
419+
if version in req.specifier:
420+
return False
421+
422+
return True
423+
424+
425+
_remote_filters = {}
426+
427+
428+
def get_remote_package_filter(remote):
429+
if date_filter_tuple := _remote_filters.get(remote.pulp_id):
430+
last_update, rfilter = date_filter_tuple
431+
if last_update == remote.pulp_last_updated:
432+
return rfilter
433+
434+
rfilter = PackageIncludeFilter(remote)
435+
_remote_filters[remote.pulp_id] = (remote.pulp_last_updated, rfilter)
436+
return rfilter

pulp_python/tests/functional/api/test_full_mirror.py

+43-2
Original file line numberDiff line numberDiff line change
@@ -5,17 +5,20 @@
55
from pulp_python.tests.functional.constants import (
66
PYPI_URL,
77
PYTHON_XS_FIXTURE_CHECKSUMS,
8+
PYTHON_SM_PROJECT_SPECIFIER,
9+
PYTHON_SM_FIXTURE_RELEASES,
810
)
911

1012
from pypi_simple import ProjectPage
13+
from packaging.version import parse
1114
from urllib.parse import urljoin, urlsplit
1215

1316

1417
def test_pull_through_install(
1518
python_bindings, python_remote_factory, python_distribution_factory, delete_orphans_pre
1619
):
1720
"""Tests that a pull-through distro can be installed from."""
18-
remote = python_remote_factory(url=PYPI_URL)
21+
remote = python_remote_factory(url=PYPI_URL, includes=[])
1922
distro = python_distribution_factory(remote=remote.pulp_href)
2023
PACKAGE = "pulpcore-releases"
2124

@@ -40,7 +43,7 @@ def test_pull_through_install(
4043
@pytest.mark.parallel
4144
def test_pull_through_simple(python_remote_factory, python_distribution_factory, pulp_content_url):
4245
"""Tests that the simple page is properly modified when requesting a pull-through."""
43-
remote = python_remote_factory(url=PYPI_URL)
46+
remote = python_remote_factory(url=PYPI_URL, includes=["shelf-reader"])
4447
distro = python_distribution_factory(remote=remote.pulp_href)
4548

4649
url = f"{distro.base_url}simple/shelf-reader/"
@@ -54,6 +57,44 @@ def test_pull_through_simple(python_remote_factory, python_distribution_factory,
5457
assert PYTHON_XS_FIXTURE_CHECKSUMS[package.filename] == package.digests["sha256"]
5558

5659

60+
@pytest.mark.parallel
61+
def test_pull_through_filter(python_remote_factory, python_distribution_factory):
62+
"""Tests that pull-through respects the includes/excludes filter on the remote."""
63+
remote = python_remote_factory(url=PYPI_URL, includes=["shelf-reader"])
64+
distro = python_distribution_factory(remote=remote.pulp_href)
65+
66+
r = requests.get(f"{distro.base_url}simple/pulpcore/")
67+
assert r.status_code == 404
68+
assert r.json() == {"detail": "pulpcore does not exist."}
69+
70+
r = requests.get(f"{distro.base_url}simple/shelf-reader/")
71+
assert r.status_code == 200
72+
73+
# Test complex include specifiers
74+
remote = python_remote_factory(includes=PYTHON_SM_PROJECT_SPECIFIER)
75+
distro = python_distribution_factory(remote=remote.pulp_href)
76+
for package, releases in PYTHON_SM_FIXTURE_RELEASES.items():
77+
url = f"{distro.base_url}simple/{package}/"
78+
project_page = ProjectPage.from_response(requests.get(url), package)
79+
packages = {p.filename for p in project_page.packages if not parse(p.version).is_prerelease}
80+
assert packages == set(releases)
81+
82+
# Test exclude logic
83+
remote = python_remote_factory(includes=[], excludes=["django"])
84+
distro = python_distribution_factory(remote=remote.pulp_href)
85+
86+
r = requests.get(f"{distro.base_url}simple/django/")
87+
assert r.status_code == 404
88+
assert r.json() == {"detail": "django does not exist."}
89+
90+
r = requests.get(f"{distro.base_url}simple/pulpcore/")
91+
assert r.status_code == 502
92+
assert r.text == f"Failed to fetch pulpcore from {remote.url}."
93+
94+
r = requests.get(f"{distro.base_url}simple/shelf-reader/")
95+
assert r.status_code == 200
96+
97+
5798
@pytest.mark.parallel
5899
def test_pull_through_with_repo(
59100
python_repo_with_sync, python_remote_factory, python_distribution_factory

0 commit comments

Comments
 (0)