|
| 1 | +import json |
1 | 2 | import logging
|
2 |
| -import requests |
3 | 3 |
|
| 4 | +from aiohttp.client_exceptions import ClientError |
4 | 5 | from rest_framework.viewsets import ViewSet
|
5 | 6 | from rest_framework.response import Response
|
6 | 7 | from django.core.exceptions import ObjectDoesNotExist
|
|
15 | 16 | Http404,
|
16 | 17 | HttpResponseForbidden,
|
17 | 18 | HttpResponseBadRequest,
|
18 |
| - StreamingHttpResponse |
| 19 | + StreamingHttpResponse, |
| 20 | + HttpResponse, |
19 | 21 | )
|
20 | 22 | from drf_spectacular.utils import extend_schema
|
21 | 23 | from dynaconf import settings
|
22 | 24 | from itertools import chain
|
23 | 25 | from packaging.utils import canonicalize_name
|
24 | 26 | from urllib.parse import urljoin, urlparse, urlunsplit
|
25 | 27 | from pathlib import PurePath
|
26 |
| -from pypi_simple import parse_links_stream_response |
| 28 | +from pypi_simple import ACCEPT_JSON_PREFERRED, ProjectPage |
27 | 29 |
|
28 | 30 | from pulpcore.plugin.viewsets import OperationPostponedResponse
|
29 | 31 | from pulpcore.plugin.tasking import dispatch
|
30 | 32 | from pulpcore.plugin.util import get_domain
|
| 33 | +from pulpcore.plugin.exceptions import TimeoutException |
31 | 34 | from pulp_python.app.models import (
|
32 | 35 | PythonDistribution,
|
33 | 36 | PythonPackageContent,
|
|
37 | 40 | SummarySerializer,
|
38 | 41 | PackageMetadataSerializer,
|
39 | 42 | PackageUploadSerializer,
|
40 |
| - PackageUploadTaskSerializer |
| 43 | + PackageUploadTaskSerializer, |
41 | 44 | )
|
42 | 45 | from pulp_python.app.utils import (
|
43 | 46 | write_simple_index,
|
44 | 47 | write_simple_detail,
|
45 | 48 | python_content_to_json,
|
46 | 49 | PYPI_LAST_SERIAL,
|
47 | 50 | PYPI_SERIAL_CONSTANT,
|
| 51 | + get_remote_package_filter, |
48 | 52 | )
|
49 | 53 |
|
50 | 54 | from pulp_python.app import tasks
|
@@ -233,27 +237,36 @@ def list(self, request, path):
|
233 | 237 |
|
234 | 238 | def pull_through_package_simple(self, package, path, remote):
|
235 | 239 | """Gets the package's simple page from remote."""
|
236 |
| - def parse_url(link): |
237 |
| - parsed = urlparse(link.url) |
238 |
| - digest, _, value = parsed.fragment.partition('=') |
| 240 | + def parse_package(release_package): |
| 241 | + parsed = urlparse(release_package.url) |
239 | 242 | stripped_url = urlunsplit(chain(parsed[:3], ("", "")))
|
240 |
| - redirect = f'{path}/{link.text}?redirect={stripped_url}' |
241 |
| - d_url = urljoin(self.base_content_url, redirect) |
242 |
| - return link.text, d_url, value if digest == 'sha256' else '' |
| 243 | + redirect_path = f'{path}/{release_package.filename}?redirect={stripped_url}' |
| 244 | + d_url = urljoin(self.base_content_url, redirect_path) |
| 245 | + return release_package.filename, d_url, release_package.digests.get("sha256", "") |
| 246 | + |
| 247 | + rfilter = get_remote_package_filter(remote) |
| 248 | + if not rfilter.filter_project(package): |
| 249 | + raise Http404(f"{package} does not exist.") |
243 | 250 |
|
244 | 251 | url = remote.get_remote_artifact_url(f'simple/{package}/')
|
245 |
| - kwargs = {} |
246 |
| - if proxy_url := remote.proxy_url: |
247 |
| - if remote.proxy_username or remote.proxy_password: |
248 |
| - parsed_proxy = urlparse(proxy_url) |
249 |
| - netloc = f"{remote.proxy_username}:{remote.proxy_password}@{parsed_proxy.netloc}" |
250 |
| - proxy_url = urlunsplit((parsed_proxy.scheme, netloc, "", "", "")) |
251 |
| - kwargs["proxies"] = {"http": proxy_url, "https": proxy_url} |
252 |
| - |
253 |
| - response = requests.get(url, stream=True, **kwargs) |
254 |
| - links = parse_links_stream_response(response) |
255 |
| - packages = (parse_url(link) for link in links) |
256 |
| - return StreamingHttpResponse(write_simple_detail(package, packages, streamed=True)) |
| 252 | + remote.headers = remote.headers or [] |
| 253 | + remote.headers.append({"Accept": ACCEPT_JSON_PREFERRED}) |
| 254 | + downloader = remote.get_downloader(url=url, max_retries=1) |
| 255 | + try: |
| 256 | + d = downloader.fetch() |
| 257 | + except ClientError: |
| 258 | + return HttpResponse(f"Failed to fetch {package} from {remote.url}.", status=502) |
| 259 | + except TimeoutException: |
| 260 | + return HttpResponse(f"{remote.url} timed out while fetching {package}.", status=504) |
| 261 | + |
| 262 | + if d.headers["content-type"] == "application/vnd.pypi.simple.v1+json": |
| 263 | + page = ProjectPage.from_json_data(json.load(open(d.path, "rb")), base_url=remote.url) |
| 264 | + else: |
| 265 | + page = ProjectPage.from_html(package, open(d.path, "rb").read(), base_url=remote.url) |
| 266 | + packages = [ |
| 267 | + parse_package(p) for p in page.packages if rfilter.filter_release(package, p.version) |
| 268 | + ] |
| 269 | + return HttpResponse(write_simple_detail(package, packages)) |
257 | 270 |
|
258 | 271 | @extend_schema(operation_id="pypi_simple_package_read", summary="Get package simple page")
|
259 | 272 | def retrieve(self, request, path, package):
|
|
0 commit comments