|
| 1 | +import json |
1 | 2 | import logging |
2 | | -import requests |
3 | 3 |
|
4 | 4 | from rest_framework.viewsets import ViewSet |
5 | 5 | from rest_framework.response import Response |
|
15 | 15 | Http404, |
16 | 16 | HttpResponseForbidden, |
17 | 17 | HttpResponseBadRequest, |
18 | | - StreamingHttpResponse |
| 18 | + StreamingHttpResponse, |
| 19 | + HttpResponse, |
19 | 20 | ) |
20 | 21 | from drf_spectacular.utils import extend_schema |
21 | 22 | from dynaconf import settings |
22 | 23 | from itertools import chain |
23 | 24 | from packaging.utils import canonicalize_name |
24 | 25 | from urllib.parse import urljoin, urlparse, urlunsplit |
25 | 26 | from pathlib import PurePath |
26 | | -from pypi_simple import parse_links_stream_response |
| 27 | +from pypi_simple import ACCEPT_JSON_PREFERRED, ProjectPage |
27 | 28 |
|
28 | 29 | from pulpcore.plugin.viewsets import OperationPostponedResponse |
29 | 30 | from pulpcore.plugin.tasking import dispatch |
|
37 | 38 | SummarySerializer, |
38 | 39 | PackageMetadataSerializer, |
39 | 40 | PackageUploadSerializer, |
40 | | - PackageUploadTaskSerializer |
| 41 | + PackageUploadTaskSerializer, |
41 | 42 | ) |
42 | 43 | from pulp_python.app.utils import ( |
43 | 44 | write_simple_index, |
44 | 45 | write_simple_detail, |
45 | 46 | python_content_to_json, |
46 | 47 | PYPI_LAST_SERIAL, |
47 | 48 | PYPI_SERIAL_CONSTANT, |
| 49 | + get_remote_package_filter, |
48 | 50 | ) |
49 | 51 |
|
50 | 52 | from pulp_python.app import tasks |
@@ -233,27 +235,34 @@ def list(self, request, path): |
233 | 235 |
|
234 | 236 | def pull_through_package_simple(self, package, path, remote): |
235 | 237 | """Gets the package's simple page from remote.""" |
236 | | - def parse_url(link): |
237 | | - parsed = urlparse(link.url) |
238 | | - digest, _, value = parsed.fragment.partition('=') |
| 238 | + def parse_package(dis_package): |
| 239 | + parsed = urlparse(dis_package.url) |
239 | 240 | stripped_url = urlunsplit(chain(parsed[:3], ("", ""))) |
240 | | - redirect = f'{path}/{link.text}?redirect={stripped_url}' |
241 | | - d_url = urljoin(self.base_content_url, redirect) |
242 | | - return link.text, d_url, value if digest == 'sha256' else '' |
| 241 | + redirect_path = f'{path}/{dis_package.filename}?redirect={stripped_url}' |
| 242 | + d_url = urljoin(self.base_content_url, redirect_path) |
| 243 | + return dis_package.filename, d_url, dis_package.digests.get("sha256", "") |
| 244 | + |
| 245 | + rfilter = get_remote_package_filter(remote) |
| 246 | + if not rfilter.filter_project(package): |
| 247 | + raise Http404(f"{package} does not exist.") |
243 | 248 |
|
244 | 249 | url = remote.get_remote_artifact_url(f'simple/{package}/') |
245 | | - kwargs = {} |
246 | | - if proxy_url := remote.proxy_url: |
247 | | - if remote.proxy_username or remote.proxy_password: |
248 | | - parsed_proxy = urlparse(proxy_url) |
249 | | - netloc = f"{remote.proxy_username}:{remote.proxy_password}@{parsed_proxy.netloc}" |
250 | | - proxy_url = urlunsplit((parsed_proxy.scheme, netloc, "", "", "")) |
251 | | - kwargs["proxies"] = {"http": proxy_url, "https": proxy_url} |
252 | | - |
253 | | - response = requests.get(url, stream=True, **kwargs) |
254 | | - links = parse_links_stream_response(response) |
255 | | - packages = (parse_url(link) for link in links) |
256 | | - return StreamingHttpResponse(write_simple_detail(package, packages, streamed=True)) |
| 250 | + remote.headers = remote.headers or [] |
| 251 | + remote.headers.append({"Accept": ACCEPT_JSON_PREFERRED}) |
| 252 | + downloader = remote.get_downloader(url=url, max_retries=1) |
| 253 | + try: |
| 254 | + d = downloader.fetch() |
| 255 | + except Exception: |
| 256 | + raise Http404(f"Could not find {package}.") |
| 257 | + |
| 258 | + if d.headers["content-type"] == "application/vnd.pypi.simple.v1+json": |
| 259 | + page = ProjectPage.from_json_data(json.load(open(d.path, "rb")), base_url=remote.url) |
| 260 | + else: |
| 261 | + page = ProjectPage.from_html(package, open(d.path, "rb").read(), base_url=remote.url) |
| 262 | + packages = [ |
| 263 | + parse_package(p) for p in page.packages if rfilter.filter_release(package, p.version) |
| 264 | + ] |
| 265 | + return HttpResponse(write_simple_detail(package, packages)) |
257 | 266 |
|
258 | 267 | @extend_schema(operation_id="pypi_simple_package_read", summary="Get package simple page") |
259 | 268 | def retrieve(self, request, path, package): |
|
0 commit comments