diff --git a/src/library/pkgcache/DESCRIPTION b/src/library/pkgcache/DESCRIPTION index 8d303c81d..f517770ae 100644 --- a/src/library/pkgcache/DESCRIPTION +++ b/src/library/pkgcache/DESCRIPTION @@ -15,9 +15,9 @@ BugReports: https://github.com/r-lib/pkgcache/issues Depends: R (>= 3.4) Imports: callr (>= 2.0.4.9000), cli (>= 3.2.0), curl (>= 3.2), filelock, jsonlite, processx (>= 3.3.0.9001), R6, tools, utils -Suggests: covr, debugme, desc, fs, mockery, pillar, pingr, rprojroot, - sessioninfo, spelling, testthat (>= 3.2.0), webfakes (>= - 1.1.5), withr, zip +Suggests: covr, debugme, desc, fs, keyring, mockery, pillar, pingr, + rprojroot, sessioninfo, spelling, testthat (>= 3.2.0), webfakes + (>= 1.1.5), withr, zip Config/Needs/website: tidyverse/tidytemplate Config/testthat/edition: 3 Encoding: UTF-8 @@ -25,7 +25,7 @@ Language: en-US Roxygen: list(markdown = TRUE, r6 = FALSE) RoxygenNote: 7.3.2 NeedsCompilation: yes -Packaged: 2024-11-07 16:35:48 UTC; gaborcsardi +Packaged: 2025-02-21 11:42:35 UTC; gaborcsardi Author: Gábor Csárdi [aut, cre], Posit Software, PBC [cph, fnd] Maintainer: Gábor Csárdi diff --git a/src/library/pkgcache/NEWS.md b/src/library/pkgcache/NEWS.md index 466b272f6..e3fcbe0a5 100644 --- a/src/library/pkgcache/NEWS.md +++ b/src/library/pkgcache/NEWS.md @@ -1,5 +1,8 @@ # pkgcache (development version) +* `parse_packages()` now parses files ending with an extra newline + correctly (#122). + # pkgcache 2.2.3 * The metadata cache now does not use source URLs for packages in `Archive` diff --git a/src/library/pkgcache/R/archive.R b/src/library/pkgcache/R/archive.R index 78863c437..090e46bda 100644 --- a/src/library/pkgcache/R/archive.R +++ b/src/library/pkgcache/R/archive.R @@ -372,7 +372,10 @@ cac__update_replica <- function(self, private) { # download_if_newer(). If the response is 304, then we'll ignore the file. file.create(tmp) - download_if_newer(url, tmp, etag_file, error_on_status = FALSE)$ + key <- random_key() + async_constant()$ + then(function() start_auth_cache(key))$ + then(function() download_if_newer(url, tmp, etag_file, error_on_status = FALSE))$ then(function(dl) { if (dl$response$status_code >= 300 && dl$response$status_code != 304) { stop("Failed to update package archive metadata") @@ -385,7 +388,10 @@ cac__update_replica <- function(self, private) { } dl })$ - finally(function() unlink(tmp)) + finally(function() { + unlink(tmp) + clear_auth_cache(key) + }) } cac__convert_archive_file <- function(self, private, raw, out) { diff --git a/src/library/pkgcache/R/async-http.R b/src/library/pkgcache/R/async-http.R index 38ddb6ae0..15318f823 100644 --- a/src/library/pkgcache/R/async-http.R +++ b/src/library/pkgcache/R/async-http.R @@ -35,6 +35,10 @@ update_async_timeouts <- function(options) { ) } +add_auth_header <- function(url, headers) { + c(headers, repo_auth_headers(url)$headers) +} + #' Download a file, asynchronously #' #' This is the asynchronous version of [utils::download.file()]. @@ -100,7 +104,7 @@ update_async_timeouts <- function(options) { download_file <- function(url, destfile, etag_file = NULL, tmp_destfile = paste0(destfile, ".tmp"), error_on_status = TRUE, - options = list(), ...) { + options = list(), headers = character(), ...) { "!DEBUG downloading `url`" assert_that( is_string(url), @@ -116,7 +120,9 @@ download_file <- function(url, destfile, etag_file = NULL, tmp_destfile <- normalizePath(tmp_destfile, mustWork = FALSE) mkdirp(dirname(tmp_destfile)) - http_get(url, file = tmp_destfile, options = options, ...)$ + headers <- add_auth_header(url, headers) + + http_get(url, file = tmp_destfile, options = options, headers = headers, ...)$ then(http_stop_for_status)$ then(function(resp) { "!DEBUG downloaded `url`" @@ -229,6 +235,7 @@ download_if_newer <- function(url, destfile, etag_file = NULL, options <- update_async_timeouts(options) etag_old <- get_etag_header_from_file(destfile, etag_file) headers <- c(headers, etag_old) + headers <- add_auth_header(url, headers) destfile <- normalizePath(destfile, mustWork = FALSE) tmp_destfile <- normalizePath(tmp_destfile, mustWork = FALSE) @@ -342,7 +349,9 @@ download_one_of <- function(urls, destfile, etag_file = NULL, options <- update_async_timeouts(options) tmps <- paste0(destfile, ".tmp.", seq_along(urls)) dls <- mapply( - download_if_newer, url = urls, tmp_destfile = tmps, + download_if_newer, + url = urls, + tmp_destfile = tmps, MoreArgs = list(destfile = destfile, etag_file = etag_file, headers = headers, options = options, ...), SIMPLIFY = FALSE) @@ -356,7 +365,7 @@ download_one_of <- function(urls, destfile, etag_file = NULL, } download_files <- function(data, error_on_status = TRUE, - options = list(), ...) { + options = list(), headers = NULL, ...) { if (any(dup <- duplicated(data$path))) { stop("Duplicate target paths in download_files: ", @@ -371,6 +380,7 @@ download_files <- function(data, error_on_status = TRUE, row <- data[idx, ] dx <- download_if_newer( row$url, row$path, row$etag, + headers = c(headers, row$headers[[1L]]), on_progress = prog_cb, error_on_status = error_on_status, options = options, ... @@ -380,6 +390,7 @@ download_files <- function(data, error_on_status = TRUE, dx <- dx$catch(error = function(err) { download_if_newer( row$fallback_url, row$path, row$etag, + headers = c(headers, row$headers[[1L]]), error_on_status = error_on_status, options = options, ... ) diff --git a/src/library/pkgcache/R/auth.R b/src/library/pkgcache/R/auth.R new file mode 100644 index 000000000..56f956859 --- /dev/null +++ b/src/library/pkgcache/R/auth.R @@ -0,0 +1,122 @@ +# Returns a set of HTTP headers for the given URL if (1) it belongs to a +# package repository; and (2) has credentials stored in the keyring. +repo_auth_headers <- function( + url, allow_prompt = interactive(), use_cache = TRUE, set_cache = TRUE) { + + # shortcut to speed up the common case of no credentials + if (!grepl("@", url)) { + return(NULL) + } + + creds <- extract_basic_auth_credentials(url) + if (length(creds$password) > 0 && nchar(creds$password) != 0) { + # The URL already contains a password. This is pretty poor practice, maybe + # we should issue a warning pointing users to the keyring package instead. + return(NULL) + } + if (length(creds$username) == 0 || nchar(creds$username) == 0) { + # No username to key the lookup in the keyring with. + return(NULL) + } + + # Try URLs in this order: + # - repo URL with username + # - repo URL w/o username + # - host URL with username + # - host URL w/o username + # We try each with and without a keyring username + urls <- unique(unlist( + creds[c("repouserurl", "repourl", "hostuserurl", "hosturl")] + )) + + if (use_cache) { + for (u in urls) { + if (u %in% names(pkgenv$credentials)) { + return(pkgenv$credentials[[u]]) + } + } + } + + if (!requireNamespace("keyring", quietly = TRUE)) { + return(NULL) + } + + # In non-interactive contexts, force the use of the environment variable + # backend so that we never hang but can still support CI setups. + kb <- if (allow_prompt) { + keyring::default_backend() + } else { + keyring::backend_env$new() + } + + for (u in urls) { + auth_domain <- u + pwd <- try_catch_null(kb$get(u, creds$username)) %||% + try_catch_null(kb$get(u)) + if (!is.null(pwd)) break + } + + res <- if (!is.null(pwd)) { + auth <- paste(creds$username, pwd, sep = ":") + list( + headers = c("Authorization" = paste("Basic", base64_encode(auth))), + auth_domain = auth_domain + ) + } + + if (set_cache) { + pkgenv$credentials[[auth_domain]] <- res + } + + res +} + +clear_auth_cache <- function(key = NULL) { + if (is.null(key) || + identical(pkgenv$credentials[[".exit_handler"]], key)) { + rm( + list = ls(pkgenv$credentials, all.names = TRUE), + envir = pkgenv$credentials + ) + } +} + +start_auth_cache <- function(key) { + if (! ".exit_handler" %in% names(pkgenv$credentials)) { + assign(".exit_handler", key, envir = pkgenv$credentials) + } +} + +base64_encode <- function(x) { + if (!is.raw(x)) { + x <- charToRaw(x) + } + processx::base64_encode(x) +} + +extract_basic_auth_credentials <- function(url) { + psd <- parse_url(url) + if (is.na(psd$host)) { + throw(new_error(cli::format_error( + "Unrecognized URL format: {.code {url}}." + ))) + } + # ideally we would work with the repo URL, and not the final download URL + # until then, we strip the download URL to get the repo URL + userat <- if (nchar(psd$username)) paste0(psd$username, "@") else "" + repo <- c( + paste0(psd$protocol, "://", psd$host, psd$path), + paste0(psd$protocol, "://", userat, psd$host, psd$path) + ) + repo <- sub("(/(src|bin)/)(.*)$", "", repo) + # Lop off any /__linux__/ subdirectories, too. + repo <- sub("^(.*)/__linux__/[^/]+(/.*)$", "\\1\\2", repo, perl = TRUE) + list( + hosturl = paste0(psd$protocol, "://", psd$host), + hostuserurl = paste0(psd$protocol, "://", userat, psd$host), + repourl = repo[1], + repouserurl = repo[2], + username = psd$username, + password = psd$password + ) +} diff --git a/src/library/pkgcache/R/cran-app.R b/src/library/pkgcache/R/cran-app.R index 370e7299e..573338048 100644 --- a/src/library/pkgcache/R/cran-app.R +++ b/src/library/pkgcache/R/cran-app.R @@ -250,6 +250,7 @@ make_dummy_repo_platform <- function(repo, packages = NULL, options = list()) { cran_app <- function(packages = NULL, log = interactive(), + basic_auth = NULL, options = list()) { app <- webfakes::new_app() @@ -257,6 +258,25 @@ cran_app <- function(packages = NULL, # Log requests by default if (log) app$use("logger" = webfakes::mw_log()) + if (!is.null(basic_auth)) { + app$use("basic auth" = function(req, res) { + exp <- paste( + "Basic", + base64_encode( + paste0(basic_auth[["username"]], ":", basic_auth[["password"]]) + ) + ) + hdr <- req$get_header("Authorization") %||% "" + if (exp != hdr) { + res$ + set_header("WWW-Authenticate", "Basic realm=\"CRAN with auth\"")$ + send_status(401L) + } else { + "next" + } + }) + } + # Parse all kinds of bodies app$use("json body parser" = webfakes::mw_json()) app$use("text body parser" = webfakes::mw_text(type = c("text/plain", "application/json"))) diff --git a/src/library/pkgcache/R/metadata-cache.R b/src/library/pkgcache/R/metadata-cache.R index 0ffe672f9..2d3a789f3 100644 --- a/src/library/pkgcache/R/metadata-cache.R +++ b/src/library/pkgcache/R/metadata-cache.R @@ -737,22 +737,31 @@ cmc__update_replica_pkgs <- function(self, private) { meta <- !is.na(pkgs$meta_url) bin <- !is.na(pkgs$bin_url) - dls <- data.frame( - stringsAsFactors = FALSE, + dls <- data_frame( url = c(pkgs$url, pkgs$meta_url[meta], pkgs$bin_url[bin], bsq_url), fallback_url = c(pkgs$fallback_url, rep(NA_character_, sum(meta) + sum(bin)), NA_character_), path = c(pkgs$path, pkgs$meta_path[meta], pkgs$bin_path[bin], bsq_path), etag = c(pkgs$etag, pkgs$meta_etag[meta], pkgs$bin_etag[bin], bsq_etag), + headers = c( + lapply(pkgs$url, function(x) repo_auth_headers(x)$headers), + vector("list", length = sum(meta)), + lapply(pkgs$bin_url[bin], function(x) repo_auth_headers(x)$headers), + vector("list", length = 1) + ), timeout = c(rep(c(200, 100), c(nrow(pkgs), sum(meta) + sum(bin))), 5), mayfail = TRUE ) - download_files(dls)$ + key <- random_key() + async_constant()$ + then(function() start_auth_cache(key))$ + then(function() download_files(dls))$ then(function(result) { missing_pkgs_note(pkgs, result) load_bioc_sysreqs() result - }) + })$ + finally(function() clear_auth_cache(key)) } # E.g. "R 4.1 macos packages are missing from CRAN and Bioconductor" diff --git a/src/library/pkgcache/R/onload.R b/src/library/pkgcache/R/onload.R index 4f728722e..50b79f641 100644 --- a/src/library/pkgcache/R/onload.R +++ b/src/library/pkgcache/R/onload.R @@ -170,6 +170,8 @@ pkgenv$ppm_r_versions_cached <- c("3.6", "4.0", "4.1", "4.2", "4.3") pkgenv$package_versions <- new.env(parent = emptyenv()) +pkgenv$credentials <- new.env(parent = emptyenv()) + onload_pkgcache <- function(libname, pkgname) { if (Sys.getenv("PKGCACHE_NO_PILLAR") == "") { requireNamespace("pillar", quietly = TRUE) diff --git a/src/library/pkgcache/R/parse-url.R b/src/library/pkgcache/R/parse-url.R index 0a6b7ee3b..5b52b3513 100644 --- a/src/library/pkgcache/R/parse-url.R +++ b/src/library/pkgcache/R/parse-url.R @@ -3,7 +3,7 @@ parse_url <- function(url) { re_url <- paste0( "^(?[a-zA-Z0-9]+)://", "(?:(?[^@/:]+)(?::(?[^@/]+))?@)?", - "(?[^/]+)", + "(?[^/]*)", "(?.*)$" # don't worry about query params here... ) @@ -48,4 +48,3 @@ re_match <- function(text, pattern, perl = TRUE, ...) { names(res) <- c(attr(match, "capture.names"), ".text", ".match") res } - diff --git a/src/library/pkgcache/R/ppm.R b/src/library/pkgcache/R/ppm.R index 11cfa2f33..77fd38c45 100644 --- a/src/library/pkgcache/R/ppm.R +++ b/src/library/pkgcache/R/ppm.R @@ -202,8 +202,14 @@ async_get_ppm_status <- function(forget = FALSE, distribution = NULL, }) } - def$ - finally(function() unlink(tmp2))$ + key <- random_key() + async_constant()$ + then(function() start_auth_cache(key))$ + then(function() def)$ + finally(function() { + clear_auth_cache(key) + unlink(tmp2) + })$ then(function() { list( distros = pkgenv$ppm_distros, diff --git a/src/library/pkgcache/R/utils.R b/src/library/pkgcache/R/utils.R index 20ef2438a..ec49a5e53 100644 --- a/src/library/pkgcache/R/utils.R +++ b/src/library/pkgcache/R/utils.R @@ -226,3 +226,7 @@ is_rcmd_check <- function() { Sys.getenv("_R_CHECK_PACKAGE_NAME_", "") != "" } } + +random_key <- function() { + basename(tempfile()) +} diff --git a/src/library/pkgcache/src/lib.c b/src/library/pkgcache/src/lib.c index 469a4da70..8d5cfd476 100644 --- a/src/library/pkgcache/src/lib.c +++ b/src/library/pkgcache/src/lib.c @@ -361,7 +361,7 @@ SEXP pkgcache_parse_packages_raw(SEXP raw) { while (*p != '\0') { switch (state) { - /* -- at the begining of a package --------------------------------- */ + /* -- at the beginning of a package -------------------------------- */ case S_BG: if (*p == '\r') { p++; @@ -479,6 +479,9 @@ SEXP pkgcache_parse_packages_raw(SEXP raw) { p = (char*) RAW(raw); p[len - 1] = tail; if (state == S_VL && tail != '\n') vlsize++; + /* if the tail is a \n, we don't need that. We also drop \r, which + is possibly not correct, but in practice better */ + if (state == S_NL && (tail == '\n' || tail == '\r')) vlsize--; if (state == S_KW) { R_THROW_ERROR("PACKAGES file ended while parsing a key");