Skip to content

Commit

Permalink
Update embedded pkgcache to get basic auth
Browse files Browse the repository at this point in the history
  • Loading branch information
gaborcsardi committed Feb 21, 2025
1 parent 0366bd4 commit 9007caf
Show file tree
Hide file tree
Showing 12 changed files with 204 additions and 19 deletions.
8 changes: 4 additions & 4 deletions src/library/pkgcache/DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -15,17 +15,17 @@ BugReports: https://github.com/r-lib/pkgcache/issues
Depends: R (>= 3.4)
Imports: callr (>= 2.0.4.9000), cli (>= 3.2.0), curl (>= 3.2),
filelock, jsonlite, processx (>= 3.3.0.9001), R6, tools, utils
Suggests: covr, debugme, desc, fs, mockery, pillar, pingr, rprojroot,
sessioninfo, spelling, testthat (>= 3.2.0), webfakes (>=
1.1.5), withr, zip
Suggests: covr, debugme, desc, fs, keyring, mockery, pillar, pingr,
rprojroot, sessioninfo, spelling, testthat (>= 3.2.0), webfakes
(>= 1.1.5), withr, zip
Config/Needs/website: tidyverse/tidytemplate
Config/testthat/edition: 3
Encoding: UTF-8
Language: en-US
Roxygen: list(markdown = TRUE, r6 = FALSE)
RoxygenNote: 7.3.2
NeedsCompilation: yes
Packaged: 2024-11-07 16:35:48 UTC; gaborcsardi
Packaged: 2025-02-21 11:42:35 UTC; gaborcsardi
Author: Gábor Csárdi [aut, cre],
Posit Software, PBC [cph, fnd]
Maintainer: Gábor Csárdi <[email protected]>
3 changes: 3 additions & 0 deletions src/library/pkgcache/NEWS.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
# pkgcache (development version)

* `parse_packages()` now parses files ending with an extra newline
correctly (#122).

# pkgcache 2.2.3

* The metadata cache now does not use source URLs for packages in `Archive`
Expand Down
10 changes: 8 additions & 2 deletions src/library/pkgcache/R/archive.R
Original file line number Diff line number Diff line change
Expand Up @@ -372,7 +372,10 @@ cac__update_replica <- function(self, private) {
# download_if_newer(). If the response is 304, then we'll ignore the file.
file.create(tmp)

download_if_newer(url, tmp, etag_file, error_on_status = FALSE)$
key <- random_key()
async_constant()$
then(function() start_auth_cache(key))$
then(function() download_if_newer(url, tmp, etag_file, error_on_status = FALSE))$
then(function(dl) {
if (dl$response$status_code >= 300 && dl$response$status_code != 304) {
stop("Failed to update package archive metadata")
Expand All @@ -385,7 +388,10 @@ cac__update_replica <- function(self, private) {
}
dl
})$
finally(function() unlink(tmp))
finally(function() {
unlink(tmp)
clear_auth_cache(key)
})
}

cac__convert_archive_file <- function(self, private, raw, out) {
Expand Down
19 changes: 15 additions & 4 deletions src/library/pkgcache/R/async-http.R
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,10 @@ update_async_timeouts <- function(options) {
)
}

add_auth_header <- function(url, headers) {
c(headers, repo_auth_headers(url)$headers)
}

#' Download a file, asynchronously
#'
#' This is the asynchronous version of [utils::download.file()].
Expand Down Expand Up @@ -100,7 +104,7 @@ update_async_timeouts <- function(options) {
download_file <- function(url, destfile, etag_file = NULL,
tmp_destfile = paste0(destfile, ".tmp"),
error_on_status = TRUE,
options = list(), ...) {
options = list(), headers = character(), ...) {
"!DEBUG downloading `url`"
assert_that(
is_string(url),
Expand All @@ -116,7 +120,9 @@ download_file <- function(url, destfile, etag_file = NULL,
tmp_destfile <- normalizePath(tmp_destfile, mustWork = FALSE)
mkdirp(dirname(tmp_destfile))

http_get(url, file = tmp_destfile, options = options, ...)$
headers <- add_auth_header(url, headers)

http_get(url, file = tmp_destfile, options = options, headers = headers, ...)$
then(http_stop_for_status)$
then(function(resp) {
"!DEBUG downloaded `url`"
Expand Down Expand Up @@ -229,6 +235,7 @@ download_if_newer <- function(url, destfile, etag_file = NULL,
options <- update_async_timeouts(options)
etag_old <- get_etag_header_from_file(destfile, etag_file)
headers <- c(headers, etag_old)
headers <- add_auth_header(url, headers)

destfile <- normalizePath(destfile, mustWork = FALSE)
tmp_destfile <- normalizePath(tmp_destfile, mustWork = FALSE)
Expand Down Expand Up @@ -342,7 +349,9 @@ download_one_of <- function(urls, destfile, etag_file = NULL,
options <- update_async_timeouts(options)
tmps <- paste0(destfile, ".tmp.", seq_along(urls))
dls <- mapply(
download_if_newer, url = urls, tmp_destfile = tmps,
download_if_newer,
url = urls,
tmp_destfile = tmps,
MoreArgs = list(destfile = destfile, etag_file = etag_file,
headers = headers, options = options, ...),
SIMPLIFY = FALSE)
Expand All @@ -356,7 +365,7 @@ download_one_of <- function(urls, destfile, etag_file = NULL,
}

download_files <- function(data, error_on_status = TRUE,
options = list(), ...) {
options = list(), headers = NULL, ...) {

if (any(dup <- duplicated(data$path))) {
stop("Duplicate target paths in download_files: ",
Expand All @@ -371,6 +380,7 @@ download_files <- function(data, error_on_status = TRUE,
row <- data[idx, ]
dx <- download_if_newer(
row$url, row$path, row$etag,
headers = c(headers, row$headers[[1L]]),
on_progress = prog_cb,
error_on_status = error_on_status,
options = options, ...
Expand All @@ -380,6 +390,7 @@ download_files <- function(data, error_on_status = TRUE,
dx <- dx$catch(error = function(err) {
download_if_newer(
row$fallback_url, row$path, row$etag,
headers = c(headers, row$headers[[1L]]),
error_on_status = error_on_status,
options = options, ...
)
Expand Down
122 changes: 122 additions & 0 deletions src/library/pkgcache/R/auth.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
# Returns a set of HTTP headers for the given URL if (1) it belongs to a
# package repository; and (2) has credentials stored in the keyring.
repo_auth_headers <- function(
url, allow_prompt = interactive(), use_cache = TRUE, set_cache = TRUE) {

# shortcut to speed up the common case of no credentials
if (!grepl("@", url)) {
return(NULL)
}

creds <- extract_basic_auth_credentials(url)
if (length(creds$password) > 0 && nchar(creds$password) != 0) {
# The URL already contains a password. This is pretty poor practice, maybe
# we should issue a warning pointing users to the keyring package instead.
return(NULL)
}
if (length(creds$username) == 0 || nchar(creds$username) == 0) {
# No username to key the lookup in the keyring with.
return(NULL)
}

# Try URLs in this order:
# - repo URL with username
# - repo URL w/o username
# - host URL with username
# - host URL w/o username
# We try each with and without a keyring username
urls <- unique(unlist(
creds[c("repouserurl", "repourl", "hostuserurl", "hosturl")]
))

if (use_cache) {
for (u in urls) {
if (u %in% names(pkgenv$credentials)) {
return(pkgenv$credentials[[u]])
}
}
}

if (!requireNamespace("keyring", quietly = TRUE)) {
return(NULL)
}

# In non-interactive contexts, force the use of the environment variable
# backend so that we never hang but can still support CI setups.
kb <- if (allow_prompt) {
keyring::default_backend()
} else {
keyring::backend_env$new()
}

for (u in urls) {
auth_domain <- u
pwd <- try_catch_null(kb$get(u, creds$username)) %||%
try_catch_null(kb$get(u))
if (!is.null(pwd)) break
}

res <- if (!is.null(pwd)) {
auth <- paste(creds$username, pwd, sep = ":")
list(
headers = c("Authorization" = paste("Basic", base64_encode(auth))),
auth_domain = auth_domain
)
}

if (set_cache) {
pkgenv$credentials[[auth_domain]] <- res
}

res
}

clear_auth_cache <- function(key = NULL) {
if (is.null(key) ||
identical(pkgenv$credentials[[".exit_handler"]], key)) {
rm(
list = ls(pkgenv$credentials, all.names = TRUE),
envir = pkgenv$credentials
)
}
}

start_auth_cache <- function(key) {
if (! ".exit_handler" %in% names(pkgenv$credentials)) {
assign(".exit_handler", key, envir = pkgenv$credentials)
}
}

base64_encode <- function(x) {
if (!is.raw(x)) {
x <- charToRaw(x)
}
processx::base64_encode(x)
}

extract_basic_auth_credentials <- function(url) {
psd <- parse_url(url)
if (is.na(psd$host)) {
throw(new_error(cli::format_error(
"Unrecognized URL format: {.code {url}}."
)))
}
# ideally we would work with the repo URL, and not the final download URL
# until then, we strip the download URL to get the repo URL
userat <- if (nchar(psd$username)) paste0(psd$username, "@") else ""
repo <- c(
paste0(psd$protocol, "://", psd$host, psd$path),
paste0(psd$protocol, "://", userat, psd$host, psd$path)
)
repo <- sub("(/(src|bin)/)(.*)$", "", repo)
# Lop off any /__linux__/ subdirectories, too.
repo <- sub("^(.*)/__linux__/[^/]+(/.*)$", "\\1\\2", repo, perl = TRUE)
list(
hosturl = paste0(psd$protocol, "://", psd$host),
hostuserurl = paste0(psd$protocol, "://", userat, psd$host),
repourl = repo[1],
repouserurl = repo[2],
username = psd$username,
password = psd$password
)
}
20 changes: 20 additions & 0 deletions src/library/pkgcache/R/cran-app.R
Original file line number Diff line number Diff line change
Expand Up @@ -250,13 +250,33 @@ make_dummy_repo_platform <- function(repo, packages = NULL, options = list()) {

cran_app <- function(packages = NULL,
log = interactive(),
basic_auth = NULL,
options = list()) {

app <- webfakes::new_app()

# Log requests by default
if (log) app$use("logger" = webfakes::mw_log())

if (!is.null(basic_auth)) {
app$use("basic auth" = function(req, res) {
exp <- paste(
"Basic",
base64_encode(
paste0(basic_auth[["username"]], ":", basic_auth[["password"]])
)
)
hdr <- req$get_header("Authorization") %||% ""
if (exp != hdr) {
res$
set_header("WWW-Authenticate", "Basic realm=\"CRAN with auth\"")$
send_status(401L)
} else {
"next"
}
})
}

# Parse all kinds of bodies
app$use("json body parser" = webfakes::mw_json())
app$use("text body parser" = webfakes::mw_text(type = c("text/plain", "application/json")))
Expand Down
17 changes: 13 additions & 4 deletions src/library/pkgcache/R/metadata-cache.R
Original file line number Diff line number Diff line change
Expand Up @@ -737,22 +737,31 @@ cmc__update_replica_pkgs <- function(self, private) {

meta <- !is.na(pkgs$meta_url)
bin <- !is.na(pkgs$bin_url)
dls <- data.frame(
stringsAsFactors = FALSE,
dls <- data_frame(
url = c(pkgs$url, pkgs$meta_url[meta], pkgs$bin_url[bin], bsq_url),
fallback_url = c(pkgs$fallback_url, rep(NA_character_, sum(meta) + sum(bin)), NA_character_),
path = c(pkgs$path, pkgs$meta_path[meta], pkgs$bin_path[bin], bsq_path),
etag = c(pkgs$etag, pkgs$meta_etag[meta], pkgs$bin_etag[bin], bsq_etag),
headers = c(
lapply(pkgs$url, function(x) repo_auth_headers(x)$headers),
vector("list", length = sum(meta)),
lapply(pkgs$bin_url[bin], function(x) repo_auth_headers(x)$headers),
vector("list", length = 1)
),
timeout = c(rep(c(200, 100), c(nrow(pkgs), sum(meta) + sum(bin))), 5),
mayfail = TRUE
)

download_files(dls)$
key <- random_key()
async_constant()$
then(function() start_auth_cache(key))$
then(function() download_files(dls))$
then(function(result) {
missing_pkgs_note(pkgs, result)
load_bioc_sysreqs()
result
})
})$
finally(function() clear_auth_cache(key))
}

# E.g. "R 4.1 macos packages are missing from CRAN and Bioconductor"
Expand Down
2 changes: 2 additions & 0 deletions src/library/pkgcache/R/onload.R
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,8 @@ pkgenv$ppm_r_versions_cached <- c("3.6", "4.0", "4.1", "4.2", "4.3")

pkgenv$package_versions <- new.env(parent = emptyenv())

pkgenv$credentials <- new.env(parent = emptyenv())

onload_pkgcache <- function(libname, pkgname) {
if (Sys.getenv("PKGCACHE_NO_PILLAR") == "") {
requireNamespace("pillar", quietly = TRUE)
Expand Down
3 changes: 1 addition & 2 deletions src/library/pkgcache/R/parse-url.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ parse_url <- function(url) {
re_url <- paste0(
"^(?<protocol>[a-zA-Z0-9]+)://",
"(?:(?<username>[^@/:]+)(?::(?<password>[^@/]+))?@)?",
"(?<host>[^/]+)",
"(?<host>[^/]*)",
"(?<path>.*)$" # don't worry about query params here...
)

Expand Down Expand Up @@ -48,4 +48,3 @@ re_match <- function(text, pattern, perl = TRUE, ...) {
names(res) <- c(attr(match, "capture.names"), ".text", ".match")
res
}

10 changes: 8 additions & 2 deletions src/library/pkgcache/R/ppm.R
Original file line number Diff line number Diff line change
Expand Up @@ -202,8 +202,14 @@ async_get_ppm_status <- function(forget = FALSE, distribution = NULL,
})
}

def$
finally(function() unlink(tmp2))$
key <- random_key()
async_constant()$
then(function() start_auth_cache(key))$
then(function() def)$
finally(function() {
clear_auth_cache(key)
unlink(tmp2)
})$
then(function() {
list(
distros = pkgenv$ppm_distros,
Expand Down
4 changes: 4 additions & 0 deletions src/library/pkgcache/R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -226,3 +226,7 @@ is_rcmd_check <- function() {
Sys.getenv("_R_CHECK_PACKAGE_NAME_", "") != ""
}
}

random_key <- function() {
basename(tempfile())
}
5 changes: 4 additions & 1 deletion src/library/pkgcache/src/lib.c
Original file line number Diff line number Diff line change
Expand Up @@ -361,7 +361,7 @@ SEXP pkgcache_parse_packages_raw(SEXP raw) {
while (*p != '\0') {
switch (state) {

/* -- at the begining of a package --------------------------------- */
/* -- at the beginning of a package -------------------------------- */
case S_BG:
if (*p == '\r') {
p++;
Expand Down Expand Up @@ -479,6 +479,9 @@ SEXP pkgcache_parse_packages_raw(SEXP raw) {
p = (char*) RAW(raw);
p[len - 1] = tail;
if (state == S_VL && tail != '\n') vlsize++;
/* if the tail is a \n, we don't need that. We also drop \r, which
is possibly not correct, but in practice better */
if (state == S_NL && (tail == '\n' || tail == '\r')) vlsize--;

if (state == S_KW) {
R_THROW_ERROR("PACKAGES file ended while parsing a key");
Expand Down

0 comments on commit 9007caf

Please sign in to comment.