Skip to content

(WIP) rustdoc redirect caching in the CDN #1262

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 7 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,17 @@ pub struct Config {
// For unit-tests the number has to be higher.
pub(crate) random_crate_search_view_size: u32,

// Time to cache crate-level redirects in rustdoc, in seconds.
// This is for redirects where the destination
// can change after the release of a new version
// of a crate.
pub(crate) cache_rustdoc_redirects_crate: u32,

// Time to cache release-level redirects in rustdoc, in seconds.
// Here the destination can only change after
// rebuilds or yanks, so very infrequently.
pub(crate) cache_rustdoc_redirects_release: u32,

// Build params
pub(crate) build_attempts: u16,
pub(crate) rustwide_workspace: PathBuf,
Expand Down Expand Up @@ -94,6 +105,11 @@ impl Config {
registry_gc_interval: env("DOCSRS_REGISTRY_GC_INTERVAL", 60 * 60)?,

random_crate_search_view_size: env("DOCSRS_RANDOM_CRATE_SEARCH_VIEW_SIZE", 500)?,
cache_rustdoc_redirects_crate: env("DOCSRS_CACHE_RUSTDOC_REDIRECTS_CRATE", 15 * 60)?, // 15 minutes
cache_rustdoc_redirects_release: env(
"DOCSRS_CACHE_RUSTDOC_REDIRECTS_RELEASE",
7 * 24 * 60 * 60, // 7 days
)?,

rustwide_workspace: env("CRATESFYI_RUSTWIDE_WORKSPACE", PathBuf::from(".workspace"))?,
inside_docker: env("DOCS_RS_DOCKER", false)?,
Expand Down
36 changes: 30 additions & 6 deletions src/test/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ use once_cell::unsync::OnceCell;
use postgres::Client as Connection;
use reqwest::{
blocking::{Client, RequestBuilder},
Method,
redirect, Method, StatusCode, Url,
};
use std::{panic, sync::Arc};

Expand Down Expand Up @@ -52,21 +52,39 @@ pub(crate) fn assert_redirect(
expected_target: &str,
web: &TestFrontend,
) -> Result<(), Error> {
// Reqwest follows redirects automatically
let response = web.get(path).send()?;

let status = response.status();
assert_eq!(status, StatusCode::FOUND);

println!("path: {:?}", path);
println!("expected target: {:?}", expected_target);

let redirect_target = Url::parse(
response
.headers()
.get("Location")
.expect("Response doesn't have Location header")
.to_str()
.unwrap(),
)
.expect("could not parse redirect location");

println!("\nredirect target: {:?}", redirect_target);

let mut tmp;
let redirect_target = if expected_target.starts_with("https://") {
response.url().as_str()
redirect_target.as_str()
} else {
tmp = String::from(response.url().path());
if let Some(query) = response.url().query() {
tmp = String::from(redirect_target.path());
if let Some(query) = redirect_target.query() {
tmp.push('?');
tmp.push_str(query);
}
&tmp
};
println!("converted redirect target: {:?}", redirect_target);

// Either we followed a redirect to the wrong place, or there was no redirect
if redirect_target != expected_target {
// wrong place
Expand All @@ -83,6 +101,9 @@ pub(crate) fn assert_redirect(
);
}
}

let target_response = web.get(redirect_target).send()?;
let status = target_response.status();
assert!(
status.is_success(),
"failed to GET {}: {}",
Expand Down Expand Up @@ -333,7 +354,10 @@ impl TestFrontend {
Self {
server: Server::start(Some("127.0.0.1:0"), false, context)
.expect("failed to start the web server"),
client: Client::new(),
client: Client::builder()
.redirect(redirect::Policy::none())
.build()
.unwrap(),
}
}

Expand Down
27 changes: 25 additions & 2 deletions src/web/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ use extensions::InjectExtensions;
use failure::Error;
use iron::{
self,
headers::{Expires, HttpDate},
headers::{CacheControl, CacheDirective},
modifiers::Redirect,
status,
status::Status,
Expand Down Expand Up @@ -491,7 +491,30 @@ fn duration_to_str(init: DateTime<Utc>) -> String {
/// `Request`.
fn redirect(url: Url) -> Response {
let mut resp = Response::with((status::Found, Redirect(url)));
resp.headers.set(Expires(HttpDate(time::now())));
resp.headers
.set(CacheControl(vec![CacheDirective::MaxAge(0)]));

resp
}

/// Creates a redirect-response which is cached on the CDN level for
/// the given amount of seconds. Browser-Local caching is deactivated.
/// The used s-maxage is respected by CloudFront (which we can invalidate
/// if we need to), and perhaps by other proxies in between.
/// When the cache-seconds are reasonably small, this should be fine.
///
/// CloudFront doesn't have `surrogate-control` headers which would be only
/// used by CloudFront and dropped (Fastly has them).
///
/// The `Expires` header from `redirect` is still kept to be safe,
/// CloudFront ignores it when it gets `Cache-Control: max-age` or
/// `s-maxage`.
fn cached_redirect(url: Url, cache_seconds: u32) -> Response {
let mut resp = Response::with((status::Found, Redirect(url)));
resp.headers.set(CacheControl(vec![
CacheDirective::MaxAge(0),
CacheDirective::SMaxAge(cache_seconds),
]));

resp
}
Expand Down
60 changes: 42 additions & 18 deletions src/web/rustdoc.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@ use crate::{
use iron::url::percent_encoding::percent_decode;
use iron::{
headers::{CacheControl, CacheDirective, Expires, HttpDate},
modifiers::Redirect,
status, Handler, IronResult, Request, Response, Url,
};
use lol_html::errors::RewritingError;
Expand All @@ -37,8 +36,12 @@ impl RustLangRedirector {
}

impl iron::Handler for RustLangRedirector {
fn handle(&self, _req: &mut Request) -> IronResult<Response> {
Ok(Response::with((status::Found, Redirect(self.url.clone()))))
fn handle(&self, req: &mut Request) -> IronResult<Response> {
let config = extension!(req, Config);
Ok(super::cached_redirect(
self.url.clone(),
config.cache_rustdoc_redirects_crate,
))
}
}

Expand All @@ -51,6 +54,7 @@ pub fn rustdoc_redirector_handler(req: &mut Request) -> IronResult<Response> {
vers: &str,
target: Option<&str>,
target_name: &str,
cache_seconds: u32,
) -> IronResult<Response> {
let mut url_str = if let Some(target) = target {
format!(
Expand All @@ -69,24 +73,25 @@ pub fn rustdoc_redirector_handler(req: &mut Request) -> IronResult<Response> {
url_str.push_str(query);
}
let url = ctry!(req, Url::parse(&url_str));
let mut resp = Response::with((status::Found, Redirect(url)));
resp.headers.set(Expires(HttpDate(time::now())));

Ok(resp)
Ok(super::cached_redirect(url, cache_seconds))
}

fn redirect_to_crate(req: &Request, name: &str, vers: &str) -> IronResult<Response> {
fn redirect_to_crate(
req: &Request,
name: &str,
vers: &str,
cache_seconds: u32,
) -> IronResult<Response> {
let url = ctry!(
req,
Url::parse(&format!("{}/crate/{}/{}", redirect_base(req), name, vers)),
);

let mut resp = Response::with((status::Found, Redirect(url)));
resp.headers.set(Expires(HttpDate(time::now())));

Ok(resp)
Ok(super::cached_redirect(url, cache_seconds))
}

let config = extension!(req, Config);
let metrics = extension!(req, Metrics).clone();
let mut rendering_time = RenderingTimesRecorder::new(&metrics.rustdoc_redirect_rendering_times);

Expand Down Expand Up @@ -154,6 +159,15 @@ pub fn rustdoc_redirector_handler(req: &mut Request) -> IronResult<Response> {
// use that instead
crate_name = new_name;
}

// Redirects with an exact SemVer match can be cached for longer.
// This is also true for corrected names.
let cache_redirect_for = if let MatchSemver::Exact(_) = v.version {
config.cache_rustdoc_redirects_release
} else {
config.cache_rustdoc_redirects_crate
};

let (version, id) = v.version.into_parts();

// get target name and whether it has docs
Expand All @@ -179,10 +193,17 @@ pub fn rustdoc_redirector_handler(req: &mut Request) -> IronResult<Response> {

if has_docs {
rendering_time.step("redirect to doc");
redirect_to_doc(req, &crate_name, &version, target, &target_name)
redirect_to_doc(
req,
&crate_name,
&version,
target,
&target_name,
cache_redirect_for,
)
} else {
rendering_time.step("redirect to crate");
redirect_to_crate(req, &crate_name, &version)
redirect_to_crate(req, &crate_name, &version, cache_redirect_for)
}
}

Expand Down Expand Up @@ -282,7 +303,10 @@ pub fn rustdoc_html_server_handler(req: &mut Request) -> IronResult<Response> {
);
let url = ctry!(req, Url::parse(&redirect_path));

Ok(super::redirect(url))
Ok(super::cached_redirect(
url,
config.cache_rustdoc_redirects_release,
))
};

rendering_time.step("match version");
Expand Down Expand Up @@ -551,10 +575,10 @@ pub fn target_redirect_handler(req: &mut Request) -> IronResult<Response> {
);

let url = ctry!(req, Url::parse(&url));
let mut resp = Response::with((status::Found, Redirect(url)));
resp.headers.set(Expires(HttpDate(time::now())));

Ok(resp)
Ok(super::cached_redirect(
url,
config.cache_rustdoc_redirects_release,
))
}

pub fn badge_handler(req: &mut Request) -> IronResult<Response> {
Expand Down