Skip to content

Commit 9089455

Browse files
committed
compressed storage for rustdoc- and source-files.
1 parent b70039c commit 9089455

24 files changed

+1031
-218
lines changed

.github/workflows/ci.yml

+2-1
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,8 @@ jobs:
109109
for f in ./test-binaries/*; do
110110
echo "running $f"
111111
chmod +x $f # GH action artifacts don't handle permissions
112-
$f --ignored || exit 1
112+
# run build-tests. Limited to one thread since we don't support parallel builds.
113+
$f --ignored --test-threads=1 || exit 1
113114
done
114115
115116
- name: Clean up the database

.gitignore

+1
Original file line numberDiff line numberDiff line change
@@ -10,4 +10,5 @@ target
1010
.vagrant
1111
.rustwide
1212
.rustwide-docker
13+
.archive_cache
1314
.workspace

Cargo.lock

+66-4
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

+4
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,9 @@ font-awesome-as-a-crate = { path = "crates/font-awesome-as-a-crate" }
5858
dashmap = "3.11.10"
5959
string_cache = "0.8.0"
6060
postgres-types = { version = "0.2", features = ["derive"] }
61+
zip = "0.5.11"
62+
bzip2 = "0.4.2"
63+
serde_cbor = "0.11.1"
6164
getrandom = "0.2.1"
6265

6366
# Async
@@ -104,6 +107,7 @@ criterion = "0.3"
104107
kuchiki = "0.8"
105108
rand = "0.8"
106109
mockito = "0.29"
110+
test-case = "1.2.0"
107111

108112
[build-dependencies]
109113
time = "0.1"

benches/compression.rs

+22-6
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,6 @@
11
use criterion::{black_box, criterion_group, criterion_main, Criterion, Throughput};
22
use docs_rs::storage::{compress, decompress, CompressionAlgorithm};
33

4-
const ALGORITHM: CompressionAlgorithm = CompressionAlgorithm::Zstd;
5-
64
pub fn regex_capture_matches(c: &mut Criterion) {
75
// this isn't a great benchmark because it only tests on one file
86
// ideally we would build a whole crate and compress each file, taking the average
@@ -11,11 +9,29 @@ pub fn regex_capture_matches(c: &mut Criterion) {
119

1210
c.benchmark_group("regex html")
1311
.throughput(Throughput::Bytes(html_slice.len() as u64))
14-
.bench_function("compress", |b| {
15-
b.iter(|| compress(black_box(html_slice), ALGORITHM));
12+
.bench_function("compress zstd", |b| {
13+
b.iter(|| compress(black_box(html_slice), CompressionAlgorithm::Zstd));
14+
})
15+
.bench_function("decompress zstd", |b| {
16+
b.iter(|| {
17+
decompress(
18+
black_box(html_slice),
19+
CompressionAlgorithm::Zstd,
20+
5 * 1024 * 1024,
21+
)
22+
});
23+
})
24+
.bench_function("compress bzip2", |b| {
25+
b.iter(|| compress(black_box(html_slice), CompressionAlgorithm::Bzip2));
1626
})
17-
.bench_function("decompress", |b| {
18-
b.iter(|| decompress(black_box(html_slice), ALGORITHM, 5 * 1024 * 1024));
27+
.bench_function("decompress bzip2", |b| {
28+
b.iter(|| {
29+
decompress(
30+
black_box(html_slice),
31+
CompressionAlgorithm::Bzip2,
32+
5 * 1024 * 1024,
33+
)
34+
});
1935
});
2036
}
2137

docker-compose.yml

+4-3
Original file line numberDiff line numberDiff line change
@@ -53,15 +53,16 @@ services:
5353
entrypoint: >
5454
/bin/sh -c "
5555
mkdir -p /data/rust-docs-rs;
56-
minio server /data;
56+
minio server /data --console-address ":9001";
5757
"
5858
ports:
5959
- "9000:9000"
60+
- "9001:9001"
6061
volumes:
6162
- minio-data:/data
6263
environment:
63-
MINIO_ACCESS_KEY: cratesfyi
64-
MINIO_SECRET_KEY: secret_key
64+
MINIO_ROOT_USER: cratesfyi
65+
MINIO_ROOT_PASSWORD: secret_key
6566
healthcheck:
6667
test:
6768
[

src/bin/cratesfyi.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -574,7 +574,7 @@ impl Context for BinContext {
574574
fn storage(self) -> Storage = Storage::new(
575575
self.pool()?,
576576
self.metrics()?,
577-
&*self.config()?,
577+
self.config()?,
578578
)?;
579579
fn config(self) -> Config = Config::from_env()?;
580580
fn metrics(self) -> Metrics = Metrics::new()?;

src/config.rs

+9
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,10 @@ pub struct Config {
5151
// For unit-tests the number has to be higher.
5252
pub(crate) random_crate_search_view_size: u32,
5353

54+
// where do we want to store the locally cached index files
55+
// for the remote archives?
56+
pub(crate) local_archive_cache_path: PathBuf,
57+
5458
// Content Security Policy
5559
pub(crate) csp_report_only: bool,
5660

@@ -127,6 +131,11 @@ impl Config {
127131

128132
csp_report_only: env("DOCSRS_CSP_REPORT_ONLY", false)?,
129133

134+
local_archive_cache_path: env(
135+
"DOCSRS_ARCHIVE_INDEX_CACHE_PATH",
136+
PathBuf::from(".archive_cache"),
137+
)?,
138+
130139
rustwide_workspace: env("DOCSRS_RUSTWIDE_WORKSPACE", PathBuf::from(".workspace"))?,
131140
inside_docker: env("DOCSRS_DOCKER", false)?,
132141
docker_image: maybe_env("DOCSRS_LOCAL_DOCKER_IMAGE")?

src/db/add_package.rs

+6-3
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ pub(crate) fn add_package_into_database(
3838
has_examples: bool,
3939
compression_algorithms: std::collections::HashSet<CompressionAlgorithm>,
4040
repository_id: Option<i32>,
41+
archive_storage: bool,
4142
) -> Result<i32> {
4243
debug!("Adding package into database");
4344
let crate_id = initialize_package_in_database(conn, metadata_pkg)?;
@@ -56,12 +57,12 @@ pub(crate) fn add_package_into_database(
5657
keywords, have_examples, downloads, files,
5758
doc_targets, is_library, doc_rustc_version,
5859
documentation_url, default_target, features,
59-
repository_id
60+
repository_id, archive_storage
6061
)
6162
VALUES (
6263
$1, $2, $3, $4, $5, $6, $7, $8, $9,
6364
$10, $11, $12, $13, $14, $15, $16, $17, $18,
64-
$19, $20, $21, $22, $23, $24, $25, $26
65+
$19, $20, $21, $22, $23, $24, $25, $26, $27
6566
)
6667
ON CONFLICT (crate_id, version) DO UPDATE
6768
SET release_time = $3,
@@ -87,7 +88,8 @@ pub(crate) fn add_package_into_database(
8788
documentation_url = $23,
8889
default_target = $24,
8990
features = $25,
90-
repository_id = $26
91+
repository_id = $26,
92+
archive_storage = $27
9193
RETURNING id",
9294
&[
9395
&crate_id,
@@ -116,6 +118,7 @@ pub(crate) fn add_package_into_database(
116118
&default_target,
117119
&features,
118120
&repository_id,
121+
&archive_storage,
119122
],
120123
)?;
121124

src/db/file.rs

+13-1
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
//! However, postgres is still available for testing and backwards compatibility.
99
1010
use crate::error::Result;
11-
use crate::storage::{CompressionAlgorithms, Storage};
11+
use crate::storage::{CompressionAlgorithm, CompressionAlgorithms, Storage};
1212

1313
use serde_json::Value;
1414
use std::path::{Path, PathBuf};
@@ -34,6 +34,18 @@ pub fn add_path_into_database<P: AsRef<Path>>(
3434
))
3535
}
3636

37+
pub fn add_path_into_remote_archive<P: AsRef<Path>>(
38+
storage: &Storage,
39+
archive_path: &str,
40+
path: P,
41+
) -> Result<(Value, CompressionAlgorithm)> {
42+
let (file_list, algorithm) = storage.store_all_in_archive(archive_path, path.as_ref())?;
43+
Ok((
44+
file_list_to_json(file_list.into_iter().collect()),
45+
algorithm,
46+
))
47+
}
48+
3749
fn file_list_to_json(file_list: Vec<(PathBuf, String)>) -> Value {
3850
Value::Array(
3951
file_list

src/db/migrate.rs

+5
Original file line numberDiff line numberDiff line change
@@ -749,6 +749,11 @@ pub fn migrate(version: Option<Version>, conn: &mut Client) -> crate::error::Res
749749
"ALTER TABLE builds RENAME COLUMN cratesfyi_version TO docsrs_version",
750750
"ALTER TABLE builds RENAME COLUMN docsrs_version TO cratesfyi_version",
751751
),
752+
migration!(
753+
context, 30, "add archive-storage marker for releases",
754+
"ALTER TABLE releases ADD COLUMN archive_storage BOOL NOT NULL DEFAULT FALSE;",
755+
"ALTER TABLE releases DROP COLUMN archive_storage;",
756+
),
752757
];
753758

754759
for migration in migrations {

src/db/mod.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ pub(crate) use self::add_package::{
55
add_build_into_database, add_doc_coverage, add_package_into_database,
66
};
77
pub use self::delete::{delete_crate, delete_version};
8-
pub use self::file::add_path_into_database;
8+
pub use self::file::{add_path_into_database, add_path_into_remote_archive};
99
pub use self::migrate::migrate;
1010
pub use self::pool::{Pool, PoolClient, PoolError};
1111

0 commit comments

Comments
 (0)