Skip to content

Commit

Permalink
parse/stats: Follow package moves
Browse files Browse the repository at this point in the history
Keeps build time stats accross renames. See #60.

This commit is an MVP, lacking tests and optimizations. Even then, parsing 80 update files takes
2-6ms on my system, so the cost of this double-parsing single-threaded implementation is already
low.

We get the list of update files from the mtimedb, which is nice because we already parse it and can
yet again avoid locating the installed repos, but also lists (on my system) some non-existing files
and files from an inactive repo.
  • Loading branch information
vincentdephily committed Jan 15, 2025
1 parent a958d94 commit c5074e2
Show file tree
Hide file tree
Showing 4 changed files with 92 additions and 13 deletions.
2 changes: 1 addition & 1 deletion docs/COMPARISON.md
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ estimate the resulting speedup factor.
| Global ETA format | total time | total time | total time, end date |
| Estimation accuracy | ok | better | best, configurable |
| Recognize binary emerges | no | no | yes |
| Follow package renames | yes | no | no |
| Follow package moves | no | no | yes |
| Query gentoo.linuxhowtos.org for unknown packages | yes | no | no |

## Speed
Expand Down
23 changes: 13 additions & 10 deletions src/commands.rs
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,7 @@ impl ArgKind {
/// Then we compute the stats per ebuild, and print that.
pub fn cmd_stats(gc: Conf, sc: ConfStats) -> Result<bool, Error> {
let hist = get_hist(&gc.logfile, gc.from, gc.to, sc.show, &sc.search, sc.exact)?;
let moves = PkgMoves::new();
let h = [sc.group.name(), "Logged emerges", "Install/Update", "Unmerge/Clean", "Sync"];
let mut tblc = Table::new(&gc).margin(1, " ").header(h);
let h = [sc.group.name(), "Repo", "Syncs", "Total time", "Predict time"];
Expand Down Expand Up @@ -237,7 +238,7 @@ pub fn cmd_stats(gc: Conf, sc: ConfStats) -> Result<bool, Error> {
*run_args.entry(ArgKind::new(&args)).or_insert(0) += 1;
},
Hist::MergeStart { ts, key, .. } => {
merge_start.insert(key, (ts, false));
merge_start.insert(moves.get(key), (ts, false));
},
Hist::MergeStep { kind, key, .. } => {
if matches!(kind, MergeStep::MergeBinary) {
Expand All @@ -247,9 +248,9 @@ pub fn cmd_stats(gc: Conf, sc: ConfStats) -> Result<bool, Error> {
}
},
Hist::MergeStop { ts, ref key, .. } => {
if let Some((start_ts, bin)) = merge_start.remove(key) {
if let Some((start_ts, bin)) = merge_start.remove(moves.get_ref(key)) {
let (tc, tb, _) =
pkg_time.entry(p.take_ebuild())
pkg_time.entry(moves.get(p.take_ebuild()))
.or_insert((Times::new(), Times::new(), Times::new()));
if bin {
tb.insert(ts - start_ts);
Expand All @@ -259,12 +260,12 @@ pub fn cmd_stats(gc: Conf, sc: ConfStats) -> Result<bool, Error> {
}
},
Hist::UnmergeStart { ts, key, .. } => {
unmerge_start.insert(key, ts);
unmerge_start.insert(moves.get(key), ts);
},
Hist::UnmergeStop { ts, ref key, .. } => {
if let Some(start_ts) = unmerge_start.remove(key) {
if let Some(start_ts) = unmerge_start.remove(moves.get_ref(key)) {
let (_, _, times) =
pkg_time.entry(p.take_ebuild())
pkg_time.entry(moves.get(p.take_ebuild()))
.or_insert((Times::new(), Times::new(), Times::new()));
times.insert(ts - start_ts);
}
Expand Down Expand Up @@ -450,23 +451,25 @@ pub fn cmd_predict(gc: Conf, mut sc: ConfPred) -> Result<bool, Error> {

// Parse emerge log.
let hist = get_hist(&gc.logfile, gc.from, gc.to, Show::m(), &vec![], false)?;
let moves = PkgMoves::new();
let mut started: BTreeMap<String, (i64, bool)> = BTreeMap::new();
let mut times: HashMap<(String, bool), Times> = HashMap::new();
for p in hist {
match p {
Hist::MergeStart { ts, key, .. } => {
started.insert(key, (ts, false));
started.insert(moves.get(key), (ts, false));
},
Hist::MergeStep { kind, key, .. } => {
if matches!(kind, MergeStep::MergeBinary) {
if let Some((_, bin)) = started.get_mut(&key) {
if let Some((_, bin)) = started.get_mut(moves.get_ref(&key)) {
*bin = true;
}
}
},
Hist::MergeStop { ts, ref key, .. } => {
if let Some((start_ts, bin)) = started.remove(key.as_str()) {
let timevec = times.entry((p.take_ebuild(), bin)).or_insert(Times::new());
if let Some((start_ts, bin)) = started.remove(moves.get_ref(key)) {
let timevec =
times.entry((moves.get(p.take_ebuild()), bin)).or_insert(Times::new());
timevec.insert(ts - start_ts);
}
},
Expand Down
2 changes: 1 addition & 1 deletion src/parse.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ mod history;
mod proces;

pub use ansi::{Ansi, AnsiStr, Theme};
pub use current::{get_buildlog, get_emerge, get_pretend, get_resume, Pkg};
pub use current::{get_buildlog, get_emerge, get_pretend, get_resume, Pkg, PkgMoves};
pub use history::{get_hist, Hist, MergeStep};
#[cfg(test)]
pub use proces::tests::procs;
Expand Down
78 changes: 77 additions & 1 deletion src/parse/current.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@ use log::*;
use regex::Regex;
use serde::Deserialize;
use serde_json::from_reader;
use std::{fs::File,
use std::{collections::HashMap,
fs::File,
io::{BufRead, BufReader, Read},
path::PathBuf};

Expand Down Expand Up @@ -81,6 +82,7 @@ struct Resume {
struct Mtimedb {
resume: Option<Resume>,
resume_backup: Option<Resume>,
updates: Option<HashMap<String, i64>>,
}

/// Parse resume list from portage mtimedb
Expand Down Expand Up @@ -111,6 +113,80 @@ fn get_resume_priv(kind: ResumeKind, file: &str) -> Option<Vec<Pkg>> {
.collect())
}

pub struct PkgMoves(HashMap<String, String>);
impl PkgMoves {
/// Parse package moves using file list from portagedb
pub fn new() -> Self {
let r = Self::load("/var/cache/edb/mtimedb").unwrap_or_default();
trace!("Package moves: {r:?}");
Self(r)
}

pub fn get(&self, key: String) -> String {
self.0.get(&key).cloned().unwrap_or(key)
}

pub fn get_ref<'a>(&'a self, key: &'a String) -> &'a String {
self.0.get(key).unwrap_or(key)
}

fn load(file: &str) -> Option<HashMap<String, String>> {
let now = std::time::Instant::now();
let reader = File::open(file).map_err(|e| warn!("Cannot open {file:?}: {e}")).ok()?;
let db: Mtimedb =
from_reader(reader).map_err(|e| warn!("Cannot parse {file:?}: {e}")).ok()?;
let mut moves = HashMap::new();
if let Some(updates) = db.updates {
// Sort the files in reverse chronological order (compare year, then quarter)
let mut files: Vec<_> = updates.keys().collect();
files.sort_by(|a, b| match (a.rsplit_once('/'), b.rsplit_once('/')) {
(Some((_, a)), Some((_, b))) if a.len() == 7 && b.len() == 7 => {
match a[3..].cmp(&b[3..]) {
std::cmp::Ordering::Equal => a[..3].cmp(&b[..3]),
o => o,
}.reverse()
},
_ => {
warn!("Unexpected update file name {a}");
a.cmp(b)
},
});
//
for f in files {
Self::parse(&mut moves, f);
}
debug!("Loaded {} package moves from {} files in {:?}",
moves.len(),
updates.len(),
now.elapsed());
}
Some(moves)
}

fn parse(moves: &mut HashMap<String, String>, file: &str) -> Option<()> {
trace!("Parsing {file}");
let f = File::open(file).map_err(|e| warn!("Cannot open {file:?}: {e}")).ok()?;
for line in
BufReader::new(f).lines().map_while(Result::ok).filter(|l| l.starts_with("move "))
{
if let Some((from, to)) = line[5..].split_once(' ') {
// Portage rewrites each repo's update files so that entries point directly to the final
// name, but there can still be cross-repo chains, which we untangle here. Assumes the
// first name seen is the latest one.
if let Some(to_final) = moves.get(to) {
if from != to_final {
trace!("Pointing {from} to {to_final} instead of {to}");
moves.insert(from.to_owned(), to_final.clone());
}
} else {
moves.insert(from.to_owned(), to.to_owned());
}
}
}
Some(())
}
}


/// Retrieve summary info from the build log
pub fn get_buildlog(pkg: &Pkg, portdirs: &Vec<PathBuf>) -> Option<String> {
Expand Down

0 comments on commit c5074e2

Please sign in to comment.