-
Notifications
You must be signed in to change notification settings - Fork 153
feat(era_manager): add era support + pre-fetching #1375
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
4f581fd
2ce7d59
a67027d
66fa50f
350994e
26d145d
5a99d28
e478b71
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,32 +1,65 @@ | ||
use std::io; | ||
use std::{collections::HashMap, io}; | ||
|
||
use anyhow::{anyhow, ensure, Error}; | ||
use rand::{seq::SliceRandom, thread_rng}; | ||
use scraper::{Html, Selector}; | ||
use surf::Client; | ||
|
||
const ERA_DIR_URL: &str = "https://mainnet.era.nimbus.team/"; | ||
const ERA1_DIR_URL: &str = "https://era1.ethportal.net/"; | ||
const ERA1_FILE_COUNT: usize = 1897; | ||
pub const ERA1_FILE_COUNT: usize = 1897; | ||
|
||
/// Fetches era1 files hosted on era1.ethportal.net and shuffles them | ||
pub async fn get_shuffled_era1_files(http_client: &Client) -> anyhow::Result<Vec<String>> { | ||
pub fn underlying_io_error_kind(error: &Error) -> Option<io::ErrorKind> { | ||
for cause in error.chain() { | ||
if let Some(io_error) = cause.downcast_ref::<io::Error>() { | ||
return Some(io_error.kind()); | ||
} | ||
} | ||
None | ||
} | ||
|
||
pub async fn download_era_links( | ||
http_client: &Client, | ||
url: &str, | ||
) -> anyhow::Result<HashMap<u64, String>> { | ||
let index_html = http_client | ||
.get(ERA1_DIR_URL) | ||
.get(url) | ||
.recv_string() | ||
.await | ||
.map_err(|e| anyhow!("{e}"))?; | ||
let index_html = Html::parse_document(&index_html); | ||
let selector = Selector::parse("a[href*='mainnet-']").expect("to be able to parse selector"); | ||
let mut era1_files: Vec<String> = index_html | ||
let era_files: HashMap<u64, String> = index_html | ||
.select(&selector) | ||
.map(|element| { | ||
let href = element | ||
.value() | ||
.attr("href") | ||
.expect("to be able to get href"); | ||
format!("{ERA1_DIR_URL}{href}") | ||
let epoch_index = href | ||
.split('-') | ||
.nth(1) | ||
.expect("to be able to get epoch") | ||
.parse::<u64>() | ||
.expect("to be able to parse epoch"); | ||
(epoch_index, format!("{url}{href}")) | ||
}) | ||
.collect(); | ||
Ok(era_files) | ||
} | ||
|
||
pub async fn get_era_files(http_client: &Client) -> anyhow::Result<HashMap<u64, String>> { | ||
let era_files = download_era_links(http_client, ERA_DIR_URL).await?; | ||
ensure!(!era_files.is_empty(), "No era files found at {ERA_DIR_URL}"); | ||
ensure!( | ||
(0..era_files.len()).all(|epoch| era_files.contains_key(&(epoch as u64))), | ||
"Epoch indices are not starting from zero or not consecutive", | ||
); | ||
Ok(era_files) | ||
} | ||
|
||
pub async fn get_era1_files(http_client: &Client) -> anyhow::Result<HashMap<u64, String>> { | ||
let era1_files = download_era_links(http_client, ERA1_DIR_URL).await?; | ||
ensure!( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: similar to era files, we can also check that all keys are in the range of |
||
era1_files.len() == ERA1_FILE_COUNT, | ||
format!( | ||
|
@@ -35,15 +68,36 @@ pub async fn get_shuffled_era1_files(http_client: &Client) -> anyhow::Result<Vec | |
era1_files.len() | ||
) | ||
); | ||
ensure!( | ||
(0..ERA1_FILE_COUNT).all(|epoch| era1_files.contains_key(&(epoch as u64))), | ||
"Epoch indices are not starting from zero or not consecutive", | ||
); | ||
Ok(era1_files) | ||
} | ||
|
||
/// Fetches era1 files hosted on era1.ethportal.net and shuffles them | ||
pub async fn get_shuffled_era1_files(http_client: &Client) -> anyhow::Result<Vec<String>> { | ||
let era1_files = get_era1_files(http_client).await?; | ||
let mut era1_files: Vec<String> = era1_files.into_values().collect(); | ||
era1_files.shuffle(&mut thread_rng()); | ||
Ok(era1_files) | ||
} | ||
|
||
pub fn underlying_io_error_kind(error: &Error) -> Option<io::ErrorKind> { | ||
for cause in error.chain() { | ||
if let Some(io_error) = cause.downcast_ref::<io::Error>() { | ||
return Some(io_error.kind()); | ||
} | ||
#[cfg(test)] | ||
mod tests { | ||
use super::*; | ||
|
||
#[tokio::test] | ||
async fn test_get_shuffled_era1_files() { | ||
let http_client = Client::new(); | ||
let era1_files = get_shuffled_era1_files(&http_client).await.unwrap(); | ||
assert_eq!(era1_files.len(), ERA1_FILE_COUNT); | ||
} | ||
|
||
#[tokio::test] | ||
async fn test_get_era_file_download_links() { | ||
let http_client = Client::new(); | ||
let era_files = get_era_files(&http_client).await.unwrap(); | ||
assert!(!era_files.is_empty()); | ||
} | ||
None | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -174,6 +174,17 @@ impl SignedBeaconBlock { | |
SignedBeaconBlock::Deneb(block) => block.message.slot, | ||
} | ||
} | ||
|
||
/// Returns execution block number. | ||
pub fn execution_block_number(&self) -> u64 { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: it feels a bit weird to be this specific and return the execution block number directly from There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This would make us do a match on match executionpayload { I think the current implementation is the cleanest, but if this is a major concern we can discuss it more |
||
match self { | ||
SignedBeaconBlock::Bellatrix(block) => { | ||
block.message.body.execution_payload.block_number | ||
} | ||
SignedBeaconBlock::Capella(block) => block.message.body.execution_payload.block_number, | ||
SignedBeaconBlock::Deneb(block) => block.message.body.execution_payload.block_number, | ||
} | ||
} | ||
} | ||
|
||
#[cfg(test)] | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit: what do you think about checking that keys are starting from 0 and consecutive?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I am not sure what you mean do you want me to loop from 0 to max era count and see if all exist?
Because since we are using a hashmap, the the values are stored in the positions of the hash of the keys which wouldn't be in sorted order, if we were to iterate over the buckets in order
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think loop and checking is simple enough. Something like:
You can also calculate min and max and check if they are as desired (this still requires looping, so no saving performance wise).