From f9e3dc19b8bd6949ea9f70aabba6f5def283ce7f Mon Sep 17 00:00:00 2001 From: Oleh Martsokha Date: Thu, 7 Mar 2024 16:55:29 +0100 Subject: [PATCH] fix(all): revert feature changes --- .github/dependabot.yaml | 10 ++- countio/Cargo.toml | 6 +- countio/README.md | 4 +- countio/counter/mod.rs | 8 +- exclusion/README.md | 4 +- exclusion/build/builder.rs | 144 --------------------------------- exclusion/build/group.rs | 2 +- exclusion/build/mod.rs | 161 ++++++++++++++++++++++++++++++++----- exclusion/build/split.rs | 15 ++++ exclusion/lib.rs | 8 +- exclusion/parse/rule.rs | 2 +- exclusion/paths/mod.rs | 4 +- inclusion/parse/entry.rs | 4 +- 13 files changed, 185 insertions(+), 187 deletions(-) delete mode 100644 exclusion/build/builder.rs create mode 100644 exclusion/build/split.rs diff --git a/.github/dependabot.yaml b/.github/dependabot.yaml index 66d1017..b1e5606 100644 --- a/.github/dependabot.yaml +++ b/.github/dependabot.yaml @@ -1,8 +1,10 @@ version: 2 updates: - - package-ecosystem: cargo - directory: / + - package-ecosystem: "cargo" + directory: "/" schedule: - day: monday - interval: weekly + interval: "weekly" + timezone: "Europe/Warsaw" + day: "friday" + time: "18:00" diff --git a/countio/Cargo.toml b/countio/Cargo.toml index fa46440..741db32 100644 --- a/countio/Cargo.toml +++ b/countio/Cargo.toml @@ -28,11 +28,11 @@ path = "./lib.rs" [features] default = ["std"] -full = ["std", "async-tokio", "async-futures"] +full = ["std", "tokio", "futures"] std = [] -async-tokio = ["dep:tokio"] -async-futures = ["dep:futures-io"] +tokio = ["dep:tokio"] +futures = ["dep:futures-io"] [dependencies] tokio = { workspace = true, optional = true } diff --git a/countio/README.md b/countio/README.md index 8fd4335..7a75754 100644 --- a/countio/README.md +++ b/countio/README.md @@ -24,8 +24,8 @@ and `tokio` crates. ### Features - `std` to enable `std::io::{Read, Write, Seek}`. **Enabled by default**. -- `async-futures` to enable `futures_io::{AsyncRead, AsyncWrite, AsyncSeek}`. -- `async-tokio` to enable `tokio::io::{AsyncRead, AsyncWrite, AsyncSeek}`. +- `futures` to enable `futures_io::{AsyncRead, AsyncWrite, AsyncSeek}`. +- `tokio` to enable `tokio::io::{AsyncRead, AsyncWrite, AsyncSeek}`. ### Examples diff --git a/countio/counter/mod.rs b/countio/counter/mod.rs index cb286ad..b3cfc0b 100644 --- a/countio/counter/mod.rs +++ b/countio/counter/mod.rs @@ -2,12 +2,12 @@ #[cfg_attr(docsrs, doc(cfg(feature = "std")))] mod stdlib; -#[cfg(feature = "async-futures")] -#[cfg_attr(docsrs, doc(cfg(feature = "async-futures")))] +#[cfg(feature = "futures")] +#[cfg_attr(docsrs, doc(cfg(feature = "futures")))] mod futures; -#[cfg(feature = "async-tokio")] -#[cfg_attr(docsrs, doc(cfg(feature = "async-tokio")))] +#[cfg(feature = "tokio")] +#[cfg_attr(docsrs, doc(cfg(feature = "tokio")))] mod tokio; /// The `Counter` struct adds byte counting to any reader or writer. diff --git a/exclusion/README.md b/exclusion/README.md index 7df3b36..290bdf6 100644 --- a/exclusion/README.md +++ b/exclusion/README.md @@ -26,8 +26,8 @@ programming language with the support of `crawl-delay`, `sitemap` and universal - `parser` to enable `robotxt::{Robots}`. **Enabled by default**. - `builder` to enable `robotxt::{RobotsBuilder, GroupBuilder}`. **Enabled by default**. -- `optimal` to enable optimize overlapping and global rules, potentially - improving matching speed at the cost of longer parsing times. +- `optimal` to optimize overlapping and global rules, potentially improving + matching speed at the cost of longer parsing times. - `serde` to enable `serde::{Deserialize, Serialize}` implementation, allowing the caching of related rules. diff --git a/exclusion/build/builder.rs b/exclusion/build/builder.rs deleted file mode 100644 index b3b3210..0000000 --- a/exclusion/build/builder.rs +++ /dev/null @@ -1,144 +0,0 @@ -use std::collections::HashSet; -use std::fmt::{Display, Formatter}; - -use url::Url; - -use crate::build::format_comment; -use crate::GroupBuilder; - -/// The set of formatted `user-agent` groups that can be written -/// in the `robots.txt` compliant format. -#[derive(Debug, Default, Clone)] -pub struct RobotsBuilder { - groups: Vec, - sitemaps: HashSet, - - header: Option, - footer: Option, -} - -impl RobotsBuilder { - /// Creates a new [`RobotsBuilder`] with default settings. - pub fn new() -> Self { - Self::default() - } - - /// Adds a global header, usually used for permissions or legal notices. - /// - /// ``` - /// use robotxt::RobotsBuilder; - /// - /// let txt = RobotsBuilder::default() - /// .header("Note: Stop right there!") - /// .group(["*"], |u| u.disallow("/")) - /// .group(["foobot"], |u| u.allow("/")); - /// ``` - pub fn header(mut self, header: &str) -> Self { - self.header = Some(header.to_string()); - self - } - - /// Adds a new `user-agent` group from the provided list of user-agents. - /// - /// ``` - /// use robotxt::RobotsBuilder; - /// - /// let txt = RobotsBuilder::default() - /// .group(["*"], |u| u.disallow("/")) - /// .group(["foobot"], |u| u.allow("/")); - /// ``` - pub fn group<'a>( - mut self, - group: impl IntoIterator, - factory: impl FnOnce(GroupBuilder) -> GroupBuilder, - ) -> Self { - let section = GroupBuilder::from_iter(group); - self.groups.push(factory(section)); - self - } - - /// Adds the `Sitemap` directive from the URL address. - /// - /// ``` - /// use url::Url; - /// use robotxt::RobotsBuilder; - /// - /// let txt = RobotsBuilder::default() - /// .sitemap("https://example.com/sitemap_1.xml".try_into().unwrap()) - /// .sitemap("https://example.com/sitemap_1.xml".try_into().unwrap()); - /// ``` - pub fn sitemap(mut self, sitemap: Url) -> Self { - self.sitemaps.insert(sitemap); - self - } - - /// Adds a global footer, usually used for notices. - /// - /// ``` - /// use robotxt::RobotsBuilder; - /// - /// let txt = RobotsBuilder::default() - /// .group(["*"], |u| u.disallow("/")) - /// .group(["foobot"], |u| u.allow("/")) - /// .footer("Note: Have a nice day!"); - /// ``` - pub fn footer(mut self, footer: &str) -> Self { - self.footer = Some(footer.to_string()); - self - } - - /// Parses the constructed output. - /// See [`Robots::from_bytes`]. - /// - /// [`Robots`]: crate::Robots - #[cfg(feature = "parser")] - #[cfg_attr(docsrs, doc(cfg(feature = "parser")))] - pub fn parse(&self, user_agent: &str) -> crate::Robots { - let txt = self.to_string(); - crate::Robots::from_bytes(txt.as_bytes(), user_agent) - } -} - -impl Display for RobotsBuilder { - fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { - let header = self.header.as_ref().map(|h| format_comment(h)); - let footer = self.footer.as_ref().map(|f| format_comment(f)); - - let groups = self.groups.iter().map(|u| u.to_string()); - let groups = groups.collect::>().join("\n\n"); - - let result = [header, Some(groups), footer]; - let result = result.iter().filter_map(|u| u.clone()); - let result = result.collect::>().join("\n\n"); - write!(f, "{}", result.as_str()) - } -} - -#[cfg(test)] -mod builder { - use crate::{Result, RobotsBuilder}; - - #[test] - fn readme() -> Result<()> { - let txt = RobotsBuilder::default() - .header("Robots.txt: Start") - .group(["foobot"], |u| { - u.crawl_delay(5) - .header("Rules for Foobot: Start") - .allow("/example/yeah.txt") - .disallow("/example/nope.txt") - .footer("Rules for Foobot: End") - }) - .group(["barbot", "nombot"], |u| { - u.crawl_delay(2) - .disallow("/example/yeah.txt") - .disallow("/example/nope.txt") - }) - .sitemap("https://example.com/sitemap_1.xml".try_into()?) - .sitemap("https://example.com/sitemap_2.xml".try_into()?) - .footer("Robots.txt: End"); - - println!("{}", txt.to_string()); - Ok(()) - } -} diff --git a/exclusion/build/group.rs b/exclusion/build/group.rs index 4ed9991..57f1e2d 100644 --- a/exclusion/build/group.rs +++ b/exclusion/build/group.rs @@ -2,7 +2,7 @@ use std::collections::HashSet; use std::fmt::{Display, Formatter}; use crate::build::format_comment; -use crate::normalize_path; +use crate::paths::normalize_path; /// The single formatted `user-agent` group. /// diff --git a/exclusion/build/mod.rs b/exclusion/build/mod.rs index 5bb456a..e2a3046 100644 --- a/exclusion/build/mod.rs +++ b/exclusion/build/mod.rs @@ -1,21 +1,146 @@ -mod builder; +use std::collections::HashSet; +use std::fmt; + +use url::Url; + +pub use crate::build::group::GroupBuilder; +use crate::build::split::format_comment; + mod group; +mod split; + +/// The set of formatted `user-agent` groups that can be written +/// in the `robots.txt` compliant format. +#[derive(Debug, Default, Clone)] +pub struct RobotsBuilder { + groups: Vec, + sitemaps: HashSet, + header: Option, + footer: Option, +} + +impl RobotsBuilder { + /// Creates a new [`RobotsBuilder`] with default settings. + pub fn new() -> Self { + Self::default() + } + + /// Adds a global header, usually used for permissions or legal notices. + /// + /// ``` + /// use robotxt::RobotsBuilder; + /// + /// let txt = RobotsBuilder::default() + /// .header("Note: Stop right there!") + /// .group(["*"], |u| u.disallow("/")) + /// .group(["foobot"], |u| u.allow("/")); + /// ``` + pub fn header(mut self, header: &str) -> Self { + self.header = Some(header.to_string()); + self + } + + /// Adds a new `user-agent` group from the provided list of user-agents. + /// + /// ``` + /// use robotxt::RobotsBuilder; + /// + /// let txt = RobotsBuilder::default() + /// .group(["*"], |u| u.disallow("/")) + /// .group(["foobot"], |u| u.allow("/")); + /// ``` + pub fn group<'a>( + mut self, + group: impl IntoIterator, + factory: impl FnOnce(GroupBuilder) -> GroupBuilder, + ) -> Self { + let section = GroupBuilder::from_iter(group); + self.groups.push(factory(section)); + self + } + + /// Adds the `Sitemap` directive from the URL address. + /// + /// ``` + /// use url::Url; + /// use robotxt::RobotsBuilder; + /// + /// let txt = RobotsBuilder::default() + /// .sitemap("https://example.com/sitemap_1.xml".try_into().unwrap()) + /// .sitemap("https://example.com/sitemap_1.xml".try_into().unwrap()); + /// ``` + pub fn sitemap(mut self, sitemap: Url) -> Self { + self.sitemaps.insert(sitemap); + self + } + + /// Adds a global footer, usually used for notices. + /// + /// ``` + /// use robotxt::RobotsBuilder; + /// + /// let txt = RobotsBuilder::default() + /// .group(["*"], |u| u.disallow("/")) + /// .group(["foobot"], |u| u.allow("/")) + /// .footer("Note: Have a nice day!"); + /// ``` + pub fn footer(mut self, footer: &str) -> Self { + self.footer = Some(footer.to_string()); + self + } + + /// Parses the constructed output. + /// See [`Robots::from_bytes`]. + /// + /// [`Robots::from_bytes`]: crate::Robots::from_bytes + #[cfg(feature = "parser")] + #[cfg_attr(docsrs, doc(cfg(feature = "parser")))] + pub fn parse(&self, user_agent: &str) -> crate::Robots { + let txt = self.to_string(); + crate::Robots::from_bytes(txt.as_bytes(), user_agent) + } +} + +impl fmt::Display for RobotsBuilder { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let header = self.header.as_ref().map(|h| format_comment(h)); + let footer = self.footer.as_ref().map(|f| format_comment(f)); + + let groups = self.groups.iter().map(|u| u.to_string()); + let groups = groups.collect::>().join("\n\n"); + + let result = [header, Some(groups), footer]; + let result = result.iter().filter_map(|u| u.clone()); + let result = result.collect::>().join("\n\n"); + write!(f, "{}", result.as_str()) + } +} + +#[cfg(test)] +mod builder { + use crate::{Result, RobotsBuilder}; + + #[test] + fn readme() -> Result<()> { + let txt = RobotsBuilder::default() + .header("Robots.txt: Start") + .group(["foobot"], |u| { + u.crawl_delay(5) + .header("Rules for Foobot: Start") + .allow("/example/yeah.txt") + .disallow("/example/nope.txt") + .footer("Rules for Foobot: End") + }) + .group(["barbot", "nombot"], |u| { + u.crawl_delay(2) + .disallow("/example/yeah.txt") + .disallow("/example/nope.txt") + }) + .sitemap("https://example.com/sitemap_1.xml".try_into()?) + .sitemap("https://example.com/sitemap_2.xml".try_into()?) + .footer("Robots.txt: End"); -pub use builder::*; -pub use group::*; - -/// Splits multiline comments into lines and prefixes them with `#`. -pub(crate) fn format_comment(txt: &str) -> String { - txt.lines() - .map(|txt| txt.trim()) - .filter(|txt| !txt.is_empty()) - .map(|txt| { - if txt.starts_with('#') { - txt.to_owned() - } else { - format!("# {txt}") - } - }) - .collect::>() - .join("\n") + println!("{}", txt.to_string()); + Ok(()) + } } diff --git a/exclusion/build/split.rs b/exclusion/build/split.rs new file mode 100644 index 0000000..0bc57ee --- /dev/null +++ b/exclusion/build/split.rs @@ -0,0 +1,15 @@ +/// Splits multiline comments into lines and prefixes them with `#`. +pub fn format_comment(txt: &str) -> String { + txt.lines() + .map(|txt| txt.trim()) + .filter(|txt| !txt.is_empty()) + .map(|txt| { + if txt.starts_with('#') { + txt.to_owned() + } else { + format!("# {txt}") + } + }) + .collect::>() + .join("\n") +} diff --git a/exclusion/lib.rs b/exclusion/lib.rs index d323094..f73fd97 100644 --- a/exclusion/lib.rs +++ b/exclusion/lib.rs @@ -6,10 +6,10 @@ pub use url; #[cfg(feature = "builder")] -pub use build::*; +pub use build::{GroupBuilder, RobotsBuilder}; #[cfg(feature = "parser")] -pub use parse::*; -pub use paths::*; +pub use parse::{AccessResult, Robots, ALL_UAS}; +pub use paths::{create_url, BYTE_LIMIT}; /// Unrecoverable failure during `robots.txt` building or parsing. /// @@ -29,7 +29,7 @@ pub enum Error { /// Unable to create the expected path to the `robots.txt` file: /// unexpected parsing error. - #[error("parsing error: {0}")] + #[error("url parsing error: {0}")] Url(#[from] url::ParseError), } diff --git a/exclusion/parse/rule.rs b/exclusion/parse/rule.rs index 51ca215..176ecb1 100644 --- a/exclusion/parse/rule.rs +++ b/exclusion/parse/rule.rs @@ -3,7 +3,7 @@ use std::sync::OnceLock; use regex::{escape, Regex, RegexBuilder}; -use crate::normalize_path; +use crate::paths::normalize_path; /// An error type indicating that a `Wildcard` could not be parsed correctly. #[derive(Debug, thiserror::Error)] diff --git a/exclusion/paths/mod.rs b/exclusion/paths/mod.rs index f9b7ed1..1dbe15b 100644 --- a/exclusion/paths/mod.rs +++ b/exclusion/paths/mod.rs @@ -1,5 +1,5 @@ -pub use create::*; -pub(crate) use normal::*; +pub use create::create_url; +pub(crate) use normal::normalize_path; mod create; mod normal; diff --git a/inclusion/parse/entry.rs b/inclusion/parse/entry.rs index 287ec9f..35ebdf3 100644 --- a/inclusion/parse/entry.rs +++ b/inclusion/parse/entry.rs @@ -227,7 +227,7 @@ mod test { parser.close()?; let exp = Url::parse("https://www.example.com/file1.html"); - assert_eq!(record.location(), &exp.unwrap()); + assert_eq!(record.location, exp.unwrap()); Ok(()) } @@ -243,7 +243,7 @@ mod test { parser.close().await?; let exp = Url::parse("https://www.example.com/file1.html"); - assert_eq!(record.location(), &exp.unwrap()); + assert_eq!(record.location, exp.unwrap()); Ok(()) }