From d5640b6255c5399c852287abdf221277986801f3 Mon Sep 17 00:00:00 2001
From: Oleh Martsokha <o.martsokha@gmail.com>
Date: Wed, 26 Jun 2024 21:38:02 +0200
Subject: [PATCH] feat(all): minor fixes

---
 Cargo.toml              |  2 +-
 README.md               |  3 +-
 countio/Cargo.toml      | 10 +++----
 robotxt/Cargo.toml      |  8 ++---
 robotxt/README.md       | 65 +++++++++++++++++++----------------------
 robotxt/lib.rs          |  7 ++++-
 robotxt/parse/access.rs | 16 +++++++++-
 robotxt/paths/create.rs | 26 +++++++++++++----
 sitemapo/Cargo.toml     |  6 ++--
 sitemapo/README.md      |  4 +--
 sitemapo/parse/entry.rs |  2 +-
 sitemapo/parse/index.rs |  2 +-
 12 files changed, 89 insertions(+), 62 deletions(-)

diff --git a/Cargo.toml b/Cargo.toml
index ecb42d5..12eebc3 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -14,7 +14,7 @@ authors = ["Oleh Martsokha <o.martsokha@gmail.com>"]
 license = "MIT"
 
 [workspace.dependencies]
-tokio = { version = "1.38", default-features = false }
+tokio = { version = "1", default-features = false }
 futures-io = { version = "0.3", default-features = false }
 futures-util = { version = "0.3", default-features = false }
 futures-test = { version = "0.3", default-features = false }
diff --git a/README.md b/README.md
index f490897..f0aa6ac 100644
--- a/README.md
+++ b/README.md
@@ -17,5 +17,4 @@
   protocol with the support of `crawl-delay`, `sitemap` and universal `*` match
   extensions.
 - [sitemapo](./sitemapo/): The implementation of the Sitemap (or URL inclusion)
-  protocol with the support of txt, xml formats and video, image, and news
-  extensions.
+  protocol with the support of txt and xml formats.
diff --git a/countio/Cargo.toml b/countio/Cargo.toml
index fb9f3d8..b487537 100644
--- a/countio/Cargo.toml
+++ b/countio/Cargo.toml
@@ -35,10 +35,10 @@ tokio = ["dep:tokio"]
 futures = ["dep:futures-io"]
 
 [dependencies]
-tokio = { workspace = true, optional = true }
-futures-io = { workspace = true, optional = true, features = ["std"] }
+tokio = { version = "1", default-features = false, optional = true }
+futures-io = { version = "0.3", default-features = false, optional = true, features = ["std"] }
 
 [dev-dependencies]
-tokio = { workspace = true, features = ["rt", "macros", "io-util"] }
-futures-util = { workspace = true }
-futures-test = { workspace = true, features = ["std"] }
+tokio = { version = "1", features = ["rt", "macros", "io-util"] }
+futures-util = { version = "0.3", default-features = false }
+futures-test = { version = "0.3", default-features = false, features = ["std"] }
diff --git a/robotxt/Cargo.toml b/robotxt/Cargo.toml
index 8338574..8d2abfe 100644
--- a/robotxt/Cargo.toml
+++ b/robotxt/Cargo.toml
@@ -45,14 +45,14 @@ optimal = []
 serde = ["dep:serde", "url/serde", "serde/derive", "serde/rc"]
 
 [dependencies]
-url = { workspace = true }
-thiserror = { workspace = true }
+url = { version = "2.5" }
+thiserror = { version = "1.0" }
 percent-encoding = { version = "2.3" }
 
 nom = { version = "7.1", optional = true }
 bstr = { version = "1.9", optional = true }
 regex = { version = "1.10", optional = true }
-serde = { workspace = true, optional = true }
+serde = { version = "1.0", optional = true }
 
 [dev-dependencies]
-serde_json = { workspace = true }
+serde_json = { version = "1.0" }
diff --git a/robotxt/README.md b/robotxt/README.md
index f5ff32b..88c5722 100644
--- a/robotxt/README.md
+++ b/robotxt/README.md
@@ -37,19 +37,17 @@ programming language with the support of `crawl-delay`, `sitemap` and universal
 ```rust
 use robotxt::Robots;
 
-fn main() {
-    let txt = r#"
-      User-Agent: foobot
-      Disallow: *
-      Allow: /example/
-      Disallow: /example/nope.txt
-    "#;
-
-    let r = Robots::from_bytes(txt.as_bytes(), "foobot");
-    assert!(r.is_relative_allowed("/example/yeah.txt"));
-    assert!(!r.is_relative_allowed("/example/nope.txt"));
-    assert!(!r.is_relative_allowed("/invalid/path.txt"));
-}
+let txt = r#"
+  User-Agent: foobot
+  Disallow: *
+  Allow: /example/
+  Disallow: /example/nope.txt
+"#;
+
+let r = Robots::from_bytes(txt.as_bytes(), "foobot");
+assert!(r.is_relative_allowed("/example/yeah.txt"));
+assert!(!r.is_relative_allowed("/example/nope.txt"));
+assert!(!r.is_relative_allowed("/invalid/path.txt"));
 ```
 
 - build the new `robots.txt` file in a declarative manner:
@@ -57,28 +55,25 @@ fn main() {
 ```rust
 use robotxt::{RobotsBuilder, Result};
 
-fn main() -> Result<()> {
-    let txt = RobotsBuilder::default()
-        .header("Robots.txt: Start")
-        .group(["foobot"], |u| {
-            u.crawl_delay(5)
-                .header("Rules for Foobot: Start")
-                .allow("/example/yeah.txt")
-                .disallow("/example/nope.txt")
-                .footer("Rules for Foobot: End")
-        })
-        .group(["barbot", "nombot"], |u| {
-            u.crawl_delay(2)
-                .disallow("/example/yeah.txt")
-                .disallow("/example/nope.txt")
-        })
-        .sitemap("https://example.com/sitemap_1.xml".try_into()?)
-        .sitemap("https://example.com/sitemap_1.xml".try_into()?)
-        .footer("Robots.txt: End");
-
-    println!("{}", txt.to_string());
-    Ok(())
-}
+let txt = RobotsBuilder::default()
+    .header("Robots.txt: Start")
+    .group(["foobot"], |u| {
+        u.crawl_delay(5)
+            .header("Rules for Foobot: Start")
+            .allow("/example/yeah.txt")
+            .disallow("/example/nope.txt")
+            .footer("Rules for Foobot: End")
+    })
+    .group(["barbot", "nombot"], |u| {
+        u.crawl_delay(2)
+            .disallow("/example/yeah.txt")
+            .disallow("/example/nope.txt")
+    })
+    .sitemap("https://example.com/sitemap_1.xml".try_into()?)
+    .sitemap("https://example.com/sitemap_1.xml".try_into()?)
+    .footer("Robots.txt: End");
+
+println!("{}", txt.to_string());
 ```
 
 ### Links
diff --git a/robotxt/lib.rs b/robotxt/lib.rs
index b03ba45..cf58df9 100644
--- a/robotxt/lib.rs
+++ b/robotxt/lib.rs
@@ -23,9 +23,14 @@ pub enum Error {
     #[error("cannot be a base url")]
     CannotBeBase,
 
+    /// Unable to create the expected path to the `robots.txt` file:
+    /// does not have a host.
+    #[error("does not have a host")]
+    NoHost,
+
     /// Unable to create the expected path to the `robots.txt` file:
     /// unexpected address scheme, expected `http` or `https`.
-    #[error("addr scheme: `{scheme}`, expected `http` or `https`")]
+    #[error("scheme: `{scheme}`, expected `http` or `https`")]
     WrongScheme { scheme: String },
 
     /// Unable to create the expected path to the `robots.txt` file:
diff --git a/robotxt/parse/access.rs b/robotxt/parse/access.rs
index acd64f8..0fe8683 100644
--- a/robotxt/parse/access.rs
+++ b/robotxt/parse/access.rs
@@ -1,4 +1,5 @@
 use std::fmt;
+use std::ops::Deref;
 
 /// The result of the `robots.txt` retrieval attempt.
 ///
@@ -6,13 +7,14 @@ use std::fmt;
 /// Also see 2.3.1. Access Results in the specification.
 ///
 /// [`Robots::from_access`]: crate::Robots::from_access
-#[derive(Debug)]
+#[derive(Debug, Copy, Clone)]
 pub enum AccessResult<'a> {
     /// 2.3.1.1.  Successful Access
     ///
     /// If the crawler successfully downloads the robots.txt file, the
     /// crawler MUST follow the parseable rules.
     Successful(&'a [u8]),
+
     /// 2.3.1.2.  Redirects
     ///
     /// It's possible that a server responds to a robots.txt fetch request
@@ -27,6 +29,7 @@ pub enum AccessResult<'a> {
     /// If there are more than five consecutive redirects, crawlers MAY
     /// assume that the robots.txt file is unavailable.
     Redirect,
+
     /// 2.3.1.3.  "Unavailable" Status
     ///
     /// "Unavailable" means the crawler tries to fetch the robots.txt file
@@ -38,6 +41,7 @@ pub enum AccessResult<'a> {
     /// unavailable to the crawler, then the crawler MAY access any resources
     /// on the server.
     Unavailable,
+
     /// 2.3.1.4.  "Unreachable" Status
     ///
     /// If the robots.txt file is unreachable due to server or network
@@ -65,7 +69,17 @@ impl AccessResult<'_> {
     }
 }
 
+impl Deref for AccessResult<'_> {
+    type Target = str;
+
+    #[inline]
+    fn deref(&self) -> &Self::Target {
+        self.as_str()
+    }
+}
+
 impl fmt::Display for AccessResult<'_> {
+    #[inline]
     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
         write!(f, "{}", self.as_str())
     }
diff --git a/robotxt/paths/create.rs b/robotxt/paths/create.rs
index e7be5e9..600a686 100644
--- a/robotxt/paths/create.rs
+++ b/robotxt/paths/create.rs
@@ -1,7 +1,16 @@
+use url::Url;
+
 use crate::{Error, Result};
 
 /// Returns the expected path to the `robots.txt` file
-/// as the [`url::Url`].
+/// as the `url::`[`Url`].
+///
+/// # Errors
+///
+/// Returns the error if the provided [`Url`] cannot be a base,
+/// does not have a host or the schema is not `http` or `https`.
+///
+/// # Examples
 ///
 /// ```rust
 /// use url::Url;
@@ -12,13 +21,17 @@ use crate::{Error, Result};
 /// let robots = create_url(&path).unwrap().to_string();
 /// assert_eq!(robots, "https://example.com/robots.txt")
 /// ```
-pub fn create_url(path: &url::Url) -> Result<url::Url> {
+pub fn create_url(path: &Url) -> Result<Url> {
     let mut path = path.clone();
 
     if path.cannot_be_a_base() {
         return Err(Error::CannotBeBase);
     }
 
+    if path.host().is_none() {
+        return Err(Error::NoHost);
+    }
+
     if path.scheme() != "http" && path.scheme() != "https" {
         return Err(Error::WrongScheme {
             scheme: path.scheme().to_string(),
@@ -26,11 +39,12 @@ pub fn create_url(path: &url::Url) -> Result<url::Url> {
     }
 
     if !path.username().is_empty() {
-        path.set_username("").unwrap();
+        path.set_username("").expect("should pass base/host tests");
     }
 
     if path.password().is_some() {
-        path.set_password(None).unwrap();
+        path.set_password(None)
+            .expect("should pass base/host tests");
     }
 
     path.join("/robots.txt").map_err(Into::into)
@@ -38,12 +52,12 @@ pub fn create_url(path: &url::Url) -> Result<url::Url> {
 
 #[cfg(test)]
 mod test {
-    use super::*;
+    use crate::{create_url, url::Url, Result};
 
     #[test]
     fn from_url() -> Result<()> {
         let path = "https://user:pass@example.com/foo/sample.txt";
-        let path = url::Url::parse(path).unwrap();
+        let path = Url::parse(path).unwrap();
 
         let robots = create_url(&path)?.to_string();
         assert_eq!(robots, "https://example.com/robots.txt");
diff --git a/sitemapo/Cargo.toml b/sitemapo/Cargo.toml
index 702af36..44683c0 100644
--- a/sitemapo/Cargo.toml
+++ b/sitemapo/Cargo.toml
@@ -15,8 +15,8 @@ documentation = "https://docs.rs/sitemapo"
 categories = ["parser-implementations", "web-programming"]
 keywords = ["sitemaps", "sitemap", "inclusion", "crawler", "scraper"]
 description = """
-The implementation of the Sitemap.xml (or URL inclusion) protocol with
-the support of txt & xml formats, and video, image, news extensions.
+The implementation of the Sitemap.xml (or URL inclusion) protocol
+with the support of txt & xml formats.
 """
 
 [package.metadata.docs.rs]
@@ -44,7 +44,7 @@ url = { workspace = true }
 thiserror = { workspace = true }
 countio = { version = "0.2" }
 
-quick-xml = { version = "0.31" }
+quick-xml = { version = "0.32" }
 bytes = { version = "1.6", features = [] }
 time = { workspace = true, features = ["parsing", "formatting"] }
 
diff --git a/sitemapo/README.md b/sitemapo/README.md
index eaa4db6..04d61a5 100644
--- a/sitemapo/README.md
+++ b/sitemapo/README.md
@@ -17,8 +17,8 @@
 [coverage-url]: https://app.codecov.io/gh/spire-rs/kit
 
 The implementation of the Sitemap (or URL inclusion) protocol in the Rust
-programming language with the support of `txt` & `xml` formats, and `video`,
-`image`, `news` extensions (according to the Google's spec).
+programming language with the support of `txt` & `xml` formats (according to the
+Google's spec).
 
 ### Features
 
diff --git a/sitemapo/parse/entry.rs b/sitemapo/parse/entry.rs
index 150a695..249a3e6 100644
--- a/sitemapo/parse/entry.rs
+++ b/sitemapo/parse/entry.rs
@@ -185,7 +185,7 @@ mod async_parser {
             loop {
                 self.inner.try_if_readable()?;
                 let event = self.inner.reader.read_event_into_async(&mut buf).await?;
-                match self.write_event(event)? {
+                match self.write_event(event) {
                     Output::Some(record) => return Ok(Some(record)),
                     Output::None => {}
                     Output::End => return Ok(None),
diff --git a/sitemapo/parse/index.rs b/sitemapo/parse/index.rs
index c81b8e2..77bf4d4 100644
--- a/sitemapo/parse/index.rs
+++ b/sitemapo/parse/index.rs
@@ -156,7 +156,7 @@ mod tokio {
             loop {
                 self.inner.try_if_readable()?;
                 let event = self.inner.reader.read_event_into_async(&mut buf).await?;
-                match self.write_event(event)? {
+                match self.write_event(event) {
                     Output::Some(record) => return Ok(Some(record)),
                     Output::None => {}
                     Output::End => return Ok(None),