Skip to content

Commit

Permalink
fix(all): revert feature changes
Browse files Browse the repository at this point in the history
  • Loading branch information
martsokha committed Mar 7, 2024
1 parent a3b3bc1 commit f9e3dc1
Show file tree
Hide file tree
Showing 13 changed files with 185 additions and 187 deletions.
10 changes: 6 additions & 4 deletions .github/dependabot.yaml
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
version: 2
updates:

- package-ecosystem: cargo
directory: /
- package-ecosystem: "cargo"
directory: "/"
schedule:
day: monday
interval: weekly
interval: "weekly"
timezone: "Europe/Warsaw"
day: "friday"
time: "18:00"
6 changes: 3 additions & 3 deletions countio/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -28,11 +28,11 @@ path = "./lib.rs"

[features]
default = ["std"]
full = ["std", "async-tokio", "async-futures"]
full = ["std", "tokio", "futures"]

std = []
async-tokio = ["dep:tokio"]
async-futures = ["dep:futures-io"]
tokio = ["dep:tokio"]
futures = ["dep:futures-io"]

[dependencies]
tokio = { workspace = true, optional = true }
Expand Down
4 changes: 2 additions & 2 deletions countio/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@ and `tokio` crates.
### Features

- `std` to enable `std::io::{Read, Write, Seek}`. **Enabled by default**.
- `async-futures` to enable `futures_io::{AsyncRead, AsyncWrite, AsyncSeek}`.
- `async-tokio` to enable `tokio::io::{AsyncRead, AsyncWrite, AsyncSeek}`.
- `futures` to enable `futures_io::{AsyncRead, AsyncWrite, AsyncSeek}`.
- `tokio` to enable `tokio::io::{AsyncRead, AsyncWrite, AsyncSeek}`.

### Examples

Expand Down
8 changes: 4 additions & 4 deletions countio/counter/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,12 @@
#[cfg_attr(docsrs, doc(cfg(feature = "std")))]
mod stdlib;

#[cfg(feature = "async-futures")]
#[cfg_attr(docsrs, doc(cfg(feature = "async-futures")))]
#[cfg(feature = "futures")]
#[cfg_attr(docsrs, doc(cfg(feature = "futures")))]
mod futures;

#[cfg(feature = "async-tokio")]
#[cfg_attr(docsrs, doc(cfg(feature = "async-tokio")))]
#[cfg(feature = "tokio")]
#[cfg_attr(docsrs, doc(cfg(feature = "tokio")))]
mod tokio;

/// The `Counter<D>` struct adds byte counting to any reader or writer.
Expand Down
4 changes: 2 additions & 2 deletions exclusion/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,8 @@ programming language with the support of `crawl-delay`, `sitemap` and universal
- `parser` to enable `robotxt::{Robots}`. **Enabled by default**.
- `builder` to enable `robotxt::{RobotsBuilder, GroupBuilder}`. **Enabled by
default**.
- `optimal` to enable optimize overlapping and global rules, potentially
improving matching speed at the cost of longer parsing times.
- `optimal` to optimize overlapping and global rules, potentially improving
matching speed at the cost of longer parsing times.
- `serde` to enable `serde::{Deserialize, Serialize}` implementation, allowing
the caching of related rules.

Expand Down
144 changes: 0 additions & 144 deletions exclusion/build/builder.rs

This file was deleted.

2 changes: 1 addition & 1 deletion exclusion/build/group.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ use std::collections::HashSet;
use std::fmt::{Display, Formatter};

use crate::build::format_comment;
use crate::normalize_path;
use crate::paths::normalize_path;

/// The single formatted `user-agent` group.
///
Expand Down
161 changes: 143 additions & 18 deletions exclusion/build/mod.rs
Original file line number Diff line number Diff line change
@@ -1,21 +1,146 @@
mod builder;
use std::collections::HashSet;
use std::fmt;

use url::Url;

pub use crate::build::group::GroupBuilder;
use crate::build::split::format_comment;

mod group;
mod split;

/// The set of formatted `user-agent` groups that can be written
/// in the `robots.txt` compliant format.
#[derive(Debug, Default, Clone)]
pub struct RobotsBuilder {
groups: Vec<GroupBuilder>,
sitemaps: HashSet<Url>,
header: Option<String>,
footer: Option<String>,
}

impl RobotsBuilder {
/// Creates a new [`RobotsBuilder`] with default settings.
pub fn new() -> Self {
Self::default()
}

/// Adds a global header, usually used for permissions or legal notices.
///
/// ```
/// use robotxt::RobotsBuilder;
///
/// let txt = RobotsBuilder::default()
/// .header("Note: Stop right there!")
/// .group(["*"], |u| u.disallow("/"))
/// .group(["foobot"], |u| u.allow("/"));
/// ```
pub fn header(mut self, header: &str) -> Self {
self.header = Some(header.to_string());
self
}

/// Adds a new `user-agent` group from the provided list of user-agents.
///
/// ```
/// use robotxt::RobotsBuilder;
///
/// let txt = RobotsBuilder::default()
/// .group(["*"], |u| u.disallow("/"))
/// .group(["foobot"], |u| u.allow("/"));
/// ```
pub fn group<'a>(
mut self,
group: impl IntoIterator<Item = &'a str>,
factory: impl FnOnce(GroupBuilder) -> GroupBuilder,
) -> Self {
let section = GroupBuilder::from_iter(group);
self.groups.push(factory(section));
self
}

/// Adds the `Sitemap` directive from the URL address.
///
/// ```
/// use url::Url;
/// use robotxt::RobotsBuilder;
///
/// let txt = RobotsBuilder::default()
/// .sitemap("https://example.com/sitemap_1.xml".try_into().unwrap())
/// .sitemap("https://example.com/sitemap_1.xml".try_into().unwrap());
/// ```
pub fn sitemap(mut self, sitemap: Url) -> Self {
self.sitemaps.insert(sitemap);
self
}

/// Adds a global footer, usually used for notices.
///
/// ```
/// use robotxt::RobotsBuilder;
///
/// let txt = RobotsBuilder::default()
/// .group(["*"], |u| u.disallow("/"))
/// .group(["foobot"], |u| u.allow("/"))
/// .footer("Note: Have a nice day!");
/// ```
pub fn footer(mut self, footer: &str) -> Self {
self.footer = Some(footer.to_string());
self
}

/// Parses the constructed output.
/// See [`Robots::from_bytes`].
///
/// [`Robots::from_bytes`]: crate::Robots::from_bytes
#[cfg(feature = "parser")]
#[cfg_attr(docsrs, doc(cfg(feature = "parser")))]
pub fn parse(&self, user_agent: &str) -> crate::Robots {
let txt = self.to_string();
crate::Robots::from_bytes(txt.as_bytes(), user_agent)
}
}

impl fmt::Display for RobotsBuilder {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let header = self.header.as_ref().map(|h| format_comment(h));
let footer = self.footer.as_ref().map(|f| format_comment(f));

let groups = self.groups.iter().map(|u| u.to_string());
let groups = groups.collect::<Vec<_>>().join("\n\n");

let result = [header, Some(groups), footer];
let result = result.iter().filter_map(|u| u.clone());
let result = result.collect::<Vec<_>>().join("\n\n");
write!(f, "{}", result.as_str())
}
}

#[cfg(test)]
mod builder {
use crate::{Result, RobotsBuilder};

#[test]
fn readme() -> Result<()> {
let txt = RobotsBuilder::default()
.header("Robots.txt: Start")
.group(["foobot"], |u| {
u.crawl_delay(5)
.header("Rules for Foobot: Start")
.allow("/example/yeah.txt")
.disallow("/example/nope.txt")
.footer("Rules for Foobot: End")
})
.group(["barbot", "nombot"], |u| {
u.crawl_delay(2)
.disallow("/example/yeah.txt")
.disallow("/example/nope.txt")
})
.sitemap("https://example.com/sitemap_1.xml".try_into()?)
.sitemap("https://example.com/sitemap_2.xml".try_into()?)
.footer("Robots.txt: End");

pub use builder::*;
pub use group::*;

/// Splits multiline comments into lines and prefixes them with `#`.
pub(crate) fn format_comment(txt: &str) -> String {
txt.lines()
.map(|txt| txt.trim())
.filter(|txt| !txt.is_empty())
.map(|txt| {
if txt.starts_with('#') {
txt.to_owned()
} else {
format!("# {txt}")
}
})
.collect::<Vec<_>>()
.join("\n")
println!("{}", txt.to_string());
Ok(())
}
}
Loading

0 comments on commit f9e3dc1

Please sign in to comment.