Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WIP: Using tree-sitter-md and visitor pattern #28

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,17 @@ debug = true
opt-level = 3
debug = false

[build-dependencies]
cc="*"

[dependencies]
bon = "2.3.0"
cached = "0.53.1"
clap = { version = "4.5.16", features = ["derive"] }
derive_more = { version = "1.0.0", features = ["full"] }
env_logger = "0.11.5"
fuzzy-matcher = "0.3.7"
getset = "0.1.3"
hashbrown = "0.15.0"
indicatif = "0.17.8"
itertools = "0.13.0"
lazy_static = "1.5.0"
Expand All @@ -29,6 +33,8 @@ serde = { version = "1.0.208", features = ["derive"] }
serde_yaml = "0.9.34"
thiserror = "1.0.63"
toml = "0.8.19"
tree-sitter = "0.23.2"
tree-sitter-md = { version = "0.3.2", features = ["parser"] }
walkdir = "2.5.0"

[dev-dependencies]
Expand Down Expand Up @@ -72,3 +78,4 @@ too_many_lines = "allow"
missing_errors_doc = "allow"
similar_names = "allow"
missing_panics_doc = "allow"
module_name_repetitions = "allow"
3 changes: 3 additions & 0 deletions Justfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# Runs tests and supresses the progress bar
test:
RUNNING_TESTS=true cargo test
54 changes: 31 additions & 23 deletions src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@ use crate::{
content::wikilink::Alias,
name::{Filename, FilenameLowercase},
},
sed::{ReplacePair, ReplacePairError},
rules::ErrorCode,
sed::{ReplacePair, ReplacePairCompilationError},
};
use bon::Builder;
use clap::Parser;
Expand All @@ -17,13 +18,15 @@ use toml;

/// Errors derived from config file reads
#[derive(thiserror::Error, Debug)]
pub enum Error {
pub enum NewConfigError {
#[error("The config file at {path} does not exist")]
FileDoesNotExistError { path: PathBuf },
#[error("Failed to read the config file")]
FileDoesNotReadError(#[from] io::Error),
#[error("The config file does not have expected values")]
FileDoesNotParseError(#[from] toml::de::Error),
#[error("ReplacePair compilation error")]
ReplacePairCompilationError(#[from] ReplacePairCompilationError),
}

/// Config which contains both the cli and the config file
Expand All @@ -39,9 +42,6 @@ pub struct Config {
/// See [`self::cli::Config::boundary_pattern`]
#[builder(default=r"___".to_owned())]
pub boundary_pattern: String,
/// See [`self::cli::Config::wikilink_pattern`]
#[builder(default=r"\[\[([A-Za-z0-9_/\- ]+)]]|(?:(\s|^))#([A-Za-z0-9_]+)".to_owned())]
pub wikilink_pattern: String,
/// See [`self::cli::Config::filename_spacing_pattern`]
#[builder(default=r"-|_|\s".to_owned())]
pub filename_spacing_pattern: String,
Expand All @@ -50,13 +50,13 @@ pub struct Config {
pub filename_match_threshold: i64,
/// See [`self::cli::Config::exclude`]
#[builder(default=vec![])]
pub exclude: Vec<String>,
pub exclude: Vec<ErrorCode>,
/// See [`self::file::Config::filename_to_alias`]
#[builder(default=Ok(ReplacePair::new(r"___", r"/").expect("Constant")))]
pub filename_to_alias: Result<ReplacePair<Filename, Alias>, ReplacePairError>,
#[builder(default=ReplacePair::new(r"___", r"/").expect("Constant"))]
pub filename_to_alias: ReplacePair<Filename, Alias>,
/// See [`self::file::Config::alias_to_filename`]
#[builder(default=Ok(ReplacePair::new(r"/", r"___").expect("Constant")))]
pub alias_to_filename: Result<ReplacePair<Alias, FilenameLowercase>, ReplacePairError>,
#[builder(default=ReplacePair::new(r"/", r"___").expect("Constant"))]
pub alias_to_filename: ReplacePair<Alias, FilenameLowercase>,
}

/// Things which implement the partial config trait
Expand All @@ -67,33 +67,41 @@ pub trait Partial {
fn directories(&self) -> Option<Vec<PathBuf>>;
fn ngram_size(&self) -> Option<usize>;
fn boundary_pattern(&self) -> Option<String>;
fn wikilink_pattern(&self) -> Option<String>;
fn filename_spacing_pattern(&self) -> Option<String>;
fn filename_match_threshold(&self) -> Option<i64>;
fn exclude(&self) -> Option<Vec<String>>;
fn filename_to_alias(&self) -> Option<Result<ReplacePair<Filename, Alias>, ReplacePairError>>;
fn exclude(&self) -> Option<Vec<ErrorCode>>;
fn filename_to_alias(
&self,
) -> Option<Result<ReplacePair<Filename, Alias>, ReplacePairCompilationError>>;
fn alias_to_filename(
&self,
) -> Option<Result<ReplacePair<Alias, FilenameLowercase>, ReplacePairError>>;
) -> Option<Result<ReplacePair<Alias, FilenameLowercase>, ReplacePairCompilationError>>;
}

/// Now we implement a combine function for patrial configs which
/// iterates over the partials and if they have a Some field they use that field in the final
/// config.
///
/// Note: This makes last elements in the input slice first priority
fn combine_partials(partials: &[&dyn Partial]) -> Config {
Config::builder()
fn combine_partials(partials: &[&dyn Partial]) -> Result<Config, ReplacePairCompilationError> {
Ok(Config::builder()
.maybe_directories(partials.iter().find_map(|p| p.directories()))
.maybe_ngram_size(partials.iter().find_map(|p| p.ngram_size()))
.maybe_boundary_pattern(partials.iter().find_map(|p| p.boundary_pattern()))
.maybe_wikilink_pattern(partials.iter().find_map(|p| p.wikilink_pattern()))
.maybe_filename_spacing_pattern(partials.iter().find_map(|p| p.filename_spacing_pattern()))
.maybe_filename_match_threshold(partials.iter().find_map(|p| p.filename_match_threshold()))
.maybe_exclude(partials.iter().find_map(|p| p.exclude()))
.maybe_filename_to_alias(partials.iter().find_map(|p| p.filename_to_alias()))
.maybe_alias_to_filename(partials.iter().find_map(|p| p.alias_to_filename()))
.build()
.maybe_filename_to_alias(match partials.iter().find_map(|p| p.filename_to_alias()) {
Some(Ok(pair)) => Some(pair),
Some(Err(e)) => return Err(e),
None => None,
})
.maybe_alias_to_filename(match partials.iter().find_map(|p| p.alias_to_filename()) {
Some(Ok(pair)) => Some(pair),
Some(Err(e)) => return Err(e),
None => None,
})
.build())
}

impl Config {
Expand All @@ -105,7 +113,7 @@ impl Config {
/// - [`Error::FileDoesNotParseError`] - Config file does not parse from toml into the
/// expected format
///
pub fn new() -> Result<Self, Error> {
pub fn new() -> Result<Self, NewConfigError> {
let cli = cli::Config::parse();

// If the config file doesn't exist, and it's not the default, error out
Expand All @@ -115,12 +123,12 @@ impl Config {
Err(report) => Err(report)?,
}
} else {
Err(Error::FileDoesNotExistError {
Err(NewConfigError::FileDoesNotExistError {
path: cli.config_path.clone(),
})?
};

// CLI has priority over file by being last
Ok(combine_partials(&[&file, &cli]))
combine_partials(&[&file, &cli]).map_err(derive_more::Into::into)
}
}
21 changes: 8 additions & 13 deletions src/config/cli.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@ use crate::{
content::wikilink::Alias,
name::{Filename, FilenameLowercase},
},
sed::{ReplacePair, ReplacePairError},
rules::ErrorCode,
sed::{ReplacePair, ReplacePairCompilationError},
};

use super::Partial;
Expand All @@ -33,11 +34,6 @@ pub(super) struct Config {
#[clap(short = 'b', long = "bound")]
pub boundary_pattern: Option<String>,

/// Regex pattern for wikilinks
/// Capture group 0 is skipped to enable lookbehind
#[clap(short = 'w', long = "wikilink")]
pub wikilink_pattern: Option<String>,

/// Regex pattern to split filenames on, like ___ or /
#[clap(short = 's', long = "space")]
pub filename_spacing_pattern: Option<String>,
Expand Down Expand Up @@ -67,29 +63,28 @@ impl Partial for Config {
fn boundary_pattern(&self) -> Option<String> {
self.boundary_pattern.clone()
}
fn wikilink_pattern(&self) -> Option<String> {
self.wikilink_pattern.clone()
}
fn filename_spacing_pattern(&self) -> Option<String> {
self.filename_spacing_pattern.clone()
}
fn filename_match_threshold(&self) -> Option<i64> {
self.filename_match_threshold
}
fn exclude(&self) -> Option<Vec<String>> {
fn exclude(&self) -> Option<Vec<ErrorCode>> {
let out = self.exclude.clone();
if out.is_empty() {
None
} else {
Some(out)
Some(out.into_iter().map(ErrorCode::new).collect())
}
}
fn filename_to_alias(&self) -> Option<Result<ReplacePair<Filename, Alias>, ReplacePairError>> {
fn filename_to_alias(
&self,
) -> Option<Result<ReplacePair<Filename, Alias>, ReplacePairCompilationError>> {
None
}
fn alias_to_filename(
&self,
) -> Option<Result<ReplacePair<Alias, FilenameLowercase>, ReplacePairError>> {
) -> Option<Result<ReplacePair<Alias, FilenameLowercase>, ReplacePairCompilationError>> {
None
}
}
50 changes: 25 additions & 25 deletions src/config/file.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,11 @@ use crate::{
content::wikilink::Alias,
name::{Filename, FilenameLowercase},
},
sed::{ReplacePair, ReplacePairError},
rules::ErrorCode,
sed::{ReplacePair, ReplacePairCompilationError},
};

use super::{Error, Partial};
use super::{NewConfigError, Partial};

#[derive(Serialize, Deserialize, Debug, Default)]
pub(super) struct Config {
Expand Down Expand Up @@ -54,9 +55,10 @@ pub(super) struct Config {
}

impl Config {
pub fn new(path: &Path) -> Result<Self, Error> {
let contents = std::fs::read_to_string(path).map_err(Error::FileDoesNotReadError)?;
toml::from_str(&contents).map_err(Error::FileDoesNotParseError)
pub fn new(path: &Path) -> Result<Self, NewConfigError> {
let contents =
std::fs::read_to_string(path).map_err(NewConfigError::FileDoesNotReadError)?;
toml::from_str(&contents).map_err(NewConfigError::FileDoesNotParseError)
}
}

Expand All @@ -78,10 +80,6 @@ impl Partial for Config {
self.boundary_pattern.clone()
}

fn wikilink_pattern(&self) -> Option<String> {
self.wikilink_pattern.clone()
}

fn filename_spacing_pattern(&self) -> Option<String> {
self.filename_spacing_pattern.clone()
}
Expand All @@ -90,42 +88,44 @@ impl Partial for Config {
self.filename_match_threshold
}

fn exclude(&self) -> Option<Vec<String>> {
fn exclude(&self) -> Option<Vec<ErrorCode>> {
let out = self.exclude.clone();
if out.is_empty() {
None
} else {
Some(out)
Some(out.into_iter().map(ErrorCode::new).collect())
}
}

fn alias_to_filename(
&self,
) -> Option<Result<ReplacePair<Alias, FilenameLowercase>, ReplacePairError>> {
) -> Option<Result<ReplacePair<Alias, FilenameLowercase>, ReplacePairCompilationError>> {
let (to, from) = self.alias_to_filename.clone();
match (to.is_empty(), from.is_empty()) {
(true, true) => None,
(false, false) => Some(ReplacePair::new(&to, &from)),
(true, false) => Some(Err(ReplacePairError::ToError(regex::Error::Syntax(
"To is empty".to_string(),
)))),
(false, true) => Some(Err(ReplacePairError::FromError(regex::Error::Syntax(
"From is empty".to_string(),
)))),
(true, false) => Some(Err(ReplacePairCompilationError::ToError(
regex::Error::Syntax("To is empty".to_string()),
))),
(false, true) => Some(Err(ReplacePairCompilationError::FromError(
regex::Error::Syntax("From is empty".to_string()),
))),
}
}

fn filename_to_alias(&self) -> Option<Result<ReplacePair<Filename, Alias>, ReplacePairError>> {
fn filename_to_alias(
&self,
) -> Option<Result<ReplacePair<Filename, Alias>, ReplacePairCompilationError>> {
let (to, from) = self.alias_to_filename.clone();
match (to.is_empty(), from.is_empty()) {
(true, true) => None,
(false, false) => Some(ReplacePair::new(&to, &from)),
(true, false) => Some(Err(ReplacePairError::ToError(regex::Error::Syntax(
"To is empty".to_string(),
)))),
(false, true) => Some(Err(ReplacePairError::FromError(regex::Error::Syntax(
"From is empty".to_string(),
)))),
(true, false) => Some(Err(ReplacePairCompilationError::ToError(
regex::Error::Syntax("To is empty".to_string()),
))),
(false, true) => Some(Err(ReplacePairCompilationError::FromError(
regex::Error::Syntax("From is empty".to_string()),
))),
}
}
}
18 changes: 1 addition & 17 deletions src/file.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,11 @@ use thiserror::Error;

use std;

use crate::sed::RegexError;

pub mod content;
pub mod name;

/// Walk the directories and get just the files
pub fn get_files(dirs: Vec<PathBuf>) -> Vec<PathBuf> {
pub fn get_files(dirs: &Vec<PathBuf>) -> Vec<PathBuf> {
let mut out = Vec::new();
for path in dirs {
let walk = WalkDir::new(path);
Expand All @@ -24,17 +22,3 @@ pub fn get_files(dirs: Vec<PathBuf>) -> Vec<PathBuf> {
}
out
}

/// A bunch of bad things can happen while you're reading files,
/// This covers most of them.
#[derive(Debug, Error)]
pub enum Error {
#[error("Error reading the file.")]
IoError(#[from] std::io::Error),
#[error("Error parsing the yaml based on expected template.")]
SerdeError(#[from] serde_yaml::Error),
#[error("Found duplicate property {0} in file contents")]
DuplicateProperty(String),
#[error("Regex error: {0}")]
RegexError(#[from] RegexError),
}
Loading
Loading