From f41cea45816e745e169eb9ccfce88dc0a170b386 Mon Sep 17 00:00:00 2001 From: Jaap Frolich Date: Fri, 6 Sep 2024 15:16:12 +0200 Subject: [PATCH 1/6] embed lang - kickoff --- README.md | 85 ++++++++++++++++++++++++++++------------ src/build/build_types.rs | 10 +++++ 2 files changed, 69 insertions(+), 26 deletions(-) diff --git a/README.md b/README.md index 67ce4187..b873cd54 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@ # Info -Rewatch is an alternative build system for the [Rescript Compiler](https://rescript-lang.org/) (which uses a combination of Ninja, OCaml and a Node.js script). It strives to deliver consistent and faster builds in monorepo setups. Bsb doesn't support a watch-mode in a monorepo setup, and when setting up a watcher that runs a global incremental compile it's consistent but very inefficient and thus slow. +Rewatch is an alternative build system for the [Rescript Compiler](https://rescript-lang.org/) (which uses a combination of Ninja, OCaml and a Node.js script). It strives to deliver consistent and faster builds in monorepo setups. Bsb doesn't support a watch-mode in a monorepo setup, and when setting up a watcher that runs a global incremental compile it's consistent but very inefficient and thus slow. We couldn't find a way to improve this without re-architecting the whole build system. The benefit of having a specialized build system is that it's possible to completely tailor it to ReScript and not being dependent of the constraints of a generic build system like Ninja. This allowed us to have significant performance improvements even in non-monorepo setups (30% to 3x improvements reported). @@ -14,27 +14,27 @@ This project should be considered in beta status. We run it in production at [Wa # Usage - 1. Install the package +1. Install the package - ``` - yarn add @rolandpeelen/rewatch - ``` +``` +yarn add @rolandpeelen/rewatch +``` - 2. Build / Clean / Watch +2. Build / Clean / Watch - ``` - yarn rewatch build - ``` +``` +yarn rewatch build +``` - ``` - yarn rewatch clean - ``` +``` +yarn rewatch clean +``` - ``` - yarn rewatch watch - ``` +``` +yarn rewatch watch +``` - You can pass in the folder as the second argument where the 'root' `bsconfig.json` lives. If you encounter a 'stale build error', either directly, or after a while, a `clean` may be needed to clean up some old compiler assets. +You can pass in the folder as the second argument where the 'root' `bsconfig.json` lives. If you encounter a 'stale build error', either directly, or after a while, a `clean` may be needed to clean up some old compiler assets. ## Full Options @@ -67,7 +67,7 @@ Options: -c, --create-sourcedirs This creates a source_dirs.json file at the root of the monorepo, which is needed when you want to use Reanalyze - + [possible values: true, false] --compiler-args @@ -88,16 +88,49 @@ Options: # Contributing - Pre-requisites: +Pre-requisites: + +- [Rust](https://rustup.rs/) +- [NodeJS](https://nodejs.org/en/) - For running testscripts only +- [Yarn](https://yarnpkg.com/) or [Npm](https://www.npmjs.com/) - Npm probably comes with your node installation + +1. `cd testrepo && yarn` (install dependencies for submodule) +2. `cargo run` - - [Rust](https://rustup.rs/) - - [NodeJS](https://nodejs.org/en/) - For running testscripts only - - [Yarn](https://yarnpkg.com/) or [Npm](https://www.npmjs.com/) - Npm probably comes with your node installation +Running tests: - 1. `cd testrepo && yarn` (install dependencies for submodule) - 2. `cargo run` +1. `cargo build --release` +2. `./tests/suite.sh` - Running tests: +### embed-lang - 1. `cargo build --release` - 2. `./tests/suite.sh` +- Parse -> MyModule.res + -> Rescript parser would generate (pass the embeds) + ``` + MyModule.embeds + [ + { + "name": "MyModule.graphql.0.embeds.res", + "content": "query MyQuery { ... }" + "hash": "123" <- fast hash of the rescript file + }, + { + "name": "MyModule.graphql.1.embeds.res", + "content": "query MyQuery { ... }" + } + ] + ``` + - after parsing everything, track all the embeds in the build state + - remove the embeds that are extraneous + - read the first line of the embed -> and mark dirty or not && see if rescript file is there + - track the embeds in the compiler state +- Run the embeds + - run the dirty embeds + STDIN -> rescript-code -> STDOUT / STDERR (exit code) + -> /lib/ocaml/**generated**/MyModule.graphql.0.res + -> /lib/ocaml/**generated**/MyModule.graphql.1.res + +-> Determine the dependency tree (and add the embeds as deps) +-> Run compiler + +-> Profit diff --git a/src/build/build_types.rs b/src/build/build_types.rs index 4dc6cfad..6a7554b2 100644 --- a/src/build/build_types.rs +++ b/src/build/build_types.rs @@ -35,10 +35,20 @@ pub struct Implementation { pub parse_dirty: bool, } +#[derive(Debug, Clone, PartialEq)] +pub struct Embed { + pub tag: String, + pub file_path: String, + pub content: String, + pub hash: String, + pub dirty: bool, +} + #[derive(Debug, Clone, PartialEq)] pub struct SourceFile { pub implementation: Implementation, pub interface: Option, + pub embeds: Vec, } #[derive(Debug, Clone, PartialEq)] From 868e57b74be78f1c5846ddf8b9a481b9d2c1fea7 Mon Sep 17 00:00:00 2001 From: Jaap Frolich Date: Fri, 13 Sep 2024 11:58:36 +0200 Subject: [PATCH 2/6] progress --- README.md | 18 +++++ src/bsconfig.rs | 10 +++ src/build.rs | 1 + src/build/build_types.rs | 46 ++++++++++- src/build/packages.rs | 8 ++ src/build/parse.rs | 171 +++++++++++++++++++++++++++++++++++++++ src/helpers.rs | 9 ++- 7 files changed, 255 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index b873cd54..714c28cc 100644 --- a/README.md +++ b/README.md @@ -130,7 +130,25 @@ Running tests: -> /lib/ocaml/**generated**/MyModule.graphql.0.res -> /lib/ocaml/**generated**/MyModule.graphql.1.res +-> Parse the outputs of the embeds -> Determine the dependency tree (and add the embeds as deps) -> Run compiler +#### configuration of embeds + +- bsconfig.json + +```json + { + "embed-generators": [ + { + "name": "graphql", + "tags": ["graphql"], + "path": "./path/to/graphql/embed" + "package": "my-generator-package" + } + ] + } +``` + -> Profit diff --git a/src/bsconfig.rs b/src/bsconfig.rs index 3abcf406..45d2dce9 100644 --- a/src/bsconfig.rs +++ b/src/bsconfig.rs @@ -157,6 +157,9 @@ pub struct Config { pub namespace: Option, pub jsx: Option, pub uncurried: Option, + #[serde(rename = "embed-generators")] + pub embed_generators: Option>, + // this is a new feature of rewatch, and it's not part of the bsconfig.json spec #[serde(rename = "namespace-entry")] pub namespace_entry: Option, @@ -164,6 +167,13 @@ pub struct Config { #[serde(rename = "allowed-dependents")] pub allowed_dependents: Option>, } +#[derive(Deserialize, Debug, Clone)] +pub struct EmbedGenerator { + pub name: String, + pub tags: Vec, + pub path: String, + pub package: Option, +} /// This flattens string flags pub fn flatten_flags(flags: &Option>>) -> Vec { diff --git a/src/build.rs b/src/build.rs index aece691c..c5c82098 100644 --- a/src/build.rs +++ b/src/build.rs @@ -278,6 +278,7 @@ pub fn incremental_build( let timing_ast = Instant::now(); let result_asts = parse::generate_asts(build_state, || pb.inc(1)); + let result_asts = parse::generate_asts(build_state, || pb.inc(1)); let timing_ast_elapsed = timing_ast.elapsed(); match result_asts { diff --git a/src/build/build_types.rs b/src/build/build_types.rs index 6a7554b2..c5904f83 100644 --- a/src/build/build_types.rs +++ b/src/build/build_types.rs @@ -1,5 +1,6 @@ use crate::build::packages::{Namespace, Package}; use ahash::{AHashMap, AHashSet}; +use serde::Deserialize; use std::time::SystemTime; #[derive(Debug, Clone, PartialEq)] @@ -35,11 +36,48 @@ pub struct Implementation { pub parse_dirty: bool, } +#[derive(Deserialize, Debug, Clone, PartialEq)] +pub struct Location { + line: u32, + col: u32, +} + +#[derive(Deserialize, Debug, Clone, PartialEq)] +pub struct EmbedLoc { + start: Location, + end: Location, +} + +// example of the *.embeds.json file +// [ +// { +// "tag": "sql.one", +// "filename": "Tst__sql_one_1.res", +// "contents": "\n SELECT * FROM tst.res\n WHERE id = 1\n", +// "loc": {"start": {"line": 1, "col": 22}, "end": {"line": 4, "col": 64}} +// }, +// { +// "tag": "sql.many", +// "filename": "Tst__sql_many_1.res", +// "contents": "\n SELECT * FROM tst.res\n WHERE id > 1\n", +// "loc": {"start": {"line": 6, "col": 86}, "end": {"line": 9, "col": 128}} +// }, +// { +// "tag": "sql.one", +// "filename": "Tst__sql_one_2.res", +// "contents": + +#[derive(Deserialize, Debug, Clone, PartialEq)] +pub struct EmbedJsonData { + pub tag: String, + pub filename: String, + pub contents: String, + pub loc: EmbedLoc, +} + #[derive(Debug, Clone, PartialEq)] pub struct Embed { - pub tag: String, - pub file_path: String, - pub content: String, + pub embed: EmbedJsonData, pub hash: String, pub dirty: bool, } @@ -48,7 +86,7 @@ pub struct Embed { pub struct SourceFile { pub implementation: Implementation, pub interface: Option, - pub embeds: Vec, + pub embeds: Vec, // Added embeds field } #[derive(Debug, Clone, PartialEq)] diff --git a/src/build/packages.rs b/src/build/packages.rs index b5b3ec10..f8c072b3 100644 --- a/src/build/packages.rs +++ b/src/build/packages.rs @@ -50,6 +50,7 @@ pub struct Package { pub name: String, pub bsconfig: bsconfig::Config, pub source_folders: AHashSet, + pub generated_file_folder: PathBuf, // these are the relative file paths (relative to the package root) pub source_files: Option>, pub namespace: Namespace, @@ -381,6 +382,11 @@ fn make_package( source_files: None, namespace: bsconfig.get_namespace(), modules: None, + generated_file_folder: match &bsconfig.sources { + bsconfig::OneOrMore::Single(source) => PathBuf::from(bsconfig::to_qualified_without_children(source, None).dir).join("__generated__"), + bsconfig::OneOrMore::Multiple(sources) if !sources.is_empty() => PathBuf::from(bsconfig::to_qualified_without_children(&sources[0], None).dir).join("__generated__"), + _ => panic!("Error: Invalid or empty sources configuration in bsconfig.json. Please ensure at least one valid source is specified."), + }, // we canonicalize the path name so it's always the same path: PathBuf::from(package_path) .canonicalize() @@ -652,6 +658,7 @@ pub fn parse_packages(build_state: &mut BuildState) { parse_dirty: true, }, interface: None, + embeds: vec![], }), deps: AHashSet::new(), dependents: AHashSet::new(), @@ -705,6 +712,7 @@ pub fn parse_packages(build_state: &mut BuildState) { last_modified: metadata.modified, parse_dirty: true, }), + embeds: vec![], }), deps: AHashSet::new(), dependents: AHashSet::new(), diff --git a/src/build/parse.rs b/src/build/parse.rs index 678a4a35..de6bcebe 100644 --- a/src/build/parse.rs +++ b/src/build/parse.rs @@ -10,6 +10,7 @@ use log::debug; use rayon::prelude::*; use std::path::{Path, PathBuf}; use std::process::Command; +use std::time::SystemTime; pub fn generate_asts( build_state: &mut BuildState, @@ -80,6 +81,15 @@ pub fn generate_asts( ) }; + // After generating ASTs, handle embeds + // Process embeds for the source file + if let Err(err) = + process_embeds(build_state, package, source_file, &build_state.workspace_root) + { + has_failure = true; + stderr.push_str(&err); + } + (module_name.to_owned(), ast_result, iast_result, dirty) } } @@ -370,6 +380,167 @@ fn path_to_ast_extension(path: &Path) -> &str { } } +// Function to process embeds +fn process_embeds( + build_state: &mut BuildState, + package: &packages::Package, + source_file: &mut SourceFile, + workspace_root: &Option, +) -> Result<(), String> { + let source_file_path = &source_file.implementation.path; + + let ast_path = Path::new(&package.get_ast_path(&source_file.implementation.path)); + let embeds_json_path = ast_path.with_extension("embeds.json"); + + // Read and parse the embeds JSON file + if embeds_json_path.exists() { + let embeds_json = helpers::read_file(&embeds_json_path).map_err(|e| e.to_string())?; + let embeds_data: Vec = + serde_json::from_str(&embeds_json).map_err(|e| e.to_string())?; + + // Process each embed + let embeds = embeds_data + .into_iter() + .map(|embed_data| { + let embed_path = package.generated_file_folder.join(&embed_data.filename); + let hash = helpers::compute_string_hash(&embed_data.contents); + let dirty = is_embed_dirty(&embed_path, &embed_data, &hash.to_string()); + // embed_path is the path of the generated rescript file, let's add this path to the build state + // Add the embed_path as a rescript source file to the build state + let relative_path = Path::new(&embed_path) + .strip_prefix(&package.path) + .unwrap() + .to_string_lossy(); + let module_name = helpers::file_path_to_module_name(&relative_path, &package.namespace); + let last_modified = std::fs::metadata(&embed_path) + .and_then(|metadata| metadata.modified()) + .unwrap_or(SystemTime::now()); + + if dirty { + // run the embed file + // Find the embed generator based on the tag + if let Some(embed_generator) = + package.bsconfig.embed_generators.as_ref().and_then(|generators| { + generators.iter().find(|gen| gen.tags.contains(&embed_data.tag)) + }) + { + // Prepare the command + // let mut command = if let Some(package_name) = &embed_generator.package { + // let node_modules_path = workspace_root + // .as_ref() + // .map(|root| Path::new(root).join("node_modules")) + // .unwrap_or_else(|| Path::new(&package.path).join("node_modules")); + // let generator_path = + // node_modules_path.join(package_name).join(&embed_generator.path); + // Command::new("node").arg(generator_path) + // } else { + // Command::new(&embed_generator.path) + // }; + let mut command = Command::new(&embed_generator.path); + + // Run the embed generator + let output = command + .stdin(std::process::Stdio::piped()) + .stdout(std::process::Stdio::piped()) + .stderr(std::process::Stdio::piped()) + .spawn() + .and_then(|mut child| { + use std::io::Write; + let contents = format!("{}\n{}", embed_data.tag, embed_data.contents); + child.stdin.as_mut().unwrap().write_all(contents.as_bytes())?; + child.wait_with_output() + }) + .map_err(|e| format!("Failed to run embed generator: {}", e))?; + + if !output.status.success() { + return Err(format!( + "Embed generator failed: {}", + String::from_utf8_lossy(&output.stderr) + )); + } + + // Write the output to the embed file + std::fs::write(&embed_path, output.stdout) + .map_err(|e| format!("Failed to write embed file: {}", e))?; + } else { + return Err(format!("No embed generator found for tag: {}", embed_data.tag)); + } + } + if !build_state.modules.contains_key(&module_name) { + let implementation = Implementation { + path: relative_path.to_string(), + parse_state: ParseState::Pending, + compile_state: CompileState::Pending, + last_modified, + parse_dirty: true, + }; + + let source_file = SourceFile { + implementation, + interface: None, + embeds: Vec::new(), + }; + + let module = Module { + source_type: SourceType::SourceFile(source_file), + deps: AHashSet::new(), + dependents: AHashSet::new(), + package_name: package.name.clone(), + compile_dirty: true, + last_compiled_cmi: None, + last_compiled_cmt: None, + }; + + build_state.insert_module(&module_name, module); + } else if dirty { + if let Some(module) = build_state.modules.get_mut(&module_name) { + if let SourceType::SourceFile(source_file) = &mut module.source_type { + source_file.implementation.parse_dirty = true; + } + } + } + + Ok(Embed { + hash: hash.to_string(), + embed: embed_data, + dirty, + }) + }) + .collect::>>(); + + // Update the source file's embeds + source_file.embeds = embeds.into_iter().filter_map(|result| result.ok()).collect(); + } + + Ok(()) +} + +fn is_embed_dirty(embed_path: &Path, embed_data: &EmbedJsonData, hash: &str) -> bool { + // Check if the embed file exists and compare hashes + // the first line of the generated rescript file is a comment with the following format: + // "// HASH: " + // if the hash is different from the hash in the embed_data, the embed is dirty + // if the file does not exist, the embed is dirty + // if the file exists but the hash is not present, the embed is dirty + // if the file exists but the hash is present but different from the hash in the embed_data, the embed is dirty + // if the file exists but the hash is present and the same as the hash in the embed_data, the embed is not dirty + if !embed_path.exists() { + return true; + } + + let first_line = match helpers::read_file(embed_path) { + Ok(contents) => contents.lines().next().unwrap_or("").to_string(), + Err(_) => return true, + }; + + if !first_line.starts_with("// HASH: ") { + return true; + } + + let file_hash = first_line.trim_start_matches("// HASH: "); + file_hash != hash +} + fn include_ppx(flag: &str, contents: &str) -> bool { if flag.contains("bisect") { return std::env::var("BISECT_ENABLE").is_ok(); diff --git a/src/helpers.rs b/src/helpers.rs index 0202f111..4584ef40 100644 --- a/src/helpers.rs +++ b/src/helpers.rs @@ -282,11 +282,12 @@ pub fn format_namespaced_module_name(module_name: &str) -> String { } } +pub fn compute_string_hash(str: &str) -> blake3::Hash { + blake3::hash(str.as_bytes()) +} + pub fn compute_file_hash(path: &str) -> Option { - match fs::read(path) { - Ok(str) => Some(blake3::hash(&str)), - Err(_) => None, - } + fs::read(path).map(|bytes| blake3::hash(&bytes)).ok() } fn has_rescript_config(path: &Path) -> bool { From 25170a63a3e975ad59d19ca7e0db017b2d32f3a3 Mon Sep 17 00:00:00 2001 From: Jaap Frolich Date: Wed, 18 Sep 2024 10:11:03 +0200 Subject: [PATCH 3/6] make it compile --- src/build.rs | 11 +++++- src/build/parse.rs | 84 +++++++++++++++++++++++++++++++--------------- 2 files changed, 67 insertions(+), 28 deletions(-) diff --git a/src/build.rs b/src/build.rs index c5c82098..4879540a 100644 --- a/src/build.rs +++ b/src/build.rs @@ -278,7 +278,16 @@ pub fn incremental_build( let timing_ast = Instant::now(); let result_asts = parse::generate_asts(build_state, || pb.inc(1)); - let result_asts = parse::generate_asts(build_state, || pb.inc(1)); + let result_asts = match result_asts { + Ok(err) => { + let result_asts = parse::generate_asts(build_state, || pb.inc(1)); + match result_asts { + Ok(new_err) => Ok(err + &new_err), + Err(new_err) => Err(err + &new_err), + } + } + Err(err) => Err(err), + }; let timing_ast_elapsed = timing_ast.elapsed(); match result_asts { diff --git a/src/build/parse.rs b/src/build/parse.rs index de6bcebe..cf186cf0 100644 --- a/src/build/parse.rs +++ b/src/build/parse.rs @@ -81,15 +81,6 @@ pub fn generate_asts( ) }; - // After generating ASTs, handle embeds - // Process embeds for the source file - if let Err(err) = - process_embeds(build_state, package, source_file, &build_state.workspace_root) - { - has_failure = true; - stderr.push_str(&err); - } - (module_name.to_owned(), ast_result, iast_result, dirty) } } @@ -102,21 +93,23 @@ pub fn generate_asts( )>>() .into_iter() .for_each(|(module_name, ast_result, iast_result, is_dirty)| { - if let Some(module) = build_state.modules.get_mut(&module_name) { + let result = if let Some(module) = build_state.modules.get_mut(&module_name) { // if the module is dirty, mark it also compile_dirty // do NOT set to false if the module is not parse_dirty, it needs to keep // the compile_dirty flag if it was set before if is_dirty { + // module.compile_dirty = true; module.compile_dirty = true; } let package = build_state .packages .get(&module.package_name) .expect("Package not found"); + if let SourceType::SourceFile(ref mut source_file) = module.source_type { // We get Err(x) when there is a parse error. When it's Ok(_, Some( // stderr_warnings )), the outputs are warnings - match ast_result { + let ast_new_result = match ast_result { // In case of a pinned (internal) dependency, we want to keep on // propagating the warning with every compile. So we mark it as dirty for // the next round @@ -128,6 +121,14 @@ pub fn generate_asts( } logs::append(package, &stderr_warnings); stderr.push_str(&stderr_warnings); + + // // After generating ASTs, handle embeds + // // Process embeds for the source file + // if let Err(err) = process_embeds(build_state, package, &module_name) { + // has_failure = true; + // stderr.push_str(&err); + // } + Ok(()) } Ok((_path, Some(_))) | Ok((_path, None)) => { // If we do have stderr_warnings here, the file is not a pinned @@ -137,6 +138,7 @@ pub fn generate_asts( if let Some(interface) = source_file.interface.as_mut() { interface.parse_dirty = false; } + Ok(()) } Err(err) => { // Some compilation error @@ -145,12 +147,13 @@ pub fn generate_asts( logs::append(package, &err); has_failure = true; stderr.push_str(&err); + Err(()) } }; // We get Err(x) when there is a parse error. When it's Ok(_, Some(( _path, // stderr_warnings ))), the outputs are warnings - match iast_result { + let iast_new_result = match iast_result { // In case of a pinned (internal) dependency, we want to keep on // propagating the warning with every compile. So we mark it as dirty for // the next round @@ -161,6 +164,7 @@ pub fn generate_asts( } logs::append(package, &stderr_warnings); stderr.push_str(&stderr_warnings); + Ok(()) } Ok(Some((_, None))) | Ok(Some((_, Some(_)))) => { // If we do have stderr_warnings here, the file is not a pinned @@ -169,6 +173,7 @@ pub fn generate_asts( interface.parse_state = ParseState::Success; interface.parse_dirty = false; } + Ok(()) } Err(err) => { // Some compilation error @@ -179,13 +184,31 @@ pub fn generate_asts( logs::append(package, &err); has_failure = true; stderr.push_str(&err); + Err(()) } Ok(None) => { // The file had no interface file associated - () + Ok(()) } + }; + match (ast_new_result, iast_new_result) { + (Ok(()), Ok(())) => Ok(()), + _ => Err(()), + } + } else { + Err(()) + } + } else { + Err(()) + }; + match result { + Ok(()) => { + if let Err(err) = process_embeds(build_state, &module_name) { + has_failure = true; + stderr.push_str(&err); } - }; + } + Err(()) => (), } }); @@ -381,15 +404,16 @@ fn path_to_ast_extension(path: &Path) -> &str { } // Function to process embeds -fn process_embeds( - build_state: &mut BuildState, - package: &packages::Package, - source_file: &mut SourceFile, - workspace_root: &Option, -) -> Result<(), String> { - let source_file_path = &source_file.implementation.path; +fn process_embeds(build_state: &mut BuildState, module_name: &str) -> Result<(), String> { + let module = build_state.modules.get(module_name).unwrap(); + let package = build_state.packages.get(&module.package_name).unwrap(); + let source_file = match &module.source_type { + SourceType::SourceFile(source_file) => source_file, + _ => panic!("Module {} is not a source file", module_name), + }; - let ast_path = Path::new(&package.get_ast_path(&source_file.implementation.path)); + let ast_path_str = package.get_ast_path(&source_file.implementation.path); + let ast_path = Path::new(&ast_path_str); let embeds_json_path = ast_path.with_extension("embeds.json"); // Read and parse the embeds JSON file @@ -404,7 +428,7 @@ fn process_embeds( .map(|embed_data| { let embed_path = package.generated_file_folder.join(&embed_data.filename); let hash = helpers::compute_string_hash(&embed_data.contents); - let dirty = is_embed_dirty(&embed_path, &embed_data, &hash.to_string()); + let dirty = is_embed_dirty(&embed_path, &hash.to_string()); // embed_path is the path of the generated rescript file, let's add this path to the build state // Add the embed_path as a rescript source file to the build state let relative_path = Path::new(&embed_path) @@ -491,7 +515,8 @@ fn process_embeds( last_compiled_cmt: None, }; - build_state.insert_module(&module_name, module); + build_state.modules.insert(module_name.to_string(), module); + build_state.module_names.insert(module_name.to_string()); } else if dirty { if let Some(module) = build_state.modules.get_mut(&module_name) { if let SourceType::SourceFile(source_file) = &mut module.source_type { @@ -508,14 +533,19 @@ fn process_embeds( }) .collect::>>(); - // Update the source file's embeds - source_file.embeds = embeds.into_iter().filter_map(|result| result.ok()).collect(); + let module = build_state.modules.get_mut(module_name).unwrap(); + match module.source_type { + SourceType::SourceFile(ref mut source_file) => { + source_file.embeds = embeds.into_iter().filter_map(|result| result.ok()).collect(); + } + _ => (), + }; } Ok(()) } -fn is_embed_dirty(embed_path: &Path, embed_data: &EmbedJsonData, hash: &str) -> bool { +fn is_embed_dirty(embed_path: &Path, hash: &str) -> bool { // Check if the embed file exists and compare hashes // the first line of the generated rescript file is a comment with the following format: // "// HASH: " From 6a9198ffccffb01a10c45adc50119488dcc7fd94 Mon Sep 17 00:00:00 2001 From: Gabriel Nordeborn Date: Wed, 18 Sep 2024 15:22:07 +0200 Subject: [PATCH 4/6] pass embed flags to bsc --- src/bsconfig.rs | 16 ++++++++++++++++ src/build/parse.rs | 25 ++++++++----------------- 2 files changed, 24 insertions(+), 17 deletions(-) diff --git a/src/bsconfig.rs b/src/bsconfig.rs index 45d2dce9..f0fef7bf 100644 --- a/src/bsconfig.rs +++ b/src/bsconfig.rs @@ -260,6 +260,22 @@ fn namespace_from_package_name(package_name: &str) -> String { .to_case(Case::Pascal) } +pub fn get_embed_generators_bsc_flags(config: &Config) -> Vec { + config + .embed_generators + .as_ref() + .unwrap_or(&vec![]) + .iter() + .flat_map(|generator| { + generator + .tags + .iter() + .map(|tag| vec![format!("-embed"), tag.to_string()]) + }) + .collect::>>() + .concat() +} + impl Config { pub fn get_namespace(&self) -> packages::Namespace { let namespace_from_package = namespace_from_package_name(&self.name); diff --git a/src/build/parse.rs b/src/build/parse.rs index cf186cf0..980b4e90 100644 --- a/src/build/parse.rs +++ b/src/build/parse.rs @@ -308,6 +308,7 @@ pub fn parser_args( let jsx_mode_args = root_config.get_jsx_mode_args(); let uncurried_args = root_config.get_uncurried_args(version); let bsc_flags = bsconfig::flatten_flags(&config.bsc_flags); + let embed_flags = bsconfig::get_embed_generators_bsc_flags(&config); let file = "../../".to_string() + file; ( @@ -327,6 +328,7 @@ pub fn parser_args( ast_path.to_string(), file, ], + embed_flags, ] .concat(), ) @@ -431,10 +433,7 @@ fn process_embeds(build_state: &mut BuildState, module_name: &str) -> Result<(), let dirty = is_embed_dirty(&embed_path, &hash.to_string()); // embed_path is the path of the generated rescript file, let's add this path to the build state // Add the embed_path as a rescript source file to the build state - let relative_path = Path::new(&embed_path) - .strip_prefix(&package.path) - .unwrap() - .to_string_lossy(); + let relative_path = Path::new(&embed_path).to_string_lossy(); let module_name = helpers::file_path_to_module_name(&relative_path, &package.namespace); let last_modified = std::fs::metadata(&embed_path) .and_then(|metadata| metadata.modified()) @@ -448,18 +447,7 @@ fn process_embeds(build_state: &mut BuildState, module_name: &str) -> Result<(), generators.iter().find(|gen| gen.tags.contains(&embed_data.tag)) }) { - // Prepare the command - // let mut command = if let Some(package_name) = &embed_generator.package { - // let node_modules_path = workspace_root - // .as_ref() - // .map(|root| Path::new(root).join("node_modules")) - // .unwrap_or_else(|| Path::new(&package.path).join("node_modules")); - // let generator_path = - // node_modules_path.join(package_name).join(&embed_generator.path); - // Command::new("node").arg(generator_path) - // } else { - // Command::new(&embed_generator.path) - // }; + // TODO(embed) Needs to be relative to package root? Join with package path? let mut command = Command::new(&embed_generator.path); // Run the embed generator @@ -483,8 +471,11 @@ fn process_embeds(build_state: &mut BuildState, module_name: &str) -> Result<(), )); } + let generated_content = String::from_utf8_lossy(&output.stdout).into_owned(); + let generated_file_contents = format!("// HASH: {}\n{}", hash, generated_content); + // Write the output to the embed file - std::fs::write(&embed_path, output.stdout) + std::fs::write(&embed_path, generated_file_contents) .map_err(|e| format!("Failed to write embed file: {}", e))?; } else { return Err(format!("No embed generator found for tag: {}", embed_data.tag)); From e107d4446e69a632320f56e2c1c568c3fc5cb787 Mon Sep 17 00:00:00 2001 From: Gabriel Nordeborn Date: Wed, 18 Sep 2024 20:29:01 +0200 Subject: [PATCH 5/6] use json for communication between generators and rewatch --- src/build/build_types.rs | 29 +++++++++++++++++--- src/build/parse.rs | 58 +++++++++++++++++++++++++++++++--------- 2 files changed, 72 insertions(+), 15 deletions(-) diff --git a/src/build/build_types.rs b/src/build/build_types.rs index c5904f83..191edf3a 100644 --- a/src/build/build_types.rs +++ b/src/build/build_types.rs @@ -1,6 +1,6 @@ use crate::build::packages::{Namespace, Package}; use ahash::{AHashMap, AHashSet}; -use serde::Deserialize; +use serde::{Deserialize, Serialize}; use std::time::SystemTime; #[derive(Debug, Clone, PartialEq)] @@ -36,13 +36,13 @@ pub struct Implementation { pub parse_dirty: bool, } -#[derive(Deserialize, Debug, Clone, PartialEq)] +#[derive(Deserialize, Serialize, Debug, Clone, PartialEq)] pub struct Location { line: u32, col: u32, } -#[derive(Deserialize, Debug, Clone, PartialEq)] +#[derive(Deserialize, Serialize, Debug, Clone, PartialEq)] pub struct EmbedLoc { start: Location, end: Location, @@ -82,6 +82,29 @@ pub struct Embed { pub dirty: bool, } +#[derive(Serialize, Clone, PartialEq, Eq)] +pub struct EmbedGeneratorConfig { + pub tag: String, + pub content: String, + pub source_file_path: String, +} + +#[derive(Serialize, Deserialize, Debug)] +pub struct EmbedGeneratorResponseOk { + pub content: String, +} + +#[derive(Serialize, Deserialize, Debug)] +pub struct EmbedGeneratorError { + pub message: String, + pub loc: EmbedLoc, +} + +#[derive(Serialize, Deserialize, Debug)] +pub struct EmbedGeneratorResponseError { + pub errors: Vec, +} + #[derive(Debug, Clone, PartialEq)] pub struct SourceFile { pub implementation: Implementation, diff --git a/src/build/parse.rs b/src/build/parse.rs index 980b4e90..f8ff2072 100644 --- a/src/build/parse.rs +++ b/src/build/parse.rs @@ -6,6 +6,7 @@ use crate::bsconfig; use crate::bsconfig::OneOrMore; use crate::helpers; use ahash::AHashSet; +use core::str; use log::debug; use rayon::prelude::*; use std::path::{Path, PathBuf}; @@ -413,6 +414,7 @@ fn process_embeds(build_state: &mut BuildState, module_name: &str) -> Result<(), SourceType::SourceFile(source_file) => source_file, _ => panic!("Module {} is not a source file", module_name), }; + let source_file_path = source_file.implementation.path.clone(); let ast_path_str = package.get_ast_path(&source_file.implementation.path); let ast_path = Path::new(&ast_path_str); @@ -424,6 +426,7 @@ fn process_embeds(build_state: &mut BuildState, module_name: &str) -> Result<(), let embeds_data: Vec = serde_json::from_str(&embeds_json).map_err(|e| e.to_string())?; + // TODO(embed) Run in parallel? // Process each embed let embeds = embeds_data .into_iter() @@ -447,7 +450,7 @@ fn process_embeds(build_state: &mut BuildState, module_name: &str) -> Result<(), generators.iter().find(|gen| gen.tags.contains(&embed_data.tag)) }) { - // TODO(embed) Needs to be relative to package root? Join with package path? + // TODO(embed) Needs to be relative to relevant package root? Join with package path? let mut command = Command::new(&embed_generator.path); // Run the embed generator @@ -458,26 +461,57 @@ fn process_embeds(build_state: &mut BuildState, module_name: &str) -> Result<(), .spawn() .and_then(|mut child| { use std::io::Write; - let contents = format!("{}\n{}", embed_data.tag, embed_data.contents); - child.stdin.as_mut().unwrap().write_all(contents.as_bytes())?; + let embed_generator_config = EmbedGeneratorConfig { + tag: embed_data.tag.clone(), + content: embed_data.contents.clone(), + source_file_path: source_file_path.clone(), + }; + let contents = serde_json::to_vec(&embed_generator_config).unwrap(); + child.stdin.as_mut().unwrap().write_all(&contents)?; child.wait_with_output() }) .map_err(|e| format!("Failed to run embed generator: {}", e))?; if !output.status.success() { - return Err(format!( - "Embed generator failed: {}", - String::from_utf8_lossy(&output.stderr) - )); + let stderr_str = str::from_utf8(&output.stderr).unwrap(); + let error_response: Result = + serde_json::from_str(stderr_str); + + match error_response { + Ok(_error_response) => { + // TODO(embeds) Pass along error properly here + return Err(format!("Embed generator failed: {}", stderr_str)); + } + Err(error_response) => { + // TODO(embeds) Proper error here + return Err(format!( + "Parsing JSON from embed generator error failed: {}", + error_response + )); + } + }; } - let generated_content = String::from_utf8_lossy(&output.stdout).into_owned(); - let generated_file_contents = format!("// HASH: {}\n{}", hash, generated_content); + let stdout_str = str::from_utf8(&output.stdout).unwrap(); + let success_response: Result = + serde_json::from_str(&stdout_str); - // Write the output to the embed file - std::fs::write(&embed_path, generated_file_contents) - .map_err(|e| format!("Failed to write embed file: {}", e))?; + match success_response { + Err(err) => { + // TODO(embeds) Proper error here + return Err(format!("Parsing JSON from embed generator failed: {}", err)); + } + Ok(success_response) => { + let generated_file_contents = + format!("// HASH: {}\n{}", hash, success_response.content); + + // Write the output to the embed file + std::fs::write(&embed_path, generated_file_contents) + .map_err(|e| format!("Failed to write embed file: {}", e))?; + } + }; } else { + // TODO(embeds) Proper error here return Err(format!("No embed generator found for tag: {}", embed_data.tag)); } } From 983edf3dc3c904f64fe826c7e3fa00767d8121c7 Mon Sep 17 00:00:00 2001 From: Gabriel Nordeborn Date: Wed, 18 Sep 2024 22:20:17 +0200 Subject: [PATCH 6/6] propagate errors --- src/build/build_types.rs | 36 ++++++- src/build/parse.rs | 214 ++++++++++++++++++++++++++++++++++----- 2 files changed, 218 insertions(+), 32 deletions(-) diff --git a/src/build/build_types.rs b/src/build/build_types.rs index 191edf3a..0f1f732f 100644 --- a/src/build/build_types.rs +++ b/src/build/build_types.rs @@ -38,14 +38,14 @@ pub struct Implementation { #[derive(Deserialize, Serialize, Debug, Clone, PartialEq)] pub struct Location { - line: u32, - col: u32, + pub line: u32, + pub col: u32, } #[derive(Deserialize, Serialize, Debug, Clone, PartialEq)] pub struct EmbedLoc { - start: Location, - end: Location, + pub start: Location, + pub end: Location, } // example of the *.embeds.json file @@ -94,7 +94,7 @@ pub struct EmbedGeneratorResponseOk { pub content: String, } -#[derive(Serialize, Deserialize, Debug)] +#[derive(Serialize, Deserialize, Debug, Clone)] pub struct EmbedGeneratorError { pub message: String, pub loc: EmbedLoc, @@ -105,6 +105,32 @@ pub struct EmbedGeneratorResponseError { pub errors: Vec, } +#[derive(Debug, Clone)] +pub struct GeneratorReturnedError { + pub errors: Vec, + pub source_file_path: String, + pub embed_data: EmbedJsonData, + pub package_name: String, +} + +#[derive(Debug, Clone)] +pub enum ProcessEmbedsErrorReason { + EmbedsJsonFileCouldNotBeRead(String), + EmbedsJsonDataParseError(String), + CouldNotWriteToGeneratorStdin(String, String), + RunningGeneratorCommandFailed(String, String), + GeneratorReturnedError(GeneratorReturnedError), + GeneratorReturnedInvalidJSON(String), + CouldNotWriteGeneratedFile(String), + NoEmbedGeneratorFoundForTag(String), +} + +#[derive(Debug, Clone)] +pub struct ProcessEmbedsError { + pub reason: ProcessEmbedsErrorReason, + pub generator_name: Option, +} + #[derive(Debug, Clone, PartialEq)] pub struct SourceFile { pub implementation: Implementation, diff --git a/src/build/parse.rs b/src/build/parse.rs index f8ff2072..a755ecb6 100644 --- a/src/build/parse.rs +++ b/src/build/parse.rs @@ -6,9 +6,12 @@ use crate::bsconfig; use crate::bsconfig::OneOrMore; use crate::helpers; use ahash::AHashSet; +use core::panic; use core::str; use log::debug; use rayon::prelude::*; +use std::cmp::max; +use std::cmp::min; use std::path::{Path, PathBuf}; use std::process::Command; use std::time::SystemTime; @@ -206,7 +209,100 @@ pub fn generate_asts( Ok(()) => { if let Err(err) = process_embeds(build_state, &module_name) { has_failure = true; - stderr.push_str(&err); + err.into_iter().for_each(|err: ProcessEmbedsError| { + let error_str = match &err.reason { + ProcessEmbedsErrorReason::CouldNotWriteGeneratedFile(err) => { + Some((String::from("[CouldNotWriteGeneratedFile]"), err)) + } + ProcessEmbedsErrorReason::CouldNotWriteToGeneratorStdin(_generator, err) => { + Some((String::from("[CouldNotWriteToGeneratorStdin]"), err)) + } + ProcessEmbedsErrorReason::GeneratorReturnedInvalidJSON(err) => { + Some((String::from("[GeneratorReturnedInvalidJSON]"), err)) + } + ProcessEmbedsErrorReason::NoEmbedGeneratorFoundForTag(err) => { + Some((String::from("[NoEmbedGeneratorFoundForTag]"), err)) + } + ProcessEmbedsErrorReason::RunningGeneratorCommandFailed(_generator, err) => { + Some((String::from("[RunningGeneratorCommandFailed]"), err)) + } + _ => None, + }; + match error_str { + Some((pre, err)) => stderr.push_str(format!("\n{}\n{}\n", pre, err).as_str()), + None => (), + } + + // Handle real embed errors, which should be transformed and pushed onto the compiler log + match err.reason { + ProcessEmbedsErrorReason::GeneratorReturnedError(generator_error) => { + generator_error.errors.iter().for_each(|error| { + // TODO(embeds) Figure out locs properly... + let transformed_loc = EmbedLoc { + start: Location { + line: max( + generator_error.embed_data.loc.start.line + + error.loc.start.line, + generator_error.embed_data.loc.start.line, + ), + col: min( + generator_error.embed_data.loc.start.col + + error.loc.start.col, + generator_error.embed_data.loc.start.col + 1, + ), + }, + end: Location { + line: max( + generator_error.embed_data.loc.end.line + + error.loc.end.line, + generator_error.embed_data.loc.end.line, + ), + col: min( + generator_error.embed_data.loc.end.col + + error.loc.end.col, + generator_error.embed_data.loc.end.col, + ), + }, + }; + + let package = build_state + .get_package(&generator_error.package_name) + .expect("Package not found"); + + // TODO(embeds) Figure out where the LSP off-by-one on line/col transform makes most sense to happen. + let error_output = format!( + " We've found a bug for you!\n{}:{}:{}-{}\n\n {}\n{}\n", + helpers::canonicalize_string_path( + &generator_error.source_file_path + ) + .unwrap(), + transformed_loc.start.line - 1, + transformed_loc.start.col, + if transformed_loc.start.line == transformed_loc.end.line { + format!("{}", transformed_loc.end.col) + } else { + format!( + "{}:{}", + transformed_loc.end.line - 1, + transformed_loc.end.col + ) + }, + match &err.generator_name { + Some(generator_name) => + format!(" Error from generator: {}\n", generator_name), + None => format!(" Error from generator\n"), + }, + error.message + ); + + logs::append(&package, &error_output); + + stderr.push_str(&error_output); + }); + } + _ => (), + } + }); } } Err(()) => (), @@ -407,7 +503,7 @@ fn path_to_ast_extension(path: &Path) -> &str { } // Function to process embeds -fn process_embeds(build_state: &mut BuildState, module_name: &str) -> Result<(), String> { +fn process_embeds(build_state: &mut BuildState, module_name: &str) -> Result<(), Vec> { let module = build_state.modules.get(module_name).unwrap(); let package = build_state.packages.get(&module.package_name).unwrap(); let source_file = match &module.source_type { @@ -422,11 +518,20 @@ fn process_embeds(build_state: &mut BuildState, module_name: &str) -> Result<(), // Read and parse the embeds JSON file if embeds_json_path.exists() { - let embeds_json = helpers::read_file(&embeds_json_path).map_err(|e| e.to_string())?; - let embeds_data: Vec = - serde_json::from_str(&embeds_json).map_err(|e| e.to_string())?; - - // TODO(embed) Run in parallel? + let embeds_json = helpers::read_file(&embeds_json_path).map_err(|e| { + vec![ProcessEmbedsError { + reason: ProcessEmbedsErrorReason::EmbedsJsonFileCouldNotBeRead(e.to_string()), + generator_name: None, + }] + })?; + let embeds_data: Vec = serde_json::from_str(&embeds_json).map_err(|e| { + vec![ProcessEmbedsError { + reason: ProcessEmbedsErrorReason::EmbedsJsonDataParseError(e.to_string()), + generator_name: None, + }] + })?; + + // TODO(embeds) Run in parallel? // Process each embed let embeds = embeds_data .into_iter() @@ -450,7 +555,7 @@ fn process_embeds(build_state: &mut BuildState, module_name: &str) -> Result<(), generators.iter().find(|gen| gen.tags.contains(&embed_data.tag)) }) { - // TODO(embed) Needs to be relative to relevant package root? Join with package path? + // TODO(embeds) Needs to be relative to relevant package root? Join with package path? let mut command = Command::new(&embed_generator.path); // Run the embed generator @@ -466,11 +571,31 @@ fn process_embeds(build_state: &mut BuildState, module_name: &str) -> Result<(), content: embed_data.contents.clone(), source_file_path: source_file_path.clone(), }; - let contents = serde_json::to_vec(&embed_generator_config).unwrap(); - child.stdin.as_mut().unwrap().write_all(&contents)?; + let contents = serde_json::to_vec(&embed_generator_config)?; + // TODO(embeds) These ? unwraps seems wrong... + child + .stdin + .as_mut() + .unwrap() + .write_all(&contents) + .map_err(|e| ProcessEmbedsError { + reason: ProcessEmbedsErrorReason::CouldNotWriteToGeneratorStdin( + embed_generator.name.clone(), + e.to_string(), + ), + generator_name: Some(embed_generator.name.clone()), + }) + .unwrap(); child.wait_with_output() }) - .map_err(|e| format!("Failed to run embed generator: {}", e))?; + .map_err(|e| ProcessEmbedsError { + reason: ProcessEmbedsErrorReason::RunningGeneratorCommandFailed( + embed_generator.name.clone(), + e.to_string(), + ), + generator_name: Some(embed_generator.name.clone()), + }) + .unwrap(); if !output.status.success() { let stderr_str = str::from_utf8(&output.stderr).unwrap(); @@ -478,16 +603,26 @@ fn process_embeds(build_state: &mut BuildState, module_name: &str) -> Result<(), serde_json::from_str(stderr_str); match error_response { - Ok(_error_response) => { - // TODO(embeds) Pass along error properly here - return Err(format!("Embed generator failed: {}", stderr_str)); + Ok(err) => { + return Err(ProcessEmbedsError { + reason: ProcessEmbedsErrorReason::GeneratorReturnedError( + GeneratorReturnedError { + package_name: package.name.clone(), + errors: err.errors, + embed_data: embed_data.clone(), + source_file_path: source_file_path.clone(), + }, + ), + generator_name: Some(embed_generator.name.clone()), + }); } - Err(error_response) => { - // TODO(embeds) Proper error here - return Err(format!( - "Parsing JSON from embed generator error failed: {}", - error_response - )); + Err(err) => { + return Err(ProcessEmbedsError { + reason: ProcessEmbedsErrorReason::GeneratorReturnedInvalidJSON( + format!("Error: {}\n\nRaw content:\n{}", err, stderr_str), + ), + generator_name: Some(embed_generator.name.clone()), + }); } }; } @@ -498,21 +633,33 @@ fn process_embeds(build_state: &mut BuildState, module_name: &str) -> Result<(), match success_response { Err(err) => { - // TODO(embeds) Proper error here - return Err(format!("Parsing JSON from embed generator failed: {}", err)); + return Err(ProcessEmbedsError { + reason: ProcessEmbedsErrorReason::GeneratorReturnedInvalidJSON( + err.to_string(), + ), + generator_name: Some(embed_generator.name.clone()), + }); } Ok(success_response) => { let generated_file_contents = format!("// HASH: {}\n{}", hash, success_response.content); // Write the output to the embed file - std::fs::write(&embed_path, generated_file_contents) - .map_err(|e| format!("Failed to write embed file: {}", e))?; + std::fs::write(&embed_path, generated_file_contents).map_err(|e| { + ProcessEmbedsError { + reason: ProcessEmbedsErrorReason::CouldNotWriteGeneratedFile( + e.to_string(), + ), + generator_name: Some(embed_generator.name.clone()), + } + })?; } }; } else { - // TODO(embeds) Proper error here - return Err(format!("No embed generator found for tag: {}", embed_data.tag)); + return Err(ProcessEmbedsError { + reason: ProcessEmbedsErrorReason::NoEmbedGeneratorFoundForTag(embed_data.tag), + generator_name: None, + }); } } if !build_state.modules.contains_key(&module_name) { @@ -556,15 +703,28 @@ fn process_embeds(build_state: &mut BuildState, module_name: &str) -> Result<(), dirty, }) }) - .collect::>>(); + .collect::>>(); let module = build_state.modules.get_mut(module_name).unwrap(); + + let embed_errors = embeds + .iter() + .filter_map(|result| match result { + Ok(_) => None, + Err(err) => Some(err.clone()), + }) + .collect::>(); + match module.source_type { SourceType::SourceFile(ref mut source_file) => { source_file.embeds = embeds.into_iter().filter_map(|result| result.ok()).collect(); } _ => (), }; + + if embed_errors.len() > 0 { + return Err(embed_errors); + } } Ok(())