diff --git a/README.ja.md b/README.ja.md index 68d17d3..a87cfaa 100644 --- a/README.ja.md +++ b/README.ja.md @@ -114,6 +114,28 @@ cd ../.. - **Calls** — 呼び出しクラスタと関数のつながりを追跡 4. ファイルを保存すると自動的に再解析される +## 検証方法 + +FlowMap には、実際の Git コミットペアをリプレイし、graph diff の検出方向が期待どおりかを確認する自動検証が含まれています。 + +- コミットリプレイ runner: `scripts/run_commit_replay.mjs` +- シナリオ runner(合成回帰セット): `scripts/run_sample_scenarios.mjs` +- リプレイ要約 builder: `scripts/build_replay_summary.mjs` + +例: + +```bash +node scripts/run_commit_replay.mjs \ + --repo /path/to/swift-repo \ + --count 100 \ + --report reports/replay-100.json +``` + +現在の統合検証アウトプット: + +- `reports/replay-validation-bundle.md` +- `reports/replay-validation-bundle.json` + ## ライセンス FlowMap は source-available モデルで提供されています。 diff --git a/README.ko.md b/README.ko.md index c9d2e93..a9aa5c9 100644 --- a/README.ko.md +++ b/README.ko.md @@ -114,6 +114,28 @@ cd ../.. - **Calls** — 호출 군집과 함수 연결 흐름 추적 4. 파일을 저장하면 자동으로 재분석됩니다 +## 검증 방식 + +FlowMap은 실제 Git 커밋 쌍을 리플레이해서 diff 검출 방향이 맞는지 확인하는 자동 검증을 제공합니다. + +- 커밋 리플레이 러너: `scripts/run_commit_replay.mjs` +- 시나리오 러너(합성 회귀 세트): `scripts/run_sample_scenarios.mjs` +- 리플레이 요약 빌더: `scripts/build_replay_summary.mjs` + +예시: + +```bash +node scripts/run_commit_replay.mjs \ + --repo /path/to/swift-repo \ + --count 100 \ + --report reports/replay-100.json +``` + +현재 통합 검증 산출물: + +- `reports/replay-validation-bundle.md` +- `reports/replay-validation-bundle.json` + ## 라이선스 FlowMap은 source-available 방식으로 제공됩니다. diff --git a/README.md b/README.md index 637a0f7..34785ad 100644 --- a/README.md +++ b/README.md @@ -114,6 +114,28 @@ cd ../.. - **Calls** — trace call clusters and follow how functions connect 4. Save a file to trigger automatic re-analysis +## Validation + +FlowMap includes automated replay validation that replays real Git commit pairs and checks whether graph diff changes are detected in the expected direction. + +- Commit replay runner: `scripts/run_commit_replay.mjs` +- Scenario runner (synthetic regression set): `scripts/run_sample_scenarios.mjs` +- Replay summary builder: `scripts/build_replay_summary.mjs` + +Example: + +```bash +node scripts/run_commit_replay.mjs \ + --repo /path/to/swift-repo \ + --count 100 \ + --report reports/replay-100.json +``` + +Current bundled validation output: + +- `reports/replay-validation-bundle.md` +- `reports/replay-validation-bundle.json` + ## License FlowMap is source-available. diff --git a/crates/engine/src/git_diff.rs b/crates/engine/src/git_diff.rs index 3c7d524..4d5fec0 100644 --- a/crates/engine/src/git_diff.rs +++ b/crates/engine/src/git_diff.rs @@ -1,34 +1,65 @@ +use std::collections::BTreeSet; use std::path::{Path, PathBuf}; use std::process::Command; -/// Run `git diff --name-only HEAD` in `workspace_root` and return the -/// absolute paths of changed `*.swift` files that still exist on disk. +/// Return absolute paths of changed Swift files in `workspace_root`. /// -/// Returns an empty vec when: -/// - `workspace_root` is not inside a git repository -/// - the repository has no commits yet -/// - no Swift files have changed relative to HEAD -/// - git is not installed +/// Sources: +/// - tracked changes vs `HEAD` (modified/renamed/deleted) +/// - untracked Swift files (new files not yet committed) +/// +/// Deleted files are intentionally kept in the result so callers can +/// reconstruct the old fragment from `HEAD` and emit removed nodes/edges. pub fn changed_swift_files(workspace_root: &Path) -> Vec { - let output = match Command::new("git") - .args(["diff", "--name-only", "HEAD"]) + let mut rel_paths: BTreeSet = BTreeSet::new(); + + // Tracked deltas compared to HEAD. + // If HEAD does not exist yet, this command fails; that's okay because we + // still collect untracked files below. + if let Ok(lines) = git_lines( + workspace_root, + &["diff", "--name-only", "HEAD", "--", "*.swift"], + ) { + rel_paths.extend(lines.into_iter().map(PathBuf::from)); + } + + // Newly created, untracked Swift files. + if let Ok(lines) = git_lines( + workspace_root, + &[ + "ls-files", + "--others", + "--exclude-standard", + "--", + "*.swift", + ], + ) { + rel_paths.extend(lines.into_iter().map(PathBuf::from)); + } + + rel_paths + .into_iter() + .map(|rel| workspace_root.join(rel)) + .collect() +} + +fn git_lines(workspace_root: &Path, args: &[&str]) -> Result, ()> { + let output = Command::new("git") + .args(args) .current_dir(workspace_root) .output() - { - Ok(o) => o, - Err(_) => return Vec::new(), - }; + .map_err(|_| ())?; if !output.status.success() { - return Vec::new(); + return Err(()); } - String::from_utf8_lossy(&output.stdout) + Ok(String::from_utf8_lossy(&output.stdout) .lines() - .filter(|l| l.ends_with(".swift")) - .map(|l| workspace_root.join(l)) - .filter(|p| p.exists()) // skip deleted files (removed from disk) - .collect() + .map(|l| l.trim()) + .filter(|l| !l.is_empty()) + .map(|l| l.to_string()) + .collect()) } /// Fetch the content of `file_path` at `HEAD` via `git show HEAD:`. @@ -57,8 +88,19 @@ pub fn head_content(workspace_root: &Path, file_path: &Path) -> Option { #[cfg(test)] mod tests { use super::*; + use std::fs; + use std::process::Command; use tempfile::TempDir; + fn git_ok(root: &Path, args: &[&str]) { + let status = Command::new("git") + .args(args) + .current_dir(root) + .status() + .expect("failed to run git"); + assert!(status.success(), "git {:?} failed", args); + } + #[test] fn test_changed_swift_files_non_git_dir() { // Directory that is not a git repo → must return empty, not panic @@ -67,6 +109,49 @@ mod tests { assert!(result.is_empty()); } + #[test] + fn test_changed_swift_files_includes_untracked_modified_deleted() { + let tmp = TempDir::new().unwrap(); + let root = tmp.path(); + + git_ok(root, &["init"]); + git_ok(root, &["config", "user.email", "flowmap-test@example.com"]); + git_ok(root, &["config", "user.name", "FlowMap Test"]); + + let modified = root.join("Modified.swift"); + let deleted = root.join("Deleted.swift"); + let untracked = root.join("Untracked.swift"); + + fs::write(&modified, "func a() {}\n").unwrap(); + fs::write(&deleted, "func b() {}\n").unwrap(); + git_ok(root, &["add", "."]); + git_ok(root, &["commit", "-m", "init"]); + + fs::write(&modified, "func a() { print(1) }\n").unwrap(); + fs::remove_file(&deleted).unwrap(); + fs::write(&untracked, "func c() {}\n").unwrap(); + fs::write(root.join("README.md"), "ignore\n").unwrap(); + + let result = changed_swift_files(root); + assert!(result.contains(&modified)); + assert!(result.contains(&deleted)); + assert!(result.contains(&untracked)); + assert!(!result.contains(&root.join("README.md"))); + } + + #[test] + fn test_changed_swift_files_in_repo_without_head_includes_untracked_swift() { + let tmp = TempDir::new().unwrap(); + let root = tmp.path(); + + git_ok(root, &["init"]); + let file = root.join("BrandNew.swift"); + fs::write(&file, "func brandNew() {}\n").unwrap(); + + let result = changed_swift_files(root); + assert!(result.contains(&file)); + } + #[test] fn test_head_content_non_git_dir() { let tmp = TempDir::new().unwrap(); diff --git a/crates/engine/src/graph_builder.rs b/crates/engine/src/graph_builder.rs index b61700d..b0c971a 100644 --- a/crates/engine/src/graph_builder.rs +++ b/crates/engine/src/graph_builder.rs @@ -28,7 +28,7 @@ pub struct BuiltEdge { /// The complete workspace-level dependency graph produced by merging /// per-file `SwiftGraph`s. -#[derive(Debug, Default, Serialize)] +#[derive(Debug, Default, Clone, Serialize)] pub struct BuiltGraph { pub nodes: Vec, pub edges: Vec, diff --git a/crates/engine/src/impact_analysis.rs b/crates/engine/src/impact_analysis.rs index 24abe65..a6d0c84 100644 --- a/crates/engine/src/impact_analysis.rs +++ b/crates/engine/src/impact_analysis.rs @@ -16,14 +16,19 @@ use std::collections::{HashMap, HashSet, VecDeque}; /// A → B → C /// impacted_nodes(graph, ["A"]) == [B, C] /// ``` -pub fn impacted_nodes(graph: &BuiltGraph, start_node_ids: &[&str]) -> Vec { - // Build adjacency map: from → [to] over "calls" edges only +pub fn impacted_nodes( + graph: &BuiltGraph, + start_node_ids: &[&str], + exclude_node_ids: &[&str], +) -> Vec { + // Build adjacency map: to → [from] over "calls" edges only. + // Changing a node impacts its callers, so we must traverse edges BACKWARDS. let mut adj: HashMap<&str, Vec<&str>> = HashMap::new(); for edge in &graph.edges { if edge.kind == "calls" { - adj.entry(edge.from.as_str()) + adj.entry(edge.to.as_str()) .or_default() - .push(edge.to.as_str()); + .push(edge.from.as_str()); } } @@ -55,11 +60,12 @@ pub fn impacted_nodes(graph: &BuiltGraph, start_node_ids: &[&str]) -> Vec = exclude_node_ids.iter().copied().collect(); + // Exclude the nodes explicitly requested to be excluded — only downstream dependants visited .into_iter() - .filter(|id| !starts.contains(id)) - .filter_map(|id| node_map.get(id).map(|&n| n.clone())) + .filter(|id| !excludes.contains(id)) + .filter_map(|id| node_map.get(id).copied().cloned()) .collect() } @@ -110,28 +116,34 @@ mod tests { #[test] fn test_linear_propagation() { - // A → B → C: changing A must impact B and C + // A → B → C: changing C impacts B and A (Callers are impacted) let g = build( vec![node("A"), node("B"), node("C")], vec![calls("A", "B"), calls("B", "C")], ); - assert_eq!(sorted_ids(impacted_nodes(&g, &["A"])), vec!["B", "C"]); + assert_eq!( + sorted_ids(impacted_nodes(&g, &["C"], &["C"])), + vec!["A", "B"] + ); } #[test] fn test_branching_propagation() { - // A → B, A → C + // B → A, C → A (B and C call A) let g = build( vec![node("A"), node("B"), node("C")], - vec![calls("A", "B"), calls("A", "C")], + vec![calls("B", "A"), calls("C", "A")], + ); + assert_eq!( + sorted_ids(impacted_nodes(&g, &["A"], &["A"])), + vec!["B", "C"] ); - assert_eq!(sorted_ids(impacted_nodes(&g, &["A"])), vec!["B", "C"]); } #[test] - fn test_no_outgoing_edges() { + fn test_no_incoming_edges() { let g = build(vec![node("A"), node("B")], vec![]); - assert!(impacted_nodes(&g, &["A"]).is_empty()); + assert!(impacted_nodes(&g, &["A"], &["A"]).is_empty()); } #[test] @@ -141,25 +153,26 @@ mod tests { vec![node("A"), node("B")], vec![calls("A", "B"), calls("B", "A")], ); - // Only B is downstream of A (A is the start and excluded from result) - assert_eq!(sorted_ids(impacted_nodes(&g, &["A"])), vec!["B"]); + // B and A call each other. B is impacted by A, A is impacted by B. + assert_eq!(sorted_ids(impacted_nodes(&g, &["A"], &["A"])), vec!["B"]); } #[test] fn test_only_calls_edges_followed() { // "contains" edge should NOT be traversed + // A contains B. Changing B should not impact A through contains. let g = build(vec![node("A"), node("B")], vec![contains_edge("A", "B")]); - assert!(impacted_nodes(&g, &["A"]).is_empty()); + assert!(impacted_nodes(&g, &["B"], &["B"]).is_empty()); } #[test] fn test_multiple_start_nodes() { - // Start from both A and D; B and C reachable from A; E reachable from D + // B -> A, C -> B, E -> D let g = build( vec![node("A"), node("B"), node("C"), node("D"), node("E")], - vec![calls("A", "B"), calls("B", "C"), calls("D", "E")], + vec![calls("B", "A"), calls("C", "B"), calls("E", "D")], ); - let mut ids = sorted_ids(impacted_nodes(&g, &["A", "D"])); + let mut ids = sorted_ids(impacted_nodes(&g, &["A", "D"], &["A", "D"])); ids.sort(); assert_eq!(ids, vec!["B", "C", "E"]); } @@ -167,6 +180,6 @@ mod tests { #[test] fn test_unknown_start_node_returns_empty() { let g = build(vec![node("A")], vec![]); - assert!(impacted_nodes(&g, &["Z"]).is_empty()); + assert!(impacted_nodes(&g, &["Z"], &["Z"]).is_empty()); } } diff --git a/crates/engine/src/incremental_graph.rs b/crates/engine/src/incremental_graph.rs index 08d069c..1876eb5 100644 --- a/crates/engine/src/incremental_graph.rs +++ b/crates/engine/src/incremental_graph.rs @@ -1,5 +1,6 @@ use crate::graph_builder::BuiltGraph; use crate::swift_bridge; +use crate::swift_bridge::UnresolvedCallSite; use std::collections::HashSet; use std::io::Write; use std::path::{Path, PathBuf}; @@ -16,7 +17,7 @@ pub fn build_old_fragment( workspace_root: &Path, changed_files: &[PathBuf], binary: &str, -) -> BuiltGraph { +) -> (BuiltGraph, Vec) { use rayon::prelude::*; let parsed_graphs: Vec = changed_files @@ -31,21 +32,54 @@ pub fn build_old_fragment( return None; } - swift_bridge::parse_swift_file(binary, tmp.path()) + let mut sg = swift_bridge::parse_swift_file(binary, tmp.path())?; + + let tmp_name = tmp.path().file_name()?.to_string_lossy(); + let real_name = file.file_name()?.to_string_lossy(); + let tmp_path_str = tmp.path().to_string_lossy(); + let real_path_str = file.to_string_lossy(); + + for node in &mut sg.nodes { + node.id = node.id.replace(&*tmp_name, &real_name); + if node.kind == "file" { + node.name = node.name.replace(&*tmp_name, &real_name); + } + if let Some(uri) = &mut node.uri { + *uri = uri.replace(&*tmp_path_str, &real_path_str); + } + } + for edge in &mut sg.edges { + edge.id = edge.id.replace(&*tmp_name, &real_name); + edge.source = edge.source.replace(&*tmp_name, &real_name); + edge.target = edge.target.replace(&*tmp_name, &real_name); + } + for site in &mut sg.call_sites { + site.caller_id = site.caller_id.replace(&*tmp_name, &real_name); + site.caller_file = site.caller_file.replace(&*tmp_path_str, &real_path_str); + } + + Some(sg) }) .collect(); let mut graph = BuiltGraph::default(); - for sg in parsed_graphs { + let mut sites = Vec::new(); + for mut sg in parsed_graphs { + for site in std::mem::take(&mut sg.call_sites) { + sites.push(site); + } graph.merge(sg); } - graph + (graph, sites) } /// Parse the **current working-tree** version of each file in `changed_files`, /// building a graph fragment that represents the files *after* the edits. -pub fn build_new_fragment(changed_files: &[PathBuf], binary: &str) -> BuiltGraph { +pub fn build_new_fragment( + changed_files: &[PathBuf], + binary: &str, +) -> (BuiltGraph, Vec) { use rayon::prelude::*; let parsed_graphs: Vec = changed_files @@ -54,11 +88,15 @@ pub fn build_new_fragment(changed_files: &[PathBuf], binary: &str) -> BuiltGraph .collect(); let mut graph = BuiltGraph::default(); - for sg in parsed_graphs { + let mut sites = Vec::new(); + for mut sg in parsed_graphs { + for site in std::mem::take(&mut sg.call_sites) { + sites.push(site); + } graph.merge(sg); } - graph + (graph, sites) } /// Apply an incremental update to `old_graph`: remove all nodes and edges that @@ -133,7 +171,7 @@ mod tests { // Binary does not exist → should return empty graph, not panic let result = build_new_fragment(&[file], "non_existent_binary_xyz"); - assert!(result.nodes.is_empty()); + assert!(result.0.nodes.is_empty()); } #[test] diff --git a/crates/engine/src/lib.rs b/crates/engine/src/lib.rs index 5517cec..4e600a9 100644 --- a/crates/engine/src/lib.rs +++ b/crates/engine/src/lib.rs @@ -12,7 +12,7 @@ use graph_diff::GraphDiff; use protocol::{RequestEnvelope, ResponseEnvelope}; use serde_json::json; use std::collections::HashSet; -use std::path::Path; +use std::path::{Path, PathBuf}; const ENGINE_VERSION: &str = "0.1.0"; @@ -72,34 +72,79 @@ fn handle_analyze(req: &RequestEnvelope) -> ResponseEnvelope { let changed_files = git_diff::changed_swift_files(root); // ── 3. Compute graph diff: HEAD fragment vs current fragment ────────── - let diff = if changed_files.is_empty() { + let mut diff = if changed_files.is_empty() { GraphDiff::default() } else { - let old_fragment = incremental_graph::build_old_fragment(root, &changed_files, &binary); - let new_fragment = incremental_graph::build_new_fragment(&changed_files, &binary); + let (mut old_fragment, old_sites) = + incremental_graph::build_old_fragment(root, &changed_files, &binary); + let (mut new_fragment, new_sites) = + incremental_graph::build_new_fragment(&changed_files, &binary); + + let old_context = build_resolution_context(&full_graph, &changed_files, &old_fragment); + let old_index = cross_file_resolver::SymbolIndex::build(&old_context); + let old_resolved = cross_file_resolver::resolve(&old_context, &old_sites, &old_index); + old_fragment.edges.extend(old_resolved); + + let new_context = build_resolution_context(&full_graph, &changed_files, &new_fragment); + let new_index = cross_file_resolver::SymbolIndex::build(&new_context); + let new_resolved = cross_file_resolver::resolve(&new_context, &new_sites, &new_index); + new_fragment.edges.extend(new_resolved); + graph_diff::diff_graphs(&old_fragment, &new_fragment) }; // ── 4. Impact analysis ──────────────────────────────────────────────── // Start nodes: added/changed nodes + any node in the full graph that // calls a removed node (its dependency was deleted → it is affected). - let removed_ids: HashSet<&str> = diff.removed_nodes.iter().map(|n| n.id.as_str()).collect(); + let removed_ids: HashSet = diff.removed_nodes.iter().map(|n| n.id.clone()).collect(); - let mut start_ids: HashSet<&str> = diff + let mut start_ids: HashSet = diff .added_nodes .iter() .chain(diff.changed_nodes.iter()) - .map(|n| n.id.as_str()) + .map(|n| n.id.clone()) .collect(); + // Preserve exactly what nodes changed intrinsically to exclude them from the orange 'impact' set + let mut primary_changed: HashSet = start_ids.clone(); + for edge in &full_graph.edges { - if edge.kind == "calls" && removed_ids.contains(edge.to.as_str()) { - start_ids.insert(edge.from.as_str()); + if edge.kind == "calls" && removed_ids.contains(&edge.to) { + start_ids.insert(edge.from.clone()); + } + } + + // Also trace callers from the old graph: if an edge was broken because the + // target was removed, the caller is intrinsically impacted. + for edge in &diff.removed_edges { + if edge.kind == "calls" && removed_ids.contains(&edge.to) { + start_ids.insert(edge.from.clone()); + primary_changed.insert(edge.from.clone()); + } + } + + // Any new call edges? The caller should be impacted too + for edge in &diff.added_edges { + if edge.kind == "calls" { + start_ids.insert(edge.from.clone()); + primary_changed.insert(edge.from.clone()); } } - let start_refs: Vec<&str> = start_ids.into_iter().collect(); - let impacted = impact_analysis::impacted_nodes(&full_graph, &start_refs); + // Add implicitly changed nodes to diff.changed_nodes so they get painted yellow + let existing_changed_ids: HashSet = + diff.changed_nodes.iter().map(|n| n.id.clone()).collect(); + for id in &primary_changed { + if !existing_changed_ids.contains(id) && !diff.added_nodes.iter().any(|n| &n.id == id) { + if let Some(node) = full_graph.nodes.iter().find(|n| n.id == *id) { + diff.changed_nodes.push(node.clone()); + } + } + } + + let start_refs: Vec<&str> = start_ids.iter().map(|s| s.as_str()).collect(); + let primary_changed_vec: Vec<&str> = primary_changed.iter().map(|s| s.as_str()).collect(); + let impacted = impact_analysis::impacted_nodes(&full_graph, &start_refs, &primary_changed_vec); let impact_ids: Vec = impacted.into_iter().map(|n| n.id).collect(); // ── 5. Serialize and respond ────────────────────────────────────────── @@ -165,6 +210,55 @@ fn build_swift_graph(workspace_path: &str, binary: &str) -> BuiltGraph { global } +/// Build a symbol-resolution context for one side of an incremental diff. +/// +/// The context includes: +/// - all nodes from unchanged files (current graph) +/// - all nodes/edges from the target fragment (old or new side) +/// +/// This avoids mixing old/new versions of changed files while still allowing +/// cross-file resolution against unchanged workspace symbols. +fn build_resolution_context( + full_graph: &BuiltGraph, + changed_files: &[PathBuf], + fragment: &BuiltGraph, +) -> BuiltGraph { + let mut context = BuiltGraph::default(); + let mut seen_ids: HashSet = HashSet::new(); + + for node in &full_graph.nodes { + if node + .uri + .as_deref() + .is_some_and(|uri| is_changed_file_uri(uri, changed_files)) + { + continue; + } + if seen_ids.insert(node.id.clone()) { + context.nodes.push(node.clone()); + } + } + + for node in &fragment.nodes { + if seen_ids.insert(node.id.clone()) { + context.nodes.push(node.clone()); + } + } + + // Deliberately use fragment edges only: + // resolver duplicate suppression should not hide edges just because they + // already exist in the full current workspace graph. + context.edges = fragment.edges.clone(); + context +} + +fn is_changed_file_uri(uri: &str, changed_files: &[PathBuf]) -> bool { + let uri_path = Path::new(uri); + changed_files.iter().any(|changed| { + uri_path == changed.as_path() || uri_path.ends_with(changed) || changed.ends_with(uri_path) + }) +} + /// Locate the `flowmap-swift-ast` binary. /// /// 1. Same directory as the running `flowmap` binary (covers `target/debug/`). @@ -199,6 +293,9 @@ fn which_in_path(name: &str) -> Option { #[cfg(test)] mod tests { use super::*; + use crate::graph_builder::{BuiltEdge, BuiltNode}; + use crate::swift_bridge::UnresolvedCallSite; + use std::path::PathBuf; fn make_req(cmd: &str) -> RequestEnvelope { RequestEnvelope { @@ -264,4 +361,135 @@ mod tests { assert!(!resp.ok); assert_eq!(resp.error.unwrap().code, "UNKNOWN_COMMAND"); } + + #[test] + fn test_is_changed_file_uri_handles_relative_absolute_mismatch() { + let changed_rel = vec![PathBuf::from("src/Changed.swift")]; + assert!(is_changed_file_uri( + "/repo/workspace/src/Changed.swift", + &changed_rel + )); + + let changed_abs = vec![PathBuf::from("/repo/workspace/src/Changed.swift")]; + assert!(is_changed_file_uri("src/Changed.swift", &changed_abs)); + } + + #[test] + fn test_build_resolution_context_drops_changed_nodes_from_full_graph() { + let full_graph = BuiltGraph { + nodes: vec![ + BuiltNode { + id: "src/Changed.swift.NewCaller()".to_string(), + kind: "func".to_string(), + name: "newCaller".to_string(), + uri: Some("src/Changed.swift".to_string()), + line: Some(10), + }, + BuiltNode { + id: "src/Shared.swift.target()".to_string(), + kind: "func".to_string(), + name: "target".to_string(), + uri: Some("src/Shared.swift".to_string()), + line: Some(3), + }, + ], + edges: vec![BuiltEdge { + id: "e1".to_string(), + from: "src/Changed.swift.NewCaller()".to_string(), + to: "src/Shared.swift.target()".to_string(), + kind: "calls".to_string(), + }], + }; + + let fragment = BuiltGraph { + nodes: vec![BuiltNode { + id: "src/Changed.swift.OldCaller()".to_string(), + kind: "func".to_string(), + name: "oldCaller".to_string(), + uri: Some("src/Changed.swift".to_string()), + line: Some(8), + }], + edges: vec![BuiltEdge { + id: "old_e1".to_string(), + from: "src/Changed.swift.OldCaller()".to_string(), + to: "src/Changed.swift.helper()".to_string(), + kind: "calls".to_string(), + }], + }; + + let context = build_resolution_context( + &full_graph, + &[PathBuf::from("src/Changed.swift")], + &fragment, + ); + + assert!(context + .nodes + .iter() + .all(|n| n.id != "src/Changed.swift.NewCaller()")); + assert!(context + .nodes + .iter() + .any(|n| n.id == "src/Changed.swift.OldCaller()")); + assert_eq!(context.edges.len(), 1); + assert_eq!(context.edges[0].id, "old_e1"); + } + + #[test] + fn test_resolve_not_suppressed_by_edges_outside_fragment() { + let full_graph = BuiltGraph { + nodes: vec![ + BuiltNode { + id: "src/Changed.swift.Caller()".to_string(), + kind: "func".to_string(), + name: "caller".to_string(), + uri: Some("src/Changed.swift".to_string()), + line: Some(1), + }, + BuiltNode { + id: "src/Shared.swift.target()".to_string(), + kind: "func".to_string(), + name: "target".to_string(), + uri: Some("src/Shared.swift".to_string()), + line: Some(2), + }, + ], + edges: vec![BuiltEdge { + id: "existing_full_edge".to_string(), + from: "src/Changed.swift.Caller()".to_string(), + to: "src/Shared.swift.target()".to_string(), + kind: "calls".to_string(), + }], + }; + + let fragment = BuiltGraph { + nodes: vec![BuiltNode { + id: "src/Changed.swift.Caller()".to_string(), + kind: "func".to_string(), + name: "caller".to_string(), + uri: Some("src/Changed.swift".to_string()), + line: Some(1), + }], + edges: vec![], + }; + + let context = build_resolution_context( + &full_graph, + &[PathBuf::from("src/Changed.swift")], + &fragment, + ); + let index = crate::cross_file_resolver::SymbolIndex::build(&context); + let sites = vec![UnresolvedCallSite { + caller_id: "src/Changed.swift.Caller()".to_string(), + callee_name: "target".to_string(), + callee_base: None, + caller_type: None, + caller_file: "src/Changed.swift".to_string(), + }]; + + let resolved = crate::cross_file_resolver::resolve(&context, &sites, &index); + assert_eq!(resolved.len(), 1); + assert_eq!(resolved[0].from, "src/Changed.swift.Caller()"); + assert_eq!(resolved[0].to, "src/Shared.swift.target()"); + } } diff --git a/editor/vscode/webview/graph.js b/editor/vscode/webview/graph.js index 94d2daf..a55cda3 100644 --- a/editor/vscode/webview/graph.js +++ b/editor/vscode/webview/graph.js @@ -121,6 +121,37 @@ containsIds.add(e.id); } }); + + // Bubble up impact to all ancestor nodes (types, files) + var newImpacts = []; + impactIds.forEach(function (id) { + var p = parentMap[id]; + while (p) { + if (!impactIds.has(p)) { + newImpacts.push(p); + } + p = parentMap[p]; + } + }); + newImpacts.forEach(function (id) { impactIds.add(id); }); + + // Bubble up diff states to ancestors so files show as changed/added/removed + var newChanged = []; + var allDiff = []; + addedNodeIds.forEach(function(i) { allDiff.push(i); }); + removedNodeIds.forEach(function(i) { allDiff.push(i); }); + changedNodeIds.forEach(function(i) { allDiff.push(i); }); + + allDiff.forEach(function (id) { + var p = parentMap[id]; + while (p) { + if (!changedNodeIds.has(p) && !addedNodeIds.has(p) && !removedNodeIds.has(p)) { + newChanged.push(p); + } + p = parentMap[p]; + } + }); + newChanged.forEach(function (id) { changedNodeIds.add(id); }); } /** @@ -630,6 +661,17 @@ padding: '20px', }, }, + // Contains edge for detail view hierarchy + { + selector: 'edge[kind = "contains"]', + style: { + width: 1.5, + 'line-color': 'rgba(100, 100, 100, 0.4)', + 'target-arrow-color': 'rgba(100, 100, 100, 0.4)', + 'target-arrow-shape': 'triangle', + 'curve-style': 'bezier', + }, + }, // ── Base edge ────────────────────────────────────────────────────── { selector: 'edge', @@ -899,8 +941,10 @@ // Apply per-node folder tint colors to file nodes (visual grouping by dir) cy.nodes('[kind = "file"]').forEach(function (n) { - var tint = getFolderTint(n.data('uri')); - if (tint) { n.style('background-color', tint); } + if (n.data('diffState') === 'unchanged') { + var tint = getFolderTint(n.data('uri')); + if (tint) { n.style('background-color', tint); } + } }); state.mode = 'overview'; @@ -971,9 +1015,37 @@ ' (types=' + typeNodes.length + ' free-funcs=' + freeNodes.length + ')' ); - // ── Type nodes + their func compound children ────────────────────────── + // ── Type nodes + their func compound children + free funcs ───────────── const detailFuncIds = new Set(); + freeNodes.forEach(function (f) { + const fFull = f.name ?? f.id; + detailElements.push({ + data: { + id: f.id, + label: truncateLabel(fFull, 18), + fullLabel: fFull, + kind: 'func', + uri: f.uri ?? '', + line: typeof f.line === 'number' ? f.line : 0, + // free function is top-level + diffState: addedNodeIds.has(f.id) + ? 'added' : changedNodeIds.has(f.id) ? 'changed' : 'unchanged', + impacted: impactIds.has(f.id), + }, + }); + detailFuncIds.add(f.id); + + detailElements.push({ + data: { + id: 'contains_' + fileNodeId + '_' + f.id, + source: fileNodeId, + target: f.id, + kind: 'contains' + } + }); + }); + typeNodes.forEach(function (t) { const tFull = t.name ?? t.id; detailElements.push({ @@ -1009,13 +1081,31 @@ kind: 'func', uri: f.uri ?? '', line: typeof f.line === 'number' ? f.line : 0, - parent: t.id, // func is a compound child of its type + // no compound parent in Cytoscape - using structural edges diffState: addedNodeIds.has(f.id) ? 'added' : changedNodeIds.has(f.id) ? 'changed' : 'unchanged', impacted: impactIds.has(f.id), }, }); detailFuncIds.add(f.id); + + detailElements.push({ + data: { + id: 'contains_' + t.id + '_' + f.id, + source: t.id, + target: f.id, + kind: 'contains' + } + }); + }); + + detailElements.push({ + data: { + id: 'contains_' + fileNodeId + '_' + t.id, + source: fileNodeId, + target: t.id, + kind: 'contains' + } }); }); @@ -1050,37 +1140,16 @@ return detailElements; } - // ── layoutDetailTypeNodes ───────────────────────────────────────────────── - // Positions the file context card (if present) at the top, then stacks - // visible type nodes below it in a strict vertical column: - // - // file-context (at y = 0) - // ——— GROUP_GAP/2 ——— - // type₀ (at y = fileHeight + GROUP_GAP/2) - // ——— GROUP_GAP ——— - // type₁ (at y = type₀.bottom + GROUP_GAP) - // ... - // - // All nodes share x = 0; deferredFit centres them in the viewport. + // Layout uses breadthfirst to build a proper tree structure function layoutDetailTypeNodes() { - var typeNodes = cy.nodes('[kind = "type"]').not('.hidden-node'); - if (typeNodes.length === 0) { return; } - - var curY = 0; - - // Position file context node above the type stack if present in detail view - var fileCtx = cy.nodes('[kind = "file"]'); - if (fileCtx.length > 0) { - var fh = Math.max(32, fileCtx.height() || 32); - fileCtx.position({ x: 0, y: curY + fh / 2 }); - curY += fh + Math.round(GROUP_GAP / 2); - } - - typeNodes.forEach(function (tn) { - var th = Math.max(32, tn.height() || 32); - tn.position({ x: 0, y: curY + th / 2 }); - curY += th + GROUP_GAP; - }); + cy.layout({ + name: 'breadthfirst', + directed: true, + spacingFactor: 1.5, + fit: true, + padding: 60, + roots: cy.nodes('[kind = "file"]') + }).run(); } // ── showFileDetail ──────────────────────────────────────────────────────── @@ -1118,6 +1187,9 @@ const typeCount = detailElements.filter(function (e) { return e.data.kind === 'type' && !e.data.parent; }).length; + const freeFuncCount = detailElements.filter(function (e) { + return e.data.kind === 'func' && !e.data.parent; + }).length; const funcCount = detailElements.filter(function (e) { return e.data.kind === 'func'; }).length; @@ -1131,9 +1203,9 @@ ' total=' + detailElements.length ); - if (typeCount === 0) { - // File has no type children — stay on overview (navigation to file still happened) - console.log('[FlowMapDebug] showFileDetail: no types in "' + fileNodeId + '" — staying in overview'); + if (typeCount === 0 && freeFuncCount === 0) { + // File has no type or free func children — stay on overview + console.log('[FlowMapDebug] showFileDetail: no children in "' + fileNodeId + '" — staying in overview'); return; } @@ -1157,49 +1229,9 @@ cy.elements().remove(); cy.add(detailElements); - // Log element counts immediately after cy.add to confirm rendering pipeline state - const cyTypesAfterAdd = cy.nodes('[kind = "type"]'); - const cyFuncsAfterAdd = cy.nodes('[kind = "func"]'); - console.log( - '[FlowMapDebug] showFileDetail: after cy.add — total cy nodes=' + cy.nodes().length + - ' type nodes=' + cyTypesAfterAdd.length + - ' func nodes=' + cyFuncsAfterAdd.length + - ' visible types=' + cyTypesAfterAdd.not('.hidden-node').length - ); - - // Func nodes start hidden — expand on type click - cy.nodes('[kind = "func"]').addClass('hidden-node'); - - // ── Defensive guard (Part 3) ───────────────────────────────────────────── - // If type nodes were built but Cytoscape shows none without hidden-node, - // force-remove the class so they can render. Guards against compound-node - // sizing edge cases where the class gets applied unexpectedly. - const visibleTypeCount = cy.nodes('[kind = "type"]').not('.hidden-node').length; - if (typeCount > 0 && visibleTypeCount === 0) { - console.warn( - '[FlowMapDebug] showFileDetail: defensive guard — ' + typeCount + - ' types built but 0 visible; forcing removeClass(hidden-node) on all type nodes' - ); - cy.nodes('[kind = "type"]').removeClass('hidden-node'); - } - - // Strict vertical stack: file context → type nodes, top-to-bottom with GROUP_GAP. - // Run BEFORE snapping funcs so hidden funcs land at each type's final position. + // Layout with cytoscape breadthfirst tree layout instead of custom positioning layoutDetailTypeNodes(); - // Snap hidden funcs to their type parent's (now-positioned) centre. - // Prevents compound bbox inflation when funcs are later revealed. - cy.nodes('[kind = "func"]').forEach(function (n) { - var par = n.parent(); - if (par && par.length > 0) { - var pp = par.position(); - if (pp && typeof pp.x === 'number') { n.position({ x: pp.x, y: pp.y }); } - } - }); - - // Fit to file context + type nodes so the full header section is visible - deferredFit(cy.nodes('[kind = "file"], [kind = "type"]'), DETAIL_PADDING); - console.log('[FlowMapDebug] showFileDetail: complete — showing ' + typeCount + ' types for "' + fileNodeId + '"'); } // ── toggleFolderExpand ─────────────────────────────────────────────────── @@ -1523,39 +1555,55 @@ // ═══════════════════════════════════════════════════════════════════════════ function toggleExpand(nodeId) { const node = cy.getElementById(nodeId); + if (!node || node.length === 0) { return; } const kind = node.data('kind'); + if (state.mode === 'file-detail') { + if (kind !== 'type') { return; } + const funcChildren = cy.edges('[kind = "contains"]').filter(function (e) { + return e.source().id() === node.id(); + }).targets(); + const anyVisible = funcChildren.not('.hidden-node').length > 0; + + if (anyVisible) { + funcChildren.addClass('hidden-node'); + } else { + funcChildren.removeClass('hidden-node'); + } + layoutDetailTypeNodes(); + syncCallsEdges(); + return; + } + + if (state.mode !== 'calls') { return; } + if (kind === 'file') { - const typeChildren = node.children('[kind = "type"]'); - const anyVisible = typeChildren.not('.hidden-node').length > 0; + const directChildren = node.children(); + const anyVisible = directChildren.not('.hidden-node').length > 0; if (anyVisible) { - typeChildren.forEach(function (t) { - t.children('[kind = "func"]').addClass('hidden-node'); - t.addClass('hidden-node'); - }); - syncCallsEdges(); + node.descendants().addClass('hidden-node'); + directChildren.addClass('hidden-node'); } else { - typeChildren.removeClass('hidden-node'); - if (typeChildren.length > 0) { + directChildren.removeClass('hidden-node'); + if (directChildren.length > 0) { layoutChildrenOf(node); } - syncCallsEdges(); } + syncCallsEdges(); } else if (kind === 'type') { const funcChildren = node.children('[kind = "func"]'); const anyVisible = funcChildren.not('.hidden-node').length > 0; if (anyVisible) { funcChildren.addClass('hidden-node'); - syncCallsEdges(); } else { funcChildren.removeClass('hidden-node'); if (funcChildren.length > 0) { layoutChildrenOf(node); } - syncCallsEdges(); } + syncCallsEdges(); } } diff --git a/parsers/swift-ast/Sources/FlowMapSwiftAST/main.swift b/parsers/swift-ast/Sources/FlowMapSwiftAST/main.swift index 6d4c1be..871ea65 100644 --- a/parsers/swift-ast/Sources/FlowMapSwiftAST/main.swift +++ b/parsers/swift-ast/Sources/FlowMapSwiftAST/main.swift @@ -174,6 +174,13 @@ final class FlowMapVisitor: SyntaxVisitor { // Capture base token: could be DeclReferenceExpr (simple name) or nil (implicit self) if let baseRef = member.base?.as(DeclReferenceExprSyntax.self) { calleeBase = baseRef.baseName.text + } else if let nestedMember = member.base?.as(MemberAccessExprSyntax.self), + let baseRef = nestedMember.base?.as(DeclReferenceExprSyntax.self), + baseRef.baseName.text.first.map({ $0.isUppercase }) == true { + // Handle singleton/nested chains like `NetworkManager.shared.connect()`. + // Lowercase roots (e.g. self.manager.connect(), service.api.call()) + // stay unresolved to avoid false same-type resolution in Rust. + calleeBase = baseRef.baseName.text } else { // Prevent treating complex chains or implicit `.foo()` as bare calls in Rust engine calleeBase = "" diff --git a/reports/replay-validation-bundle.json b/reports/replay-validation-bundle.json new file mode 100644 index 0000000..b63cb3c --- /dev/null +++ b/reports/replay-validation-bundle.json @@ -0,0 +1,79 @@ +{ + "created_at": "2026-03-07T08:09:16.408Z", + "total_reports": 3, + "total_pairs": 209, + "total_swift_pairs": 112, + "total_non_swift_pairs": 97, + "confusion": { + "tp": 106, + "tn": 97, + "fp": 0, + "fn": 6 + }, + "metrics": { + "non_swift_fp_rate": 0, + "swift_detection_rate": 0.9464, + "overall_match_rate": 0.9713 + }, + "totals": { + "failed_pairs": 0, + "warn_pairs": 6, + "error_pairs": 0 + }, + "rows": [ + { + "report": "replay-heavy-swift-full-strict.json", + "repo": "flowmap-heavy-swift-repo", + "strict_swift": true, + "pairs": 120, + "swift_pairs": 100, + "non_swift_pairs": 20, + "tp": 100, + "tn": 20, + "fp": 0, + "fn": 0, + "non_swift_fp_rate": 0, + "swift_detection_rate": 1, + "gate_passed": true, + "failed_pairs": 0, + "warn_pairs": 0, + "error_pairs": 0 + }, + { + "report": "replay-flowmap-75.json", + "repo": "FlowMap", + "strict_swift": false, + "pairs": 74, + "swift_pairs": 4, + "non_swift_pairs": 70, + "tp": 4, + "tn": 70, + "fp": 0, + "fn": 0, + "non_swift_fp_rate": 0, + "swift_detection_rate": 1, + "gate_passed": true, + "failed_pairs": 0, + "warn_pairs": 0, + "error_pairs": 0 + }, + { + "report": "replay-kdecoder.json", + "repo": "KDecoder", + "strict_swift": false, + "pairs": 15, + "swift_pairs": 8, + "non_swift_pairs": 7, + "tp": 2, + "tn": 7, + "fp": 0, + "fn": 6, + "non_swift_fp_rate": 0, + "swift_detection_rate": 0.25, + "gate_passed": true, + "failed_pairs": 0, + "warn_pairs": 6, + "error_pairs": 0 + } + ] +} diff --git a/reports/replay-validation-bundle.md b/reports/replay-validation-bundle.md new file mode 100644 index 0000000..7c1f93b --- /dev/null +++ b/reports/replay-validation-bundle.md @@ -0,0 +1,25 @@ +# Commit Replay Validation Summary + +- Generated: 2026-03-07T08:09:16.408Z +- Reports merged: 3 +- Total commit pairs: 209 +- Swift / Non-Swift pairs: 112 / 97 +- TP / TN / FP / FN: 106 / 97 / 0 / 6 +- Non-Swift FP Rate: 0% +- Swift Detection Rate: 94.64% +- Overall Match Rate: 97.13% + +## Per Repo + +| Repo | Strict | Pairs | Swift | Non-Swift | TP | TN | FP | FN | Non-Swift FP Rate | Swift Detect Rate | Gate | +|---|---:|---:|---:|---:|---:|---:|---:|---:|---:|---:|---:| +| flowmap-heavy-swift-repo | yes | 120 | 100 | 20 | 100 | 20 | 0 | 0 | 0% | 100% | PASS | +| FlowMap | no | 74 | 4 | 70 | 4 | 70 | 0 | 0 | 0% | 100% | PASS | +| KDecoder | no | 15 | 8 | 7 | 2 | 7 | 0 | 6 | 0% | 25% | PASS | + +## Interpretation + +- Non-Swift FP Rate measures hallucination risk on commits without Swift changes. +- Swift Detection Rate measures whether any graph diff was detected when Swift files changed. +- Non-strict reports treat Swift no-op commits as valid warnings rather than failures. + diff --git a/scripts/build_replay_summary.mjs b/scripts/build_replay_summary.mjs new file mode 100644 index 0000000..2d3a762 --- /dev/null +++ b/scripts/build_replay_summary.mjs @@ -0,0 +1,174 @@ +#!/usr/bin/env node + +import fs from "fs"; +import path from "path"; + +function fail(msg) { + console.error(`ERROR: ${msg}`); + process.exit(1); +} + +function parseArgs(argv) { + const out = { + reports: [], + jsonOut: "", + mdOut: "", + }; + for (let i = 0; i < argv.length; i++) { + const a = argv[i]; + if (a === "--report") { + const v = argv[++i]; + if (!v) fail("missing value for --report"); + out.reports.push(path.resolve(v)); + continue; + } + if (a === "--json-out") { + const v = argv[++i]; + if (!v) fail("missing value for --json-out"); + out.jsonOut = path.resolve(v); + continue; + } + if (a === "--md-out") { + const v = argv[++i]; + if (!v) fail("missing value for --md-out"); + out.mdOut = path.resolve(v); + continue; + } + if (a === "--help" || a === "-h") { + console.log( + "Usage: node scripts/build_replay_summary.mjs --report [--report ...] --json-out --md-out ", + ); + process.exit(0); + } + fail(`unknown arg: ${a}`); + } + if (out.reports.length === 0) fail("at least one --report is required"); + if (!out.jsonOut) fail("--json-out is required"); + if (!out.mdOut) fail("--md-out is required"); + return out; +} + +function ensureParent(filePath) { + fs.mkdirSync(path.dirname(filePath), { recursive: true }); +} + +function pct(n, d) { + if (d === 0) return 0; + return Number(((n / d) * 100).toFixed(2)); +} + +function repoLabel(repoPath) { + if (!repoPath || repoPath === "unknown") return "unknown"; + const normalized = String(repoPath).replace(/[\\\/]+$/, ""); + const label = path.basename(normalized); + return label || normalized; +} + +function main() { + const args = parseArgs(process.argv.slice(2)); + const inputs = args.reports.map((p) => { + if (!fs.existsSync(p)) fail(`report not found: ${p}`); + const data = JSON.parse(fs.readFileSync(p, "utf8")); + return { path: p, data }; + }); + + const rows = inputs.map(({ path: reportPath, data }) => { + const s = data.summary ?? {}; + const label = repoLabel(data.config?.repo ?? "unknown"); + return { + report: path.basename(reportPath), + repo: label, + strict_swift: Boolean(data.config?.strict_swift), + pairs: s.total_pairs ?? 0, + swift_pairs: s.swift_pairs ?? 0, + non_swift_pairs: s.non_swift_pairs ?? 0, + tp: s.confusion?.tp ?? 0, + tn: s.confusion?.tn ?? 0, + fp: s.confusion?.fp ?? 0, + fn: s.confusion?.fn ?? 0, + non_swift_fp_rate: s.non_swift_fp_rate ?? 0, + swift_detection_rate: s.swift_detection_rate ?? 0, + gate_passed: Boolean(s.gate_passed), + failed_pairs: s.failed_pairs ?? 0, + warn_pairs: s.warn_pairs ?? 0, + error_pairs: s.error_pairs ?? 0, + }; + }); + + const totalPairs = rows.reduce((a, r) => a + r.pairs, 0); + const totalSwift = rows.reduce((a, r) => a + r.swift_pairs, 0); + const totalNonSwift = rows.reduce((a, r) => a + r.non_swift_pairs, 0); + const totalTp = rows.reduce((a, r) => a + r.tp, 0); + const totalTn = rows.reduce((a, r) => a + r.tn, 0); + const totalFp = rows.reduce((a, r) => a + r.fp, 0); + const totalFn = rows.reduce((a, r) => a + r.fn, 0); + const totalFailedPairs = rows.reduce((a, r) => a + r.failed_pairs, 0); + const totalWarnPairs = rows.reduce((a, r) => a + r.warn_pairs, 0); + const totalErrorPairs = rows.reduce((a, r) => a + r.error_pairs, 0); + + const summary = { + created_at: new Date().toISOString(), + total_reports: rows.length, + total_pairs: totalPairs, + total_swift_pairs: totalSwift, + total_non_swift_pairs: totalNonSwift, + confusion: { + tp: totalTp, + tn: totalTn, + fp: totalFp, + fn: totalFn, + }, + metrics: { + non_swift_fp_rate: totalNonSwift === 0 ? 0 : Number((totalFp / totalNonSwift).toFixed(4)), + swift_detection_rate: totalSwift === 0 ? 0 : Number((totalTp / totalSwift).toFixed(4)), + overall_match_rate: totalPairs === 0 ? 0 : Number(((totalTp + totalTn) / totalPairs).toFixed(4)), + }, + totals: { + failed_pairs: totalFailedPairs, + warn_pairs: totalWarnPairs, + error_pairs: totalErrorPairs, + }, + rows, + }; + + const md = [ + "# Commit Replay Validation Summary", + "", + `- Generated: ${summary.created_at}`, + `- Reports merged: ${summary.total_reports}`, + `- Total commit pairs: ${summary.total_pairs}`, + `- Swift / Non-Swift pairs: ${summary.total_swift_pairs} / ${summary.total_non_swift_pairs}`, + `- TP / TN / FP / FN: ${totalTp} / ${totalTn} / ${totalFp} / ${totalFn}`, + `- Non-Swift FP Rate: ${pct(totalFp, summary.total_non_swift_pairs)}%`, + `- Swift Detection Rate: ${pct(totalTp, summary.total_swift_pairs)}%`, + `- Overall Match Rate: ${pct(totalTp + totalTn, summary.total_pairs)}%`, + "", + "## Per Repo", + "", + "| Repo | Strict | Pairs | Swift | Non-Swift | TP | TN | FP | FN | Non-Swift FP Rate | Swift Detect Rate | Gate |", + "|---|---:|---:|---:|---:|---:|---:|---:|---:|---:|---:|---:|", + ...rows.map((r) => + `| ${r.repo} | ${r.strict_swift ? "yes" : "no"} | ${r.pairs} | ${r.swift_pairs} | ${r.non_swift_pairs} | ${r.tp} | ${r.tn} | ${r.fp} | ${r.fn} | ${pct(r.fp, r.non_swift_pairs)}% | ${pct(r.tp, r.swift_pairs)}% | ${ + r.gate_passed ? "PASS" : "FAIL" + } |` + ), + "", + "## Interpretation", + "", + "- Non-Swift FP Rate measures hallucination risk on commits without Swift changes.", + "- Swift Detection Rate measures whether any graph diff was detected when Swift files changed.", + "- Non-strict reports treat Swift no-op commits as valid warnings rather than failures.", + "", + ].join("\n"); + + ensureParent(args.jsonOut); + ensureParent(args.mdOut); + fs.writeFileSync(args.jsonOut, `${JSON.stringify(summary, null, 2)}\n`, "utf8"); + fs.writeFileSync(args.mdOut, `${md}\n`, "utf8"); + + console.log(`json: ${args.jsonOut}`); + console.log(`md: ${args.mdOut}`); + console.log(`overall match rate: ${pct(totalTp + totalTn, summary.total_pairs)}%`); +} + +main(); diff --git a/scripts/run_commit_replay.mjs b/scripts/run_commit_replay.mjs new file mode 100644 index 0000000..1af9ebf --- /dev/null +++ b/scripts/run_commit_replay.mjs @@ -0,0 +1,375 @@ +#!/usr/bin/env node + +import fs from "fs"; +import os from "os"; +import path from "path"; +import { fileURLToPath } from "url"; +import { spawnSync } from "child_process"; + +const __filename = fileURLToPath(import.meta.url); +const __dirname = path.dirname(__filename); +const repoRoot = path.resolve(__dirname, ".."); +const flowmapBin = path.join(repoRoot, "target", "debug", "flowmap"); +const parserBin = path.join( + repoRoot, + "parsers", + "swift-ast", + ".build", + "debug", + "flowmap-swift-ast", +); + +function fail(msg) { + console.error(`ERROR: ${msg}`); + process.exit(1); +} + +function run(cmd, args, opts = {}) { + const result = spawnSync(cmd, args, { + cwd: opts.cwd ?? repoRoot, + input: opts.input, + encoding: "utf8", + env: opts.env ?? process.env, + maxBuffer: 20 * 1024 * 1024, + }); + if (result.status !== 0) { + const stderr = (result.stderr || "").trim(); + const stdout = (result.stdout || "").trim(); + throw new Error( + `Command failed: ${cmd} ${args.join(" ")}\n` + + `exit=${result.status}\n` + + `stdout=${stdout}\n` + + `stderr=${stderr}`, + ); + } + return result.stdout ?? ""; +} + +function git(cwd, args) { + return run("git", args, { cwd }); +} + +function ensureDirForFile(filePath) { + fs.mkdirSync(path.dirname(filePath), { recursive: true }); +} + +function writeJsonReport(reportPath, data) { + ensureDirForFile(reportPath); + fs.writeFileSync(reportPath, `${JSON.stringify(data, null, 2)}\n`, "utf8"); +} + +function parseArgs(argv) { + const out = { + repo: "", + count: 60, + rev: "HEAD", + strictSwift: false, + keepTemp: false, + report: path.join(repoRoot, "reports", "commit-replay-report.json"), + }; + + for (let i = 0; i < argv.length; i++) { + const a = argv[i]; + if (a === "--repo") { + out.repo = path.resolve(argv[++i] ?? ""); + if (!out.repo) fail("missing value for --repo"); + continue; + } + if (a === "--count") { + out.count = Number(argv[++i] ?? "0"); + if (!Number.isInteger(out.count) || out.count < 2) { + fail("--count must be an integer >= 2"); + } + continue; + } + if (a === "--rev") { + out.rev = argv[++i] ?? ""; + if (!out.rev) fail("missing value for --rev"); + continue; + } + if (a === "--report") { + out.report = path.resolve(argv[++i] ?? ""); + if (!out.report) fail("missing value for --report"); + continue; + } + if (a === "--strict-swift") { + out.strictSwift = true; + continue; + } + if (a === "--keep-temp") { + out.keepTemp = true; + continue; + } + if (a === "--help" || a === "-h") { + console.log( + "Usage: node scripts/run_commit_replay.mjs --repo [--count 60] [--rev HEAD] [--strict-swift] [--report ] [--keep-temp]", + ); + process.exit(0); + } + fail(`unknown arg: ${a}`); + } + + if (!out.repo) fail("--repo is required"); + return out; +} + +function assertReadable(pathValue, label) { + if (!fs.existsSync(pathValue)) { + fail(`${label} not found: ${pathValue}`); + } +} + +function analyze(workspacePath) { + const req = JSON.stringify({ + protocolVersion: "0.1", + requestId: "replay", + cmd: "analyze", + payload: { path: workspacePath }, + }) + "\n"; + + const env = { + ...process.env, + PATH: `${path.dirname(parserBin)}:${process.env.PATH ?? ""}`, + }; + + const stdout = run(flowmapBin, [], { + cwd: repoRoot, + input: req, + env, + }); + + const line = stdout + .split(/\r?\n/) + .map((l) => l.trim()) + .find((l) => l.length > 0); + + if (!line) throw new Error("flowmap returned empty output"); + const json = JSON.parse(line); + if (!json.ok) throw new Error(`flowmap error: ${JSON.stringify(json.error)}`); + return json.payload; +} + +function extractCounts(payload) { + const d = payload.diff ?? {}; + return { + added_nodes: (d.added_nodes ?? []).length, + removed_nodes: (d.removed_nodes ?? []).length, + changed_nodes: (d.changed_nodes ?? []).length, + added_edges: (d.added_edges ?? []).length, + removed_edges: (d.removed_edges ?? []).length, + impact: (payload.impact ?? []).length, + }; +} + +function hasAnyDiff(counts) { + return ( + counts.added_nodes > 0 || + counts.removed_nodes > 0 || + counts.changed_nodes > 0 || + counts.added_edges > 0 || + counts.removed_edges > 0 + ); +} + +function linesToList(stdout) { + return stdout + .split(/\r?\n/) + .map((l) => l.trim()) + .filter((l) => l.length > 0); +} + +function getCommitMeta(cwd, rev) { + const format = "%H%n%h%n%s%n%an%n%ad"; + const out = git(cwd, ["show", "-s", `--format=${format}`, "--date=iso-strict", rev]); + const [hash, short, subject, author, date] = out.split(/\r?\n/); + return { hash, short, subject, author, date }; +} + +function replayPair(cwd, prev, curr) { + // Reset to previous commit state + git(cwd, ["checkout", "--quiet", "--force", prev]); + git(cwd, ["reset", "--hard", "--quiet", prev]); + git(cwd, ["clean", "-fdq"]); + + const changedFiles = linesToList(git(cwd, ["diff", "--name-only", prev, curr])); + const swiftFiles = changedFiles.filter((p) => p.endsWith(".swift")); + + // Materialize the Swift snapshot of `curr` on top of HEAD=`prev`. + // This avoids replay failures from large binary/non-swift diffs while keeping + // exactly the Swift working-tree delta that the engine consumes. + const existingSwift = linesToList(git(cwd, ["ls-files"])).filter((p) => p.endsWith(".swift")); + for (const rel of existingSwift) { + fs.rmSync(path.join(cwd, rel), { force: true }); + } + + const currSwift = linesToList(git(cwd, ["ls-tree", "-r", "--name-only", curr])).filter((p) => + p.endsWith(".swift"), + ); + for (const rel of currSwift) { + ensureDirForFile(path.join(cwd, rel)); + git(cwd, ["checkout", "--quiet", curr, "--", rel]); + } + const payload = analyze(cwd); + const counts = extractCounts(payload); + return { swiftFiles, counts, engineChanged: hasAnyDiff(counts) }; +} + +function main() { + const args = parseArgs(process.argv.slice(2)); + assertReadable(flowmapBin, "flowmap binary"); + assertReadable(parserBin, "flowmap-swift-ast binary"); + assertReadable(args.repo, "repo path"); + + // Validate git repo early. + const inside = git(args.repo, ["rev-parse", "--is-inside-work-tree"]).trim(); + if (inside !== "true") { + fail(`not a git repository: ${args.repo}`); + } + + const commits = linesToList( + git(args.repo, ["rev-list", "--reverse", `--max-count=${args.count}`, args.rev]), + ); + if (commits.length < 2) { + fail(`not enough commits to replay under rev ${args.rev}`); + } + + const tempRoot = fs.mkdtempSync(path.join(os.tmpdir(), "flowmap-commit-replay-")); + const workRepo = path.join(tempRoot, "repo"); + git(repoRoot, ["clone", "--quiet", "--no-hardlinks", args.repo, workRepo]); + + const results = []; + let tp = 0; + let tn = 0; + let fp = 0; + let fn = 0; + let errors = 0; + let swiftPairs = 0; + let nonSwiftPairs = 0; + + try { + for (let i = 1; i < commits.length; i++) { + const prev = commits[i - 1]; + const curr = commits[i]; + const currMeta = getCommitMeta(workRepo, curr); + + try { + const { swiftFiles, counts, engineChanged } = replayPair(workRepo, prev, curr); + const swiftChanged = swiftFiles.length > 0; + const expectChanged = args.strictSwift ? swiftChanged : (swiftChanged ? null : false); + let status = "pass"; + + if (swiftChanged) { + swiftPairs++; + } else { + nonSwiftPairs++; + } + + if (swiftChanged && engineChanged) tp++; + if (swiftChanged && !engineChanged) fn++; + if (!swiftChanged && !engineChanged) tn++; + if (!swiftChanged && engineChanged) fp++; + + if (!swiftChanged && engineChanged) { + status = "fail_non_swift_fp"; + } else if (args.strictSwift && swiftChanged && !engineChanged) { + status = "fail_swift_fn"; + } else if (!args.strictSwift && swiftChanged && !engineChanged) { + status = "warn_swift_no_graph_change"; + } + + results.push({ + prev, + curr, + commit: currMeta, + swift_changed: swiftChanged, + swift_files: swiftFiles, + engine_changed: engineChanged, + expected_changed: expectChanged, + status, + counts, + }); + } catch (error) { + errors++; + results.push({ + prev, + curr, + commit: currMeta, + status: "error", + error: String(error.message ?? error), + }); + } + } + } finally { + if (!args.keepTemp) { + fs.rmSync(tempRoot, { recursive: true, force: true }); + } + } + + const totalPairs = results.length; + const failedPairs = results.filter((r) => r.status.startsWith("fail_") || r.status === "error").length; + const warnedPairs = results.filter((r) => r.status.startsWith("warn_")).length; + const passPairs = totalPairs - failedPairs - warnedPairs; + const nonSwiftFpRate = nonSwiftPairs === 0 ? 0 : fp / nonSwiftPairs; + const swiftDetectRate = swiftPairs === 0 ? 0 : tp / swiftPairs; + const gate = { + max_non_swift_fp_rate: 0.0, + max_errors: 0, + strict_swift_required: args.strictSwift, + }; + const gatePassed = nonSwiftFpRate <= gate.max_non_swift_fp_rate && errors <= gate.max_errors && + (args.strictSwift ? fn === 0 : true); + + const report = { + created_at: new Date().toISOString(), + mode: "commit-replay", + config: { + repo: args.repo, + rev: args.rev, + count: args.count, + strict_swift: args.strictSwift, + keep_temp: args.keepTemp, + report_path: args.report, + }, + binaries: { + flowmap: flowmapBin, + parser: parserBin, + }, + summary: { + total_pairs: totalPairs, + pass_pairs: passPairs, + warn_pairs: warnedPairs, + failed_pairs: failedPairs, + error_pairs: errors, + swift_pairs: swiftPairs, + non_swift_pairs: nonSwiftPairs, + confusion: { tp, tn, fp, fn }, + non_swift_fp_rate: Number(nonSwiftFpRate.toFixed(4)), + swift_detection_rate: Number(swiftDetectRate.toFixed(4)), + gate, + gate_passed: gatePassed, + }, + results, + }; + + writeJsonReport(args.report, report); + + console.log("=== FlowMap Commit Replay ==="); + console.log(`repo: ${args.repo}`); + console.log(`pairs: ${totalPairs}`); + console.log(`pass/warn/fail: ${passPairs}/${warnedPairs}/${failedPairs}`); + console.log(`swift/non-swift: ${swiftPairs}/${nonSwiftPairs}`); + console.log(`tp/tn/fp/fn: ${tp}/${tn}/${fp}/${fn}`); + console.log(`non-swift fp rate: ${report.summary.non_swift_fp_rate}`); + console.log(`swift detect rate: ${report.summary.swift_detection_rate}`); + console.log(`gate: ${gatePassed ? "PASS" : "FAIL"}`); + console.log(`report: ${args.report}`); + if (args.keepTemp) { + console.log(`temp clone kept at: ${tempRoot}`); + } + + if (!gatePassed) { + process.exitCode = 2; + } +} + +main(); diff --git a/scripts/run_sample_scenarios.mjs b/scripts/run_sample_scenarios.mjs new file mode 100644 index 0000000..1b32225 --- /dev/null +++ b/scripts/run_sample_scenarios.mjs @@ -0,0 +1,1406 @@ +#!/usr/bin/env node + +import fs from "fs"; +import os from "os"; +import path from "path"; +import { fileURLToPath } from "url"; +import { spawnSync } from "child_process"; + +const __filename = fileURLToPath(import.meta.url); +const __dirname = path.dirname(__filename); +const repoRoot = path.resolve(__dirname, ".."); +const flowmapBin = path.join(repoRoot, "target", "debug", "flowmap"); +const parserBin = path.join( + repoRoot, + "parsers", + "swift-ast", + ".build", + "debug", + "flowmap-swift-ast", +); +const defaultReportPath = path.join( + repoRoot, + "reports", + "sample-scenarios-report.json", +); + +function parseArgs(argv) { + let reportPath = defaultReportPath; + for (let i = 0; i < argv.length; i++) { + const a = argv[i]; + if (a === "--report") { + const next = argv[i + 1]; + if (!next) { + fail("missing value for --report"); + } + reportPath = path.resolve(next); + i++; + continue; + } + if (a === "--help" || a === "-h") { + console.log("Usage: node scripts/run_sample_scenarios.mjs [--report ]"); + process.exit(0); + } + fail(`unknown arg: ${a}`); + } + return { reportPath }; +} + +function fail(msg) { + console.error(`ERROR: ${msg}`); + process.exit(1); +} + +function assertExists(p, label) { + if (!fs.existsSync(p)) { + fail(`${label} not found: ${p}`); + } +} + +function run(cmd, args, opts = {}) { + const result = spawnSync(cmd, args, { + cwd: opts.cwd ?? repoRoot, + input: opts.input, + encoding: "utf8", + env: opts.env ?? process.env, + maxBuffer: 10 * 1024 * 1024, + }); + if (result.status !== 0) { + const stderr = (result.stderr || "").trim(); + const stdout = (result.stdout || "").trim(); + throw new Error( + `Command failed: ${cmd} ${args.join(" ")}\n` + + `exit=${result.status}\n` + + `stdout=${stdout}\n` + + `stderr=${stderr}`, + ); + } + return result.stdout ?? ""; +} + +function git(cwd, args) { + return run("git", args, { cwd }); +} + +function ensureDirForFile(filePath) { + fs.mkdirSync(path.dirname(filePath), { recursive: true }); +} + +function writeFile(cwd, rel, content) { + const abs = path.join(cwd, rel); + ensureDirForFile(abs); + fs.writeFileSync(abs, content, "utf8"); +} + +function readFile(cwd, rel) { + return fs.readFileSync(path.join(cwd, rel), "utf8"); +} + +function replaceOnce(cwd, rel, from, to) { + const oldText = readFile(cwd, rel); + if (!oldText.includes(from)) { + throw new Error(`replaceOnce target not found in ${rel}: ${from}`); + } + const newText = oldText.replace(from, to); + fs.writeFileSync(path.join(cwd, rel), newText, "utf8"); +} + +function appendText(cwd, rel, text) { + fs.appendFileSync(path.join(cwd, rel), text, "utf8"); +} + +function removeSnippet(cwd, rel, snippet) { + replaceOnce(cwd, rel, snippet, ""); +} + +function deleteFile(cwd, rel) { + fs.rmSync(path.join(cwd, rel), { force: true }); +} + +function renameFile(cwd, fromRel, toRel) { + const from = path.join(cwd, fromRel); + const to = path.join(cwd, toRel); + ensureDirForFile(to); + fs.renameSync(from, to); +} + +function analyze(workspacePath) { + const req = JSON.stringify({ + protocolVersion: "0.1", + requestId: "sample", + cmd: "analyze", + payload: { path: workspacePath }, + }) + "\n"; + const env = { + ...process.env, + PATH: `${path.dirname(parserBin)}:${process.env.PATH ?? ""}`, + }; + const stdout = run(flowmapBin, [], { cwd: repoRoot, input: req, env }); + const line = stdout + .split(/\r?\n/) + .map((l) => l.trim()) + .find((l) => l.length > 0); + if (!line) { + throw new Error("flowmap returned empty output"); + } + const json = JSON.parse(line); + if (!json.ok) { + throw new Error(`flowmap error response: ${JSON.stringify(json.error)}`); + } + return json.payload; +} + +function extractCounts(payload) { + const d = payload.diff ?? {}; + return { + added_nodes: (d.added_nodes ?? []).length, + removed_nodes: (d.removed_nodes ?? []).length, + changed_nodes: (d.changed_nodes ?? []).length, + added_edges: (d.added_edges ?? []).length, + removed_edges: (d.removed_edges ?? []).length, + impact: (payload.impact ?? []).length, + }; +} + +function rule(field, op) { + return { field, op }; +} + +function writeJsonReport(reportPath, data) { + ensureDirForFile(reportPath); + fs.writeFileSync(reportPath, `${JSON.stringify(data, null, 2)}\n`, "utf8"); +} + +function evalRule(counts, r) { + const v = counts[r.field] ?? 0; + if (r.op === "gt0") { + return { pass: v > 0, type: v > 0 ? "ok" : "fn" }; + } + if (r.op === "eq0") { + return { pass: v === 0, type: v === 0 ? "ok" : "fp" }; + } + throw new Error(`unknown rule op: ${r.op}`); +} + +function checkScenario(counts, rules) { + const results = rules.map((r) => ({ r, ...evalRule(counts, r) })); + const pass = results.every((x) => x.pass); + const fp = results.filter((x) => x.type === "fp").length; + const fn = results.filter((x) => x.type === "fn").length; + return { pass, fp, fn, results }; +} + +function createBaselineRepo() { + const root = fs.mkdtempSync(path.join(os.tmpdir(), "flowmap-scenarios-")); + git(root, ["init"]); + git(root, ["config", "user.email", "flowmap-scenarios@example.com"]); + git(root, ["config", "user.name", "FlowMap Scenario Runner"]); + + const files = { + "Sources/App/DemoApp.swift": `import Foundation + +@main +struct DemoApp { + static func main() async { + let controller = DemoViewController() + await controller.viewDidLoad() + await controller.refreshButtonTapped() + } +} +`, + "Sources/Core/Protocols.swift": `import Foundation + +protocol NetworkServiceProtocol { + func connectToNetwork() async throws -> String + func disconnect(reason: String) +} + +protocol ViewModelProtocol { + var networkService: NetworkServiceProtocol { get } + func fetchData() async + func refreshData() async +} +`, + "Sources/Models/NetworkError.swift": `import Foundation + +struct NetworkError: Error { + enum Kind { + case authenticationFail + case unknown + } + let kind: Kind +} +`, + "Sources/Services/NetworkService.swift": `import Foundation + +class NetworkService: NetworkServiceProtocol { + static let shared = NetworkService() + + func connectToNetwork() async throws -> String { + let success = Bool.random() + guard success else { + throw NetworkError(kind: .unknown) + } + return try await authenticate() + } + + func disconnect(reason: String) { + print("Disconnecting from network: \\(reason)") + } + + private func authenticate() async throws -> String { + let authSuccess = Bool.random() + guard authSuccess else { + throw NetworkError(kind: .authenticationFail) + } + return "Token123" + } +} +`, + "Sources/Features/Demo/DemoViewModel.swift": `import Foundation + +class DemoViewModel: ViewModelProtocol { + let networkService: NetworkServiceProtocol + + init(networkService: NetworkServiceProtocol = NetworkService.shared) { + self.networkService = networkService + } + + func fetchData() async { + do { + let token = try await networkService.connectToNetwork() + print("Connected with token: \\(token)") + networkService.disconnect(reason: "Auto disconnect after success") + } catch let error as NetworkError { + print("Failed to connect: \\(error.kind)") + networkService.disconnect(reason: "Error cleanup") + } catch { + print("Unexpected error: \\(error)") + } + } + + func refreshData() async { + await fetchData() + } +} +`, + "Sources/Features/Demo/DemoViewController.swift": `import Foundation + +class DemoViewController { + private let viewModel: ViewModelProtocol + + init(viewModel: ViewModelProtocol = DemoViewModel()) { + self.viewModel = viewModel + } + + func viewDidLoad() async { + setupUI() + await viewModel.fetchData() + } + + func setupUI() { + print("Initializing UI") + } + + func refreshButtonTapped() async { + await viewModel.refreshData() + } +} +`, + }; + + for (const [rel, content] of Object.entries(files)) { + writeFile(root, rel, content); + } + + git(root, ["add", "."]); + git(root, ["commit", "-m", "baseline"]); + return root; +} + +const F = { + app: "Sources/App/DemoApp.swift", + protocols: "Sources/Core/Protocols.swift", + model: "Sources/Models/NetworkError.swift", + service: "Sources/Services/NetworkService.swift", + vm: "Sources/Features/Demo/DemoViewModel.swift", + vc: "Sources/Features/Demo/DemoViewController.swift", +}; + +const scenarios = [ + { + id: "S01", + name: "Clean workspace should have zero diff", + mutate() {}, + rules: [ + rule("added_nodes", "eq0"), + rule("removed_nodes", "eq0"), + rule("changed_nodes", "eq0"), + rule("added_edges", "eq0"), + rule("removed_edges", "eq0"), + ], + }, + { + id: "S02", + name: "Add new utility file", + mutate(cwd) { + writeFile( + cwd, + "Sources/Utils/Logger.swift", + `import Foundation +func logInfo(_ msg: String) { print(msg) } +`, + ); + }, + rules: [rule("added_nodes", "gt0"), rule("removed_nodes", "eq0")], + }, + { + id: "S03", + name: "Add new type file", + mutate(cwd) { + writeFile( + cwd, + "Sources/Utils/DateFormatterUtil.swift", + `import Foundation +class DateFormatterUtil { + func format(_ value: String) -> String { value } +} +`, + ); + }, + rules: [rule("added_nodes", "gt0"), rule("removed_nodes", "eq0")], + }, + { + id: "S04", + name: "Delete model file", + mutate(cwd) { + deleteFile(cwd, F.model); + }, + rules: [rule("removed_nodes", "gt0"), rule("added_nodes", "eq0")], + }, + { + id: "S05", + name: "Delete service file", + mutate(cwd) { + deleteFile(cwd, F.service); + }, + rules: [rule("removed_nodes", "gt0"), rule("removed_edges", "gt0")], + }, + { + id: "S06", + name: "String literal change should not change graph", + mutate(cwd) { + replaceOnce(cwd, F.vc, `print("Initializing UI")`, `print("Init UI")`); + }, + rules: [ + rule("added_nodes", "eq0"), + rule("removed_nodes", "eq0"), + rule("changed_nodes", "eq0"), + rule("added_edges", "eq0"), + rule("removed_edges", "eq0"), + ], + }, + { + id: "S07", + name: "Trailing comment should not change graph", + mutate(cwd) { + appendText(cwd, F.app, `\n// trailing comment\n`); + }, + rules: [ + rule("added_nodes", "eq0"), + rule("removed_nodes", "eq0"), + rule("changed_nodes", "eq0"), + rule("added_edges", "eq0"), + rule("removed_edges", "eq0"), + ], + }, + { + id: "S08", + name: "Rename service method", + mutate(cwd) { + replaceOnce( + cwd, + F.service, + `func connectToNetwork() async throws -> String {`, + `func connect() async throws -> String {`, + ); + }, + rules: [ + rule("added_nodes", "gt0"), + rule("removed_nodes", "gt0"), + rule("removed_edges", "gt0"), + ], + }, + { + id: "S09", + name: "Add helper method to view model", + mutate(cwd) { + replaceOnce( + cwd, + F.vm, + ` func refreshData() async { + await fetchData() + } +} +`, + ` func refreshData() async { + await fetchData() + } + + func debugLog() { + print("debug") + } +} +`, + ); + }, + rules: [rule("added_nodes", "gt0"), rule("removed_nodes", "eq0")], + }, + { + id: "S10", + name: "Remove method from view controller", + mutate(cwd) { + removeSnippet( + cwd, + F.vc, + ` func setupUI() { + print("Initializing UI") + } + +`, + ); + }, + rules: [rule("removed_nodes", "gt0")], + }, + { + id: "S11", + name: "Add extra call in viewDidLoad", + mutate(cwd) { + replaceOnce( + cwd, + F.vc, + ` await viewModel.fetchData()`, + ` await viewModel.fetchData() + await refreshButtonTapped()`, + ); + }, + rules: [rule("added_edges", "gt0")], + }, + { + id: "S12", + name: "Remove existing call in viewDidLoad", + mutate(cwd) { + replaceOnce( + cwd, + F.vc, + ` setupUI() +`, + ``, + ); + }, + rules: [rule("removed_edges", "gt0")], + }, + { + id: "S13", + name: "Change call target fetchData -> refreshData", + mutate(cwd) { + replaceOnce( + cwd, + F.vc, + ` await viewModel.fetchData()`, + ` await viewModel.refreshData()`, + ); + }, + // Instance-base protocol calls are intentionally not resolved yet. + // Changing fetchData <-> refreshData therefore should not change graph edges. + rules: [ + rule("added_nodes", "eq0"), + rule("removed_nodes", "eq0"), + rule("changed_nodes", "eq0"), + rule("added_edges", "eq0"), + rule("removed_edges", "eq0"), + ], + }, + { + id: "S14", + name: "Add protocol requirement", + mutate(cwd) { + replaceOnce( + cwd, + F.protocols, + ` func refreshData() async`, + ` func refreshData() async + func ping() async`, + ); + }, + rules: [rule("added_nodes", "gt0")], + }, + { + id: "S15", + name: "Remove protocol requirement", + mutate(cwd) { + replaceOnce(cwd, F.protocols, ` func refreshData() async\n`, ``); + }, + rules: [rule("removed_nodes", "gt0")], + }, + { + id: "S16", + name: "Change protocol signature", + mutate(cwd) { + replaceOnce( + cwd, + F.protocols, + ` func disconnect(reason: String)`, + ` func disconnect()`, + ); + }, + rules: [rule("added_nodes", "gt0"), rule("removed_nodes", "gt0")], + }, + { + id: "S17", + name: "Add nested model type", + mutate(cwd) { + replaceOnce( + cwd, + F.model, + ` let kind: Kind`, + ` let kind: Kind + + struct RetryPolicy { + let maxRetries: Int + }`, + ); + }, + rules: [rule("added_nodes", "gt0")], + }, + { + id: "S18", + name: "Rename controller type", + mutate(cwd) { + replaceOnce(cwd, F.vc, `class DemoViewController {`, `class DemoController {`); + }, + rules: [rule("added_nodes", "gt0"), rule("removed_nodes", "gt0")], + }, + { + id: "S19", + name: "Move setupUI from controller to view model", + mutate(cwd) { + removeSnippet( + cwd, + F.vc, + ` func setupUI() { + print("Initializing UI") + } + +`, + ); + replaceOnce( + cwd, + F.vm, + ` func refreshData() async { + await fetchData() + } +`, + ` func refreshData() async { + await fetchData() + } + + func setupUI() { + print("vm setup") + } +`, + ); + }, + rules: [rule("added_nodes", "gt0"), rule("removed_nodes", "gt0")], + }, + { + id: "S20", + name: "Add uppercase singleton-chain call", + mutate(cwd) { + replaceOnce( + cwd, + F.app, + ` await controller.viewDidLoad()`, + ` await controller.viewDidLoad() + let _ = try? await NetworkService.shared.connectToNetwork()`, + ); + }, + rules: [rule("added_edges", "gt0")], + }, + { + id: "S21", + name: "Add duplicate call edge (should stay zero diff)", + mutate(cwd) { + replaceOnce( + cwd, + F.vm, + ` func refreshData() async { + await fetchData() + }`, + ` func refreshData() async { + await fetchData() + await fetchData() + }`, + ); + }, + rules: [ + rule("added_nodes", "eq0"), + rule("removed_nodes", "eq0"), + rule("changed_nodes", "eq0"), + rule("added_edges", "eq0"), + rule("removed_edges", "eq0"), + ], + }, + { + id: "S22", + name: "Line shift should produce changed nodes", + mutate(cwd) { + replaceOnce( + cwd, + F.service, + `import Foundation`, + `import Foundation + +`, + ); + }, + rules: [ + rule("changed_nodes", "gt0"), + rule("added_nodes", "eq0"), + rule("removed_nodes", "eq0"), + ], + }, + { + id: "S23", + name: "Add new view model file and call it", + mutate(cwd) { + writeFile( + cwd, + "Sources/Features/Demo/AltViewModel.swift", + `import Foundation +class AltViewModel { + func run() async {} +} +`, + ); + replaceOnce( + cwd, + F.app, + ` await controller.refreshButtonTapped()`, + ` await controller.refreshButtonTapped() + let vm = AltViewModel() + await vm.run()`, + ); + }, + rules: [rule("added_nodes", "gt0"), rule("added_edges", "gt0")], + }, + { + id: "S24", + name: "Delete controller file", + mutate(cwd) { + deleteFile(cwd, F.vc); + }, + rules: [rule("removed_nodes", "gt0"), rule("removed_edges", "gt0")], + }, + { + id: "S25", + name: "Replace known call with unknown symbol", + mutate(cwd) { + replaceOnce( + cwd, + F.vc, + ` await viewModel.fetchData()`, + ` await viewModel.reloadData()`, + ); + }, + // Same limitation as S13: lowercase instance-base call sites are unresolved. + rules: [ + rule("added_nodes", "eq0"), + rule("removed_nodes", "eq0"), + rule("changed_nodes", "eq0"), + rule("added_edges", "eq0"), + rule("removed_edges", "eq0"), + ], + }, + { + id: "S26", + name: "Add file with unresolved call", + mutate(cwd) { + writeFile( + cwd, + "Sources/Utils/Diagnostics.swift", + `import Foundation +func runDiag() { missingFunc() } +`, + ); + }, + rules: [rule("added_nodes", "gt0"), rule("removed_nodes", "eq0")], + }, + { + id: "S27", + name: "Rename file path (delete+add semantics)", + mutate(cwd) { + renameFile(cwd, F.service, "Sources/Services/NetSvc.swift"); + }, + rules: [rule("added_nodes", "gt0"), rule("removed_nodes", "gt0")], + }, + { + id: "S28", + name: "Change service method signature", + mutate(cwd) { + replaceOnce( + cwd, + F.service, + ` func disconnect(reason: String) {`, + ` func disconnect() {`, + ); + replaceOnce( + cwd, + F.service, + `print("Disconnecting from network: \\(reason)")`, + `print("Disconnecting from network")`, + ); + }, + rules: [rule("added_nodes", "gt0"), rule("removed_nodes", "gt0")], + }, + { + id: "S29", + name: "Add nested state enum to controller", + mutate(cwd) { + replaceOnce( + cwd, + F.vc, + `class DemoViewController { +`, + `class DemoViewController { + enum State { + case idle + } +`, + ); + }, + rules: [rule("added_nodes", "gt0")], + }, + { + id: "S30", + name: "Add one file and delete one file", + mutate(cwd) { + writeFile( + cwd, + "Sources/Helpers/Tracer.swift", + `import Foundation +struct Tracer { static func mark() {} } +`, + ); + deleteFile(cwd, F.model); + }, + rules: [rule("added_nodes", "gt0"), rule("removed_nodes", "gt0")], + }, + { + id: "S31", + name: "Add utility file with two free functions", + mutate(cwd) { + writeFile( + cwd, + "Sources/Utils/MathUtil.swift", + `import Foundation +func sum(_ a: Int, _ b: Int) -> Int { a + b } +func mul(_ a: Int, _ b: Int) -> Int { a * b } +`, + ); + }, + rules: [rule("added_nodes", "gt0"), rule("removed_nodes", "eq0")], + }, + { + id: "S32", + name: "Add core feature flag type file", + mutate(cwd) { + writeFile( + cwd, + "Sources/Core/FeatureFlag.swift", + `import Foundation +struct FeatureFlag { + let name: String +} +`, + ); + }, + rules: [rule("added_nodes", "gt0")], + }, + { + id: "S33", + name: "Delete app file", + mutate(cwd) { + deleteFile(cwd, F.app); + }, + rules: [rule("removed_nodes", "gt0"), rule("added_nodes", "eq0")], + }, + { + id: "S34", + name: "Delete protocols file", + mutate(cwd) { + deleteFile(cwd, F.protocols); + }, + rules: [rule("removed_nodes", "gt0")], + }, + { + id: "S35", + name: "Rename view model file path", + mutate(cwd) { + renameFile(cwd, F.vm, "Sources/Features/Demo/MainViewModel.swift"); + }, + rules: [rule("added_nodes", "gt0"), rule("removed_nodes", "gt0")], + }, + { + id: "S36", + name: "Rename model type name", + mutate(cwd) { + replaceOnce(cwd, F.model, `struct NetworkError: Error {`, `struct AppNetworkError: Error {`); + }, + rules: [rule("added_nodes", "gt0"), rule("removed_nodes", "gt0")], + }, + { + id: "S37", + name: "Add helper method to service", + mutate(cwd) { + replaceOnce( + cwd, + F.service, + ` private func authenticate() async throws -> String {`, + ` func ping() -> Bool { + true + } + + private func authenticate() async throws -> String {`, + ); + }, + rules: [rule("added_nodes", "gt0"), rule("removed_nodes", "eq0")], + }, + { + id: "S38", + name: "Inline authenticate and remove method", + mutate(cwd) { + replaceOnce(cwd, F.service, ` return try await authenticate()`, ` return "Token123"`); + removeSnippet( + cwd, + F.service, + ` private func authenticate() async throws -> String { + let authSuccess = Bool.random() + guard authSuccess else { + throw NetworkError(kind: .authenticationFail) + } + return "Token123" + } +`, + ); + }, + rules: [rule("removed_nodes", "gt0"), rule("removed_edges", "gt0")], + }, + { + id: "S39", + name: "Add top-level helper and call it from service", + mutate(cwd) { + replaceOnce( + cwd, + F.service, + ` func disconnect(reason: String) { + print("Disconnecting from network: \\(reason)") + }`, + ` func disconnect(reason: String) { + print("Disconnecting from network: \\(reason)") + traceDisconnect() + }`, + ); + appendText( + cwd, + F.service, + ` +func traceDisconnect() { + print("trace") +} +`, + ); + }, + rules: [rule("added_nodes", "gt0"), rule("added_edges", "gt0")], + }, + { + id: "S40", + name: "Add top-level factory function to model file", + mutate(cwd) { + appendText( + cwd, + F.model, + ` +func makeUnknownError() -> NetworkError { + NetworkError(kind: .unknown) +} +`, + ); + }, + rules: [rule("added_nodes", "gt0")], + }, + { + id: "S41", + name: "EOF comment in controller should not change graph", + mutate(cwd) { + appendText(cwd, F.vc, `\n// ui note\n`); + }, + rules: [ + rule("added_nodes", "eq0"), + rule("removed_nodes", "eq0"), + rule("changed_nodes", "eq0"), + rule("added_edges", "eq0"), + rule("removed_edges", "eq0"), + ], + }, + { + id: "S42", + name: "Insert blank line near top of view model", + mutate(cwd) { + replaceOnce(cwd, F.vm, `import Foundation`, `import Foundation\n`); + }, + rules: [rule("changed_nodes", "gt0"), rule("added_nodes", "eq0"), rule("removed_nodes", "eq0")], + }, + { + id: "S43", + name: "Add enum-only file", + mutate(cwd) { + writeFile( + cwd, + "Sources/Utils/HTTPMethod.swift", + `import Foundation +enum HTTPMethod { + case get + case post +} +`, + ); + }, + rules: [rule("added_nodes", "gt0"), rule("removed_nodes", "eq0")], + }, + { + id: "S44", + name: "Rewrite service file with identical content", + mutate(cwd) { + const content = readFile(cwd, F.service); + writeFile(cwd, F.service, content); + }, + rules: [ + rule("added_nodes", "eq0"), + rule("removed_nodes", "eq0"), + rule("changed_nodes", "eq0"), + rule("added_edges", "eq0"), + rule("removed_edges", "eq0"), + ], + }, + { + id: "S45", + name: "Add second protocol requirement", + mutate(cwd) { + replaceOnce( + cwd, + F.protocols, + `protocol ViewModelProtocol { + var networkService: NetworkServiceProtocol { get } + func fetchData() async + func refreshData() async +}`, + `protocol ViewModelProtocol { + var networkService: NetworkServiceProtocol { get } + func fetchData() async + func refreshData() async + func boot() async +}`, + ); + }, + rules: [rule("added_nodes", "gt0")], + }, + { + id: "S46", + name: "Remove refreshData method in view model", + mutate(cwd) { + removeSnippet( + cwd, + F.vm, + ` func refreshData() async { + await fetchData() + } +`, + ); + }, + rules: [rule("removed_nodes", "gt0"), rule("removed_edges", "gt0")], + }, + { + id: "S47", + name: "Add nested enum inside service type", + mutate(cwd) { + replaceOnce( + cwd, + F.service, + `class NetworkService: NetworkServiceProtocol { + static let shared = NetworkService() +`, + `class NetworkService: NetworkServiceProtocol { + static let shared = NetworkService() + + enum State { + case idle + } +`, + ); + }, + rules: [rule("added_nodes", "gt0")], + }, + { + id: "S48", + name: "Change init parameter label in controller", + mutate(cwd) { + replaceOnce( + cwd, + F.vc, + ` init(viewModel: ViewModelProtocol = DemoViewModel()) {`, + ` init(vm: ViewModelProtocol = DemoViewModel()) {`, + ); + replaceOnce(cwd, F.vc, ` self.viewModel = viewModel`, ` self.viewModel = vm`); + }, + // Current parser normalizes initializer IDs to `.init` without params. + // Parameter-label-only edits are therefore graph-neutral today. + rules: [ + rule("added_nodes", "eq0"), + rule("removed_nodes", "eq0"), + rule("changed_nodes", "eq0"), + rule("added_edges", "eq0"), + rule("removed_edges", "eq0"), + ], + }, + { + id: "S49", + name: "Rename service type", + mutate(cwd) { + replaceOnce(cwd, F.service, `class NetworkService: NetworkServiceProtocol {`, `class NetService: NetworkServiceProtocol {`); + }, + rules: [rule("added_nodes", "gt0"), rule("removed_nodes", "gt0")], + }, + { + id: "S50", + name: "Add nested-folder helper file", + mutate(cwd) { + writeFile( + cwd, + "Sources/Helpers/Format/StringFormatter.swift", + `import Foundation +struct StringFormatter { + func trim(_ s: String) -> String { s.trimmingCharacters(in: .whitespaces) } +} +`, + ); + }, + rules: [rule("added_nodes", "gt0")], + }, + { + id: "S51", + name: "Replace model file with new model", + mutate(cwd) { + deleteFile(cwd, F.model); + writeFile( + cwd, + "Sources/Models/RequestError.swift", + `import Foundation +struct RequestError: Error { + let code: Int +} +`, + ); + }, + rules: [rule("added_nodes", "gt0"), rule("removed_nodes", "gt0")], + }, + { + id: "S52", + name: "Service string literal update only", + mutate(cwd) { + replaceOnce(cwd, F.service, `return "Token123"`, `return "TokenABC"`); + }, + rules: [ + rule("added_nodes", "eq0"), + rule("removed_nodes", "eq0"), + rule("changed_nodes", "eq0"), + rule("added_edges", "eq0"), + rule("removed_edges", "eq0"), + ], + }, + { + id: "S53", + name: "Add logDisconnect method and call", + mutate(cwd) { + replaceOnce( + cwd, + F.service, + ` func disconnect(reason: String) { + print("Disconnecting from network: \\(reason)") + }`, + ` func disconnect(reason: String) { + print("Disconnecting from network: \\(reason)") + logDisconnect() + }`, + ); + replaceOnce( + cwd, + F.service, + ` private func authenticate() async throws -> String {`, + ` func logDisconnect() { + print("logged") + } + + private func authenticate() async throws -> String {`, + ); + }, + rules: [rule("added_nodes", "gt0"), rule("added_edges", "gt0")], + }, + { + id: "S54", + name: "Remove bare call in refreshData", + mutate(cwd) { + replaceOnce( + cwd, + F.vm, + ` func refreshData() async { + await fetchData() + }`, + ` func refreshData() async { + }`, + ); + }, + rules: [rule("removed_edges", "gt0")], + }, + { + id: "S55", + name: "Change bare call fetchData to debugLog", + mutate(cwd) { + replaceOnce( + cwd, + F.vm, + ` func refreshData() async { + await fetchData() + }`, + ` func refreshData() async { + await debugLog() + } + + func debugLog() async { + }`, + ); + }, + rules: [rule("added_nodes", "gt0"), rule("added_edges", "gt0"), rule("removed_edges", "gt0")], + }, + { + id: "S56", + name: "Add uppercase singleton chain call in view model", + mutate(cwd) { + replaceOnce( + cwd, + F.vm, + ` func fetchData() async { + do {`, + ` func fetchData() async { + let _ = try? await NetworkService.shared.connectToNetwork() + do {`, + ); + }, + rules: [rule("added_edges", "gt0")], + }, + { + id: "S57", + name: "Rename controller file path", + mutate(cwd) { + renameFile(cwd, F.vc, "Sources/Features/Demo/MainViewController.swift"); + }, + rules: [rule("added_nodes", "gt0"), rule("removed_nodes", "gt0")], + }, + { + id: "S58", + name: "Replace app file with alternate app file", + mutate(cwd) { + deleteFile(cwd, F.app); + writeFile( + cwd, + "Sources/App/MainApp.swift", + `import Foundation + +@main +struct MainApp { + static func main() async { + let controller = DemoViewController() + await controller.viewDidLoad() + } +} +`, + ); + }, + rules: [rule("added_nodes", "gt0"), rule("removed_nodes", "gt0")], + }, + { + id: "S59", + name: "Add extra protocol in core", + mutate(cwd) { + appendText( + cwd, + F.protocols, + ` +protocol LoggerProtocol { + func log(_ message: String) +} +`, + ); + }, + rules: [rule("added_nodes", "gt0")], + }, + { + id: "S60", + name: "Add file and change service signature together", + mutate(cwd) { + writeFile( + cwd, + "Sources/Utils/Env.swift", + `import Foundation +struct Env { + static let name = "dev" +} +`, + ); + replaceOnce( + cwd, + F.service, + ` func disconnect(reason: String) {`, + ` func disconnect() {`, + ); + replaceOnce( + cwd, + F.service, + `print("Disconnecting from network: \\(reason)")`, + `print("Disconnecting from network")`, + ); + }, + rules: [rule("added_nodes", "gt0"), rule("removed_nodes", "gt0")], + }, +]; + +function resetRepo(cwd) { + git(cwd, ["reset", "--hard", "HEAD"]); + git(cwd, ["clean", "-fd"]); +} + +function main() { + const { reportPath } = parseArgs(process.argv.slice(2)); + assertExists(flowmapBin, "flowmap binary"); + assertExists(parserBin, "flowmap-swift-ast binary"); + + const workspace = createBaselineRepo(); + const results = []; + let totalFp = 0; + let totalFn = 0; + + try { + // Baseline sanity + const baseline = extractCounts(analyze(workspace)); + if ( + baseline.added_nodes !== 0 || + baseline.removed_nodes !== 0 || + baseline.changed_nodes !== 0 || + baseline.added_edges !== 0 || + baseline.removed_edges !== 0 + ) { + throw new Error(`baseline is not clean: ${JSON.stringify(baseline)}`); + } + + for (const s of scenarios) { + resetRepo(workspace); + s.mutate(workspace); + const payload = analyze(workspace); + const counts = extractCounts(payload); + const judged = checkScenario(counts, s.rules); + totalFp += judged.fp; + totalFn += judged.fn; + results.push({ + id: s.id, + name: s.name, + pass: judged.pass, + fp: judged.fp, + fn: judged.fn, + counts, + failures: judged.results + .filter((r) => !r.pass) + .map((r) => `${r.r.field}:${r.r.op} (got ${counts[r.r.field]})`), + }); + } + } finally { + fs.rmSync(workspace, { recursive: true, force: true }); + } + + const passed = results.filter((r) => r.pass).length; + const failed = results.length - passed; + const scenarioCount = results.length; + const passRate = scenarioCount === 0 ? 0 : passed / scenarioCount; + const gate = { + min_pass_rate: 1.0, + max_failed_scenarios: 0, + max_fp_total: 0, + max_fn_total: 0, + }; + const gatePassed = + passRate >= gate.min_pass_rate && + failed <= gate.max_failed_scenarios && + totalFp <= gate.max_fp_total && + totalFn <= gate.max_fn_total; + + const report = { + created_at: new Date().toISOString(), + tool: "FlowMap sample scenario runner", + workspace: repoRoot, + binaries: { + flowmap: flowmapBin, + parser: parserBin, + }, + summary: { + scenario_count: scenarioCount, + passed, + failed, + pass_rate: Number(passRate.toFixed(4)), + fp_total: totalFp, + fn_total: totalFn, + gate, + gate_passed: gatePassed, + }, + results, + }; + writeJsonReport(reportPath, report); + + console.log("=== FlowMap Sample Scenarios ==="); + console.log(`scenarios: ${scenarioCount}`); + console.log(`passed: ${passed}`); + console.log(`failed: ${failed}`); + console.log(`FP total: ${totalFp}`); + console.log(`FN total: ${totalFn}`); + console.log(`report: ${reportPath}`); + console.log(`gate: ${gatePassed ? "PASS" : "FAIL"}`); + console.log(""); + for (const r of results) { + const c = r.counts; + const tag = r.pass ? "PASS" : "FAIL"; + console.log( + `${tag} ${r.id} ${r.name} ` + + `| +N:${c.added_nodes} -N:${c.removed_nodes} ~N:${c.changed_nodes} ` + + `+E:${c.added_edges} -E:${c.removed_edges} impact:${c.impact}`, + ); + if (!r.pass) { + for (const f of r.failures) { + console.log(` - ${f}`); + } + } + } + + if (failed > 0) { + process.exitCode = 2; + } +} + +main();