diff --git a/src/differ.rs b/src/differ.rs index 5caef9a..e414730 100644 --- a/src/differ.rs +++ b/src/differ.rs @@ -1,4 +1,4 @@ -use sequencematcher::SequenceMatcher; +use sequencematcher::{SequenceMatcher, Tag}; use std::cmp; use utils::{count_leading, str_with_similar_chars}; @@ -22,8 +22,8 @@ impl Differ { let mut res = Vec::new(); for opcode in matcher.get_opcodes() { let mut gen = Vec::new(); - match opcode.tag.as_ref() { - "replace" => { + match opcode.tag { + Tag::Replace => { gen = self.fancy_replace( first_sequence, opcode.first_start, @@ -33,13 +33,13 @@ impl Differ { opcode.second_end, ) } - "delete" => { + Tag::Delete => { gen = self.dump("-", first_sequence, opcode.first_start, opcode.first_end) } - "insert" => { + Tag::Insert => { gen = self.dump("+", second_sequence, opcode.second_start, opcode.second_end) } - "equal" => { + Tag::Equal => { gen = self.dump(" ", first_sequence, opcode.first_start, opcode.first_end) } _ => {} @@ -147,8 +147,9 @@ impl Differ { second_sequence, second_start, second_end, - ).iter() - .cloned(), + ) + .iter() + .cloned(), ); return res; } @@ -165,8 +166,9 @@ impl Differ { second_sequence, second_start, best_j, - ).iter() - .cloned(), + ) + .iter() + .cloned(), ); let first_element = &first_sequence[best_i]; let second_element = &second_sequence[best_j]; @@ -181,18 +183,18 @@ impl Differ { opcode.first_end - opcode.first_start, opcode.second_end - opcode.second_start, ); - match opcode.tag.as_ref() { - "replace" => { + match opcode.tag { + Tag::Replace => { first_tag.push_str(&str_with_similar_chars('^', first_length)); second_tag.push_str(&str_with_similar_chars('^', second_length)); } - "delete" => { + Tag::Delete => { first_tag.push_str(&str_with_similar_chars('-', first_length)); } - "insert" => { + Tag::Insert => { second_tag.push_str(&str_with_similar_chars('+', second_length)); } - "equal" => { + Tag::Equal => { first_tag.push_str(&str_with_similar_chars(' ', first_length)); second_tag.push_str(&str_with_similar_chars(' ', second_length)); } @@ -217,8 +219,9 @@ impl Differ { second_sequence, best_j + 1, second_end, - ).iter() - .cloned(), + ) + .iter() + .cloned(), ); res } diff --git a/src/lib.rs b/src/lib.rs index ca6b1cc..fed731b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -2,7 +2,7 @@ pub mod differ; pub mod sequencematcher; mod utils; -use sequencematcher::{Sequence, SequenceMatcher}; +use sequencematcher::{Sequence, SequenceMatcher, Tag}; use std::collections::HashMap; use std::fmt::Display; use utils::{format_range_context, format_range_unified}; @@ -59,7 +59,7 @@ pub fn unified_diff( file1_range, file2_range, lineterm )); for code in group { - if code.tag == "equal" { + if code.tag == Tag::Equal { for item in first_sequence .iter() .take(code.first_end) @@ -69,7 +69,7 @@ pub fn unified_diff( } continue; } - if code.tag == "replace" || code.tag == "delete" { + if code.tag == Tag::Replace || code.tag == Tag::Delete { for item in first_sequence .iter() .take(code.first_end) @@ -78,7 +78,7 @@ pub fn unified_diff( res.push(format!("-{}", item)); } } - if code.tag == "replace" || code.tag == "insert" { + if code.tag == Tag::Replace || code.tag == Tag::Insert { for item in second_sequence .iter() .take(code.second_end) @@ -103,11 +103,11 @@ pub fn context_diff( ) -> Vec { let mut res = Vec::new(); let lineterm = '\n'; - let mut prefix: HashMap = HashMap::new(); - prefix.insert(String::from("insert"), String::from("+ ")); - prefix.insert(String::from("delete"), String::from("- ")); - prefix.insert(String::from("replace"), String::from("! ")); - prefix.insert(String::from("equal"), String::from(" ")); + let mut prefix: HashMap = HashMap::new(); + prefix.insert(Tag::Insert, String::from("+ ")); + prefix.insert(Tag::Delete, String::from("- ")); + prefix.insert(Tag::Replace, String::from("! ")); + prefix.insert(Tag::Equal, String::from(" ")); let mut started = false; let mut matcher = SequenceMatcher::new(first_sequence, second_sequence); for group in &matcher.get_grouped_opcodes(n) { @@ -124,14 +124,14 @@ pub fn context_diff( res.push(format!("*** {} ****{}", file1_range, lineterm)); let mut any = false; for opcode in group { - if opcode.tag == "replace" || opcode.tag == "delete" { + if opcode.tag == Tag::Replace || opcode.tag == Tag::Delete { any = true; break; } } if any { for opcode in group { - if opcode.tag != "insert" { + if opcode.tag != Tag::Insert { for item in first_sequence .iter() .take(opcode.first_end) @@ -146,14 +146,14 @@ pub fn context_diff( res.push(format!("--- {} ----{}", file2_range, lineterm)); any = false; for opcode in group { - if opcode.tag == "replace" || opcode.tag == "insert" { + if opcode.tag == Tag::Replace || opcode.tag == Tag::Insert { any = true; break; } } if any { for opcode in group { - if opcode.tag != "delete" { + if opcode.tag != Tag::Delete { for item in second_sequence .iter() .take(opcode.second_end) diff --git a/src/sequencematcher.rs b/src/sequencematcher.rs index 4157808..53feb0d 100644 --- a/src/sequencematcher.rs +++ b/src/sequencematcher.rs @@ -20,9 +20,28 @@ impl Match { } } +#[derive(Debug, Clone, PartialEq, Copy, Eq, Hash)] +pub enum Tag { + Insert, + Delete, + Replace, + Equal, +} + +impl Tag { + pub fn as_str(&self) -> &'static str { + match self { + Tag::Insert => "insert", + Tag::Delete => "delete", + Tag::Replace => "replace", + Tag::Equal => "equal", + } + } +} + #[derive(Debug, Clone, PartialEq)] pub struct Opcode { - pub tag: String, + pub tag: Tag, pub first_start: usize, pub first_end: usize, pub second_start: usize, @@ -30,8 +49,8 @@ pub struct Opcode { } impl Opcode { - fn new( - tag: String, + pub fn new( + tag: Tag, first_start: usize, first_end: usize, second_start: usize, @@ -129,10 +148,11 @@ impl<'a, T: Sequence> SequenceMatcher<'a, T> { let len = second_sequence.len(); if len >= 200 { let test_len = (len as f32 / 100.0).floor() as usize + 1; - second_sequence_elements = second_sequence_elements - .into_iter() - .filter(|&(_, ref indexes)| indexes.len() > test_len) - .collect(); + // second_sequence_elements = second_sequence_elements + // .into_iter() + // .filter(|&(_, ref indexes)| indexes.len() > test_len) + // .collect(); + second_sequence_elements.retain(|_, indexes| indexes.len() <= test_len); } self.second_sequence_elements = second_sequence_elements; } @@ -182,22 +202,52 @@ impl<'a, T: Sequence> SequenceMatcher<'a, T> { } j2len = new_j2len; } - for _ in 0..2 { - while best_i > first_start - && best_j > second_start - && first_sequence.get(best_i - 1) == second_sequence.get(best_j - 1) - { - best_i -= 1; - best_j -= 1; - best_size += 1; - } - while best_i + best_size < first_end - && best_j + best_size < second_end - && first_sequence.get(best_i + best_size) == second_sequence.get(best_j + best_size) - { - best_size += 1; - } + while best_i > first_start + && best_j > second_start + && first_sequence.get(best_i - 1) == second_sequence.get(best_j - 1) + { + best_i -= 1; + best_j -= 1; + best_size += 1; + } + while best_i + best_size < first_end + && best_j + best_size < second_end + && first_sequence.get(best_i + best_size) == second_sequence.get(best_j + best_size) + { + best_size += 1; + } + + while best_i > first_start + && best_j > second_start + && first_sequence.get(best_i - 1) == second_sequence.get(best_j - 1) + { + best_i -= 1; + best_j -= 1; + best_size += 1; + } + while best_i + best_size < first_end + && best_j + best_size < second_end + && first_sequence.get(best_i + best_size) == second_sequence.get(best_j + best_size) + { + best_size += 1; } + + // for _ in 0..2 { + // while best_i > first_start + // && best_j > second_start + // && first_sequence.get(best_i - 1) == second_sequence.get(best_j - 1) + // { + // best_i -= 1; + // best_j -= 1; + // best_size += 1; + // } + // while best_i + best_size < first_end + // && best_j + best_size < second_end + // && first_sequence.get(best_i + best_size) == second_sequence.get(best_j + best_size) + // { + // best_size += 1; + // } + // } Match::new(best_i, best_j, best_size) } @@ -259,28 +309,29 @@ impl<'a, T: Sequence> SequenceMatcher<'a, T> { let mut opcodes = Vec::new(); let (mut i, mut j) = (0, 0); for m in self.get_matching_blocks() { - let mut tag = String::new(); + let mut tag = None; + if i < m.first_start && j < m.second_start { - tag = String::from("replace"); + tag = Some(Tag::Replace); } else if i < m.first_start { - tag = String::from("delete"); + tag = Some(Tag::Delete); } else if j < m.second_start { - tag = String::from("insert"); + tag = Some(Tag::Insert); } - if !tag.is_empty() { - opcodes.push(Opcode::new(tag, i, m.first_start, j, m.second_start)); - } - i = m.first_start + m.size; - j = m.second_start + m.size; - if m.size != 0 { + if tag.is_some() { opcodes.push(Opcode::new( - String::from("equal"), - m.first_start, + tag.unwrap(), i, - m.second_start, + m.first_start, j, + m.second_start, )); } + i = m.first_start + m.size; + j = m.second_start + m.size; + if m.size != 0 { + opcodes.push(Opcode::new(Tag::Equal, m.first_start, i, m.second_start, j)); + } } self.opcodes = Some(opcodes); self.opcodes.as_ref().unwrap().clone() @@ -290,15 +341,15 @@ impl<'a, T: Sequence> SequenceMatcher<'a, T> { let mut res = Vec::new(); let mut codes = self.get_opcodes(); if codes.is_empty() { - codes.push(Opcode::new("equal".to_string(), 0, 1, 0, 1)); + codes.push(Opcode::new(Tag::Equal, 0, 1, 0, 1)); } - if codes.first().unwrap().tag == "equal" { + if codes.first().unwrap().tag == Tag::Equal { let opcode = codes.first_mut().unwrap(); opcode.first_start = max(opcode.first_start, opcode.first_end.saturating_sub(n)); opcode.second_start = max(opcode.second_start, opcode.second_end.saturating_sub(n)); } - if codes.last().unwrap().tag == "equal" { + if codes.last().unwrap().tag == Tag::Equal { let opcode = codes.last_mut().unwrap(); opcode.first_end = min(opcode.first_start + n, opcode.first_end); opcode.second_end = min(opcode.second_start + n, opcode.second_end); @@ -307,7 +358,7 @@ impl<'a, T: Sequence> SequenceMatcher<'a, T> { let mut group = Vec::new(); for code in &codes { let (mut first_start, mut second_start) = (code.first_start, code.second_start); - if code.tag == "equal" && code.first_end - code.first_start > nn { + if code.tag == Tag::Equal && code.first_end - code.first_start > nn { group.push(Opcode::new( code.tag.clone(), code.first_start, @@ -328,14 +379,15 @@ impl<'a, T: Sequence> SequenceMatcher<'a, T> { code.second_end, )); } - if !(group.len() == 1 && group.first().unwrap().tag == "equal") || group.is_empty() { + if !(group.len() == 1 && group.first().unwrap().tag == Tag::Equal) || group.is_empty() { res.push(group.clone()); } res } pub fn ratio(&mut self) -> f32 { - let matches = self.get_matching_blocks() + let matches = self + .get_matching_blocks() .iter() .fold(0, |res, &m| res + m.size); calculate_ratio( @@ -343,4 +395,27 @@ impl<'a, T: Sequence> SequenceMatcher<'a, T> { self.first_sequence.len() + self.second_sequence.len(), ) } + + pub fn quick_ratio(&mut self) -> f32 { + let mut fullbcount = HashMap::new(); + for elt in self.second_sequence.iter() { + fullbcount.entry(elt).and_modify(|x| *x += 1).or_insert(1); + } + let mut avail = HashMap::new(); + let mut matches = 0; + for elt in self.first_sequence.iter() { + let numb = avail + .get(&elt) + .map(|x| *x) + .unwrap_or(fullbcount.get(elt).map(|x| *x).unwrap_or(0)); + avail.insert(elt, numb - 1); + if numb > 0 { + matches += 1; + } + } + calculate_ratio( + matches, + self.first_sequence.len() + self.second_sequence.len(), + ) + } } diff --git a/tests/tests.rs b/tests/tests.rs index 8f49de9..c6aa3ab 100644 --- a/tests/tests.rs +++ b/tests/tests.rs @@ -1,7 +1,7 @@ extern crate difflib; use difflib::differ::Differ; -use difflib::sequencematcher::{Match, Opcode, SequenceMatcher}; +use difflib::sequencematcher::{Match, Opcode, SequenceMatcher, Tag}; #[test] fn test_longest_match() { @@ -41,35 +41,35 @@ fn test_get_opcodes() { let result = matcher.get_opcodes(); let mut expected_result = Vec::new(); expected_result.push(Opcode { - tag: "delete".to_string(), + tag: Tag::Delete, first_start: 0, first_end: 1, second_start: 0, second_end: 0, }); expected_result.push(Opcode { - tag: "equal".to_string(), + tag: Tag::Equal, first_start: 1, first_end: 3, second_start: 0, second_end: 2, }); expected_result.push(Opcode { - tag: "replace".to_string(), + tag: Tag::Replace, first_start: 3, first_end: 4, second_start: 2, second_end: 3, }); expected_result.push(Opcode { - tag: "equal".to_string(), + tag: Tag::Equal, first_start: 4, first_end: 6, second_start: 3, second_end: 5, }); expected_result.push(Opcode { - tag: "insert".to_string(), + tag: Tag::Insert, first_start: 6, first_end: 6, second_start: 5, @@ -145,7 +145,8 @@ fn test_unified_diff() { "2005-01-26 23:30:50", "2010-04-02 10:20:52", 3, - ).join(""); + ) + .join(""); assert_eq!( result, "--- Original\t2005-01-26 23:30:50\n+++ Current\t2010-04-02 10:20:52\n@@ -1,4 \ @@ -165,7 +166,8 @@ fn test_context_diff() { "2005-01-26 23:30:50", "2010-04-02 10:20:52", 3, - ).join(""); + ) + .join(""); assert_eq!( result, "*** Original\t2005-01-26 23:30:50\n--- Current\t2010-04-02 \