Skip to content

Commit d84f8bd

Browse files
committed
cosine similarity returns 0 if one of the strings is shorter than k
1 parent 988ed03 commit d84f8bd

File tree

3 files changed

+26
-31
lines changed

3 files changed

+26
-31
lines changed

pom.xml

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010

1111
<name>${project.artifactId}</name>
1212
<url>https://github.com/tdebatty/java-string-similarity</url>
13-
<description>Implementation of various string similarity and distance algorithms: Levenshtein, Jaro, Jaro-winkler, n-Gram, Q-Gram (Jaccard index), Longest Common Subsequence edit distance,...</description>
13+
<description>Implementation of various string similarity and distance algorithms: Levenshtein, Jaro-winkler, n-Gram, Q-Gram, Jaccard index, Longest Common Subsequence edit distance, cosine similarity...</description>
1414

1515
<properties>
1616
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
@@ -34,8 +34,8 @@
3434
<connection>scm:git:[email protected]:tdebatty/java-string-similarity.git</connection>
3535
<developerConnection>scm:git:[email protected]:tdebatty/java-string-similarity.git</developerConnection>
3636
<url>[email protected]:tdebatty/java-string-similarity.git</url>
37-
<tag>v0.7</tag>
38-
</scm>
37+
<tag>v0.7</tag>
38+
</scm>
3939

4040
<distributionManagement>
4141
<snapshotRepository>
@@ -120,7 +120,7 @@
120120
<groupId>org.apache.maven.plugins</groupId>
121121
<artifactId>maven-release-plugin</artifactId>
122122
<version>2.5.1</version>
123-
<configuration>
123+
<configuration>
124124
<tagNameFormat>v@{project.version}</tagNameFormat>
125125
</configuration>
126126
</plugin>
@@ -154,9 +154,9 @@
154154
</instrumentation>
155155
</configuration>
156156
</plugin>
157-
158157
</plugins>
159158
</build>
159+
160160
<dependencies>
161161
<dependency>
162162
<groupId>junit</groupId>

src/main/java/info/debatty/java/stringsimilarity/Cosine.java

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -44,15 +44,22 @@ public class Cosine extends ShingleBased implements
4444
*
4545
* @param k
4646
*/
47-
public Cosine(int k) {
47+
public Cosine(final int k) {
4848
super(k);
4949
}
5050

51+
/**
52+
*
53+
*/
5154
public Cosine() {
5255
super();
5356
}
5457

5558
public double similarity(String s1, String s2) {
59+
60+
if (s1.length() < k || s2.length() < k) {
61+
return 0;
62+
}
5663
KShingling ks = new KShingling(k);
5764
int[] profile1 = ks.getArrayProfile(s1);
5865
int[] profile2 = ks.getArrayProfile(s2);
@@ -61,7 +68,7 @@ public double similarity(String s1, String s2) {
6168
}
6269

6370
/**
64-
* Compute the norm L2 : sqrt(Sum_i( v_i²))
71+
* Compute the norm L2 : sqrt(Sum_i( v_i²)).
6572
*
6673
* @param profile
6774
* @return L2 norm

src/test/java/info/debatty/java/stringsimilarity/CosineTest.java

Lines changed: 12 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -24,10 +24,6 @@
2424

2525
package info.debatty.java.stringsimilarity;
2626

27-
import org.junit.After;
28-
import org.junit.AfterClass;
29-
import org.junit.Before;
30-
import org.junit.BeforeClass;
3127
import org.junit.Test;
3228
import static org.junit.Assert.*;
3329

@@ -36,34 +32,26 @@
3632
* @author Thibault Debatty
3733
*/
3834
public class CosineTest {
39-
40-
public CosineTest() {
41-
}
42-
43-
@BeforeClass
44-
public static void setUpClass() {
45-
}
46-
47-
@AfterClass
48-
public static void tearDownClass() {
49-
}
50-
51-
@Before
52-
public void setUp() {
53-
}
54-
55-
@After
56-
public void tearDown() {
57-
}
5835

5936
/**
6037
* Test of similarity method, of class Cosine.
6138
*/
6239
@Test
63-
public void testSimilarity() {
40+
public final void testSimilarity() {
6441
System.out.println("similarity");
6542
Cosine instance = new Cosine();
6643
double result = instance.similarity("ABC", "ABCE");
6744
assertEquals(0.71, result, 0.01);
6845
}
46+
47+
/**
48+
* If one of the strings is smaller than k, the similarity should be 0.
49+
*/
50+
@Test
51+
public final void testSmallString() {
52+
System.out.println("test small string");
53+
Cosine instance = new Cosine(3);
54+
double result = instance.similarity("AB", "ABCE");
55+
assertEquals(0.0, result, 0.00001);
56+
}
6957
}

0 commit comments

Comments
 (0)