Go package for clustering strings. Given a slice of strings, a similarity metric and a threshold, the input strings are clustered according to their similarity.
Similarity metrics are provided by https://github.com/adrg/strutil.
go get github.com/JRI98/string-clusterer
clusterer := NewClusterer()
input := []string{"apple", "aple", "banana", "bananna", "orange", "ornge"}
result := clusterer.Cluster(input)
fmt.Println(result) // [[apple aple] [banana bananna] [orange ornge]]
NewHamming(caseSensitive bool)
NewJaccard(caseSensitive bool)
NewJaro(caseSensitive bool)
NewJaroWinkler(caseSensitive bool)
NewLevenshtein(caseSensitive bool)
NewOverlapCoefficient(caseSensitive bool)
NewSmithWatermanGotoh(caseSensitive bool)
NewSorensenDice(caseSensitive bool)
go test
go test -bench=. -run=^#
go test -fuzz=FuzzCluster -run=^#