-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdists.go
More file actions
137 lines (112 loc) · 3.76 KB
/
dists.go
File metadata and controls
137 lines (112 loc) · 3.76 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
/* Distance functions used to calculate allele profiles in go
To supply different return types a type and an interface may need to be added, as unions are not an option in go
TODO offer int or float return
TODO treating missing alleles (zeroes) as missing should be optional
Matthew Wells: 2024-04-06
*/
package main
type DistFunc struct {
function func(*[]int, *[]int) float64
assignment int
help string
truncate bool // to truncate the output value to an integer or to remain as a float
}
var ham = DistFunc{function: HammingDistance, assignment: 0, help: "Hamming Distance skipping missing values", truncate: true}
var ham_missing = DistFunc{function: HammingDistanceMissing, assignment: 1, help: "Hamming distance missing values treated as alleles.", truncate: true}
var scaled = DistFunc{function: ScaledDistance, assignment: 2, help: "Scaled Distance skipping missing values", truncate: false}
var scaled_missing = DistFunc{function: ScaledDistanceMissing, assignment: 3, help: "Scaled distance missing values treated as alleles.", truncate: false}
// update distance functions, with their position in the array pertaining to their calling
var distance_functions = []DistFunc{ham, ham_missing, scaled, scaled_missing}
var DIST_FUNC = 0 // Distance function default
// Hamming distance with missing values not counted as differences
func HammingDistance(profile_1 *[]int, profile_2 *[]int) float64 {
p1 := *profile_1
p2 := *profile_2
var count float64 = 0.0
profile_len := len(p1)
for idx := 0; idx < profile_len; idx++ {
if (p1[idx] * p2[idx]) == MissingAlleleValue {
// If either value is 0 it is missing
continue
}
if (p1[idx] ^ p2[idx]) != MissingAlleleValue {
count++
}
}
return count
}
// Hamming distance with missing values not counted as differences
func HammingDistanceEql(profile_1 *[]int, profile_2 *[]int) float64 {
p1 := *profile_1
p2 := *profile_2
var count float64 = 0.0
profile_len := len(p1)
for idx := 0; idx < profile_len; idx++ {
if (p1[idx] * p2[idx]) == MissingAlleleValue {
// If either value is 0 it is missing
continue
}
if p1[idx] == p2[idx] {
count++
}
}
return count
}
// Returns hamming distance, with missing values counted as differences
func HammingDistanceMissing(profile_1 *[]int, profile_2 *[]int) float64 {
p1 := *profile_1
p2 := *profile_2
profile_len := len(p1)
var count float64 = 0.0
for idx := 0; idx < profile_len; idx++ {
if (p1[idx] ^ p2[idx]) != MissingAlleleValue {
count++
}
}
return count
}
// Scaled distance with missing data not included as differences
func ScaledDistance(profile_1 *[]int, profile_2 *[]int) float64 {
p1 := *profile_1
p2 := *profile_2
count_compared_sites := 0
count_match := 0
profile_len := len(p1)
default_return := 100.0
for idx := 0; idx < profile_len; idx++ {
if (p1[idx] * p2[idx]) == MissingAlleleValue {
continue
}
count_compared_sites++
if (p1[idx] ^ p2[idx]) != MissingAlleleValue {
// If not equal skip
continue
}
count_match++
}
if count_compared_sites != 0 {
cc_sites_f64 := float64(count_compared_sites)
count_match_f64 := float64(count_match)
scaled_value := default_return * ((cc_sites_f64 - count_match_f64) / cc_sites_f64)
return scaled_value
}
return default_return
}
// Scaled distance with missing data counted as differences.
func ScaledDistanceMissing(profile_1 *[]int, profile_2 *[]int) float64 {
p1 := *profile_1
p2 := *profile_2
count_match := 0
default_return := 100.0
profile_len := len(p1)
for idx := 0; idx < profile_len; idx++ {
if p1[idx] != p2[idx] { // skip if the same
continue
}
count_match++
}
cc_sites_f64 := float64(profile_len)
count_match_f64 := float64(count_match)
default_return = default_return * ((cc_sites_f64 - count_match_f64) / cc_sites_f64)
return default_return
}