Skip to content

Commit 3730cac

Browse files
author
Miguel Angel
committed
🎉 Initializing the repo
0 parents  commit 3730cac

12 files changed

+507
-0
lines changed

Distances/cosine.py

+33
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
import numpy as np
2+
3+
'''
4+
The angle cosine in geometry can be used to
5+
measure the difference between two vector directions.
6+
This concept is used in machine learning to measure the
7+
difference between sample vectors.
8+
9+
The cosine of the angle is in the range [-1,1]. The larger the cosine of the angle,
10+
the smaller the angle between the two vectors, and the smaller the cosine of the angle
11+
indicates the larger the angle between the two vectors. When the directions of the
12+
two vectors coincide, the cosine of the angle takes the maximum value of 1.
13+
When the directions of the two vectors are completely opposite,
14+
the cosine of the angle takes the minimum value of -1.
15+
16+
'''
17+
18+
data = np.array([
19+
[5, 0, 3, 0, 2, 0, 0, 2, 0, 0], # A
20+
[3, 0, 2, 0, 1, 1, 0, 1, 0, 1] # B
21+
])
22+
23+
# Dot product of two arrays
24+
dot = np.dot(data[0,:], data[1,:])
25+
# Module of A
26+
Amod = np.linalg.norm(data[0,:])
27+
# Module of B
28+
Bmod = np.linalg.norm(data[1,:])
29+
30+
# Applying the formula
31+
cosine = dot / (Amod*Bmod)
32+
33+
print(cosine)

Distances/euclidean.py

+22
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
import numpy as np
2+
3+
'''
4+
The Euclidean distance (L2 norm) is the easiest to
5+
understand distance calculation method derived from
6+
the distance formula between two points in Euclidean space
7+
'''
8+
9+
objA = [22, 1, 42, 10]
10+
11+
objB = [20, 0, 36, 8]
12+
13+
npA = np.array(objA)
14+
15+
npB = np.array(objB)
16+
17+
euclidean = np.sqrt(np.sum(np.square(npA - npB)))
18+
19+
# euclidean = np.linalg.norm(npA - npB)
20+
21+
print(euclidean)
22+

Distances/manhattan.py

+39
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
import numpy as np
2+
3+
'''
4+
From the name you can guess the calculation of this distance.
5+
Imagine that you are driving from an intersection to another intersection in Manhattan.
6+
Is the driving distance a straight line between two points? Obviously not,
7+
unless you can cross the building. The actual driving distance is this "Manhattan distance" (L1 norm).
8+
This is also the source of the Manhattan distance name, which is also known as the City Block distance
9+
'''
10+
11+
objA = [22, 1, 42, 10]
12+
13+
objB = [20, 0, 36, 8]
14+
15+
npA = np.array(objA)
16+
17+
npB = np.array(objB)
18+
19+
manhattan = np.sum(np.abs(npA - npB))
20+
21+
# manhattan = np.linalg.norm(npA - npB, ord=1)
22+
23+
print(manhattan)
24+
25+
26+
27+
28+
29+
30+
31+
32+
33+
34+
35+
36+
37+
38+
39+

Distances/minkowski.py

+21
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
import numpy as np
2+
3+
'''
4+
Minkowski distance is a distance/ similarity measurement between two points
5+
in the normed vector space (N dimensional real space) and is a generalization of
6+
the Euclidean distance and the Manhattan distance.
7+
'''
8+
9+
objA = [22, 1, 42, 10]
10+
11+
objB = [20, 0, 36, 8]
12+
13+
h = 3
14+
15+
npA = np.array(objA)
16+
17+
npB = np.array(objB)
18+
19+
minkowski = (np.abs(npA - npB) ** h).sum() ** (1/h)
20+
21+
print(minkowski)

Distances/superior.py

+27
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
import numpy as np
2+
3+
'''
4+
In mathematics, Chebyshev distance (or Tchebychev distance), maximum metric,
5+
or L∞ metric is a metric defined on a vector space where
6+
the distance between two vectors is the greatest of their differences
7+
along any coordinate dimension.[2] It is named after Pafnuty Chebyshev.
8+
9+
It is also known as chessboard distance, since in the game of chess the minimum number of
10+
moves needed by a king to go from one square on a chessboard to another equals the
11+
Chebyshev distance between the centers of the squares, if the squares have side length one,
12+
as represented in 2-D spatial coordinates with axes aligned to the edges of the board.
13+
'''
14+
15+
objA = [22, 1, 42, 10]
16+
17+
objB = [20, 0, 36, 8]
18+
19+
npA = np.array(objA)
20+
21+
npB = np.array(objB)
22+
23+
chebyshev = np.abs(npA - npB).max()
24+
25+
# chebyshev = np.linalg.norm(npA -npB, ord=np.inf)
26+
27+
print(chebyshev)

Normalization/minmax.py

+32
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
import numpy as np
2+
3+
'''
4+
This python code calculates the Min-Max Normalization
5+
6+
Min-max normalization is one of the most common ways to normalize data.
7+
For every feature, the minimum value of that feature gets transformed into a 0,
8+
the maximum value gets transformed into a 1, and every other value
9+
gets transformed into a decimal between 0 and 1.
10+
11+
norm = [(x - X.min)/(X.max - X.min)]*(new_max - new_min) + new_min
12+
13+
'''
14+
15+
# Input the data array
16+
data = [200, 400, 800, 1000, 2000]
17+
18+
# Setting the new min and new max
19+
nmin = 0
20+
nmax = 10
21+
22+
# Putting the data in new numpy array
23+
nparray = np.array(data)
24+
25+
#------------- Normalizing the data --------------------------#
26+
# Difference between max nparray value and min nparray value
27+
diff = nparray.max() - nparray.min()
28+
npmin = nparray.min()
29+
30+
ndata = ( (nparray - npmin) / (diff) ) * (nmax-nmin) + nmin
31+
32+
print(ndata)

Normalization/minmax_edad.py

+41
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
import numpy as np
2+
3+
'''
4+
This python code calculates the Min-Max Normalization
5+
6+
Min-max normalization is one of the most common ways to normalize data.
7+
For every feature, the minimum value of that feature gets transformed into a 0,
8+
the maximum value gets transformed into a 1, and every other value
9+
gets transformed into a decimal between 0 and 1.
10+
11+
norm = [(x - X.min)/(X.max - X.min)]*(new_max - new_min) + new_min
12+
13+
'''
14+
15+
def minmax_norm(X, npmin, diff, nmax, nmin):
16+
return ( (X - npmin) / (diff) ) * (nmax-nmin) + nmin
17+
18+
# Input the data array
19+
data = [ 13, 15, 16, 16, 19, 20, 20, 21, 22, 22,
20+
25, 25, 25, 25, 30, 33, 33, 35, 35, 35,
21+
35, 36, 40, 45, 46, 52, 70]
22+
23+
# Setting the new min and new max
24+
nmin = 0
25+
nmax = 1
26+
27+
# Putting the data in new numpy array
28+
nparray = np.array(data)
29+
30+
#------------- Normalizing the data --------------------------#
31+
# Difference between max nparray value and min nparray value
32+
diff = nparray.max() - nparray.min()
33+
npmin = nparray.min()
34+
35+
ndata = minmax_norm(nparray, npmin, diff, nmax, nmin)
36+
37+
print(ndata)
38+
39+
# Getting the norm of 35
40+
nvalue = minmax_norm(35, npmin, diff, nmax, nmin)
41+
print(np.round(nvalue, 2))

Normalization/znorm.py

+28
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
import numpy as np
2+
3+
'''
4+
This python code calculates the Z-Normalization
5+
6+
The absolute value of z represents the distance between that raw score x and
7+
the population mean in units of the standard deviation.
8+
Z is negative when the raw score is below the mean, positive when above.
9+
10+
z={x-\mu \over \sigma }
11+
12+
'''
13+
14+
# Input the data array
15+
data = [200, 400, 800, 1000, 2000]
16+
17+
# Putting the data in new numpy array
18+
nparray = np.array(data)
19+
20+
#------------- Normalizing the data --------------------------#
21+
# Obtaining the mean of the array
22+
u = nparray.mean()
23+
# Obtaining the standard deviation of the array
24+
std = nparray.std()
25+
26+
zdata = (nparray - u) / std
27+
28+
print(zdata)

ProximityMeasure/binary_proximity.py

+29
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
import numpy as np
2+
import pandas as pd
3+
4+
'''
5+
6+
'''
7+
8+
# Input the data
9+
data = np.array([
10+
["Jack", 1, 0, 1, 0, 0, 0],
11+
["Mary", 1, 0, 1, 0, 1, 0],
12+
["Jim", 1, 1, 0, 0, 0, 0]
13+
])
14+
15+
patients = data.shape[0]
16+
17+
for i in range(patients-1):
18+
for j in range(i+1, patients):
19+
# Getting the contingency matrix
20+
# This method is slow, can be improved
21+
contingency_matrix = pd.crosstab(data[i,1:], data[j,1:])
22+
23+
r = contingency_matrix["0"]["1"]
24+
s = contingency_matrix["1"]["0"]
25+
q = contingency_matrix["1"]["1"]
26+
27+
d = (r + s)/(q + r + s)
28+
29+
print("d({}, {})\t=\t{}".format(data[i,0], data[j,0], d))

0 commit comments

Comments
 (0)