🎉 Initializing the repo

Miguel Angel · Miguel Angel · commit 3730cac088ac · 2021-04-18T13:40:18.000-05:00
diff --git a/Distances/cosine.py b/Distances/cosine.py
@@ -0,0 +1,33 @@
+import numpy as np
+
+'''
+    The angle cosine in geometry can be used to 
+    measure the difference between two vector directions. 
+    This concept is used in machine learning to measure the 
+    difference between sample vectors.
+
+    The cosine of the angle is in the range [-1,1]. The larger the cosine of the angle, 
+    the smaller the angle between the two vectors, and the smaller the cosine of the angle 
+    indicates the larger the angle between the two vectors. When the directions of the 
+    two vectors coincide, the cosine of the angle takes the maximum value of 1. 
+    When the directions of the two vectors are completely opposite, 
+    the cosine of the angle takes the minimum value of -1.
+
+'''
+
+data = np.array([
+    [5, 0, 3, 0, 2, 0, 0, 2, 0, 0], # A
+    [3, 0, 2, 0, 1, 1, 0, 1, 0, 1]  # B
+])
+
+# Dot product of two arrays
+dot = np.dot(data[0,:], data[1,:])
+# Module of A
+Amod = np.linalg.norm(data[0,:])
+# Module of B
+Bmod = np.linalg.norm(data[1,:])
+
+# Applying the formula 
+cosine =  dot / (Amod*Bmod)
+
+print(cosine)
diff --git a/Distances/euclidean.py b/Distances/euclidean.py
@@ -0,0 +1,22 @@
+import numpy as np
+
+'''
+    The Euclidean distance (L2 norm) is the easiest to 
+    understand distance calculation method derived from 
+    the distance formula between two points in Euclidean space
+'''
+
+objA = [22, 1, 42, 10]
+
+objB = [20, 0, 36, 8]
+
+npA = np.array(objA)
+
+npB = np.array(objB)
+
+euclidean = np.sqrt(np.sum(np.square(npA - npB)))
+
+# euclidean = np.linalg.norm(npA - npB)
+
+print(euclidean)
+
diff --git a/Distances/manhattan.py b/Distances/manhattan.py
@@ -0,0 +1,39 @@
+import numpy as np
+
+'''
+    From the name you can guess the calculation of this distance. 
+    Imagine that you are driving from an intersection to another intersection in Manhattan. 
+    Is the driving distance a straight line between two points? Obviously not, 
+    unless you can cross the building. The actual driving distance is this "Manhattan distance" (L1 norm). 
+    This is also the source of the Manhattan distance name, which is also known as the City Block distance
+'''
+
+objA = [22, 1, 42, 10]
+
+objB = [20, 0, 36, 8]
+
+npA = np.array(objA)
+
+npB = np.array(objB)
+
+manhattan = np.sum(np.abs(npA - npB))
+
+# manhattan = np.linalg.norm(npA - npB, ord=1)
+
+print(manhattan)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/Distances/minkowski.py b/Distances/minkowski.py
@@ -0,0 +1,21 @@
+import numpy as np
+
+'''
+    Minkowski distance is a distance/ similarity measurement between two points 
+    in the normed vector space (N dimensional real space) and is a generalization of 
+    the Euclidean distance and the Manhattan distance.
+'''
+
+objA = [22, 1, 42, 10]
+
+objB = [20, 0, 36, 8]
+
+h = 3
+
+npA = np.array(objA)
+
+npB = np.array(objB)
+
+minkowski = (np.abs(npA - npB) ** h).sum() ** (1/h)
+
+print(minkowski)
diff --git a/Distances/superior.py b/Distances/superior.py
@@ -0,0 +1,27 @@
+import numpy as np
+
+'''
+    In mathematics, Chebyshev distance (or Tchebychev distance), maximum metric, 
+    or L∞ metric is a metric defined on a vector space where 
+    the distance between two vectors is the greatest of their differences 
+    along any coordinate dimension.[2] It is named after Pafnuty Chebyshev.
+
+    It is also known as chessboard distance, since in the game of chess the minimum number of 
+    moves needed by a king to go from one square on a chessboard to another equals the 
+    Chebyshev distance between the centers of the squares, if the squares have side length one, 
+    as represented in 2-D spatial coordinates with axes aligned to the edges of the board.
+'''
+
+objA = [22, 1, 42, 10]
+
+objB = [20, 0, 36, 8]
+
+npA = np.array(objA)
+
+npB = np.array(objB)
+
+chebyshev = np.abs(npA - npB).max()
+
+# chebyshev = np.linalg.norm(npA -npB, ord=np.inf)
+
+print(chebyshev)
diff --git a/Normalization/minmax.py b/Normalization/minmax.py
@@ -0,0 +1,32 @@
+import numpy as np
+
+'''
+    This python code calculates the Min-Max Normalization
+
+    Min-max normalization is one of the most common ways to normalize data. 
+    For every feature, the minimum value of that feature gets transformed into a 0, 
+    the maximum value gets transformed into a 1, and every other value 
+    gets transformed into a decimal between 0 and 1.
+
+    norm = [(x - X.min)/(X.max - X.min)]*(new_max - new_min) + new_min
+
+'''
+
+# Input the data array
+data = [200, 400, 800, 1000, 2000]
+
+# Setting the new min and new max
+nmin = 0
+nmax = 10
+
+# Putting the data in new numpy array
+nparray = np.array(data)
+
+#------------- Normalizing the data --------------------------#
+# Difference between max nparray value and min nparray value
+diff = nparray.max() - nparray.min()
+npmin = nparray.min()
+
+ndata = ( (nparray - npmin) / (diff) ) * (nmax-nmin) + nmin
+
+print(ndata)
diff --git a/Normalization/minmax_edad.py b/Normalization/minmax_edad.py
@@ -0,0 +1,41 @@
+import numpy as np
+
+'''
+    This python code calculates the Min-Max Normalization
+
+    Min-max normalization is one of the most common ways to normalize data. 
+    For every feature, the minimum value of that feature gets transformed into a 0, 
+    the maximum value gets transformed into a 1, and every other value 
+    gets transformed into a decimal between 0 and 1.
+
+    norm = [(x - X.min)/(X.max - X.min)]*(new_max - new_min) + new_min
+
+'''
+
+def minmax_norm(X, npmin, diff, nmax, nmin):
+    return ( (X - npmin) / (diff) ) * (nmax-nmin) + nmin
+
+# Input the data array
+data = [ 13, 15, 16, 16, 19, 20, 20, 21, 22, 22,
+         25, 25, 25, 25, 30, 33, 33, 35, 35, 35,
+         35, 36, 40, 45, 46, 52, 70]
+
+# Setting the new min and new max
+nmin = 0
+nmax = 1
+
+# Putting the data in new numpy array
+nparray = np.array(data)
+
+#------------- Normalizing the data --------------------------#
+# Difference between max nparray value and min nparray value
+diff = nparray.max() - nparray.min()
+npmin = nparray.min()
+
+ndata = minmax_norm(nparray, npmin, diff, nmax, nmin)
+
+print(ndata)
+
+# Getting the norm of 35
+nvalue = minmax_norm(35, npmin, diff, nmax, nmin)
+print(np.round(nvalue, 2))
diff --git a/Normalization/znorm.py b/Normalization/znorm.py
@@ -0,0 +1,28 @@
+import numpy as np
+
+'''
+    This python code calculates the Z-Normalization
+
+    The absolute value of z represents the distance between that raw score x and 
+    the population mean in units of the standard deviation. 
+    Z is negative when the raw score is below the mean, positive when above.
+    
+    z={x-\mu  \over \sigma }
+
+'''
+
+# Input the data array
+data = [200, 400, 800, 1000, 2000]
+
+# Putting the data in new numpy array
+nparray = np.array(data)
+
+#------------- Normalizing the data --------------------------#
+# Obtaining the mean of the array
+u = nparray.mean()
+# Obtaining the standard deviation of the array
+std = nparray.std()
+
+zdata = (nparray - u) / std
+
+print(zdata)
diff --git a/ProximityMeasure/binary_proximity.py b/ProximityMeasure/binary_proximity.py
@@ -0,0 +1,29 @@
+import numpy as np
+import pandas as pd
+
+'''
+    
+'''
+
+# Input the data
+data = np.array([
+    ["Jack", 1, 0, 1, 0, 0, 0],
+    ["Mary", 1, 0, 1, 0, 1, 0],
+    ["Jim", 1, 1, 0, 0, 0, 0]
+])
+
+patients = data.shape[0]
+
+for i in range(patients-1):
+    for j in range(i+1, patients):
+        # Getting the contingency matrix
+        # This method is slow, can be improved
+        contingency_matrix = pd.crosstab(data[i,1:], data[j,1:])
+
+        r = contingency_matrix["0"]["1"]
+        s = contingency_matrix["1"]["0"]
+        q = contingency_matrix["1"]["1"]
+
+        d = (r + s)/(q + r + s)
+
+        print("d({}, {})\t=\t{}".format(data[i,0], data[j,0], d))
diff --git a/ProximityMeasure/mix_proximity.py b/ProximityMeasure/mix_proximity.py
diff --git a/ProximityMeasure/nominal_proximity.py b/ProximityMeasure/nominal_proximity.py
diff --git a/ProximityMeasure/ordinal_proximity.py b/ProximityMeasure/ordinal_proximity.py