1
- '''
1
+ """
2
2
developed by: markmelnic
3
3
original repo: https://github.com/markmelnic/Scoring-Algorithm
4
-
4
+ pypi: https://pypi.org/project/scalg/
5
5
Analyse data using a range based percentual proximity algorithm
6
6
and calculate the linear maximum likelihood estimation.
7
7
The basic principle is that all values supplied will be broken
8
8
down to a range from 0 to 1 and each column's score will be added
9
9
up to get the total score.
10
-
11
10
==========
12
11
Example for data of vehicles
13
12
price|mileage|registration_year
14
13
20k |60k |2012
15
14
22k |50k |2011
16
15
23k |90k |2015
17
16
16k |210k |2010
18
-
19
17
We want the vehicle with the lowest price,
20
18
lowest mileage but newest registration year.
21
19
Thus the weights for each column are as follows:
22
20
[0, 0, 1]
23
-
24
- >>> procentual_proximity([[20, 60, 2012],[23, 90, 2015],[22, 50, 2011]], [0, 0, 1])
21
+ >>> score([[20, 60, 2012],[23, 90, 2015],[22, 50, 2011]], [0, 0, 1])
25
22
[[20, 60, 2012, 2.0], [23, 90, 2015, 1.0], [22, 50, 2011, 1.3333333333333335]]
26
- '''
27
-
28
-
29
- def procentual_proximity (source_data : list , weights : list ) -> list :
30
-
31
- '''
32
- weights - int list
33
- possible values - 0 / 1
34
- 0 if lower values have higher weight in the data set
35
- 1 if higher values have higher weight in the data set
36
- '''
23
+ >>> score([[20, 60, 2012],[23, 90, 2015],[22, 50, 2011]], [0, 0, 1], 'scores')
24
+ [2.0, 1.0, 1.3333333333333335]
25
+ >>> score_columns([[20, 60, 2012],[23, 90, 2015],[22, 50, 2011]], [0, 2], [0, 0, 1])
26
+ [[20, 2012, 1.25], [23, 2015, 1.0], [22, 2011, 0.33333333333333337]]
27
+ """
28
+
29
+
30
+ def score (source_data : list , weights : list , * args ) -> list :
31
+ """Analyse and score a dataset using a range based percentualF proximity
32
+ algorithm and calculate the linear maximum likelihood estimation.
33
+ Args:
34
+ source_data (list): Data set to process.
35
+ weights (list): Weights corresponding to each column from the data set.
36
+ 0 if lower values have higher weight in the data set,
37
+ 1 if higher values have higher weight in the data set
38
+ Optional args:
39
+ "score_lists" (str): Returns a list with lists of each column scores.
40
+ "scores" (str): Returns only the final scores.
41
+ Raises:
42
+ ValueError: Weights can only be either 0 or 1 (int)
43
+ Returns:
44
+ list: Source data with the score of the set appended at as the last element.
45
+ """
37
46
38
47
# getting data
39
48
data_lists = []
40
49
for item in source_data :
41
- for i in range ( len ( item ) ):
50
+ for i , val in enumerate ( item ):
42
51
try :
43
- data_lists [i ].append (float (item [ i ] ))
52
+ data_lists [i ].append (float (val ))
44
53
except IndexError :
45
- # generate corresponding number of lists
46
54
data_lists .append ([])
47
- data_lists [i ].append (float (item [ i ] ))
55
+ data_lists [i ].append (float (val ))
48
56
57
+ # calculating price score
49
58
score_lists = []
50
- # calculating each score
51
59
for dlist , weight in zip (data_lists , weights ):
52
60
mind = min (dlist )
53
61
maxd = max (dlist )
54
62
55
63
score = []
56
- # for weight 0 score is 1 - actual score
57
64
if weight == 0 :
58
65
for item in dlist :
59
66
try :
@@ -68,12 +75,15 @@ def procentual_proximity(source_data : list, weights : list) -> list:
68
75
except ZeroDivisionError :
69
76
score .append (0 )
70
77
71
- # weight not 0 or 1
72
78
else :
73
79
raise ValueError ("Invalid weight of %f provided" % (weight ))
74
80
75
81
score_lists .append (score )
76
82
83
+ # return score lists
84
+ if "score_lists" in args :
85
+ return score_lists
86
+
77
87
# initialize final scores
78
88
final_scores = [0 for i in range (len (score_lists [0 ]))]
79
89
@@ -82,8 +92,40 @@ def procentual_proximity(source_data : list, weights : list) -> list:
82
92
for j , ele in enumerate (slist ):
83
93
final_scores [j ] = final_scores [j ] + ele
84
94
95
+ # return only scores
96
+ if "scores" in args :
97
+ return final_scores
98
+
85
99
# append scores to source data
86
100
for i , ele in enumerate (final_scores ):
87
101
source_data [i ].append (ele )
88
102
89
103
return source_data
104
+
105
+
106
+ def score_columns (source_data : list , columns : list , weights : list ) -> list :
107
+ """Analyse data file using a range based procentual proximity
108
+ algorithm and calculate the linear maximum likelihood estimation.
109
+ Args:
110
+ source_data (list): Data set to process.
111
+ columns (list): Indexes of the source_data columns to be scored.
112
+ weights (list): Weights corresponding to each column from the data set.
113
+ 0 if lower values have higher weight in the data set,
114
+ 1 if higher values have higher weight in the data set
115
+ Raises:
116
+ ValueError: Weights can only be either 0 or 1 (int)
117
+ Returns:
118
+ list: Source data with the score of the set appended at as the last element.
119
+ """
120
+
121
+ temp_data = []
122
+ for item in source_data :
123
+ temp_data .append ([item [c ] for c in columns ])
124
+
125
+ if len (weights ) > len (columns ):
126
+ weights = [weights [item ] for item in columns ]
127
+
128
+ for i , sc in enumerate (score (temp_data , weights , "scores" )):
129
+ source_data [i ].append (sc )
130
+
131
+ return source_data
0 commit comments