Skip to content

Commit 00d6ea3

Browse files
authored
Added score_column method along multiple fixes
1 parent f70a757 commit 00d6ea3

File tree

1 file changed

+66
-24
lines changed

1 file changed

+66
-24
lines changed

scalg.py

+66-24
Original file line numberDiff line numberDiff line change
@@ -1,59 +1,66 @@
1-
'''
1+
"""
22
developed by: markmelnic
33
original repo: https://github.com/markmelnic/Scoring-Algorithm
4-
4+
pypi: https://pypi.org/project/scalg/
55
Analyse data using a range based percentual proximity algorithm
66
and calculate the linear maximum likelihood estimation.
77
The basic principle is that all values supplied will be broken
88
down to a range from 0 to 1 and each column's score will be added
99
up to get the total score.
10-
1110
==========
1211
Example for data of vehicles
1312
price|mileage|registration_year
1413
20k |60k |2012
1514
22k |50k |2011
1615
23k |90k |2015
1716
16k |210k |2010
18-
1917
We want the vehicle with the lowest price,
2018
lowest mileage but newest registration year.
2119
Thus the weights for each column are as follows:
2220
[0, 0, 1]
23-
24-
>>> procentual_proximity([[20, 60, 2012],[23, 90, 2015],[22, 50, 2011]], [0, 0, 1])
21+
>>> score([[20, 60, 2012],[23, 90, 2015],[22, 50, 2011]], [0, 0, 1])
2522
[[20, 60, 2012, 2.0], [23, 90, 2015, 1.0], [22, 50, 2011, 1.3333333333333335]]
26-
'''
27-
28-
29-
def procentual_proximity(source_data : list, weights : list) -> list:
30-
31-
'''
32-
weights - int list
33-
possible values - 0 / 1
34-
0 if lower values have higher weight in the data set
35-
1 if higher values have higher weight in the data set
36-
'''
23+
>>> score([[20, 60, 2012],[23, 90, 2015],[22, 50, 2011]], [0, 0, 1], 'scores')
24+
[2.0, 1.0, 1.3333333333333335]
25+
>>> score_columns([[20, 60, 2012],[23, 90, 2015],[22, 50, 2011]], [0, 2], [0, 0, 1])
26+
[[20, 2012, 1.25], [23, 2015, 1.0], [22, 2011, 0.33333333333333337]]
27+
"""
28+
29+
30+
def score(source_data: list, weights: list, *args) -> list:
31+
"""Analyse and score a dataset using a range based percentualF proximity
32+
algorithm and calculate the linear maximum likelihood estimation.
33+
Args:
34+
source_data (list): Data set to process.
35+
weights (list): Weights corresponding to each column from the data set.
36+
0 if lower values have higher weight in the data set,
37+
1 if higher values have higher weight in the data set
38+
Optional args:
39+
"score_lists" (str): Returns a list with lists of each column scores.
40+
"scores" (str): Returns only the final scores.
41+
Raises:
42+
ValueError: Weights can only be either 0 or 1 (int)
43+
Returns:
44+
list: Source data with the score of the set appended at as the last element.
45+
"""
3746

3847
# getting data
3948
data_lists = []
4049
for item in source_data:
41-
for i in range(len(item)):
50+
for i, val in enumerate(item):
4251
try:
43-
data_lists[i].append(float(item[i]))
52+
data_lists[i].append(float(val))
4453
except IndexError:
45-
# generate corresponding number of lists
4654
data_lists.append([])
47-
data_lists[i].append(float(item[i]))
55+
data_lists[i].append(float(val))
4856

57+
# calculating price score
4958
score_lists = []
50-
# calculating each score
5159
for dlist, weight in zip(data_lists, weights):
5260
mind = min(dlist)
5361
maxd = max(dlist)
5462

5563
score = []
56-
# for weight 0 score is 1 - actual score
5764
if weight == 0:
5865
for item in dlist:
5966
try:
@@ -68,12 +75,15 @@ def procentual_proximity(source_data : list, weights : list) -> list:
6875
except ZeroDivisionError:
6976
score.append(0)
7077

71-
# weight not 0 or 1
7278
else:
7379
raise ValueError("Invalid weight of %f provided" % (weight))
7480

7581
score_lists.append(score)
7682

83+
# return score lists
84+
if "score_lists" in args:
85+
return score_lists
86+
7787
# initialize final scores
7888
final_scores = [0 for i in range(len(score_lists[0]))]
7989

@@ -82,8 +92,40 @@ def procentual_proximity(source_data : list, weights : list) -> list:
8292
for j, ele in enumerate(slist):
8393
final_scores[j] = final_scores[j] + ele
8494

95+
# return only scores
96+
if "scores" in args:
97+
return final_scores
98+
8599
# append scores to source data
86100
for i, ele in enumerate(final_scores):
87101
source_data[i].append(ele)
88102

89103
return source_data
104+
105+
106+
def score_columns(source_data: list, columns: list, weights: list) -> list:
107+
"""Analyse data file using a range based procentual proximity
108+
algorithm and calculate the linear maximum likelihood estimation.
109+
Args:
110+
source_data (list): Data set to process.
111+
columns (list): Indexes of the source_data columns to be scored.
112+
weights (list): Weights corresponding to each column from the data set.
113+
0 if lower values have higher weight in the data set,
114+
1 if higher values have higher weight in the data set
115+
Raises:
116+
ValueError: Weights can only be either 0 or 1 (int)
117+
Returns:
118+
list: Source data with the score of the set appended at as the last element.
119+
"""
120+
121+
temp_data = []
122+
for item in source_data:
123+
temp_data.append([item[c] for c in columns])
124+
125+
if len(weights) > len(columns):
126+
weights = [weights[item] for item in columns]
127+
128+
for i, sc in enumerate(score(temp_data, weights, "scores")):
129+
source_data[i].append(sc)
130+
131+
return source_data

0 commit comments

Comments
 (0)