-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathreader.py
163 lines (136 loc) · 6.07 KB
/
reader.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
# Used for ExerciseType
# see https://stackoverflow.com/questions/33533148/how-do-i-type-hint-a-method-with-the-type-of-the-enclosing-class
from __future__ import annotations
import os
import xlrd
from identify import get_uuid, get_team
from typing import List, Dict, NamedTuple
import paths
class ExerciseType(NamedTuple):
name: str
sheet_index: int
@staticmethod
def all() -> List[ExerciseType]:
"""
The script can operate on only one of the two tests at a single time.
The decision at the start of the script changes the global variable CURRENT_TEST which is used across the script
"""
return [ExerciseType('חקר ביצועים', 1), ExerciseType('Solution', 0)]
class Attribute(NamedTuple):
"""A single evaluation attribute, containing a numeric part and a textual part"""
num: str
text: str
class Evaluation(NamedTuple):
"""A single evaluation for a candidate, containing who made it (evaluator) and relevant attributes"""
evaluator_name: str
exercise_name: str
learning_ability: Attribute
personal: Attribute
interpersonal: Attribute
leader: Attribute
summary: Attribute
class Record(NamedTuple):
"""A raw record read from the excel files, holding candidate (private) name and team and the actual evaluation.
Multiple such records may exist for a single candidate"""
candidate_name: str
team: str
evaluation: Evaluation
class Candidate(NamedTuple):
"""All evaluation for a single candidate.
One object per candidate.
Used at team context, so team name isn't needed."""
uuid: str
evaluations: List[Evaluation]
def _sanitize_num(value: str) -> str:
"""Remove annoying '.0' from numeric values which should all be integers"""
return f"{int(value)}" if isinstance(value, float) else ""
def _sanitize_team(team_name: str) -> str:
"""Remove tick so that e.g. א and א' are interpreted the same (as א)"""
if isinstance(team_name, float):
coerce = ['א', 'ב', 'ג', 'ד', 'ה', 'ו', 'ז', 'ח', 'ט', 'י'][int(team_name) - 1]
print(f"Team name sanitation: Coercing {team_name} into {coerce}")
return coerce
return team_name.replace("'", "").replace("׳", "")
def _read_attribute(sheet, row, col):
"""
Return the attribute at specified coordinates
"""
return Attribute(
num=_sanitize_num(sheet.cell_value(row, col)),
text=sheet.cell_value(row + 1, col)
)
def _read_sheet(sheet: xlrd.sheet.Sheet, exercise_name: str) -> List[Record]:
"""
Return all candidate records in given sheet
"""
records = []
# 5 is the candidate count
for candidate_index in range(5):
row = 5 + candidate_index * 2
evaluator_name = sheet.cell_value(5, 1)
candidate_name = sheet.cell_value(row, 4).strip()
team_name = _sanitize_team(sheet.cell_value(5, 2))
if not candidate_name:
continue
if candidate_name and not team_name:
raise Exception("Error: No team name")
if team_name and not evaluator_name:
raise Exception("Error: No evaluator name")
evaluation = Evaluation(
evaluator_name=evaluator_name,
exercise_name=exercise_name,
learning_ability=_read_attribute(sheet, row, 5),
personal=_read_attribute(sheet, row, 10),
interpersonal=_read_attribute(sheet, row, 15),
leader=_read_attribute(sheet, row, 20),
summary=_read_attribute(sheet, row, 25)
)
records.append(Record(candidate_name, team_name, evaluation))
return records
def _read_all_files(filepaths: List[str]) -> Dict[str, List[Evaluation]]:
"""
Reads evaluations from all specified file paths.
Returns a dictionary mapping uuid to "candidate tuple"s
A "candidate tuple" maps evaluator name to an AttributeSet objects (which holds all
"""
evaluations_for_candidate = {}
for filepath in filepaths:
print(f"Reading file {filepath}")
src_wb = xlrd.open_workbook(filepath)
for exercise in ExerciseType.all():
sheet = src_wb.sheet_by_index(exercise.sheet_index)
# This is a handy test - most evaluators tend to write their names in the name of the file they upload
# This check can help find files which have incorrect evaluator names
evaluator = sheet.cell_value(5, 1)
if evaluator not in filepath:
print(f"WARNING: evaluator name {evaluator} not in file path {filepath}\n\tIt may be incorrect")
for record in _read_sheet(sheet, exercise.name):
uuid = get_uuid(record.team, record.candidate_name, filepath)
if uuid is None:
continue # Candidate not found, identify module is responsible to report to user
if uuid not in evaluations_for_candidate:
evaluations_for_candidate[uuid] = []
evaluations_for_candidate[uuid].append(record.evaluation)
return evaluations_for_candidate
def _sort_to_teams(all_candidates: Dict[str, List[Evaluation]]) -> Dict[str, List[Candidate]]:
"""
Accepts a dictionary mapping uuid of a candidate to a list of evaluations for this candidate.
Returns a dictionary mapping name of a team to a list of candidates in this team.
"""
# Map team name to list of candidates in that team
teams = {}
for uuid, evaluations in all_candidates.items():
team_name = get_team(uuid)
if team_name not in teams:
teams[team_name] = []
teams[team_name].append(Candidate(uuid, evaluations))
# In each team, we want the candidates sorted by uuid
for team_name in teams:
teams[team_name].sort(key=lambda candidate: candidate.uuid)
return teams
def read() -> Dict[str, List[Candidate]]:
# Get all full file paths for files in SRC directory
filepaths = [os.path.join(paths.INPUT, filename) for filename in os.listdir(paths.INPUT)]
all_candidates = _read_all_files(filepaths)
print(f"Read totally {len(filepaths)} files")
return _sort_to_teams(all_candidates)