Skip to content

Commit 18ea499

Browse files
Merge pull request #29 from mathysgrapotte/pipeline_integration
updated the Json schema to be column_name oriented instead of noise f…
2 parents 788cbd7 + 831a2d4 commit 18ea499

File tree

3 files changed

+61
-26
lines changed

3 files changed

+61
-26
lines changed

bin/json_schema.py

Lines changed: 55 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -7,52 +7,83 @@ class JsonSchema(ABC):
77
This class helps decode and work on a difened Json schema used by the stimulus pipeline
88
"""
99
def __init__(self, schema: dict ) -> None:
10-
self.schema = schema
11-
self.noise_arg = schema.get('noise', [])
12-
self.split_arg = schema.get('split', [])
13-
self.custom_arg = schema.get('custom', [])
10+
self.schema = schema
11+
self.interpret_params_mode = schema.get('interpret_parmas_mode', "culumn_wise")
12+
self.experiment = schema.get('experiment', None)
13+
self.noise_arg = schema.get('noise', [])
14+
self.split_arg = schema.get('split', [])
15+
self.custom_arg = schema.get('custom', [])
1416

15-
# check that both noise and split have they're coherent number of parameters values
16-
self.number_noise_val = self._check_params_schema('noise')
17-
self.number_split_val = self._check_params_schema('split')
17+
# Send error if experiment name is missing
18+
if not self.experiment:
19+
raise ValueError(f"No experiment name given, the Json should always have a experiment:'ExperimentName' field")
1820

21+
# Send error if self.interpret_parmas_mode is not of possibility
22+
if self.interpret_params_mode not in ["culumn_wise", "all_combinations"]:
23+
raise ValueError(f"interpret_params_mode value can only be one of the following keywords -> ['culumn_wise', 'all_combinations']")
1924

20-
def _check_params_schema(self, switch: Literal['noise', 'split']) -> int:
25+
# check that inside noise dictionary there are no repeated column_nmae values and return them otherwise send error
26+
self.column_names = self._check_repeated_column_names()
27+
28+
29+
# check that noise dictionary have a coherent number of parameters values in case of column_wise for self.interpret_parmas_mode
30+
self.number_culumn_wise_val = self._check_params_schema()
31+
32+
def _check_repeated_column_names(self) -> list:
33+
"""
34+
Helper function that ensures that inside noise dictionary there are no column:names repeated values
35+
"""
36+
37+
# in case there is no noise or split flag but a custom one instead
38+
if not self.noise_arg and self.custom_arg:
39+
return None
40+
41+
column_name_list = []
42+
for i, dictionary in enumerate(self.noise_arg):
43+
column_name = dictionary["column_name"]
44+
45+
# If already present as a name throw an error
46+
if column_name in column_name_list:
47+
raise ValueError(f"The column_name {column_name} is repeated. column_names should be unique.")
48+
else:
49+
column_name_list.append(column_name)
50+
return column_name_list
51+
52+
53+
54+
def _check_params_schema(self) -> int:
2155
"""
2256
Help function to check if the number of values in params in the noise dictionary is consisten among all params.
23-
If there is {"Noisernmae" : { "params": [{"val1":[0, 1]}], "OtherNoiser" : { "params": [{"val1":[2, 3], "val3":[4]}]}}
57+
If there is {"NoiserName" : { "params": [{"val1":[0, 1]}], "OtherNoiser" : { "params": [{"val1":[2, 3], "val3":[4]}]}}
2458
it will raise error because the val3 has only a list of len() 1 instead of 2
2559
otherwise it resturn the len()
2660
"""
2761

28-
starting_list = self.noise_arg
29-
if switch == 'split':
30-
starting_list = self.split_arg
31-
32-
# in case there is no noise or split flag but a custom one instead
33-
if not starting_list and self.custom_arg:
62+
# in case there is no noise dictionary but a custom one instead or if interpret_params_mode is in all_combinations mode
63+
if (not self.noise_arg and self.custom_arg) or self.interpret_params_mode == 'all_combinations' :
3464
return None
3565

3666
num_params_list = []
37-
# Iterate through the given dictionary becuse more than one noising function could be specified for ex.
38-
for i, dictionary in enumerate(starting_list):
67+
# Iterate through the given dictionary becuse more than one column_name values could be specified for ex.
68+
for i, col_name_dictionary in enumerate(self.noise_arg):
3969

4070
# take into account that there could be the keyword default
41-
if dictionary["params"] == "default":
71+
if col_name_dictionary["params"] == "default":
4272
# TODO think what to do in this case
4373
continue
4474

45-
# iterate throught the possible multiple parmaeters
75+
# iterate throught the possible multiple parmaeters, some noisers could have more than one parameter flag
4676
else:
47-
for params_flag, params_list in dictionary["params"][0].items():
48-
num_params_list.append(len(params_list))
49-
77+
for k, params_dict in enumerate(col_name_dictionary["params"]):
78+
for params_flag, params_list in params_dict.items():
79+
num_params_list.append(len(params_list))
80+
5081
# check that all parameters values found are equal
5182
if len(set(num_params_list)) == 1:
5283
return num_params_list[0]
5384
else:
54-
raise ValueError(f"Expected the same number of values for all the params under {switch} flag, but received a discordant ammount input Json.")
55-
85+
raise ValueError(f"Expected the same number of values for all the params under noise value, but received a discordant ammount instead.")
86+
5687

5788

5889

bin/launch_interpret_json.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,13 +24,16 @@ def interpret_json(input_json: dict) -> list:
2424

2525
# TODO handle no noise or splitter
2626

27-
# Initialize json schema
27+
# Initialize json schema it checks for correctness of the Json architecture and fields / values
2828
schema = JsonSchema(input_json)
2929

3030
#print("\nnoise_configurations :\n", schema.noise_arg, "\n", type(schema.noise_arg))
3131
#print("\nsplit_configurations :\n", schema.split_arg, "\n", type(schema.split_arg))
3232
#print("\ncustom_configurations :\n", schema.custom_arg, "\n", type(schema.custom_arg))
33-
print(schema.number_noise_val, schema.number_split_val)
33+
print(schema.number_culumn_wise_val)
34+
#print(schema.experiment, schema.interpret_params_mode, schema.column_names)
35+
36+
3437

3538

3639

modules/interpret_json.nf

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
process INTERPRET_JSON {
33

44
container "python@sha256:a2d01031695ff170831430810ee30dd06d8413b08f72ad978b43fd10daa6b86e" // python 3.11.8-slim-bullseye
5+
label 'process_low'
56

67
input:
78
path user_json

0 commit comments

Comments
 (0)