-
Notifications
You must be signed in to change notification settings - Fork 12
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Firstpipe module #35
Firstpipe module #35
Changes from all commits
4f82127
ca0d693
407516b
469055f
3c7c6c9
8e03c6c
19cf640
42a47bb
f6833ab
d9b5d2a
c545ae8
e4fc975
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,10 +1,13 @@ | ||
|
||
from abc import ABC, abstractmethod | ||
from typing import Literal | ||
from typing import Literal | ||
from itertools import product | ||
|
||
class JsonSchema(ABC): | ||
""" | ||
This class helps decode and work on a difened Json schema used by the stimulus pipeline | ||
This class helps decode and work on a difened Json schema used by the stimulus pipeline. | ||
TODO add Json.schema real library to control that each noise, split have the correct keys associated to them. | ||
link -> https://json-schema.org/learn/getting-started-step-by-step#create | ||
""" | ||
def __init__(self, schema: dict ) -> None: | ||
self.schema = schema | ||
|
@@ -25,10 +28,11 @@ def __init__(self, schema: dict ) -> None: | |
# check that inside noise dictionary there are no repeated column_nmae values and return them otherwise send error | ||
self.column_names = self._check_repeated_column_names() | ||
|
||
|
||
# check that noise dictionary have a coherent number of parameters values in case of column_wise for self.interpret_parmas_mode | ||
self.number_culumn_wise_val = self._check_params_schema() | ||
|
||
self.number_culumn_wise_val = self._check_noise_params_schema() | ||
|
||
|
||
|
||
def _check_repeated_column_names(self) -> list: | ||
""" | ||
Helper function that ensures that inside noise dictionary there are no column:names repeated values | ||
|
@@ -51,25 +55,24 @@ def _check_repeated_column_names(self) -> list: | |
|
||
|
||
|
||
def _check_params_schema(self) -> int: | ||
def _check_noise_params_schema(self) -> int: | ||
""" | ||
Help function to check if the number of values in params in the noise dictionary is consisten among all params. | ||
Help function to check if the number of values in params in the noise dictionary is consistent among all params. | ||
If there is {"NoiserName" : { "params": [{"val1":[0, 1]}], "OtherNoiser" : { "params": [{"val1":[2, 3], "val3":[4]}]}} | ||
it will raise error because the val3 has only a list of len() 1 instead of 2 | ||
otherwise it resturn the len() | ||
""" | ||
|
||
# in case there is no noise dictionary but a custom one instead or if interpret_params_mode is in all_combinations mode | ||
if (not self.noise_arg and self.custom_arg) or self.interpret_params_mode == 'all_combinations' : | ||
return None | ||
# in case there is no noise dictionary or if interpret_params_mode is in all_combinations mode | ||
if not self.noise_arg or self.interpret_params_mode == 'all_combinations' : | ||
return 0 | ||
|
||
num_params_list = [] | ||
# Iterate through the given dictionary becuse more than one column_name values could be specified for ex. | ||
for i, col_name_dictionary in enumerate(self.noise_arg): | ||
|
||
# take into account that there could be the keyword default | ||
if col_name_dictionary["params"] == "default": | ||
# TODO think what to do in this case | ||
continue | ||
|
||
# iterate throught the possible multiple parmaeters, some noisers could have more than one parameter flag | ||
|
@@ -83,7 +86,167 @@ def _check_params_schema(self) -> int: | |
return num_params_list[0] | ||
else: | ||
raise ValueError(f"Expected the same number of values for all the params under noise value, but received a discordant ammount instead.") | ||
|
||
|
||
|
||
def _transform_noise_dict(self): | ||
""" | ||
TODO helper fucntion section | ||
""" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This should provide enough documentation with examples |
||
noise_dict = {} | ||
for col_name_dictionary in self.noise_arg: | ||
# The name: field of a noise: can be either a simlpe string or list of strings, so convert such variable to a list if it's a string, otherwise leave it unchanged | ||
noiser_list = [col_name_dictionary['name']] if isinstance(col_name_dictionary['name'], str) else col_name_dictionary['name'] | ||
# Now get the parametrs or set of parameters associated with each noiser and store bot in a tuple and append to list noiser names associated to a given clumn_name | ||
for k, noiser_name in enumerate(noiser_list): | ||
# handle the fact that params can have "default" as value and not a list | ||
if col_name_dictionary['params'] == "default": | ||
params_to_be_added = "default" | ||
else: | ||
params_to_be_added = col_name_dictionary['params'][k] | ||
# handle the case of multiple noiser with same name in the same list associated to the column_name, solution -> create a scheme to modify the name | ||
if noise_dict.get(col_name_dictionary["column_name"]) and noiser_name in noise_dict.get(col_name_dictionary["column_name"]) : | ||
# Modify the noiser name already present appending a unique key to it | ||
noiser_name = noiser_name + '-#' + str(k) | ||
#noise_dict.setdefault(col_name_dictionary["column_name"], []).append( {noiser_name : params_to_be_added} ) | ||
noise_dict.setdefault(col_name_dictionary["column_name"], {})[noiser_name] = params_to_be_added | ||
return noise_dict | ||
|
||
|
||
def _generate_cartesian_product_combinations(self, d: dict) -> list: | ||
""" | ||
Helper function for creating cartesian product combinations out of a dictionary. | ||
TODO expand explanation | ||
""" | ||
keys = d.keys() | ||
value_lists = d.values() | ||
|
||
# Generate Cartesian product of value lists | ||
combinations = product(*value_lists) | ||
|
||
# Create dictionaries for each combination | ||
result = [] | ||
for combination in combinations: | ||
combined_dict = {} | ||
for key, value in zip(keys, combination): | ||
nested_dict = {value : d[key][value]} | ||
combined_dict.update({key: nested_dict}) | ||
result.append(combined_dict) | ||
|
||
return result | ||
|
||
|
||
|
||
def _handle_parameter_selection(self, d: dict, param_index: int) -> dict: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This should not have "_" as it calls self in the arguments |
||
""" | ||
TODO helper fucntion section | ||
""" | ||
|
||
for key, param_dict in d.items(): | ||
# remove the appendix used to handle same noise names for same column_name, this is done in the _transform_noise_dict function, this line does nothing if that key is not present afterall | ||
key = key.split('-#')[0] | ||
# handle "defualt" as params value returning a empty dict | ||
if param_dict == 'default': | ||
return {"name" : key, "params" : {}} | ||
else: | ||
tmp_param_dict = {} | ||
# iterate through the possible multiple parameter otpions | ||
for param_name, param_value in param_dict.items(): | ||
tmp_param_dict[param_name] = param_value[param_index] | ||
return {"name": key, "params": tmp_param_dict} | ||
|
||
|
||
|
||
def noise_column_wise_combination(self) -> list: | ||
""" | ||
works on the self.noise_arg dictionary to compute all column wise combinations for parametrs and noise function specified. | ||
The combinations of noisers is all against all, except there can not be two noisers for the same column_name. | ||
Combinations of noisers will always include at least one noiser per column_name. | ||
example for noisers -> | ||
|
||
column_name : 1 column_name : 2 | ||
name : [noiser1, noiser2] name: [othernoiser] | ||
|
||
combinations -> | ||
noiser1 - othernoiser | ||
noiser2 - othernoiser | ||
|
||
Now this is how noiser functions are selected but for each of the above combination there are as many as there are parameters. | ||
Again an example shows it better -> | ||
|
||
column_name : 1 column_name : 2 | ||
name : [noiser1, noiser2] name: [othernoiser] | ||
parameters : [{p1 : [1 ,2 ,3]}, {p1 : [1.5, 2.5, 3.5 ]}] parameters : [{p1 : [4 ,5 ,6], p2 : [7, 8, 9]}] | ||
|
||
combinations -> | ||
noiser1 (p1 = 1) - othernoiser (p1 = 4, p2 = 7) | ||
noiser1 (p1 = 2) - othernoiser (p1 = 5, p2 = 8) | ||
noiser1 (p1 = 3) - othernoiser (p1 = 6, p2 = 9) | ||
noiser2 (p1 = 1.5) - othernoiser (p1 = 4, p2 = 7) | ||
noiser2 (p1 = 2.5) - othernoiser (p1 = 5, p2 = 8) | ||
noiser2 (p1 = 3.5) - othernoiser (p1 = 6, p2 = 9) | ||
""" | ||
|
||
# transform noise entry in a nested dictionary, with structure {col_name: { noiser_name : {p1 : [1]} }} | ||
noise_as_dict = self._transform_noise_dict() | ||
|
||
# Create cartesian product of noiser names based on the above dictionary | ||
noiser_combination_list = self._generate_cartesian_product_combinations(noise_as_dict) | ||
|
||
# for each noiser combination create the column wise selection of parameters associated | ||
all_noise_combination = [] | ||
for noise_combo in noiser_combination_list: | ||
# select the parameter iterating through the total number of parameters value fopr each col type | ||
for params_index in range(self.number_culumn_wise_val): | ||
noise_list = [] | ||
for col_name, noise_dict in noise_combo.items(): | ||
single_param_dict = self._handle_parameter_selection(noise_dict, params_index) | ||
# add the column_name field to this dictionary | ||
single_param_dict["column_name"] = col_name | ||
# reorder the entries by key alphabetically for readability | ||
sorted_dict = {key: single_param_dict[key] for key in sorted(single_param_dict)} | ||
noise_list.append(sorted_dict) | ||
all_noise_combination.append(noise_list) | ||
return all_noise_combination | ||
|
||
|
||
|
||
def noise_all_combination(self) -> list: | ||
""" | ||
works on the self.noise_arg dictionary to compute all possible combinations of parameters and nboisers in a all against all fashion. | ||
""" | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Possible to write an issue about this ? |
||
# TODO implement this function | ||
raise ValueError("the function noise_all_combination for the flag interpret_parmas_mode : all_combinations is not implemented yet ") | ||
|
||
|
||
|
||
def split_combination(self) -> list: | ||
""" | ||
TODO add description | ||
""" | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Possible to write an issue about this ? |
||
list_split_comibinations = [] | ||
# iterate through the split entry and return a list of split possibilities, where each splitter_name has one/set of one parametyers | ||
for i, split_dict in enumerate(self.split_arg): | ||
# jsut create a new dictionary for each set of params associated to each split_name, basically if a splitter has more than one element in his params: then they should be decoupled so to have each splitter with only one value for params: | ||
# if the value of params: is "default" just return the dictionary with an empty dict as value of params : | ||
if split_dict['params'] == "default": | ||
split_dict['params'] = {} | ||
list_split_comibinations.append({ "split" : [split_dict]}) | ||
else: | ||
# Get lengths of all lists | ||
lengths = {key: len(value) for key, value in split_dict['params'][0].items()} | ||
|
||
# Check if all lengths are the same | ||
all_lengths_same = set(lengths.values()) | ||
|
||
if len(all_lengths_same) != 1 : | ||
raise ValueError(f"All split params for teh same splitter have to have the same number of elements, this splitter does not: {split_dict['name']}.") | ||
else: | ||
# iterate at level of number of params_values | ||
for params_index in range(list(all_lengths_same)[0]): | ||
# making the split into a dict the _handle_parameter_selection can use | ||
single_param_dict = self._handle_parameter_selection({split_dict['name']: split_dict['params'][0] }, params_index) | ||
list_split_comibinations.append(single_param_dict) | ||
return list_split_comibinations |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should be transform_noise_dict without "_" since this calls self.