-
Notifications
You must be signed in to change notification settings - Fork 12
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Firstpipe module #35
Firstpipe module #35
Changes from 8 commits
4f82127
ca0d693
407516b
469055f
3c7c6c9
8e03c6c
19cf640
42a47bb
f6833ab
d9b5d2a
c545ae8
e4fc975
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,7 @@ | ||
|
||
from abc import ABC, abstractmethod | ||
from typing import Literal | ||
from typing import Literal | ||
from itertools import product | ||
|
||
class JsonSchema(ABC): | ||
""" | ||
|
@@ -83,7 +84,143 @@ def _check_params_schema(self) -> int: | |
return num_params_list[0] | ||
else: | ||
raise ValueError(f"Expected the same number of values for all the params under noise value, but received a discordant ammount instead.") | ||
|
||
|
||
def _transform_noise_dict(self): | ||
""" | ||
TODO helper fucntion section | ||
""" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This should provide enough documentation with examples |
||
noise_dict = {} | ||
for col_name_dictionary in self.noise_arg: | ||
# The name: field of a noise: can be either a simlpe string or list of strings, so convert such variable to a list if it's a string, otherwise leave it unchanged | ||
noiser_list = [col_name_dictionary['name']] if isinstance(col_name_dictionary['name'], str) else col_name_dictionary['name'] | ||
# Now get the parametrs or set of parameters associated with each noiser and store bot in a tuple and append to list noiser names associated to a given clumn_name | ||
for k, noiser_name in enumerate(noiser_list): | ||
# handle the fact that params can have "default" as value and not a list | ||
if col_name_dictionary['params'] == "default": | ||
params_to_be_added = "default" | ||
else: | ||
params_to_be_added = col_name_dictionary['params'][k] | ||
# handle the case of multiple noiser with same name in the same list associated to the column_name, solution -> create a scheme to modify the name | ||
if noise_dict.get(col_name_dictionary["column_name"]) and noiser_name in noise_dict.get(col_name_dictionary["column_name"]) : | ||
# Modify the noiser name already present appending a unique key to it | ||
noiser_name = noiser_name + '-#' + str(k) | ||
#noise_dict.setdefault(col_name_dictionary["column_name"], []).append( {noiser_name : params_to_be_added} ) | ||
noise_dict.setdefault(col_name_dictionary["column_name"], {})[noiser_name] = params_to_be_added | ||
return noise_dict | ||
|
||
|
||
def _generate_cartesian_product_combinations(self, d: dict) -> list: | ||
""" | ||
Helper function for creating cartesian product combinations out of a dictionary. | ||
TODO expand explanation | ||
""" | ||
keys = d.keys() | ||
value_lists = d.values() | ||
|
||
# Generate Cartesian product of value lists | ||
combinations = product(*value_lists) | ||
|
||
# Create dictionaries for each combination | ||
result = [] | ||
for combination in combinations: | ||
combined_dict = {} | ||
for key, value in zip(keys, combination): | ||
nested_dict = {value : d[key][value]} | ||
combined_dict.update({key: nested_dict}) | ||
result.append(combined_dict) | ||
|
||
return result | ||
|
||
|
||
|
||
def _handle_parameter_selection(self, d: dict, param_index: int) -> dict: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This should not have "_" as it calls self in the arguments |
||
""" | ||
TODO helper fucntion section | ||
""" | ||
|
||
for key, param_dict in d.items(): | ||
# remove the appendix used to handle same noise names for same column_name, this is done in the _transform_noise_dict function, this line does nothing if that key is not present afterall | ||
key = key.split('-#')[0] | ||
# handle "defualt" as params value | ||
if param_dict == 'default': | ||
return {"name" : key, "params" : param_dict} | ||
else: | ||
tmp_param_dict = {} | ||
# iterate through the possible multiple parameter otpions | ||
for param_name, param_value in param_dict.items(): | ||
tmp_param_dict[param_name] = param_value[param_index] | ||
return {"name": key, "params": tmp_param_dict} | ||
|
||
|
||
|
||
def noise_column_wise_combination(self) -> list: | ||
""" | ||
works on the self.noise_arg dictionary to compute all column wise combinations for parametrs and noise function specified. | ||
The combinations of noisers is all against all, except there can not be two noisers for the same column_name. | ||
Combinations of noisers will always include at least one noiser per column_name. | ||
example for noisers -> | ||
|
||
column_name : 1 column_name : 2 | ||
name : [noiser1, noiser2] name: [othernoiser] | ||
|
||
combinations -> | ||
noiser1 - othernoiser | ||
noiser2 - othernoiser | ||
|
||
Now this is how noiser functions are selected but for each of the above combination there are as many as there are parameters. | ||
Again an example shows it better -> | ||
|
||
column_name : 1 column_name : 2 | ||
name : [noiser1, noiser2] name: [othernoiser] | ||
parameters : [{p1 : [1 ,2 ,3]}, {p1 : [1.5, 2.5, 3.5 ]}] parameters : [{p1 : [4 ,5 ,6], p2 : [7, 8, 9]}] | ||
|
||
combinations -> | ||
noiser1 (p1 = 1) - othernoiser (p1 = 4, p2 = 7) | ||
noiser1 (p1 = 2) - othernoiser (p1 = 5, p2 = 8) | ||
noiser1 (p1 = 3) - othernoiser (p1 = 6, p2 = 9) | ||
noiser2 (p1 = 1.5) - othernoiser (p1 = 4, p2 = 7) | ||
noiser2 (p1 = 2.5) - othernoiser (p1 = 5, p2 = 8) | ||
noiser2 (p1 = 3.5) - othernoiser (p1 = 6, p2 = 9) | ||
""" | ||
|
||
# transform noise entry in a nested dictionary, with structure {col_name: { noiser_name : {parameters : {p1 : [1]} }}} | ||
noise_as_dict = self._transform_noise_dict() | ||
|
||
# Create cartesian product of noiser names based on the above dictionary | ||
noiser_combination_list = self._generate_cartesian_product_combinations(noise_as_dict) | ||
|
||
# for each noiser combination create the column wise selection of parameters associated | ||
all_noise_combination = [] | ||
for noise_combo in noiser_combination_list: | ||
# select the parameter iterating through the total number of parameters value fopr each col type | ||
for params_index in range(self.number_culumn_wise_val): | ||
noise_list = [] | ||
for col_name, noise_dict in noise_combo.items(): | ||
single_param_dict = self._handle_parameter_selection(noise_dict, params_index) | ||
# add the column_name field to this dictionary | ||
single_param_dict["column_name"] = col_name | ||
# reorder the entries by key alphabetically for readability | ||
sorted_dict = {key: single_param_dict[key] for key in sorted(single_param_dict)} | ||
noise_list.append(sorted_dict) | ||
all_noise_combination.append({'noise' : noise_list }) | ||
return all_noise_combination | ||
|
||
|
||
|
||
def noise_all_combination(self) -> list: | ||
""" | ||
works on the self.noise_arg dictionary to compute all possible combinations of parameters and nboisers in a all against all fashion. | ||
""" | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Possible to write an issue about this ? |
||
# TODO implement this function | ||
raise ValueError("the function noise_all_combination for the flag interpret_parmas_mode : all_combinations is not implemented yet ") | ||
|
||
|
||
|
||
def split_combination(self) -> list: | ||
""" | ||
TODO add description | ||
""" | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Possible to write an issue about this ? |
||
# iterate through the split entry and return a list of split possibilities, where each splitter_name has one/set of one parametyers |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -7,7 +7,8 @@ | |
|
||
def get_args(): | ||
|
||
"get the arguments when using from the commandline" | ||
"""get the arguments when using from the commandline | ||
TODO write help function description""" | ||
|
||
parser = argparse.ArgumentParser(description="") | ||
parser.add_argument("-j", "--json", type=str, required=True, metavar="FILE", help='The json config file that hold all parameter info') | ||
|
@@ -30,11 +31,18 @@ def interpret_json(input_json: dict) -> list: | |
#print("\nnoise_configurations :\n", schema.noise_arg, "\n", type(schema.noise_arg)) | ||
#print("\nsplit_configurations :\n", schema.split_arg, "\n", type(schema.split_arg)) | ||
#print("\ncustom_configurations :\n", schema.custom_arg, "\n", type(schema.custom_arg)) | ||
print(schema.number_culumn_wise_val) | ||
#print(schema.number_culumn_wise_val) | ||
#print(schema.experiment, schema.interpret_params_mode, schema.column_names) | ||
|
||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ideally, prints should not be used and replaced by a logger if you are using those to debug (see https://docs.python.org/3/howto/logging.html ) |
||
|
||
# compute all noise combinations | ||
# first set right fucntion call based on schema.interpret_params_mode, done like following because if are inefficient | ||
function_call_dict = {"culumn_wise": schema.noise_column_wise_combination, "all_combinations": schema.noise_all_combination} | ||
list_noise_combinations = function_call_dict[schema.interpret_params_mode]() | ||
print(list_noise_combinations, len(list_noise_combinations)) | ||
|
||
# compute all split combinations, this will only be all vs all because there is no concept of column_name | ||
list_split_combinations = schema.split_combination() | ||
print(list_split_combinations, len(list_split_combinations)) | ||
|
||
|
||
def main(config_json: str) -> str: | ||
|
@@ -44,7 +52,7 @@ def main(config_json: str) -> str: | |
with open(config_json, 'r') as in_json: | ||
config = json.load(in_json) | ||
|
||
# initialize the json scheme class | ||
# interpret the json | ||
interpret_json(config) | ||
|
||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should be transform_noise_dict without "_" since this calls self.