nf-core
diff --git a/‎.gitignore
Lines changed: 1 addition & 1 deletion b/‎.gitignore
Lines changed: 1 addition & 1 deletion
diff --git a/‎Dockerfiles/torch_scikit_numpy/Dockerfile
Lines changed: 11 additions & 0 deletions b/‎Dockerfiles/torch_scikit_numpy/Dockerfile
Lines changed: 11 additions & 0 deletions
diff --git a/‎bin/json_schema.py
Lines changed: 58 additions & 0 deletions b/‎bin/json_schema.py
Lines changed: 58 additions & 0 deletions
diff --git a/‎bin/launch_csv_handling.py
Lines changed: 38 additions & 0 deletions b/‎bin/launch_csv_handling.py
Lines changed: 38 additions & 0 deletions
diff --git a/‎bin/launch_interpret_json.py
Lines changed: 53 additions & 0 deletions b/‎bin/launch_interpret_json.py
Lines changed: 53 additions & 0 deletions
diff --git a/‎configs/crg.config
Lines changed: 42 additions & 0 deletions b/‎configs/crg.config
Lines changed: 42 additions & 0 deletions
diff --git a/‎configs/local.config
Lines changed: 36 additions & 0 deletions b/‎configs/local.config
Lines changed: 36 additions & 0 deletions
diff --git a/‎configs/modules.config
Lines changed: 34 additions & 0 deletions b/‎configs/modules.config
Lines changed: 34 additions & 0 deletions
diff --git a/‎configs/test.config
Lines changed: 24 additions & 0 deletions b/‎configs/test.config
Lines changed: 24 additions & 0 deletions
diff --git a/‎examples/pipeline_generated.json
Lines changed: 26 additions & 0 deletions b/‎examples/pipeline_generated.json
Lines changed: 26 additions & 0 deletions
@@ -6,7 +6,7 @@ bin/**/__pycache__/
 /notebook/.ipynb_checkpoints
 .ipynb_checkpoints
 .nextflow*
-works
+/work/
 .DS_Store
 /singularity_cache
 /results/
 
@@ -0,0 +1,11 @@
+# python 3.11.8-slim-bullseye
+FROM python@sha256:a2d01031695ff170831430810ee30dd06d8413b08f72ad978b43fd10daa6b86e
+LABEL maintainer="Alessio Vignoli" \
+        name="alessiovignoli3/stimulus:torch_scikit_numpy" \
+        description="Docker image containing python packages required for model-check pipeline"
+
+# installing python needed packages
+RUN pip install \
+    numpy==1.26.0 \
+    pytorch-lightning==2.0.1 \
+    scikit-learn==1.3.0
@@ -0,0 +1,58 @@
+
+from abc import ABC, abstractmethod
+from typing import Literal 
+
+class JsonSchema(ABC):
+    """
+    This class helps decode and work on a difened Json schema used by the stimulus pipeline
+    """
+    def __init__(self, schema: dict ) -> None:
+        self.schema = schema
+        self.noise_arg  = schema.get('noise', [])
+        self.split_arg  = schema.get('split', [])
+        self.custom_arg = schema.get('custom', [])
+
+        # check that both noise and split have they're coherent number of parameters values
+        self.number_noise_val = self._check_params_schema('noise')
+        self.number_split_val = self._check_params_schema('split')
+
+
+    def _check_params_schema(self, switch: Literal['noise', 'split']) -> int:
+        """
+        Help function to check if the number of values in params in the noise dictionary is consisten among all params.
+        If there is {"Noisernmae" : { "params": [{"val1":[0, 1]}], "OtherNoiser" : { "params": [{"val1":[2, 3], "val3":[4]}]}}
+        it will raise error because the val3 has only a list of len() 1 instead of 2
+        otherwise it resturn the len()
+        """
+
+        starting_list = self.noise_arg
+        if switch == 'split':
+            starting_list = self.split_arg
+
+        # in case there is no noise or split flag but a custom one instead
+        if not starting_list and self.custom_arg:
+            return None
+
+        num_params_list = []
+        # Iterate through the given dictionary becuse more than one noising function could be specified for ex.
+        for i, dictionary in enumerate(starting_list):
+            
+            # take into account that there could be the keyword default
+            if dictionary["params"] == "default":
+                # TODO think what to do in this case
+                continue
+
+            # iterate throught the possible multiple parmaeters
+            else:
+                for params_flag, params_list in dictionary["params"][0].items():
+                    num_params_list.append(len(params_list))
+
+        # check that all parameters values found are equal
+        if len(set(num_params_list)) == 1:
+            return num_params_list[0]
+        else:
+            raise ValueError(f"Expected the same number of values for all the params under {switch} flag, but received a discordant ammount input Json.")
+
+            
+    
+
@@ -0,0 +1,38 @@
+#!/usr/bin/env python3
+
+import argparse
+from src.data.csv_parser import CSVParser
+import src.data.experiments as exp
+
+
+def get_args():
+
+        "get the arguments when using from the commandline"
+
+        parser = argparse.ArgumentParser(description="")
+        parser.add_argument("-c", "--csv", type=str, required=True, metavar="FILE", help='The file path for the csv containing all data')
+        parser.add_argument("-j", "--json", type=str, required=True, metavar="FILE", help='The json config file that hold all parameter info')
+
+        args = parser.parse_args()
+        return args
+
+
+   
+
+def main(data_csv, config_json):
+
+    print(data_csv, config_json)
+
+
+    """
+    experiment = exp.eval(json[experiment_name])
+    data = CsvHandler( data_csv, experiment )
+    data.noise(json[params])
+    data.split(jason[splitparamates])
+    """
+
+
+
+if __name__ == "__main__":
+        args = get_args()
+        main(args.csv, args.json)
@@ -0,0 +1,53 @@
+#!/usr/bin/env python3
+
+import argparse
+import json
+from json_schema import JsonSchema
+
+
+def get_args():
+
+    "get the arguments when using from the commandline"
+    
+    parser = argparse.ArgumentParser(description="")
+    parser.add_argument("-j", "--json", type=str, required=True, metavar="FILE", help='The json config file that hold all parameter info')
+    
+    args = parser.parse_args()
+    return args
+
+
+
+
+
+
+def interpret_json(input_json: dict) -> list:
+
+    # TODO handle no noise or splitter
+
+    # Initialize json schema 
+    schema = JsonSchema(input_json)
+
+    #print("\nnoise_configurations :\n", schema.noise_arg, "\n", type(schema.noise_arg))
+    #print("\nsplit_configurations :\n", schema.split_arg, "\n", type(schema.split_arg))
+    #print("\ncustom_configurations :\n", schema.custom_arg, "\n", type(schema.custom_arg))
+    print(schema.number_noise_val, schema.number_split_val)
+    
+   
+
+def main(config_json: str) -> str:
+
+    # open and read Json
+    config = {}
+    with open(config_json, 'r') as in_json:
+        config = json.load(in_json)
+
+    # initialize the json scheme class 
+    interpret_json(config)
+
+
+   
+
+
+if __name__ == "__main__":
+    args = get_args()
+    main(args.json)
@@ -0,0 +1,42 @@
+params {
+  config_profile_name = 'CRG profile'
+  config_profile_description = 'Configuration to run on CRG cluster'
+
+  max_cpus = 64
+  max_memory = 100.GB
+  max_time   = 48.h
+}
+
+
+process {
+    executor = 'crg'
+    maxRetries = params.max_retries
+    errorStrategy = params.err_start
+
+    withLabel:process_low {
+          queue = 'cn-el7,short-centos79'
+          cpus   = { check_max( 1                  , 'cpus'    ) }
+          memory = { check_max( 4.GB * task.attempt, 'memory'  ) }
+          time   = { check_max( 1.h  * task.attempt, 'time'    ) }
+       }
+    withLabel:process_medium{
+          queue = 'cn-el7,short-centos79'
+          cpus   = { check_max( 4                  , 'cpus'    ) }
+          memory = { check_max( 10.GB * task.attempt, 'memory'  ) }
+          time   = { check_max( 6.h  * task.attempt, 'time'    ) }
+    }
+      withLabel:process_medium_high {
+
+        queue = 'cn-el7'
+        cpus   = { check_max( 12                  , 'cpus'    ) }
+        memory = { check_max( 50.GB * task.attempt, 'memory'  ) }
+        time   = { check_max( 12.h  * task.attempt, 'time'    ) }
+
+    }
+}
+
+
+singularity {
+    enabled  = true
+    cacheDir = 'singularity_cache'
+}
@@ -0,0 +1,36 @@
+params {
+  config_profile_name = 'Local profile'
+  config_profile_description = 'Configuration to run on local machine'
+
+}
+
+
+process {
+    maxRetries = params.max_retries
+    errorStrategy = params.err_start
+
+    withLabel:process_low {
+          cpus   = { check_max( 1                  , 'cpus'    ) }
+          memory = { check_max( 4.GB * task.attempt, 'memory'  ) }
+          time   = { check_max( 1.h  * task.attempt, 'time'    ) }
+       }
+    withLabel:process_medium{
+          cpus   = { check_max( 4                  , 'cpus'    ) }
+          memory = { check_max( 10.GB * task.attempt, 'memory'  ) }
+          time   = { check_max( 6.h  * task.attempt, 'time'    ) }
+    }
+    withLabel:process_medium_high {
+          cpus   = { check_max( 12                  , 'cpus'    ) }
+          memory = { check_max( 50.GB * task.attempt, 'memory'  ) }
+          time   = { check_max( 12.h  * task.attempt, 'time'    ) }
+    }
+}
+
+
+docker {
+    enabled = true
+    cacheDir = 'docker_cache'
+
+    // the following line is a prototype to fix a warning on the usage of /tmp instead of /dev/shm by Ray tuner
+    //runOptions = '--shm-size=1.84gb'
+}
@@ -0,0 +1,34 @@
+/* config file for defining DSL2 per module options and publishing paths
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    Available keys to override module options:
+        ext.args   = Additional arguments appended to command in module.
+        ext.args2  = Second set of arguments appended to command in module (multi-tool modules).
+        ext.args3  = Third set of arguments appended to command in module (multi-tool modules).
+        ext.prefix = File name prefix for output files.
+----------------------------------------------------------------------------------------
+*/
+
+process {
+
+    withName: "GENERATE_FASTA|GENERATE_FROM_FASTA" {
+        ext.args = { [ params.dna_seq_len          ? "-sl ${params.dna_seq_len}"         : '-sl 100',
+                       params.motif_tag            ? "-t ${params.motif_tag}"            : '-t 5',
+                       params.non_motif_tag        ? "-u ${params.non_motif_tag}"        : '-u 0',
+                       params.num_seq              ? "-ns ${params.num_seq}"             : '',
+                       params.motif_start          ? "-p ${params.motif_start}"          : ''
+                       ].flatten().unique(false).join(' ').trim()
+        }
+        ext.prefix = { params.generated_fasta ? params.generated_fasta : null }
+ 
+        // the outdir has to be the one the user specify plus stuff that makes it run unique
+        publishDir = [
+            path: { "${params.outdir}/${workflow.runName}_" + "${workflow.start}".replaceAll('[-:]', '_').split('\\.')[0] },
+            mode: params.publish_dir_mode,
+            overwrite: true
+        ]
+    }
+
+
+
+}
+
@@ -0,0 +1,24 @@
+params {
+  config_profile_name = 'CRG profile'
+  config_profile_description = 'Configuration to run on CRG cluster'
+
+  max_cpus = 64
+  max_memory = 100.GB
+  max_time   = 48.h
+}
+
+
+process {
+   executor = 'crg'
+   queue = 'cn-el7'
+   cpus = 1
+   queueSize = 50
+   memory = '6.GB'
+   time = '1 h'
+}
+
+
+singularity {
+    enabled = true
+    cacheDir = 'singularity_cache'
+}
@@ -0,0 +1,26 @@
+{
+    "experiment": "DnaToFloatExperiment",
+    "noise": [
+        {
+            "column_name": "input1",
+            "name": "UniformTextMasker",
+            "params": [{"probability": [0.1]}]
+        },
+        {
+            "column_name": "input2",
+            "name": "UniformTextMasker",
+            "params": [{"probability": [0.4]}]
+        },
+        {
+            "column_name": "label",
+            "name": "GaussianNoise",
+            "params": [{"mean": [0.5], "std": [0.1]}]
+        }
+    ],
+    "split": [
+        {
+            "name": "RandomSplitter",
+            "params": [{"split": [[0.6, 0.8]]}]
+        }
+    ]
+}