From 50e44d735fdf1bfb8120c284c574e70d74a48689 Mon Sep 17 00:00:00 2001
From: jfallmann <fallmann.joerg@gmail.com>
Date: Wed, 22 Apr 2026 15:04:23 +0200
Subject: [PATCH 01/39] fgumi module added

---
 MONSDA/Workflows.py                  |  45 +++++++++---
 configs/template.json                |  10 ++-
 configs/template_base_commented.json |  15 +++-
 configs/template_clean.json          |  10 ++-
 containers/apptainer/fgumi.def       |  23 ++++++
 envs/fgumi.yaml                      |   9 +++
 workflows/fgumi.nf                   | 104 +++++++++++++++++++++++++++
 workflows/fgumi.smk                  |  76 ++++++++++++++++++++
 workflows/fgumi_dedup.nf             |  54 ++++++++++++++
 workflows/fgumi_dedup.smk            |  15 ++++
 10 files changed, 349 insertions(+), 12 deletions(-)
 create mode 100644 containers/apptainer/fgumi.def
 create mode 100644 envs/fgumi.yaml
 create mode 100644 workflows/fgumi.nf
 create mode 100644 workflows/fgumi.smk
 create mode 100644 workflows/fgumi_dedup.nf
 create mode 100644 workflows/fgumi_dedup.smk

diff --git a/MONSDA/Workflows.py b/MONSDA/Workflows.py
index 88aaf92a..709b95e1 100755
--- a/MONSDA/Workflows.py
+++ b/MONSDA/Workflows.py
@@ -901,7 +901,9 @@ def make_sub(
                             and "MAPPING" not in works
                             and toolenv != "rustqc"
                         ):
-                            if "DEDUP" in works and "umitools" in envs:
+                            if "DEDUP" in works and any(
+                                x in envs for x in ["umitools", "fgumi"]
+                            ):
                                 subname = toolenv + "_dedup_trim.smk"
                             else:
                                 subname = toolenv + "_trim.smk"
@@ -912,16 +914,18 @@ def make_sub(
                             and "MAPPING" not in works
                             and toolenv != "rustqc"
                         ):
-                            if "DEDUP" in subworkflows and "umitools" in envs:
+                            if "DEDUP" in subworkflows and any(
+                                x in envs for x in ["umitools", "fgumi"]
+                            ):
                                 subname = toolenv + "_dedup.smk"
                             else:
                                 subname = toolenv + "_raw.smk"
 
-                        # Picard tools can be extended here
+                        # Dedup tools can be extended here
                         if works[j] == "DEDUP" and toolenv == "picard":
                             subname = toolenv + "_dedup.smk"
                             subconf.pop("PREDEDUP", None)
-                        elif works[j] == "DEDUP" and toolenv == "umitools":
+                        elif works[j] == "DEDUP" and toolenv in ["umitools", "fgumi"]:
                             subconf["PREDEDUP"] = "enabled"
 
                         smkf = os.path.abspath(os.path.join(workflowpath, subname))
@@ -1090,11 +1094,11 @@ def make_sub(
                         else:
                             subname = toolenv + "_trim.smk"
 
-                    # Picard tools can be extended here
+                    # Dedup tools can be extended here
                     if subwork == "DEDUP" and toolenv == "picard":
                         subname = toolenv + "_dedup.smk"
                         subconf.pop("PREDEDUP", None)
-                    elif works[j] == "DEDUP" and toolenv == "umitools":
+                    elif subwork == "DEDUP" and toolenv in ["umitools", "fgumi"]:
                         subconf["PREDEDUP"] = "enabled"
                     # Add rulethemall based on chosen workflows
                     add.append(
@@ -2913,7 +2917,7 @@ def nf_make_sub(
                             else:
                                 if "DEDUP" in subworkflows:
                                     flowlist.append("QC_RAW")
-                                    if toolenv == "umitools":
+                                    if toolenv in ["umitools", "fgumi"]:
                                         flowlist.append("DEDUPEXTRACT")
                                     if "MAPPING" in works:
                                         flowlist.append("QC_MAPPING")
@@ -2928,7 +2932,7 @@ def nf_make_sub(
                             flowlist.append("TRIMMING")
 
                         if works[j] == "DEDUP":
-                            if toolenv == "umitools":
+                            if toolenv in ["umitools", "fgumi"]:
                                 flowlist.append("PREDEDUP")
                                 subconf["PREDEDUP"] = "enabled"
                                 if "QC" in flowlist:
@@ -3284,9 +3288,32 @@ def nf_make_sub(
 
                         subname = toolenv + ".nf"
 
-                    # Picard tools can be extended here
+                    # Dedup tools can be extended here
                     if subwork == "DEDUP" and toolenv == "picard":
                         subname = toolenv + "_dedup.nf"
+                    elif subwork == "DEDUP" and toolenv in ["umitools", "fgumi"]:
+                        flowlist.append("PREDEDUP")
+                        subconf["PREDEDUP"] = "enabled"
+                        if "QC" in flowlist:
+                            flowlist.append("QC_DEDUP")
+                        subname = toolenv + ".nf"
+                        nfi = os.path.abspath(os.path.join(workflowpath, subname))
+                        with open(nfi, "r") as nf:
+                            for line in mu.comment_remover(nf.readlines()):
+                                line = re.sub(condapath, 'conda "' + envpath, line)
+                                if "include {" in line:
+                                    line = fixinclude(
+                                        line,
+                                        loglevel,
+                                        condapath,
+                                        envpath,
+                                        workflowpath,
+                                        logfix,
+                                        "nfmode",
+                                    )
+                                subjobs.append(line)
+                            subjobs.append("\n\n")
+                        subname = toolenv + "_dedup.nf"
 
                     nfi = os.path.abspath(os.path.join(workflowpath, subname))
                     with open(nfi, "r") as nf:
diff --git a/configs/template.json b/configs/template.json
index 9ab0bb95..374e77b3 100644
--- a/configs/template.json
+++ b/configs/template.json
@@ -120,7 +120,8 @@
     "DEDUP": { #options for deduplication for each sample/condition
                "TOOLS": {
                    "umitools": "umi_tools",
-                   "picard": "picard"
+                   "picard": "picard",
+                   "fgumi": "fgumi"
                },
                "id": {
                    "condition": {
@@ -141,6 +142,13 @@
                                             "JAVA" : "",# options
                                             "DEDUP": ""  # dedup options
                                         }
+                                    },
+                                    "fgumi":{
+                                        "OPTIONS":
+                                        {
+                                            "EXTRACT": "",  # fgumi extract options, e.g. --read-structures 8M+T +T
+                                            "DEDUP": ""  # fgumi dedup options
+                                        }
                                     }
                                   }
                    }
diff --git a/configs/template_base_commented.json b/configs/template_base_commented.json
index 8b2c1388..7a47a674 100644
--- a/configs/template_base_commented.json
+++ b/configs/template_base_commented.json
@@ -123,7 +123,8 @@
   },
   "DEDUP": {
     "TOOLS": {
-      "umitools": "umi_tools"
+      "umitools": "umi_tools",
+      "fgumi": "fgumi"
     },
     "ENV" : "",
     "BIN" : "",
@@ -150,6 +151,18 @@
             "JAVA" : "",
             "DEDUP": ""
         }
+    },
+    "fgumi": {
+      "comment":
+        {
+            "EXTRACT": "fgumi extract options, e.g. --read-structures 8M+T +T",
+            "DEDUP": "fgumi dedup options"
+        },
+      "OPTIONS":
+        {
+            "EXTRACT": "",
+            "DEDUP": ""
+        }
     }
   },
   "MAPPING": {
diff --git a/configs/template_clean.json b/configs/template_clean.json
index 825e7335..a335c204 100644
--- a/configs/template_clean.json
+++ b/configs/template_clean.json
@@ -110,7 +110,8 @@
   },
   "DEDUP": {
     "TOOLS": {
-      "umitools": "umi_tools"
+      "umitools": "umi_tools",
+      "fgumi": "fgumi"
     },
     "id": {
       "condition": {
@@ -128,6 +129,13 @@
                 "JAVA" : "",
                 "DEDUP": ""
             }
+        },
+        "fgumi": {
+          "OPTIONS":
+          {
+              "EXTRACT": "",
+              "DEDUP": ""
+          }
         }
       }
     }
diff --git a/containers/apptainer/fgumi.def b/containers/apptainer/fgumi.def
new file mode 100644
index 00000000..4d305696
--- /dev/null
+++ b/containers/apptainer/fgumi.def
@@ -0,0 +1,23 @@
+Bootstrap: docker
+From: continuumio/miniconda3
+   
+%files
+    /home/fall/MONSDA/envs/fgumi.yaml /opt/envs/
+    ${HOME}/MONSDA/scripts /opt/MONSDA/
+
+%environment
+   
+%post
+    ls -alrt /opt/envs
+    chmod -R +x /opt/envs/fgumi.yaml
+    
+    ENV_NAME=fgumi
+    echo ". /opt/conda/etc/profile.d/conda.sh" >> $APPTAINER_ENVIRONMENT
+    echo "conda activate $ENV_NAME" >> $APPTAINER_ENVIRONMENT
+   
+    . /opt/conda/etc/profile.d/conda.sh
+    conda env create -f /opt/envs/fgumi.yaml -p /opt/conda/envs/$ENV_NAME
+    conda clean --all
+   
+%runscript
+    exec "$@"
diff --git a/envs/fgumi.yaml b/envs/fgumi.yaml
new file mode 100644
index 00000000..9bffb96d
--- /dev/null
+++ b/envs/fgumi.yaml
@@ -0,0 +1,9 @@
+name: fgumi
+channels:
+  - conda-forge
+  - bioconda
+  - nodefaults
+dependencies:
+  - samtools =1.21
+  - fgumi =0.1.3
+  - dateutils =0.6.12
diff --git a/workflows/fgumi.nf b/workflows/fgumi.nf
new file mode 100644
index 00000000..100de0f8
--- /dev/null
+++ b/workflows/fgumi.nf
@@ -0,0 +1,104 @@
+DEDUPENV=get_always('DEDUPENV')
+DEDUPBIN=get_always('DEDUPBIN')
+
+EXTRACTPARAMS = get_always('fgumi_params_EXTRACT') ?: ''
+DEDUPPARAMS = get_always('fgumi_params_DEDUP') ?: ''
+
+process extract_fq{
+    conda "$DEDUPENV"+".yaml"
+    container "oras://jfallmann/monsda:"+"$DEDUPENV"
+    cpus THREADS
+	cache 'lenient'
+
+    publishDir "${workflow.workDir}/../" , mode: 'link',
+    saveAs: {filename ->
+        if (filename.indexOf("_dedup.fastq.gz") > 0)      "DEDUP_FASTQ/${COMBO}/${CONDITION}/${file(filename).getSimpleName()}.fastq.gz"
+        else if (filename.indexOf("_fgumi_extract.bam") > 0) "TMP/FGEX/${COMBO}/${CONDITION}/${filename}"
+        else if (filename.indexOf("log") > 0)             "LOGS/${COMBO}/${CONDITION}/DEDUP/dedup_extract.log"
+        else null
+    }
+
+    input:
+    path samples
+
+    output:
+    path "*_dedup.fastq.gz", emit: extract
+    path "*_fgumi_extract.bam", emit: ubam
+    path "ex.log", emit: logs
+
+    script:
+    if (PAIRED == 'paired'){
+        r1 = samples[0]
+        r2 = samples[1]
+        sn = samples[0].getSimpleName().replace("_R1","")
+        ubam = sn+"_fgumi_extract.bam"
+        outf = samples[0].getSimpleName()+"_dedup.fastq.gz"
+        outf2 = samples[1].getSimpleName()+"_dedup.fastq.gz"
+        """
+            mkdir -p tmp && $DEDUPBIN extract $EXTRACTPARAMS --inputs $r1 $r2 --sample $sn --library $sn --output $ubam > ex.log 2>&1 && samtools fastq -n -1 $outf -2 $outf2 -0 /dev/null -s /dev/null $ubam >> ex.log 2>&1
+        """
+    }
+    else{
+        r1 = samples[0]
+        sn = samples[0].getSimpleName().replace(".fastq.gz","")
+        ubam = sn+"_fgumi_extract.bam"
+        outf = samples[0].getSimpleName()+"_dedup.fastq.gz"
+        """
+            mkdir -p tmp && $DEDUPBIN extract $EXTRACTPARAMS --inputs $r1 --sample $sn --library $sn --output $ubam > ex.log 2>&1 && samtools fastq -n $ubam | gzip -c > $outf && echo done >> ex.log
+        """
+    }
+}
+
+workflow DEDUPEXTRACT{
+    take:
+    collection
+
+    main:
+    //SAMPLE CHANNELS
+    if ( PREDEDUP == 'enabled' ){
+        if (PAIRED == 'paired'){
+            extract_fq(samples_ch.collate( 2 ))
+        } else{
+            extract_fq(samples_ch.collate( 1 ))
+        }
+    }else{
+        if (PAIRED == 'paired'){
+            extract_fq(collection.collate( 2 ))
+        } else{
+            extract_fq(collection.collate( 1 ))
+        }
+    }
+
+    emit:
+    extract = extract_fq.out.extract
+    ubam = extract_fq.out.ubam
+    logs = extract_fq.out.logs
+}
+
+process dedup_bam{
+    conda "$DEDUPENV"+".yaml"
+    container "oras://jfallmann/monsda:"+"$DEDUPENV"
+    cpus THREADS
+    cache 'lenient'
+
+    publishDir "${workflow.workDir}/../MAPPED/${COMBO}/${CONDITION}" , mode: 'link'
+
+    input:
+    path mapped_bam
+    path ubam
+
+    output:
+    path "${mapped_bam.baseName}_dedup.bam", emit: dedup_bam
+    path "${mapped_bam.baseName}_dedup.bam.bai", emit: dedup_idx
+    path "dedup.log", emit: logs
+
+    script:
+    """
+    mkdir -p tmp
+    $DEDUPBIN zipper --unmapped $ubam --aligned $mapped_bam --output tmp/zippered.bam > dedup.log 2>&1
+    $DEDUPBIN sort --order template-coordinate --input tmp/zippered.bam --output tmp/sorted.bam >> dedup.log 2>&1
+    $DEDUPBIN dedup $DEDUPPARAMS --input tmp/sorted.bam --output ${mapped_bam.baseName}_dedup.bam >> dedup.log 2>&1
+    samtools index ${mapped_bam.baseName}_dedup.bam >> dedup.log 2>&1
+    rm $ubam
+    """
+}
diff --git a/workflows/fgumi.smk b/workflows/fgumi.smk
new file mode 100644
index 00000000..9a5ef72e
--- /dev/null
+++ b/workflows/fgumi.smk
@@ -0,0 +1,76 @@
+DEDUPBIN, DEDUPENV = env_bin_from_config(config, 'DEDUP')
+
+wildcard_constraints:
+    type = "sorted|sorted_unique"
+
+if paired == 'paired':
+    rule extract:
+        input:  r1 = lambda wildcards: "FASTQ/{rawfile}_R1.fastq.gz".format(rawfile=[x for x in SAMPLES if x.split(os.sep)[-1] in wildcards.file][0]),
+                r2 = lambda wildcards: "FASTQ/{rawfile}_R2.fastq.gz".format(rawfile=[x for x in SAMPLES if x.split(os.sep)[-1] in wildcards.file][0])
+        output: o1 = "DEDUP_FASTQ/{combo}/{file}_R1_dedup.fastq.gz",
+                o2 = "DEDUP_FASTQ/{combo}/{file}_R2_dedup.fastq.gz",
+                ubam = temp("TMP/FGEX/{combo}/{file}_extracted.bam"),
+                td = temp(directory("TMP/FGEX/{combo}/{file}"))
+        log:   "LOGS/{combo}/{file}_dedup_extract.log"
+        conda: ""+DEDUPENV+".yaml"
+        container: "oras://jfallmann/monsda:"+DEDUPENV+""
+        threads: 1
+        params: epara = lambda wildcards: tool_params(wildcards.file, None, config, "DEDUP", DEDUPENV)['OPTIONS'].get('EXTRACT', ""),
+                dedup = DEDUPBIN,
+                sname = lambda wildcards: os.path.basename(wildcards.file)
+        shell:  "mkdir -p {output.td} && {params.dedup} extract {params.epara} --inputs {input.r1} {input.r2} --sample {params.sname} --library {params.sname} --output {output.ubam} > {log} 2>&1 && samtools fastq -n -1 {output.o1} -2 {output.o2} -0 /dev/null -s /dev/null {output.ubam} >> {log} 2>&1"
+else:
+    rule extract:
+        input:  r1 = lambda wildcards: "FASTQ/{rawfile}.fastq.gz".format(rawfile=[x for x in SAMPLES if x.split(os.sep)[-1] in wildcards.file][0])
+        output: o1 = "DEDUP_FASTQ/{combo}/{file}_dedup.fastq.gz",
+                ubam = temp("TMP/FGEX/{combo}/{file}_extracted.bam"),
+                td = temp(directory("TMP/FGEX/{combo}/{file}"))
+        log:   "LOGS/{combo}/{file}_dedup_extract.log"
+        conda: ""+DEDUPENV+".yaml"
+        container: "oras://jfallmann/monsda:"+DEDUPENV+""
+        threads: 1
+        params: epara = lambda wildcards: tool_params(wildcards.file, None, config, "DEDUP", DEDUPENV)['OPTIONS'].get('EXTRACT', ""),
+                dedup = DEDUPBIN,
+                sname = lambda wildcards: os.path.basename(wildcards.file)
+        shell:  "mkdir -p {output.td} && {params.dedup} extract {params.epara} --inputs {input.r1} --sample {params.sname} --library {params.sname} --output {output.ubam} > {log} 2>&1 && samtools fastq -n {output.ubam} | gzip -c > {output.o1} && echo done >> {log}"
+
+if paired == 'paired':
+    rule dedupbam:
+        input:  bam = "MAPPED/{combo}/{file}_mapped_{type}.bam",
+                ubam = "TMP/FGEX/{combo}/{file}_extracted.bam"
+        output: bam = report("MAPPED/{combo}/{file}_mapped_{type}_dedup.bam", category="DEDUP"),
+                bai = report("MAPPED/{combo}/{file}_mapped_{type}_dedup.bam.bai", category="DEDUP"),
+                td = temp(directory("TMP/UMIDD/{combo}/{file}_{type}"))
+        log:    "LOGS/{combo}/{file}_{type}/dedupbam.log"
+        conda:  ""+DEDUPENV+".yaml"
+        container: "oras://jfallmann/monsda:"+DEDUPENV+""
+        threads: 1
+        priority: 0               # This should be done after all mapping is done
+        params: dpara = lambda wildcards: tool_params(wildcards.file, None, config, "DEDUP", DEDUPENV)['OPTIONS'].get('DEDUP', ""),
+                dedup = DEDUPBIN
+        shell: """mkdir -p {output.td}
+{params.dedup} zipper --unmapped {input.ubam} --aligned {input.bam} --output {output.td}/zippered.bam > {log} 2>&1
+{params.dedup} sort --order template-coordinate --input {output.td}/zippered.bam --output {output.td}/sorted.bam >> {log} 2>&1
+{params.dedup} dedup {params.dpara} --input {output.td}/sorted.bam --output {output.bam} >> {log} 2>&1
+samtools index {output.bam} >> {log} 2>&1
+rm {input.ubam}"""
+else:
+    rule dedupbam:
+        input:  bam = "MAPPED/{combo}/{file}_mapped_{type}.bam",
+                ubam = "TMP/FGEX/{combo}/{file}_extracted.bam"
+        output: bam = report("MAPPED/{combo}/{file}_mapped_{type}_dedup.bam", category="DEDUP"),
+                bai = report("MAPPED/{combo}/{file}_mapped_{type}_dedup.bam.bai", category="DEDUP"),
+                td = temp(directory("TMP/UMIDD/{combo}/{file}_{type}"))
+        log:    "LOGS/{combo}/{file}_{type}/dedupbam.log"
+        conda:  ""+DEDUPENV+".yaml"
+        container: "oras://jfallmann/monsda:"+DEDUPENV+""
+        threads: 1
+        priority: 0               # This should be done after all mapping is done
+        params: dpara = lambda wildcards: tool_params(wildcards.file, None, config, "DEDUP", DEDUPENV)['OPTIONS'].get('DEDUP', ""),
+                dedup = DEDUPBIN
+        shell: """mkdir -p {output.td}
+{params.dedup} zipper --unmapped {input.ubam} --aligned {input.bam} --output {output.td}/zippered.bam > {log} 2>&1
+{params.dedup} sort --order template-coordinate --input {output.td}/zippered.bam --output {output.td}/sorted.bam >> {log} 2>&1
+{params.dedup} dedup {params.dpara} --input {output.td}/sorted.bam --output {output.bam} >> {log} 2>&1
+samtools index {output.bam} >> {log} 2>&1
+rm {input.ubam}"""
diff --git a/workflows/fgumi_dedup.nf b/workflows/fgumi_dedup.nf
new file mode 100644
index 00000000..05ff5ed1
--- /dev/null
+++ b/workflows/fgumi_dedup.nf
@@ -0,0 +1,54 @@
+DEDUPENV=get_always('DEDUPENV')
+DEDUPBIN=get_always('DEDUPBIN')
+
+DEDUPPARAMS = get_always('fgumi_params_DEDUP') ?: ''
+
+process dedup_bam{
+    conda "$DEDUPENV"+".yaml"
+    container "oras://jfallmann/monsda:"+"$DEDUPENV"
+    cpus THREADS
+	cache 'lenient'
+    //validExitStatus 0,1
+
+    publishDir "${workflow.workDir}/../" , mode: 'link',
+    saveAs: {filename ->
+        if (filename.endsWith("_dedup.bam"))              "MAPPED/${COMBO}/${CONDITION}/${file(filename).getName()}"
+        else if (filename.indexOf("_dedup.bam.bai") > 0)  "MAPPED/${COMBO}/${CONDITION}/${file(filename).getName()}"
+        else if (filename.indexOf("dedup.log") > 0)       "LOGS/${COMBO}/${CONDITION}/DEDUP/${file(filename).getName()}"
+        else null
+    }
+
+    input:
+    path todedup
+    path bami
+        
+    output:
+    path "*_dedup.bam", emit: bam
+    path "*_dedup.bam.bai", emit: bai
+    path "*_dedup.log", emit: logs
+
+    script:
+    bams = todedup[0]
+    bais = todedup[1]
+    outf = bams.getSimpleName()+"_dedup.bam"
+    outl = bams.getSimpleName()+"_dedup.log"
+    """
+    mkdir -p TMP && $DEDUPBIN dedup $DEDUPPARAMS --input $bams --output $outf &> $outl && samtools index $outf &>> $outl
+    """
+}
+
+workflow DEDUPBAM{
+    take:
+    map
+    mapi
+    mapu
+    mapui
+
+    main:
+    dedup_bam(map.concat(mapu), mapi.concat(mapui))
+
+    emit:
+    dedup = dedup_bam.out.bam
+    dedupbai = dedup_bam.out.bai
+    deduplog = dedup_bam.out.logs
+}
diff --git a/workflows/fgumi_dedup.smk b/workflows/fgumi_dedup.smk
new file mode 100644
index 00000000..49d54982
--- /dev/null
+++ b/workflows/fgumi_dedup.smk
@@ -0,0 +1,15 @@
+DEDUPBIN, DEDUPENV = env_bin_from_config(config, 'DEDUP')
+
+rule dedupbam:
+    input:  bam = "MAPPED/{combo}/{file}_mapped_{type}.bam"
+    output: bam = report("MAPPED/{combo}/{file}_mapped_{type}_dedup.bam", category="DEDUP"),
+            bai = report("MAPPED/{combo}/{file}_mapped_{type}_dedup.bam.bai", category="DEDUP"),
+            td = temp(directory("TMP/UMIDD/{combo}/{file}_{type}"))
+    log:    "LOGS/{combo}/{file}_{type}/dedupbam.log"
+    conda:  ""+DEDUPENV+".yaml"
+    container: "oras://jfallmann/monsda:"+DEDUPENV+""
+    threads: 1
+    priority: 0               # This should be done after all mapping is done
+    params: dpara = lambda wildcards: tool_params(wildcards.file, None, config, "DEDUP", DEDUPENV)['OPTIONS'].get('DEDUP', ""),
+            dedup = DEDUPBIN
+    shell: "mkdir -p {output.td} && {params.dedup} dedup {params.dpara} --input {input.bam} --output {output.bam} 2> {log} && samtools index {output.bam} 2>> {log}"

From 410496496cc1cb1f9a075ea187638bcc85e8ab47 Mon Sep 17 00:00:00 2001
From: jfallmann <fallmann.joerg@gmail.com>
Date: Wed, 22 Apr 2026 15:06:44 +0200
Subject: [PATCH 02/39] fgumi module update

---
 workflows/fgumi.nf  | 1 -
 workflows/fgumi.smk | 4 ++--
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/workflows/fgumi.nf b/workflows/fgumi.nf
index 100de0f8..227c386a 100644
--- a/workflows/fgumi.nf
+++ b/workflows/fgumi.nf
@@ -13,7 +13,6 @@ process extract_fq{
     publishDir "${workflow.workDir}/../" , mode: 'link',
     saveAs: {filename ->
         if (filename.indexOf("_dedup.fastq.gz") > 0)      "DEDUP_FASTQ/${COMBO}/${CONDITION}/${file(filename).getSimpleName()}.fastq.gz"
-        else if (filename.indexOf("_fgumi_extract.bam") > 0) "TMP/FGEX/${COMBO}/${CONDITION}/${filename}"
         else if (filename.indexOf("log") > 0)             "LOGS/${COMBO}/${CONDITION}/DEDUP/dedup_extract.log"
         else null
     }
diff --git a/workflows/fgumi.smk b/workflows/fgumi.smk
index 9a5ef72e..60a197e2 100644
--- a/workflows/fgumi.smk
+++ b/workflows/fgumi.smk
@@ -9,7 +9,7 @@ if paired == 'paired':
                 r2 = lambda wildcards: "FASTQ/{rawfile}_R2.fastq.gz".format(rawfile=[x for x in SAMPLES if x.split(os.sep)[-1] in wildcards.file][0])
         output: o1 = "DEDUP_FASTQ/{combo}/{file}_R1_dedup.fastq.gz",
                 o2 = "DEDUP_FASTQ/{combo}/{file}_R2_dedup.fastq.gz",
-                ubam = temp("TMP/FGEX/{combo}/{file}_extracted.bam"),
+                ubam = "TMP/FGEX/{combo}/{file}_extracted.bam",
                 td = temp(directory("TMP/FGEX/{combo}/{file}"))
         log:   "LOGS/{combo}/{file}_dedup_extract.log"
         conda: ""+DEDUPENV+".yaml"
@@ -23,7 +23,7 @@ else:
     rule extract:
         input:  r1 = lambda wildcards: "FASTQ/{rawfile}.fastq.gz".format(rawfile=[x for x in SAMPLES if x.split(os.sep)[-1] in wildcards.file][0])
         output: o1 = "DEDUP_FASTQ/{combo}/{file}_dedup.fastq.gz",
-                ubam = temp("TMP/FGEX/{combo}/{file}_extracted.bam"),
+                ubam = "TMP/FGEX/{combo}/{file}_extracted.bam",
                 td = temp(directory("TMP/FGEX/{combo}/{file}"))
         log:   "LOGS/{combo}/{file}_dedup_extract.log"
         conda: ""+DEDUPENV+".yaml"

From 38213d0dc21f6b8f8b8f9bb26b7e11b56a3e57ba Mon Sep 17 00:00:00 2001
From: jfallmann <fallmann.joerg@gmail.com>
Date: Thu, 23 Apr 2026 11:01:38 +0200
Subject: [PATCH 03/39] tests and documentation

---
 README.md                            |  7 ++++
 docs/source/config.rst               |  5 +++
 docs/source/tutorial.rst             | 18 ++++-----
 docs/source/workflows.rst            |  8 ++++
 tests/data/README_fgumi_test_data.md | 27 +++++++++++++
 tests/data/config_fgumi_test.json    | 47 ++++++++++++++++++++++
 tests/data/make_fgumi_umi_fixture.py | 60 ++++++++++++++++++++++++++++
 tests/test_fgumi_smoke.sh            | 22 ++++++++++
 8 files changed, 185 insertions(+), 9 deletions(-)
 create mode 100644 tests/data/README_fgumi_test_data.md
 create mode 100644 tests/data/config_fgumi_test.json
 create mode 100644 tests/data/make_fgumi_umi_fixture.py
 create mode 100644 tests/test_fgumi_smoke.sh

diff --git a/README.md b/README.md
index b3dfedfc..a442b81c 100644
--- a/README.md
+++ b/README.md
@@ -43,6 +43,13 @@ pip install MONSDA
 
 More information can be found in the official [documentation](https://monsda.readthedocs.io/en/latest/?badge=latest)
 
+## Notes on newly available tools
+
+- **rustqc** is available as an alternative QC backend and is designed for fast, integrated RNA-seq QC reporting with MultiQC-compatible outputs.
+- **fgumi** is available as an additional UMI-aware deduplication backend in the `DEDUP` step.
+
+Both tools are available through the shipped conda environments in `envs/` and can be selected through the regular config `TOOLS` sections.
+
 
 ## How does it work
 
diff --git a/docs/source/config.rst b/docs/source/config.rst
index 022fe09d..6be4d1ac 100644
--- a/docs/source/config.rst
+++ b/docs/source/config.rst
@@ -34,6 +34,11 @@ You can always define differing ENV/BIN keys for each condition-tree leaf separa
 
 The next key-level is the *OPTIONS* key which is where you can define additional parameters for each tool. It is not needed to define anything related to *single-/paired-* end or *singlecell* sequencing, this is done automatically.  To add parameters simply add the *OPTION* key which defines a dict where you can set parameters for each defined subworkflow-step. Parameters are here defined as key/value pairs corresponding to the subworkflow-step, e.g. 'INDEX' to generate an index file for mapping and all settings similar to a command line call as values. This should become clear having a look at the different processing steps in the template json.  If there are no options just leave the 'OPTIONS' dict empty.
 
+For newly available tools:
+
+- **QC/rustqc** can be selected with ``"rustqc": "rustqc"`` in the ``QC -> TOOLS`` section and configured via the regular ``OPTIONS`` keys (e.g. ``QC`` and ``MULTI`` entries in tutorial configs).
+- **DEDUP/fgumi** can be selected with ``"fgumi": "fgumi"`` in the ``DEDUP -> TOOLS`` section and supports dedicated options under ``OPTIONS`` using ``EXTRACT`` and ``DEDUP`` keys.
+
 
 .. literalinclude:: ../../configs/template.json
     :language: json
diff --git a/docs/source/tutorial.rst b/docs/source/tutorial.rst
index 2d1afe53..ebd55264 100644
--- a/docs/source/tutorial.rst
+++ b/docs/source/tutorial.rst
@@ -57,10 +57,10 @@ This slightly more complex use case involves multiple input files, two condition
 Workflows include: 
 
     - FETCH: Download from SRA
-    - QC: FASTQC of input and output
+    - QC: FASTQC and RustQC of input and output
     - TRIMMING: Adaptor removal with cutadapt/trimgalore
     - MAPPING: Read mapping with STAR, hisat2, bwa, segemehl3 and minimap2 
-    - DEDUP: Read deduplication with umi_tools and picard
+    - DEDUP: Read deduplication with umi_tools, fgumi and picard
 
 The more complex config for this analysis follows
 
@@ -74,7 +74,7 @@ Starting the run with 12 cores (defining more will be capped by the config file
 
     monsda -j 12 -c ${CONDA_PREFIX}/share/MONSDA/configs/tutorial_toolmix.json --directory ${PWD}
 
-Will start the run in the current directory and generate a "FASTQ" sub-directory containing the downloaded sample, a "GENOME/INDICES" directory containing the built index, a "QC" directory containing all *fastqc* reports and *multiqc* output, a "TRIMMED_FASTQ" directory for *trimgalore* and *cutadapt* output, a "DEDUP" directory for *umi_tools* (runs before trimming and after mapping) and *picard* (runs after mapping) output and a "MAPPING" directory containing the mapped files. Furthermore, a "DE" directory will be created which will hold output from counting with featurecounts and DE input and output from *EdgeR* and *DESeq2*. Again, **MONSDA** will create a "LOG" directory containing it's own log, as well as logs of all executed jobs and the "JOBS" directory with all command-line calls. 
+Will start the run in the current directory and generate a "FASTQ" sub-directory containing the downloaded sample, a "GENOME/INDICES" directory containing the built index, a "QC" directory containing all *fastqc*/*rustqc* reports and *multiqc* output, a "TRIMMED_FASTQ" directory for *trimgalore* and *cutadapt* output, a "DEDUP" directory for *umi_tools*/*fgumi* (runs before trimming and after mapping) and *picard* (runs after mapping) output and a "MAPPING" directory containing the mapped files. Furthermore, a "DE" directory will be created which will hold output from counting with featurecounts and DE input and output from *EdgeR* and *DESeq2*. Again, **MONSDA** will create a "LOG" directory containing it's own log, as well as logs of all executed jobs and the "JOBS" directory with all command-line calls. 
 
 A successful run will show the message 'Workflow finished, no error' at the end.
 
@@ -86,10 +86,10 @@ This postprocessing use case involves multiple input files, two conditions (WT/K
 Workflows include: 
 
     - FETCH: Download from SRA
-    - QC: FASTQC of input and output
+    - QC: FASTQC and RustQC of input and output
     - TRIMMING: Adaptor removal with cutadapt/trimgalore
     - MAPPING: Read mapping with STAR, hisat2, bwa, segemehl3 and minimap2 
-    - DEDUP: Read deduplication with umi_tools and picard
+    - DEDUP: Read deduplication with umi_tools, fgumi and picard
     - DE: Differential Expression analysis with EdgeR and DESeq2
     - DEU: Differential Exon Usage analysis with EdgeR (DEXSeq skipped, runtime)
     - COUNTING: Read counting with FeatureCounts
@@ -108,7 +108,7 @@ Starting the run with 12 cores (defining more will be capped by the config file
 
     monsda -j 12 -c ${CONDA_PREFIX}/share/MONSDA/configs/tutorial_postprocess.json --directory ${PWD}
 
-Will start the run in the current directory and generate a "FASTQ" sub-directory containing the downloaded sample, a "GENOME/Ecoli/INDICES" directory containing the built indices, including the one built for *salmon* later on, a "QC" directory containing all *FastQC* reports and *MultiQC* output, a "TRIMMED_FASTQ" directory for *trimgalore* and *cutadapt* output, a "DEDUP" directory for *umi_tools* (runs before trimming and after mapping) and *picard* (runs after mapping) output and a "MAPPING" directory containing the mapped files. Furthermore, "DE/DEU" directories will be created which will hold output from counting with FeatureCounts and DE/DEU input and output from *EdgeR* and *DESeq2* respectively. Again, **MONSDA** will create a "LOG" directory containing it's own log, as well as logs of all executed jobs and again a "JOBS" directory for command-line calls. 
+Will start the run in the current directory and generate a "FASTQ" sub-directory containing the downloaded sample, a "GENOME/Ecoli/INDICES" directory containing the built indices, including the one built for *salmon* later on, a "QC" directory containing all *FastQC*/*RustQC* reports and *MultiQC* output, a "TRIMMED_FASTQ" directory for *trimgalore* and *cutadapt* output, a "DEDUP" directory for *umi_tools*/*fgumi* (runs before trimming and after mapping) and *picard* (runs after mapping) output and a "MAPPING" directory containing the mapped files. Furthermore, "DE/DEU" directories will be created which will hold output from counting with FeatureCounts and DE/DEU input and output from *EdgeR* and *DESeq2* respectively. Again, **MONSDA** will create a "LOG" directory containing it's own log, as well as logs of all executed jobs and again a "JOBS" directory for command-line calls. 
 
 A successful run will show the message 'Workflow finished, no error'. Be aware that this is indeed an exhaustive workflow and will require a decent amount of disk-space, memory and compute-time, depending on the hardware at your disposal.
 
@@ -122,10 +122,10 @@ This exhaustive use case involves multiple input files, two conditions (WT/KO) a
 Workflows include: 
 
     - FETCH: Download from SRA
-    - QC: FASTQC of input and output
+    - QC: FASTQC and RustQC of input and output
     - TRIMMING: Adaptor removal with cutadapt/trimgalore
     - MAPPING: Read mapping with STAR, hisat2, bwa, segemehl3 and minimap2 
-    - DEDUP: Read deduplication with umi_tools and picard
+    - DEDUP: Read deduplication with umi_tools, fgumi and picard
     - DE: Differential Expression analysis with EdgeR and DESeq2
     - DEU: Differential Exon Usage analysis with EdgeR (DEXSeq skipped, runtime)
     - DAS: Differential Alternative Splicing analysis with EdgeR and DIEGO 
@@ -186,6 +186,6 @@ Starting the run with 12 cores (defining more will be capped by the config file
 
     monsda -j 12 -c ${CONDA_PREFIX}/share/MONSDA/configs/tutorial_exhaustive.json --directory ${PWD}
 
-Will start the run in the current directory and generate a "FASTQ" sub-directory containing the downloaded sample, a "GENOME/Ecoli/INDICES" directory containing the built indices, including the one built for salmon later on, a "QC" directory containing all FASTQC reports and MULTIQC output, a "TRIMMED_FASTQ" directory for trimgalore and cutadapt output, a "DEDUP" directory for umi_tools (runs before trimming and after mapping) and picard (runs after mapping) output and a "MAPPING" directory containing the mapped files. Furthermore, "DE/DEU/DAS/DTU" directories will be created which will hold output from counting with FeatureCounts (or salmon for DTU) and DE/DEU/DAS/DTU input and output from EDGER, DESeq2, Diego and DrimSeq respectively. Again, **MONSDA** will create a "LOG" directory containing it's own log, as well as logs of all executed jobs and again a "JOBS" directory for command-line calls. 
+Will start the run in the current directory and generate a "FASTQ" sub-directory containing the downloaded sample, a "GENOME/Ecoli/INDICES" directory containing the built indices, including the one built for salmon later on, a "QC" directory containing all FASTQC/RustQC reports and MULTIQC output, a "TRIMMED_FASTQ" directory for trimgalore and cutadapt output, a "DEDUP" directory for umi_tools/fgumi (runs before trimming and after mapping) and picard (runs after mapping) output and a "MAPPING" directory containing the mapped files. Furthermore, "DE/DEU/DAS/DTU" directories will be created which will hold output from counting with FeatureCounts (or salmon for DTU) and DE/DEU/DAS/DTU input and output from EDGER, DESeq2, Diego and DrimSeq respectively. Again, **MONSDA** will create a "LOG" directory containing it's own log, as well as logs of all executed jobs and again a "JOBS" directory for command-line calls. 
 
 A successful run will show the message 'Workflow finished, no error'. Be aware that this is indeed an exhaustive workflow and will require a decent amount of disk-space, memory and compute-time, depending on the hardware at your disposal.
\ No newline at end of file
diff --git a/docs/source/workflows.rst b/docs/source/workflows.rst
index cccf07e6..3295b4f5 100644
--- a/docs/source/workflows.rst
+++ b/docs/source/workflows.rst
@@ -75,6 +75,8 @@ QUALITY CONTROL I
 
 This workflow step can be run as preprocessing step if none of the processing workflows is defined in the config.json.
 
+*rustqc* is intended for mapped BAM-level QC and is therefore generally most useful in processing mode after mapping outputs are available.
+
 .. table:: 
   :widths: 10, 40, 10, 10, 10, 10, 10
   :class: tight-table
@@ -84,6 +86,8 @@ This workflow step can be run as preprocessing step if none of the processing wo
   +============================+============================================================+=========+=========+=========================================================================+============+===========+
   | FASTQC (includes MULTIQC)  | A quality control tool for high throughput sequence data.  | fastqc  | fastqc  | `fastqc <https://www.bioinformatics.babraham.ac.uk/projects/fastqc/>`_  | FASTQ/BAM  | ZIP/HTML  |
   +----------------------------+------------------------------------------------------------+---------+---------+-------------------------------------------------------------------------+------------+-----------+
+  | RustQC (includes MULTIQC)  | High-performance RNA-seq QC suite with MultiQC-compatible outputs. | rustqc  | rustqc  | `rustqc <https://github.com/seqeralabs/RustQC>`_  | BAM  | TEXT/TSV/LOG  |
+  +----------------------------+------------------------------------------------------------+---------+---------+-------------------------------------------------------------------------+------------+-----------+
   
 
 PROCESSING
@@ -103,6 +107,8 @@ If any of the below listed processing steps is defined in the config.json, quali
   +============================+============================================================+=========+=========+=========================================================================+============+===========+
   | FASTQC (includes MULTIQC)  | A quality control tool for high throughput sequence data.  | fastqc  | fastqc  | `fastqc <https://www.bioinformatics.babraham.ac.uk/projects/fastqc/>`_  | FASTQ/BAM  | ZIP/HTML  |
   +----------------------------+------------------------------------------------------------+---------+---------+-------------------------------------------------------------------------+------------+-----------+
+  | RustQC (includes MULTIQC)  | High-performance RNA-seq QC suite with MultiQC-compatible outputs. | rustqc  | rustqc  | `rustqc <https://github.com/seqeralabs/RustQC>`_  | BAM  | TEXT/TSV/LOG  |
+  +----------------------------+------------------------------------------------------------+---------+---------+-------------------------------------------------------------------------+------------+-----------+
 
 
 Trimming
@@ -173,6 +179,8 @@ Deduplicate reads by UMI or based on mapping position and CIGAR string
   +===============+====================================================================================================================================================+===========+============+====================================================================================================+======================+============+
   | UMI-tools     | UMI-tools contains tools for dealing with Unique Molecular Identifiers (UMIs)/Random Molecular Tags (RMTs) and single cell RNA-Seq cell barcodes.  | umitools  | umi_tools  | `umitools <https://umi-tools.readthedocs.io/en/latest/>`_                                          | FASTQ/TRIMMED_FASTQ  | FASTQ/BAM  |
   +---------------+----------------------------------------------------------------------------------------------------------------------------------------------------+-----------+------------+----------------------------------------------------------------------------------------------------+----------------------+------------+
+  | fgumi         | High-performance tools for UMI-tagged sequencing data including UMI extraction and UMI-aware deduplication.                                        | fgumi     | fgumi      | `fgumi <https://github.com/fulcrumgenomics/fgumi>`_                                                | FASTQ/TRIMMED_FASTQ  | FASTQ/BAM  |
+  +---------------+----------------------------------------------------------------------------------------------------------------------------------------------------+-----------+------------+----------------------------------------------------------------------------------------------------+----------------------+------------+
   | Picard tools  | A better duplication marking algorithm that handles all cases including clipped and gapped alignments.                                             | picard    | picard     | `picard <https://gatk.broadinstitute.org/hc/en-us/articles/360037052812-MarkDuplicates-Picard->`_  | BAM                  | BAM        |
   +---------------+----------------------------------------------------------------------------------------------------------------------------------------------------+-----------+------------+----------------------------------------------------------------------------------------------------+----------------------+------------+
 
diff --git a/tests/data/README_fgumi_test_data.md b/tests/data/README_fgumi_test_data.md
new file mode 100644
index 00000000..4902a8c1
--- /dev/null
+++ b/tests/data/README_fgumi_test_data.md
@@ -0,0 +1,27 @@
+# fgumi UMI smoke-test data
+
+This directory contains a tiny synthetic paired-end fixture generator for `fgumi`:
+
+- `make_fgumi_umi_fixture.py` writes
+  - `FASTQ/Test/umi/FGUMI01_R1.fastq.gz`
+  - `FASTQ/Test/umi/FGUMI01_R2.fastq.gz`
+- `config_fgumi_test.json` is a minimal tutorial-style MONSDA config that enables only `DEDUP` with `fgumi`.
+
+## Why synthetic data?
+
+Most publicly referenced UMI tutorial datasets are not truly small enough for quick CI-style smoke tests.
+The synthetic fixture keeps runtime tiny and deterministic while still exercising UMI extraction behavior.
+
+## External UMI datasets (if you want real data)
+
+- Galaxy Training (CEL-Seq2 UMI tutorial data on Zenodo):
+  - https://zenodo.org/record/2573177
+  - Files:
+    - `test_barcodes_celseq2_R1.fastq.gz` (~243.7 MB)
+    - `test_barcodes_celseq2_R2.fastq.gz` (~594.9 MB)
+- Galaxy tutorial page:
+  - https://training.galaxyproject.org/training-material/topics/single-cell/tutorials/scrna-umis/tutorial.html
+
+For nf-core test datasets, use branch discovery/search tooling first:
+- https://github.com/nf-core/test-datasets
+- https://github.com/nf-core/test-datasets/blob/master/docs/USE_EXISTING_DATA.md
diff --git a/tests/data/config_fgumi_test.json b/tests/data/config_fgumi_test.json
new file mode 100644
index 00000000..a19d1bc7
--- /dev/null
+++ b/tests/data/config_fgumi_test.json
@@ -0,0 +1,47 @@
+{
+  "WORKFLOWS": "DEDUP",
+  "BINS": "",
+  "MAXTHREADS": "2",
+  "VERSION": "FIXME",
+  "SETTINGS": {
+    "Test": {
+      "umi": {
+        "SAMPLES": [
+          "FGUMI01"
+        ],
+        "SEQUENCING": "paired",
+        "REFERENCE": "GENOME/genome.fa.gz",
+        "ANNOTATION": {
+          "GTF": "GENOME/genomic.gtf.gz",
+          "GFF": "GENOME/genomic.gff.gz"
+        },
+        "GROUPS": [
+          "umi"
+        ],
+        "TYPES": [
+          "fgumi_smoke"
+        ],
+        "BATCHES": [
+          "1"
+        ],
+        "INDEX": "",
+        "PREFIX": ""
+      }
+    }
+  },
+  "DEDUP": {
+    "TOOLS": {
+      "fgumi": "fgumi"
+    },
+    "Test": {
+      "umi": {
+        "fgumi": {
+          "OPTIONS": {
+            "EXTRACT": "--read-structures 6M+T +T",
+            "DEDUP": ""
+          }
+        }
+      }
+    }
+  }
+}
diff --git a/tests/data/make_fgumi_umi_fixture.py b/tests/data/make_fgumi_umi_fixture.py
new file mode 100644
index 00000000..79d9dbf6
--- /dev/null
+++ b/tests/data/make_fgumi_umi_fixture.py
@@ -0,0 +1,60 @@
+#!/usr/bin/env python3
+"""Create a tiny paired-end FASTQ fixture with synthetic UMIs for fgumi tests.
+
+This script writes gzipped FASTQ files into:
+  FASTQ/Test/umi/FGUMI01_R1.fastq.gz
+  FASTQ/Test/umi/FGUMI01_R2.fastq.gz
+
+R1 layout is intentionally compatible with a simple fgumi read structure like:
+  --read-structures 6M+T +T
+where the first 6 bases in R1 are a mock UMI.
+"""
+
+from __future__ import annotations
+
+import gzip
+from pathlib import Path
+
+OUTDIR = Path("FASTQ") / "Test" / "umi"
+SAMPLE = "FGUMI01"
+
+# (umi6, r1_suffix, r2_sequence)
+READS = [
+    ("ACGTAA", "TTTTTTTTTTTTAACCGGTTCCAA", "GATTACAGATTACAGATTACAGATTACA"),
+    ("ACGTAA", "TTTTTTTTTTTTAACCGGTTCCAA", "GATTACAGATTACAGATTACAGATTACA"),  # duplicate
+    ("TGCACT", "TTTTTTTTTTTTGGCCAATTGGCC", "CGTACGTACGTACGTACGTACGTACGTAC"),
+    (
+        "TGCACT",
+        "TTTTTTTTTTTTGGCCAATTGGCC",
+        "CGTACGTACGTACGTACGTACGTACGTAC",
+    ),  # duplicate
+    ("GGAACC", "TTTTTTTTTTTTCCGGAATTCCGG", "TTGCAATTGCAATTGCAATTGCAATTGCA"),
+    ("CCTTGG", "TTTTTTTTTTTTAATTCCGGAATT", "AACCGGTTAACCGGTTAACCGGTTAACCG"),
+    ("TTAAGG", "TTTTTTTTTTTTGGAATTCCGGAA", "GGCCAATTGGCCAATTGGCCAATTGGCCA"),
+    ("CCGGTT", "TTTTTTTTTTTTAACCTTGGAACC", "ATATCGCGATATCGCGATATCGCGATATC"),
+]
+
+
+def _fq_record(name: str, seq: str, qual_char: str = "I") -> str:
+    return f"@{name}\n{seq}\n+\n{qual_char * len(seq)}\n"
+
+
+def main() -> None:
+    OUTDIR.mkdir(parents=True, exist_ok=True)
+
+    r1_path = OUTDIR / f"{SAMPLE}_R1.fastq.gz"
+    r2_path = OUTDIR / f"{SAMPLE}_R2.fastq.gz"
+
+    with gzip.open(r1_path, "wt") as r1h, gzip.open(r2_path, "wt") as r2h:
+        for i, (umi, r1_suffix, r2_seq) in enumerate(READS, start=1):
+            read_id = f"{SAMPLE}:{i}"
+            r1_seq = umi + r1_suffix
+            r1h.write(_fq_record(read_id + " 1:N:0:TEST", r1_seq))
+            r2h.write(_fq_record(read_id + " 2:N:0:TEST", r2_seq))
+
+    print(f"Wrote {r1_path}")
+    print(f"Wrote {r2_path}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tests/test_fgumi_smoke.sh b/tests/test_fgumi_smoke.sh
new file mode 100644
index 00000000..fabfd17c
--- /dev/null
+++ b/tests/test_fgumi_smoke.sh
@@ -0,0 +1,22 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+SCRIPT_DIR=$(dirname "$(realpath "$0")")
+cd "${SCRIPT_DIR}"
+
+# Mirror existing integration style: expose tests/data as working inputs
+ln -fs data/* .
+
+# Generate tiny gzipped UMI FASTQ inputs
+python data/make_fgumi_umi_fixture.py
+
+# Keep version in sync with installed MONSDA
+VERSION=$(monsda --version 2>&1 | sed 's/MONSDA version //g')
+sed -i "s/\"VERSION\": \"FIXME\"/\"VERSION\": \"${VERSION}\"/g" config_fgumi_test.json
+
+mkdir -p CONDALIB
+
+# --save keeps this as a lightweight workflow generation smoke test
+monsda -j 2 -c config_fgumi_test.json --directory "${PWD}" --use-conda --conda-prefix CONDALIB --save
+
+echo "FGUMI smoke test workflow generation completed."

From 71297dbe89ce43c57f1aa541aadbb3a7fa9bd22d Mon Sep 17 00:00:00 2001
From: jfallmann <fallmann.joerg@gmail.com>
Date: Thu, 23 Apr 2026 11:13:25 +0200
Subject: [PATCH 04/39] gracefully exit if no samples found

---
 MONSDA/Params.py | 12 ++++++++++++
 MONSDA/Utils.py  | 16 ++++++++++++++++
 2 files changed, 28 insertions(+)

diff --git a/MONSDA/Params.py b/MONSDA/Params.py
index 71182733..e59c00ef 100644
--- a/MONSDA/Params.py
+++ b/MONSDA/Params.py
@@ -424,6 +424,18 @@ def get_samples_from_dir(search: str, config: dict, nocheck: str = None) -> list
                         clean.append(c)
                         break
             log.debug(logid + "checkclean: " + str(clean))
+            
+            # Check if any samples were found in the clean list
+            if not clean:
+                search_dir = os.sep.join(["FASTQ"] + search)
+                error_msg = (
+                    f"No sample files found for condition {os.sep.join(search)}. "
+                    f"Expected to find files matching samples {samples} "
+                    f"in directory: {search_dir}"
+                )
+                log.error(logid + error_msg)
+                raise ValueError(error_msg)
+            
             paired = checkpaired(
                 [os.sep.join([os.sep.join(search), clean[0].split(os.sep)[-1]])], config
             )
diff --git a/MONSDA/Utils.py b/MONSDA/Utils.py
index 29a8c4d2..3a8bae03 100644
--- a/MONSDA/Utils.py
+++ b/MONSDA/Utils.py
@@ -182,6 +182,22 @@ def func_wrapper(*args, **kwargs):
         try:
             return func(*args, **kwargs)
 
+        except ValueError as e:
+            # Handle sample not found errors gracefully
+            error_msg = str(e)
+            if "sample" in error_msg.lower() and "not found" in error_msg.lower():
+                log.error(logid + error_msg)
+                log.error(logid + "STOPPING: Processing stopped due to missing samples")
+                sys.exit(1)
+            else:
+                # Re-raise other ValueError exceptions
+                exc_type, exc_value, exc_tb = sys.exc_info()
+                tbe = tb.TracebackException(
+                    exc_type,
+                    exc_value,
+                    exc_tb,
+                )
+                log.error(logid + "".join(tbe.format()))
         except Exception:
             exc_type, exc_value, exc_tb = sys.exc_info()
             tbe = tb.TracebackException(

From 888d4d86f7d4450220037f43bdc5e60d9db4dca6 Mon Sep 17 00:00:00 2001
From: jfallmann <fallmann.joerg@gmail.com>
Date: Thu, 23 Apr 2026 11:47:05 +0200
Subject: [PATCH 05/39] docs update

---
 README.md              | 7 -------
 docs/source/config.rst | 5 -----
 2 files changed, 12 deletions(-)

diff --git a/README.md b/README.md
index a442b81c..b3dfedfc 100644
--- a/README.md
+++ b/README.md
@@ -43,13 +43,6 @@ pip install MONSDA
 
 More information can be found in the official [documentation](https://monsda.readthedocs.io/en/latest/?badge=latest)
 
-## Notes on newly available tools
-
-- **rustqc** is available as an alternative QC backend and is designed for fast, integrated RNA-seq QC reporting with MultiQC-compatible outputs.
-- **fgumi** is available as an additional UMI-aware deduplication backend in the `DEDUP` step.
-
-Both tools are available through the shipped conda environments in `envs/` and can be selected through the regular config `TOOLS` sections.
-
 
 ## How does it work
 
diff --git a/docs/source/config.rst b/docs/source/config.rst
index 6be4d1ac..022fe09d 100644
--- a/docs/source/config.rst
+++ b/docs/source/config.rst
@@ -34,11 +34,6 @@ You can always define differing ENV/BIN keys for each condition-tree leaf separa
 
 The next key-level is the *OPTIONS* key which is where you can define additional parameters for each tool. It is not needed to define anything related to *single-/paired-* end or *singlecell* sequencing, this is done automatically.  To add parameters simply add the *OPTION* key which defines a dict where you can set parameters for each defined subworkflow-step. Parameters are here defined as key/value pairs corresponding to the subworkflow-step, e.g. 'INDEX' to generate an index file for mapping and all settings similar to a command line call as values. This should become clear having a look at the different processing steps in the template json.  If there are no options just leave the 'OPTIONS' dict empty.
 
-For newly available tools:
-
-- **QC/rustqc** can be selected with ``"rustqc": "rustqc"`` in the ``QC -> TOOLS`` section and configured via the regular ``OPTIONS`` keys (e.g. ``QC`` and ``MULTI`` entries in tutorial configs).
-- **DEDUP/fgumi** can be selected with ``"fgumi": "fgumi"`` in the ``DEDUP -> TOOLS`` section and supports dedicated options under ``OPTIONS`` using ``EXTRACT`` and ``DEDUP`` keys.
-
 
 .. literalinclude:: ../../configs/template.json
     :language: json

From 1d43280422de0bce3bbc760104805b10e72b3405 Mon Sep 17 00:00:00 2001
From: jfallmann <fallmann.joerg@gmail.com>
Date: Thu, 23 Apr 2026 11:47:14 +0200
Subject: [PATCH 06/39] fgumi zipper fix

---
 workflows/fgumi.nf  | 21 ++++++++++++++++++++-
 workflows/fgumi.smk | 10 ++++++----
 2 files changed, 26 insertions(+), 5 deletions(-)

diff --git a/workflows/fgumi.nf b/workflows/fgumi.nf
index 227c386a..78b2776d 100644
--- a/workflows/fgumi.nf
+++ b/workflows/fgumi.nf
@@ -85,6 +85,7 @@ process dedup_bam{
     input:
     path mapped_bam
     path ubam
+    path ref
 
     output:
     path "${mapped_bam.baseName}_dedup.bam", emit: dedup_bam
@@ -94,10 +95,28 @@ process dedup_bam{
     script:
     """
     mkdir -p tmp
-    $DEDUPBIN zipper --unmapped $ubam --aligned $mapped_bam --output tmp/zippered.bam > dedup.log 2>&1
+    $DEDUPBIN zipper --unmapped $ubam --input $mapped_bam --reference $ref --output tmp/zippered.bam > dedup.log 2>&1
     $DEDUPBIN sort --order template-coordinate --input tmp/zippered.bam --output tmp/sorted.bam >> dedup.log 2>&1
     $DEDUPBIN dedup $DEDUPPARAMS --input tmp/sorted.bam --output ${mapped_bam.baseName}_dedup.bam >> dedup.log 2>&1
     samtools index ${mapped_bam.baseName}_dedup.bam >> dedup.log 2>&1
     rm $ubam
     """
 }
+
+workflow DEDUPBAM{
+    take:
+    map
+    mapi
+    mapu
+    mapui
+    ubam
+
+    main:
+    ref_ch = Channel.fromPath(REFERENCE)
+    dedup_bam(map.concat(mapu), ubam, ref_ch)
+
+    emit:
+    dedup = dedup_bam.out.dedup_bam
+    dedupbai = dedup_bam.out.dedup_idx
+    deduplog = dedup_bam.out.logs
+}
diff --git a/workflows/fgumi.smk b/workflows/fgumi.smk
index 60a197e2..8483f96e 100644
--- a/workflows/fgumi.smk
+++ b/workflows/fgumi.smk
@@ -47,9 +47,10 @@ if paired == 'paired':
         threads: 1
         priority: 0               # This should be done after all mapping is done
         params: dpara = lambda wildcards: tool_params(wildcards.file, None, config, "DEDUP", DEDUPENV)['OPTIONS'].get('DEDUP', ""),
-                dedup = DEDUPBIN
+                dedup = DEDUPBIN,
+                ref = REFERENCE
         shell: """mkdir -p {output.td}
-{params.dedup} zipper --unmapped {input.ubam} --aligned {input.bam} --output {output.td}/zippered.bam > {log} 2>&1
+{params.dedup} zipper --unmapped {input.ubam} --input {input.bam} --reference {params.ref} --output {output.td}/zippered.bam > {log} 2>&1
 {params.dedup} sort --order template-coordinate --input {output.td}/zippered.bam --output {output.td}/sorted.bam >> {log} 2>&1
 {params.dedup} dedup {params.dpara} --input {output.td}/sorted.bam --output {output.bam} >> {log} 2>&1
 samtools index {output.bam} >> {log} 2>&1
@@ -67,9 +68,10 @@ else:
         threads: 1
         priority: 0               # This should be done after all mapping is done
         params: dpara = lambda wildcards: tool_params(wildcards.file, None, config, "DEDUP", DEDUPENV)['OPTIONS'].get('DEDUP', ""),
-                dedup = DEDUPBIN
+                dedup = DEDUPBIN,
+                ref = REFERENCE
         shell: """mkdir -p {output.td}
-{params.dedup} zipper --unmapped {input.ubam} --aligned {input.bam} --output {output.td}/zippered.bam > {log} 2>&1
+{params.dedup} zipper --unmapped {input.ubam} --input {input.bam} --reference {params.ref} --output {output.td}/zippered.bam > {log} 2>&1
 {params.dedup} sort --order template-coordinate --input {output.td}/zippered.bam --output {output.td}/sorted.bam >> {log} 2>&1
 {params.dedup} dedup {params.dpara} --input {output.td}/sorted.bam --output {output.bam} >> {log} 2>&1
 samtools index {output.bam} >> {log} 2>&1

From 900fdb6979222a3324d7c9452c0db1572f871fc2 Mon Sep 17 00:00:00 2001
From: jfallmann <fallmann.joerg@gmail.com>
Date: Thu, 23 Apr 2026 13:36:03 +0200
Subject: [PATCH 07/39] fgumi ref dict if not availbale

---
 workflows/fgumi.nf  |  4 +++-
 workflows/fgumi.smk | 12 ++++++++----
 2 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/workflows/fgumi.nf b/workflows/fgumi.nf
index 78b2776d..e9cd0907 100644
--- a/workflows/fgumi.nf
+++ b/workflows/fgumi.nf
@@ -95,7 +95,9 @@ process dedup_bam{
     script:
     """
     mkdir -p tmp
-    $DEDUPBIN zipper --unmapped $ubam --input $mapped_bam --reference $ref --output tmp/zippered.bam > dedup.log 2>&1
+    ref_dict=\$(basename $ref .gz).dict
+    if [[ ! -f "\${ref_dict}" ]]; then samtools dict $ref -o \${ref_dict} >> dedup.log 2>&1; fi
+    $DEDUPBIN zipper --unmapped $ubam --input $mapped_bam --reference $ref --output tmp/zippered.bam >> dedup.log 2>&1
     $DEDUPBIN sort --order template-coordinate --input tmp/zippered.bam --output tmp/sorted.bam >> dedup.log 2>&1
     $DEDUPBIN dedup $DEDUPPARAMS --input tmp/sorted.bam --output ${mapped_bam.baseName}_dedup.bam >> dedup.log 2>&1
     samtools index ${mapped_bam.baseName}_dedup.bam >> dedup.log 2>&1
diff --git a/workflows/fgumi.smk b/workflows/fgumi.smk
index 8483f96e..efb4d6b0 100644
--- a/workflows/fgumi.smk
+++ b/workflows/fgumi.smk
@@ -48,9 +48,11 @@ if paired == 'paired':
         priority: 0               # This should be done after all mapping is done
         params: dpara = lambda wildcards: tool_params(wildcards.file, None, config, "DEDUP", DEDUPENV)['OPTIONS'].get('DEDUP', ""),
                 dedup = DEDUPBIN,
-                ref = REFERENCE
+                ref = REFERENCE,
+                ref_dict = (REFERENCE[:-3] if REFERENCE.endswith('.gz') else REFERENCE) + ".dict"
         shell: """mkdir -p {output.td}
-{params.dedup} zipper --unmapped {input.ubam} --input {input.bam} --reference {params.ref} --output {output.td}/zippered.bam > {log} 2>&1
+[[ -f "{params.ref_dict}" ]] || samtools dict {params.ref} -o {params.ref_dict} >> {log} 2>&1
+{params.dedup} zipper --unmapped {input.ubam} --input {input.bam} --reference {params.ref} --output {output.td}/zippered.bam >> {log} 2>&1
 {params.dedup} sort --order template-coordinate --input {output.td}/zippered.bam --output {output.td}/sorted.bam >> {log} 2>&1
 {params.dedup} dedup {params.dpara} --input {output.td}/sorted.bam --output {output.bam} >> {log} 2>&1
 samtools index {output.bam} >> {log} 2>&1
@@ -69,9 +71,11 @@ else:
         priority: 0               # This should be done after all mapping is done
         params: dpara = lambda wildcards: tool_params(wildcards.file, None, config, "DEDUP", DEDUPENV)['OPTIONS'].get('DEDUP', ""),
                 dedup = DEDUPBIN,
-                ref = REFERENCE
+                ref = REFERENCE,
+                ref_dict = (REFERENCE[:-3] if REFERENCE.endswith('.gz') else REFERENCE) + ".dict"
         shell: """mkdir -p {output.td}
-{params.dedup} zipper --unmapped {input.ubam} --input {input.bam} --reference {params.ref} --output {output.td}/zippered.bam > {log} 2>&1
+[[ -f "{params.ref_dict}" ]] || samtools dict {params.ref} -o {params.ref_dict} >> {log} 2>&1
+{params.dedup} zipper --unmapped {input.ubam} --input {input.bam} --reference {params.ref} --output {output.td}/zippered.bam >> {log} 2>&1
 {params.dedup} sort --order template-coordinate --input {output.td}/zippered.bam --output {output.td}/sorted.bam >> {log} 2>&1
 {params.dedup} dedup {params.dpara} --input {output.td}/sorted.bam --output {output.bam} >> {log} 2>&1
 samtools index {output.bam} >> {log} 2>&1

From 84ec08f6737c4e872e3869b209a204a52a3e84f8 Mon Sep 17 00:00:00 2001
From: jfallmann <fallmann.joerg@gmail.com>
Date: Thu, 23 Apr 2026 13:41:35 +0200
Subject: [PATCH 08/39] sorting by name before fgumi zipper

---
 workflows/fgumi.nf  |  6 ++++--
 workflows/fgumi.smk | 12 ++++++++----
 2 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/workflows/fgumi.nf b/workflows/fgumi.nf
index e9cd0907..b0e097ee 100644
--- a/workflows/fgumi.nf
+++ b/workflows/fgumi.nf
@@ -77,7 +77,7 @@ workflow DEDUPEXTRACT{
 process dedup_bam{
     conda "$DEDUPENV"+".yaml"
     container "oras://jfallmann/monsda:"+"$DEDUPENV"
-    cpus THREADS
+    cpus 4
     cache 'lenient'
 
     publishDir "${workflow.workDir}/../MAPPED/${COMBO}/${CONDITION}" , mode: 'link'
@@ -97,7 +97,9 @@ process dedup_bam{
     mkdir -p tmp
     ref_dict=\$(basename $ref .gz).dict
     if [[ ! -f "\${ref_dict}" ]]; then samtools dict $ref -o \${ref_dict} >> dedup.log 2>&1; fi
-    $DEDUPBIN zipper --unmapped $ubam --input $mapped_bam --reference $ref --output tmp/zippered.bam >> dedup.log 2>&1
+    samtools sort -n -@ ${task.cpus} -o tmp/ubam_qn.bam $ubam >> dedup.log 2>&1
+    samtools sort -n -@ ${task.cpus} -o tmp/mapped_qn.bam $mapped_bam >> dedup.log 2>&1
+    $DEDUPBIN zipper --unmapped tmp/ubam_qn.bam --input tmp/mapped_qn.bam --reference $ref --output tmp/zippered.bam >> dedup.log 2>&1
     $DEDUPBIN sort --order template-coordinate --input tmp/zippered.bam --output tmp/sorted.bam >> dedup.log 2>&1
     $DEDUPBIN dedup $DEDUPPARAMS --input tmp/sorted.bam --output ${mapped_bam.baseName}_dedup.bam >> dedup.log 2>&1
     samtools index ${mapped_bam.baseName}_dedup.bam >> dedup.log 2>&1
diff --git a/workflows/fgumi.smk b/workflows/fgumi.smk
index efb4d6b0..06b61ee2 100644
--- a/workflows/fgumi.smk
+++ b/workflows/fgumi.smk
@@ -44,7 +44,7 @@ if paired == 'paired':
         log:    "LOGS/{combo}/{file}_{type}/dedupbam.log"
         conda:  ""+DEDUPENV+".yaml"
         container: "oras://jfallmann/monsda:"+DEDUPENV+""
-        threads: 1
+        threads: 4
         priority: 0               # This should be done after all mapping is done
         params: dpara = lambda wildcards: tool_params(wildcards.file, None, config, "DEDUP", DEDUPENV)['OPTIONS'].get('DEDUP', ""),
                 dedup = DEDUPBIN,
@@ -52,7 +52,9 @@ if paired == 'paired':
                 ref_dict = (REFERENCE[:-3] if REFERENCE.endswith('.gz') else REFERENCE) + ".dict"
         shell: """mkdir -p {output.td}
 [[ -f "{params.ref_dict}" ]] || samtools dict {params.ref} -o {params.ref_dict} >> {log} 2>&1
-{params.dedup} zipper --unmapped {input.ubam} --input {input.bam} --reference {params.ref} --output {output.td}/zippered.bam >> {log} 2>&1
+samtools sort -n -@ {threads} -o {output.td}/ubam_qn.bam {input.ubam} >> {log} 2>&1
+samtools sort -n -@ {threads} -o {output.td}/mapped_qn.bam {input.bam} >> {log} 2>&1
+{params.dedup} zipper --unmapped {output.td}/ubam_qn.bam --input {output.td}/mapped_qn.bam --reference {params.ref} --output {output.td}/zippered.bam >> {log} 2>&1
 {params.dedup} sort --order template-coordinate --input {output.td}/zippered.bam --output {output.td}/sorted.bam >> {log} 2>&1
 {params.dedup} dedup {params.dpara} --input {output.td}/sorted.bam --output {output.bam} >> {log} 2>&1
 samtools index {output.bam} >> {log} 2>&1
@@ -67,7 +69,7 @@ else:
         log:    "LOGS/{combo}/{file}_{type}/dedupbam.log"
         conda:  ""+DEDUPENV+".yaml"
         container: "oras://jfallmann/monsda:"+DEDUPENV+""
-        threads: 1
+        threads: 4
         priority: 0               # This should be done after all mapping is done
         params: dpara = lambda wildcards: tool_params(wildcards.file, None, config, "DEDUP", DEDUPENV)['OPTIONS'].get('DEDUP', ""),
                 dedup = DEDUPBIN,
@@ -75,7 +77,9 @@ else:
                 ref_dict = (REFERENCE[:-3] if REFERENCE.endswith('.gz') else REFERENCE) + ".dict"
         shell: """mkdir -p {output.td}
 [[ -f "{params.ref_dict}" ]] || samtools dict {params.ref} -o {params.ref_dict} >> {log} 2>&1
-{params.dedup} zipper --unmapped {input.ubam} --input {input.bam} --reference {params.ref} --output {output.td}/zippered.bam >> {log} 2>&1
+samtools sort -n -@ {threads} -o {output.td}/ubam_qn.bam {input.ubam} >> {log} 2>&1
+samtools sort -n -@ {threads} -o {output.td}/mapped_qn.bam {input.bam} >> {log} 2>&1
+{params.dedup} zipper --unmapped {output.td}/ubam_qn.bam --input {output.td}/mapped_qn.bam --reference {params.ref} --output {output.td}/zippered.bam >> {log} 2>&1
 {params.dedup} sort --order template-coordinate --input {output.td}/zippered.bam --output {output.td}/sorted.bam >> {log} 2>&1
 {params.dedup} dedup {params.dpara} --input {output.td}/sorted.bam --output {output.bam} >> {log} 2>&1
 samtools index {output.bam} >> {log} 2>&1

From c1c0cc528906d76d68d85be3dd28faede007fdd9 Mon Sep 17 00:00:00 2001
From: jfallmann <fallmann.joerg@gmail.com>
Date: Thu, 23 Apr 2026 14:23:08 +0200
Subject: [PATCH 09/39] sorting before index

---
 workflows/fgumi.nf  | 3 ++-
 workflows/fgumi.smk | 6 ++++--
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/workflows/fgumi.nf b/workflows/fgumi.nf
index b0e097ee..0f32f9a6 100644
--- a/workflows/fgumi.nf
+++ b/workflows/fgumi.nf
@@ -101,7 +101,8 @@ process dedup_bam{
     samtools sort -n -@ ${task.cpus} -o tmp/mapped_qn.bam $mapped_bam >> dedup.log 2>&1
     $DEDUPBIN zipper --unmapped tmp/ubam_qn.bam --input tmp/mapped_qn.bam --reference $ref --output tmp/zippered.bam >> dedup.log 2>&1
     $DEDUPBIN sort --order template-coordinate --input tmp/zippered.bam --output tmp/sorted.bam >> dedup.log 2>&1
-    $DEDUPBIN dedup $DEDUPPARAMS --input tmp/sorted.bam --output ${mapped_bam.baseName}_dedup.bam >> dedup.log 2>&1
+    $DEDUPBIN dedup $DEDUPPARAMS --input tmp/sorted.bam --output tmp/dedup.bam >> dedup.log 2>&1
+    samtools sort -@ ${task.cpus} -o ${mapped_bam.baseName}_dedup.bam tmp/dedup.bam >> dedup.log 2>&1
     samtools index ${mapped_bam.baseName}_dedup.bam >> dedup.log 2>&1
     rm $ubam
     """
diff --git a/workflows/fgumi.smk b/workflows/fgumi.smk
index 06b61ee2..0eb1fe64 100644
--- a/workflows/fgumi.smk
+++ b/workflows/fgumi.smk
@@ -56,7 +56,8 @@ samtools sort -n -@ {threads} -o {output.td}/ubam_qn.bam {input.ubam} >> {log} 2
 samtools sort -n -@ {threads} -o {output.td}/mapped_qn.bam {input.bam} >> {log} 2>&1
 {params.dedup} zipper --unmapped {output.td}/ubam_qn.bam --input {output.td}/mapped_qn.bam --reference {params.ref} --output {output.td}/zippered.bam >> {log} 2>&1
 {params.dedup} sort --order template-coordinate --input {output.td}/zippered.bam --output {output.td}/sorted.bam >> {log} 2>&1
-{params.dedup} dedup {params.dpara} --input {output.td}/sorted.bam --output {output.bam} >> {log} 2>&1
+{params.dedup} dedup {params.dpara} --input {output.td}/sorted.bam --output {output.td}/dedup.bam >> {log} 2>&1
+samtools sort -@ {threads} -o {output.bam} {output.td}/dedup.bam >> {log} 2>&1
 samtools index {output.bam} >> {log} 2>&1
 rm {input.ubam}"""
 else:
@@ -81,6 +82,7 @@ samtools sort -n -@ {threads} -o {output.td}/ubam_qn.bam {input.ubam} >> {log} 2
 samtools sort -n -@ {threads} -o {output.td}/mapped_qn.bam {input.bam} >> {log} 2>&1
 {params.dedup} zipper --unmapped {output.td}/ubam_qn.bam --input {output.td}/mapped_qn.bam --reference {params.ref} --output {output.td}/zippered.bam >> {log} 2>&1
 {params.dedup} sort --order template-coordinate --input {output.td}/zippered.bam --output {output.td}/sorted.bam >> {log} 2>&1
-{params.dedup} dedup {params.dpara} --input {output.td}/sorted.bam --output {output.bam} >> {log} 2>&1
+{params.dedup} dedup {params.dpara} --input {output.td}/sorted.bam --output {output.td}/dedup.bam >> {log} 2>&1
+samtools sort -@ {threads} -o {output.bam} {output.td}/dedup.bam >> {log} 2>&1
 samtools index {output.bam} >> {log} 2>&1
 rm {input.ubam}"""

From b07955b311fcf1a9fd209415b8ab121e41e0de2e Mon Sep 17 00:00:00 2001
From: jfallmann <fallmann.joerg@gmail.com>
Date: Fri, 24 Apr 2026 14:31:14 +0200
Subject: [PATCH 10/39] fgumi update

---
 workflows/fgumi.nf  |  8 ++++----
 workflows/fgumi.smk | 16 ++++++++--------
 2 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/workflows/fgumi.nf b/workflows/fgumi.nf
index 0f32f9a6..566abfc3 100644
--- a/workflows/fgumi.nf
+++ b/workflows/fgumi.nf
@@ -99,11 +99,11 @@ process dedup_bam{
     if [[ ! -f "\${ref_dict}" ]]; then samtools dict $ref -o \${ref_dict} >> dedup.log 2>&1; fi
     samtools sort -n -@ ${task.cpus} -o tmp/ubam_qn.bam $ubam >> dedup.log 2>&1
     samtools sort -n -@ ${task.cpus} -o tmp/mapped_qn.bam $mapped_bam >> dedup.log 2>&1
-    $DEDUPBIN zipper --unmapped tmp/ubam_qn.bam --input tmp/mapped_qn.bam --reference $ref --output tmp/zippered.bam >> dedup.log 2>&1
-    $DEDUPBIN sort --order template-coordinate --input tmp/zippered.bam --output tmp/sorted.bam >> dedup.log 2>&1
+    samtools view -h tmp/mapped_qn.bam | awk 'BEGIN{FS=OFS="\t"} /^@/{print; next} {f=\$2+0; if (!and(f,256) && !and(f,2048)) {k=\$1":"(and(f,64)?1:0)":"(and(f,128)?1:0); if (seen[k]++) next} print}' | samtools view -b -o tmp/mapped_qn_primaryuniq.bam - >> dedup.log 2>&1
+    $DEDUPBIN zipper --unmapped tmp/ubam_qn.bam --input tmp/mapped_qn_primaryuniq.bam --reference $ref --output tmp/zippered.bam --threads ${task.cpus} --compression-level 1 >> dedup.log 2>&1
+    $DEDUPBIN sort --order template-coordinate --input tmp/zippered.bam --output tmp/sorted.bam --threads ${task.cpus} --compression-level 1 >> dedup.log 2>&1
     $DEDUPBIN dedup $DEDUPPARAMS --input tmp/sorted.bam --output tmp/dedup.bam >> dedup.log 2>&1
-    samtools sort -@ ${task.cpus} -o ${mapped_bam.baseName}_dedup.bam tmp/dedup.bam >> dedup.log 2>&1
-    samtools index ${mapped_bam.baseName}_dedup.bam >> dedup.log 2>&1
+    $DEDUPBIN sort --order coordinate --input tmp/dedup.bam --output ${mapped_bam.baseName}_dedup.bam --write-index --threads ${task.cpus} --compression-level 1 >> dedup.log 2>&1
     rm $ubam
     """
 }
diff --git a/workflows/fgumi.smk b/workflows/fgumi.smk
index 0eb1fe64..04d25ba1 100644
--- a/workflows/fgumi.smk
+++ b/workflows/fgumi.smk
@@ -54,11 +54,11 @@ if paired == 'paired':
 [[ -f "{params.ref_dict}" ]] || samtools dict {params.ref} -o {params.ref_dict} >> {log} 2>&1
 samtools sort -n -@ {threads} -o {output.td}/ubam_qn.bam {input.ubam} >> {log} 2>&1
 samtools sort -n -@ {threads} -o {output.td}/mapped_qn.bam {input.bam} >> {log} 2>&1
-{params.dedup} zipper --unmapped {output.td}/ubam_qn.bam --input {output.td}/mapped_qn.bam --reference {params.ref} --output {output.td}/zippered.bam >> {log} 2>&1
-{params.dedup} sort --order template-coordinate --input {output.td}/zippered.bam --output {output.td}/sorted.bam >> {log} 2>&1
+samtools view -h {output.td}/mapped_qn.bam | awk 'BEGIN{{FS=OFS="\t"}} /^@/{{print; next}} {{f=$2+0; if (!and(f,256) && !and(f,2048)) {{k=$1":"(and(f,64)?1:0)":"(and(f,128)?1:0); if (seen[k]++) next}} print}}' | samtools view -b -o {output.td}/mapped_qn_primaryuniq.bam - >> {log} 2>&1
+{params.dedup} zipper --unmapped {output.td}/ubam_qn.bam --input {output.td}/mapped_qn_primaryuniq.bam --reference {params.ref} --output {output.td}/zippered.bam --threads {threads} --compression-level 1 >> {log} 2>&1
+{params.dedup} sort --order template-coordinate --input {output.td}/zippered.bam --output {output.td}/sorted.bam --threads {threads} --compression-level 1 >> {log} 2>&1
 {params.dedup} dedup {params.dpara} --input {output.td}/sorted.bam --output {output.td}/dedup.bam >> {log} 2>&1
-samtools sort -@ {threads} -o {output.bam} {output.td}/dedup.bam >> {log} 2>&1
-samtools index {output.bam} >> {log} 2>&1
+{params.dedup} sort --order coordinate --input {output.td}/dedup.bam --output {output.bam} --write-index --threads {threads} --compression-level 1 >> {log} 2>&1
 rm {input.ubam}"""
 else:
     rule dedupbam:
@@ -80,9 +80,9 @@ else:
 [[ -f "{params.ref_dict}" ]] || samtools dict {params.ref} -o {params.ref_dict} >> {log} 2>&1
 samtools sort -n -@ {threads} -o {output.td}/ubam_qn.bam {input.ubam} >> {log} 2>&1
 samtools sort -n -@ {threads} -o {output.td}/mapped_qn.bam {input.bam} >> {log} 2>&1
-{params.dedup} zipper --unmapped {output.td}/ubam_qn.bam --input {output.td}/mapped_qn.bam --reference {params.ref} --output {output.td}/zippered.bam >> {log} 2>&1
-{params.dedup} sort --order template-coordinate --input {output.td}/zippered.bam --output {output.td}/sorted.bam >> {log} 2>&1
+samtools view -h {output.td}/mapped_qn.bam | awk 'BEGIN{{FS=OFS="\t"}} /^@/{{print; next}} {{f=$2+0; if (!and(f,256) && !and(f,2048)) {{k=$1":"(and(f,64)?1:0)":"(and(f,128)?1:0); if (seen[k]++) next}} print}}' | samtools view -b -o {output.td}/mapped_qn_primaryuniq.bam - >> {log} 2>&1
+{params.dedup} zipper --unmapped {output.td}/ubam_qn.bam --input {output.td}/mapped_qn_primaryuniq.bam --reference {params.ref} --output {output.td}/zippered.bam --threads {threads} --compression-level 1 >> {log} 2>&1
+{params.dedup} sort --order template-coordinate --input {output.td}/zippered.bam --output {output.td}/sorted.bam --threads {threads} --compression-level 1 >> {log} 2>&1
 {params.dedup} dedup {params.dpara} --input {output.td}/sorted.bam --output {output.td}/dedup.bam >> {log} 2>&1
-samtools sort -@ {threads} -o {output.bam} {output.td}/dedup.bam >> {log} 2>&1
-samtools index {output.bam} >> {log} 2>&1
+{params.dedup} sort --order coordinate --input {output.td}/dedup.bam --output {output.bam} --write-index --threads {threads} --compression-level 1 >> {log} 2>&1
 rm {input.ubam}"""

From 0d8460f87dbc68a19ae158e67ee8cbaa1fc74afe Mon Sep 17 00:00:00 2001
From: jfallmann <fallmann.joerg@gmail.com>
Date: Fri, 24 Apr 2026 14:52:07 +0200
Subject: [PATCH 11/39] fgumi update

---
 workflows/fgumi.nf  | 1 -
 workflows/fgumi.smk | 4 ++--
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/workflows/fgumi.nf b/workflows/fgumi.nf
index 566abfc3..190c583f 100644
--- a/workflows/fgumi.nf
+++ b/workflows/fgumi.nf
@@ -104,7 +104,6 @@ process dedup_bam{
     $DEDUPBIN sort --order template-coordinate --input tmp/zippered.bam --output tmp/sorted.bam --threads ${task.cpus} --compression-level 1 >> dedup.log 2>&1
     $DEDUPBIN dedup $DEDUPPARAMS --input tmp/sorted.bam --output tmp/dedup.bam >> dedup.log 2>&1
     $DEDUPBIN sort --order coordinate --input tmp/dedup.bam --output ${mapped_bam.baseName}_dedup.bam --write-index --threads ${task.cpus} --compression-level 1 >> dedup.log 2>&1
-    rm $ubam
     """
 }
 
diff --git a/workflows/fgumi.smk b/workflows/fgumi.smk
index 04d25ba1..edeacf1e 100644
--- a/workflows/fgumi.smk
+++ b/workflows/fgumi.smk
@@ -59,7 +59,7 @@ samtools view -h {output.td}/mapped_qn.bam | awk 'BEGIN{{FS=OFS="\t"}} /^@/{{pri
 {params.dedup} sort --order template-coordinate --input {output.td}/zippered.bam --output {output.td}/sorted.bam --threads {threads} --compression-level 1 >> {log} 2>&1
 {params.dedup} dedup {params.dpara} --input {output.td}/sorted.bam --output {output.td}/dedup.bam >> {log} 2>&1
 {params.dedup} sort --order coordinate --input {output.td}/dedup.bam --output {output.bam} --write-index --threads {threads} --compression-level 1 >> {log} 2>&1
-rm {input.ubam}"""
+"""
 else:
     rule dedupbam:
         input:  bam = "MAPPED/{combo}/{file}_mapped_{type}.bam",
@@ -85,4 +85,4 @@ samtools view -h {output.td}/mapped_qn.bam | awk 'BEGIN{{FS=OFS="\t"}} /^@/{{pri
 {params.dedup} sort --order template-coordinate --input {output.td}/zippered.bam --output {output.td}/sorted.bam --threads {threads} --compression-level 1 >> {log} 2>&1
 {params.dedup} dedup {params.dpara} --input {output.td}/sorted.bam --output {output.td}/dedup.bam >> {log} 2>&1
 {params.dedup} sort --order coordinate --input {output.td}/dedup.bam --output {output.bam} --write-index --threads {threads} --compression-level 1 >> {log} 2>&1
-rm {input.ubam}"""
+"""

From ffb8e3dd5121e24e823d3248b77f9a8ba1692183 Mon Sep 17 00:00:00 2001
From: jfallmann <fallmann.joerg@gmail.com>
Date: Fri, 24 Apr 2026 15:04:21 +0200
Subject: [PATCH 12/39] fgumi nf split/update

---
 MONSDA/Workflows.py      | 40 ++++++++++++++++++++++++--------
 workflows/fgumi.nf       | 50 ----------------------------------------
 workflows/fgumi_dedup.nf | 45 ++++++++++++++++++++++++++++++------
 3 files changed, 68 insertions(+), 67 deletions(-)

diff --git a/MONSDA/Workflows.py b/MONSDA/Workflows.py
index 709b95e1..3b93851f 100755
--- a/MONSDA/Workflows.py
+++ b/MONSDA/Workflows.py
@@ -2712,6 +2712,9 @@ def nf_make_pre(
                         subjobs.append(line)
                     subjobs.append("\n\n")
 
+                if "DEDUP" in works:
+                    flowlist.append("DEDUPBAM")
+
                 tp.append(
                     nf_tool_params(
                         subsamples[0],
@@ -2932,6 +2935,7 @@ def nf_make_sub(
                             flowlist.append("TRIMMING")
 
                         if works[j] == "DEDUP":
+                            deduptool = toolenv
                             if toolenv in ["umitools", "fgumi"]:
                                 flowlist.append("PREDEDUP")
                                 subconf["PREDEDUP"] = "enabled"
@@ -3101,11 +3105,18 @@ def nf_make_sub(
                                 " " * 4 + "POSTMAPPING(MAPPING.out.mapped)\n"
                             )
                         elif w == "DEDUPBAM":
-                            subjobs.append(
-                                " " * 4
-                                + w
-                                + "(POSTMAPPING.out.postmap, POSTMAPPING.out.postbai, POSTMAPPING.out.postmapuni, POSTMAPPING.out.postunibai)\n"
-                            )
+                            if deduptool == "fgumi":
+                                subjobs.append(
+                                    " " * 4
+                                    + w
+                                    + "(POSTMAPPING.out.postmap, POSTMAPPING.out.postbai, POSTMAPPING.out.postmapuni, POSTMAPPING.out.postunibai, DEDUPEXTRACT.out.ubam)\n"
+                                )
+                            else:
+                                subjobs.append(
+                                    " " * 4
+                                    + w
+                                    + "(POSTMAPPING.out.postmap, POSTMAPPING.out.postbai, POSTMAPPING.out.postmapuni, POSTMAPPING.out.postunibai)\n"
+                                )
                         elif w == "QC_MAPPING":
                             if "DEDUPBAM" in flowlist:
                                 subjobs.append(
@@ -3205,6 +3216,7 @@ def nf_make_sub(
             subjobs = list()
             subconf = mu.NestedDefaultDict()
             tp = list()
+            deduptool = None
 
             for subwork in subworkflows:
                 log.debug(logid + "PREPARING " + str(subwork) + " " + str(condition))
@@ -3292,6 +3304,7 @@ def nf_make_sub(
                     if subwork == "DEDUP" and toolenv == "picard":
                         subname = toolenv + "_dedup.nf"
                     elif subwork == "DEDUP" and toolenv in ["umitools", "fgumi"]:
+                        deduptool = toolenv
                         flowlist.append("PREDEDUP")
                         subconf["PREDEDUP"] = "enabled"
                         if "QC" in flowlist:
@@ -3440,11 +3453,18 @@ def nf_make_sub(
                         subjobs.append(" " * 4 + w + "(TRIMMING.out.trimmed)\n")
                         subjobs.append(" " * 4 + "POSTMAPPING(MAPPING.out.mapped)\n")
                     elif w == "DEDUPBAM":
-                        subjobs.append(
-                            " " * 4
-                            + w
-                            + "(POSTMAPPING.out.postmap, POSTMAPPING.out.postbai, POSTMAPPING.out.postmapuni, POSTMAPPING.out.postunibai)\n"
-                        )
+                        if deduptool == "fgumi":
+                            subjobs.append(
+                                " " * 4
+                                + w
+                                + "(POSTMAPPING.out.postmap, POSTMAPPING.out.postbai, POSTMAPPING.out.postmapuni, POSTMAPPING.out.postunibai, DEDUPEXTRACT.out.ubam)\n"
+                            )
+                        else:
+                            subjobs.append(
+                                " " * 4
+                                + w
+                                + "(POSTMAPPING.out.postmap, POSTMAPPING.out.postbai, POSTMAPPING.out.postmapuni, POSTMAPPING.out.postunibai)\n"
+                            )
                     elif w == "QC_MAPPING":
                         if "DEDUPBAM" in flowlist:
                             subjobs.append(
diff --git a/workflows/fgumi.nf b/workflows/fgumi.nf
index 190c583f..9e9547b9 100644
--- a/workflows/fgumi.nf
+++ b/workflows/fgumi.nf
@@ -74,53 +74,3 @@ workflow DEDUPEXTRACT{
     logs = extract_fq.out.logs
 }
 
-process dedup_bam{
-    conda "$DEDUPENV"+".yaml"
-    container "oras://jfallmann/monsda:"+"$DEDUPENV"
-    cpus 4
-    cache 'lenient'
-
-    publishDir "${workflow.workDir}/../MAPPED/${COMBO}/${CONDITION}" , mode: 'link'
-
-    input:
-    path mapped_bam
-    path ubam
-    path ref
-
-    output:
-    path "${mapped_bam.baseName}_dedup.bam", emit: dedup_bam
-    path "${mapped_bam.baseName}_dedup.bam.bai", emit: dedup_idx
-    path "dedup.log", emit: logs
-
-    script:
-    """
-    mkdir -p tmp
-    ref_dict=\$(basename $ref .gz).dict
-    if [[ ! -f "\${ref_dict}" ]]; then samtools dict $ref -o \${ref_dict} >> dedup.log 2>&1; fi
-    samtools sort -n -@ ${task.cpus} -o tmp/ubam_qn.bam $ubam >> dedup.log 2>&1
-    samtools sort -n -@ ${task.cpus} -o tmp/mapped_qn.bam $mapped_bam >> dedup.log 2>&1
-    samtools view -h tmp/mapped_qn.bam | awk 'BEGIN{FS=OFS="\t"} /^@/{print; next} {f=\$2+0; if (!and(f,256) && !and(f,2048)) {k=\$1":"(and(f,64)?1:0)":"(and(f,128)?1:0); if (seen[k]++) next} print}' | samtools view -b -o tmp/mapped_qn_primaryuniq.bam - >> dedup.log 2>&1
-    $DEDUPBIN zipper --unmapped tmp/ubam_qn.bam --input tmp/mapped_qn_primaryuniq.bam --reference $ref --output tmp/zippered.bam --threads ${task.cpus} --compression-level 1 >> dedup.log 2>&1
-    $DEDUPBIN sort --order template-coordinate --input tmp/zippered.bam --output tmp/sorted.bam --threads ${task.cpus} --compression-level 1 >> dedup.log 2>&1
-    $DEDUPBIN dedup $DEDUPPARAMS --input tmp/sorted.bam --output tmp/dedup.bam >> dedup.log 2>&1
-    $DEDUPBIN sort --order coordinate --input tmp/dedup.bam --output ${mapped_bam.baseName}_dedup.bam --write-index --threads ${task.cpus} --compression-level 1 >> dedup.log 2>&1
-    """
-}
-
-workflow DEDUPBAM{
-    take:
-    map
-    mapi
-    mapu
-    mapui
-    ubam
-
-    main:
-    ref_ch = Channel.fromPath(REFERENCE)
-    dedup_bam(map.concat(mapu), ubam, ref_ch)
-
-    emit:
-    dedup = dedup_bam.out.dedup_bam
-    dedupbai = dedup_bam.out.dedup_idx
-    deduplog = dedup_bam.out.logs
-}
diff --git a/workflows/fgumi_dedup.nf b/workflows/fgumi_dedup.nf
index 05ff5ed1..608b288b 100644
--- a/workflows/fgumi_dedup.nf
+++ b/workflows/fgumi_dedup.nf
@@ -6,7 +6,7 @@ DEDUPPARAMS = get_always('fgumi_params_DEDUP') ?: ''
 process dedup_bam{
     conda "$DEDUPENV"+".yaml"
     container "oras://jfallmann/monsda:"+"$DEDUPENV"
-    cpus THREADS
+    cpus 4
 	cache 'lenient'
     //validExitStatus 0,1
 
@@ -19,8 +19,8 @@ process dedup_bam{
     }
 
     input:
-    path todedup
-    path bami
+    tuple val(sample_id), path(mapped_bam), path(ubam)
+    path ref
         
     output:
     path "*_dedup.bam", emit: bam
@@ -28,12 +28,20 @@ process dedup_bam{
     path "*_dedup.log", emit: logs
 
     script:
-    bams = todedup[0]
-    bais = todedup[1]
+    bams = mapped_bam
     outf = bams.getSimpleName()+"_dedup.bam"
     outl = bams.getSimpleName()+"_dedup.log"
     """
-    mkdir -p TMP && $DEDUPBIN dedup $DEDUPPARAMS --input $bams --output $outf &> $outl && samtools index $outf &>> $outl
+    mkdir -p TMP
+    ref_dict=\$(basename $ref .gz).dict
+    if [[ ! -f "\${ref_dict}" ]]; then samtools dict $ref -o \${ref_dict} >> $outl 2>&1; fi
+    samtools sort -n -@ ${task.cpus} -o TMP/ubam_qn.bam $ubam >> $outl 2>&1
+    samtools sort -n -@ ${task.cpus} -o TMP/mapped_qn.bam $bams >> $outl 2>&1
+    samtools view -h TMP/mapped_qn.bam | awk 'BEGIN{FS=OFS="\t"} /^@/{print; next} {f=\$2+0; if (!and(f,256) && !and(f,2048)) {k=\$1":"(and(f,64)?1:0)":"(and(f,128)?1:0); if (seen[k]++) next} print}' | samtools view -b -o TMP/mapped_qn_primaryuniq.bam - >> $outl 2>&1
+    $DEDUPBIN zipper --unmapped TMP/ubam_qn.bam --input TMP/mapped_qn_primaryuniq.bam --reference $ref --output TMP/zippered.bam --threads ${task.cpus} --compression-level 1 >> $outl 2>&1
+    $DEDUPBIN sort --order template-coordinate --input TMP/zippered.bam --output TMP/sorted.bam --threads ${task.cpus} --compression-level 1 >> $outl 2>&1
+    $DEDUPBIN dedup $DEDUPPARAMS --input TMP/sorted.bam --output TMP/dedup.bam >> $outl 2>&1
+    $DEDUPBIN sort --order coordinate --input TMP/dedup.bam --output $outf --write-index --threads ${task.cpus} --compression-level 1 >> $outl 2>&1
     """
 }
 
@@ -43,9 +51,32 @@ workflow DEDUPBAM{
     mapi
     mapu
     mapui
+    ubam
 
     main:
-    dedup_bam(map.concat(mapu), mapi.concat(mapui))
+    mapped_ch = map.concat(mapu).map { b ->
+        def n = file(b).getName()
+        def key = n
+            .replaceFirst(/_mapped_sorted_unique\.bam$/, '')
+            .replaceFirst(/_mapped_sorted\.bam$/, '')
+            .replaceFirst(/_R1_dedup_trimmed$/, '')
+            .replaceFirst(/_dedup_trimmed$/, '')
+            .replaceFirst(/_R1_trimmed$/, '')
+            .replaceFirst(/_trimmed$/, '')
+        tuple(key, b)
+    }
+    ubam_ch = ubam.map { u ->
+        def n = file(u).getName()
+        def key = n
+            .replaceFirst(/_fgumi_extract\.bam$/, '')
+            .replaceFirst(/_extracted\.bam$/, '')
+            .replaceFirst(/_R1$/, '')
+        tuple(key, u)
+    }
+    paired_ch = mapped_ch.combine(ubam_ch, by: 0).map { key, mb, ub -> tuple(key, mb, ub) }
+
+    ref_ch = channel.value(file(REFERENCE))
+    dedup_bam(paired_ch, ref_ch)
 
     emit:
     dedup = dedup_bam.out.bam

From cbc335fe1af4bc80e415aef2f145290ff7ba7e6e Mon Sep 17 00:00:00 2001
From: jfallmann <fallmann.joerg@gmail.com>
Date: Fri, 24 Apr 2026 15:52:40 +0200
Subject: [PATCH 13/39] counttable verbosity fix

---
 scripts/Analysis/build_count_table.py | 4 ++++
 workflows/bwa.nf                      | 4 ++--
 workflows/hisat2.nf                   | 4 ++--
 workflows/star.nf                     | 6 +++---
 4 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/scripts/Analysis/build_count_table.py b/scripts/Analysis/build_count_table.py
index 64401669..5a220a8a 100755
--- a/scripts/Analysis/build_count_table.py
+++ b/scripts/Analysis/build_count_table.py
@@ -383,6 +383,8 @@ def prepare_table(
             exc_tb,
         )
         log.error(logid + "".join(tbe.format()))
+        print("".join(tbe.format()), file=sys.stderr)
+        raise SystemExit(1)
 
 
 def make_sample_list(group_name):
@@ -433,6 +435,8 @@ def make_sample_list(group_name):
             exc_tb,
         )
         log.error(logid + "".join(tbe.format()))
+        print("".join(tbe.format()), file=sys.stderr)
+        raise SystemExit(1)
 
 #
 # build_count_table_simple.py ends here
diff --git a/workflows/bwa.nf b/workflows/bwa.nf
index be07f149..e436f6f0 100644
--- a/workflows/bwa.nf
+++ b/workflows/bwa.nf
@@ -89,7 +89,7 @@ process bwa_mapping{
     if (PAIRED == 'paired'){
         r1 = reads[1]
         r2 = reads[2]
-        fn = file(r1).getSimpleName().replaceAll(/\Q_R1_trimmed\E/,"")
+        fn = file(r1).getSimpleName().replaceAll(/_R1(_dedup)?_trimmed$/,"")
         pf = fn+"_mapped.sam.gz"
         uf1 = fn+"_R1_unmapped.fastq.gz"
         uf2 = fn+"_R2_unmapped.fastq.gz"
@@ -99,7 +99,7 @@ process bwa_mapping{
         """
     }else{
         read = reads[1]
-        fn = file(reads[1]).getSimpleName().replaceAll(/\Q_trimmed\E/,"")
+        fn = file(reads[1]).getSimpleName().replaceAll(/(_dedup)?_trimmed$/,"")
         pf = fn+"_mapped.sam.gz"
         uf = fn+"_unmapped.fastq.gz"
         lf = "bwa_"+fn+".log"
diff --git a/workflows/hisat2.nf b/workflows/hisat2.nf
index 55bcffc4..cf3d3f89 100644
--- a/workflows/hisat2.nf
+++ b/workflows/hisat2.nf
@@ -105,7 +105,7 @@ process hisat2_mapping{
 
         r1 = reads[1]
         r2 = reads[2]
-        fn = file(r1).getSimpleName().replaceAll(/\Q_R1_trimmed\E/,"")
+        fn = file(r1).getSimpleName().replaceAll(/_R1(_dedup)?_trimmed$/,"")
         pf = fn+"_mapped.sam"
         ufo = fn+"_R1_unmapped.fastq.gz"
         uft = fn+"_R2_unmapped.fastq.gz"
@@ -122,7 +122,7 @@ process hisat2_mapping{
             stranded = ''
         }
         read = reads[1]
-        fn = file(reads[1]).getSimpleName().replaceAll(/\Q_trimmed\E/,"")
+        fn = file(reads[1]).getSimpleName().replaceAll(/(_dedup)?_trimmed$/,"")
         pf = fn+"_mapped.sam"
         uf = fn+"_unmapped.fastq.gz"
         lf = "hisat2_"+fn+".log"
diff --git a/workflows/star.nf b/workflows/star.nf
index 83681e95..62c3563a 100644
--- a/workflows/star.nf
+++ b/workflows/star.nf
@@ -96,7 +96,7 @@ process star_mapping{
         r1 = reads[1]
         r2 = reads[2]
         a = "Trimming_report.txt"
-        fn = file(r1).getSimpleName().replaceAll(/\Q_R1_trimmed\E/,"")
+        fn = file(r1).getSimpleName().replaceAll(/_R1(_dedup)?_trimmed$/,"")
         of = fn+'.Aligned.out.sam'
         gf = of.replaceAll(/\Q.Aligned.out.sam\E/,"_mapped.sam.gz")
         """
@@ -106,7 +106,7 @@ process star_mapping{
     else{
         if (PAIRED != 'singlecell'){
             read = reads[1]
-            fn = file(reads[1]).getSimpleName().replaceAll(/\Q_trimmed\E/,"")+"."
+            fn = file(reads[1]).getSimpleName().replaceAll(/(_dedup)?_trimmed$/,"")+"."
             of = fn+'Aligned.out.sam'
             gf = of.replaceAll(/\Q.Aligned.out.sam\E/,"_mapped.sam.gz")
             """
@@ -122,7 +122,7 @@ process star_mapping{
                 stranded = '--soloStrand Unstranded'
             }
             r1 = reads[1]
-            fn = file(r1).getSimpleName().replaceAll(/\Q_R1_trimmed\E/,"")
+            fn = file(r1).getSimpleName().replaceAll(/_R1(_dedup)?_trimmed$/,"")
             r2 = "${workflow.workDir}/../FASTQ/${CONDITION}/"+file(reads[2]).getSimpleName().replaceAll(/\QR2_trimmed\E/,"R2.fastq.gz")
             if (MAPPARAMS.contains('--soloBarcodeMate 1')){
                 t = r2

From 2e13a34283dfba3bc685e7f0229fbe99ce1856a6 Mon Sep 17 00:00:00 2001
From: jfallmann <fallmann.joerg@gmail.com>
Date: Mon, 27 Apr 2026 13:33:46 +0200
Subject: [PATCH 14/39] docs fix

---
 docs/source/workflows.rst | 2 --
 1 file changed, 2 deletions(-)

diff --git a/docs/source/workflows.rst b/docs/source/workflows.rst
index 3295b4f5..008756bc 100644
--- a/docs/source/workflows.rst
+++ b/docs/source/workflows.rst
@@ -75,8 +75,6 @@ QUALITY CONTROL I
 
 This workflow step can be run as preprocessing step if none of the processing workflows is defined in the config.json.
 
-*rustqc* is intended for mapped BAM-level QC and is therefore generally most useful in processing mode after mapping outputs are available.
-
 .. table:: 
   :widths: 10, 40, 10, 10, 10, 10, 10
   :class: tight-table

From 6a0d2aa77644f8b445d454fc3a1c02c12b5a3a75 Mon Sep 17 00:00:00 2001
From: jfallmann <fallmann.joerg@gmail.com>
Date: Mon, 27 Apr 2026 13:34:01 +0200
Subject: [PATCH 15/39] counttable build from nf dedup fix

---
 workflows/countreads.nf | 3 ++-
 workflows/deseq2_DE.nf  | 3 ++-
 workflows/dexseq_DEU.nf | 3 ++-
 workflows/edger_DAS.nf  | 3 ++-
 workflows/edger_DE.nf   | 3 ++-
 workflows/edger_DEU.nf  | 3 ++-
 6 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/workflows/countreads.nf b/workflows/countreads.nf
index b542cfab..b54cf7b2 100644
--- a/workflows/countreads.nf
+++ b/workflows/countreads.nf
@@ -252,7 +252,8 @@ process prepare_count_table{
 
     script:
     """
-    ${BINS}/Analysis/build_count_table.py $DEREPS --table COUNTS.gz --anno ANNOTATION.gz --nextflow 2> log
+    reps_csv=\$(for f in $reps; do basename "\$f"; done | paste -sd, -)
+    ${BINS}/Analysis/build_count_table.py $DEREPS -r \$reps_csv --table COUNTS.gz --anno ANNOTATION.gz --nextflow 2> log
     """
 }
 
diff --git a/workflows/deseq2_DE.nf b/workflows/deseq2_DE.nf
index e39a645c..d555c913 100644
--- a/workflows/deseq2_DE.nf
+++ b/workflows/deseq2_DE.nf
@@ -94,7 +94,8 @@ process prepare_count_table{
 
     script:
     """
-    ${BINS}/Analysis/build_count_table.py $DEREPS --table COUNTS.gz --anno ANNOTATION.gz --nextflow 2> log
+    reps_csv=\$(for f in $reps; do basename "\$f"; done | paste -sd, -)
+    ${BINS}/Analysis/build_count_table.py $DEREPS -r \$reps_csv --table COUNTS.gz --anno ANNOTATION.gz --nextflow 2> log
     """
 }
 
diff --git a/workflows/dexseq_DEU.nf b/workflows/dexseq_DEU.nf
index 8c954541..8d938a17 100644
--- a/workflows/dexseq_DEU.nf
+++ b/workflows/dexseq_DEU.nf
@@ -133,7 +133,8 @@ process prepare_count_table{
 
     script:
     """
-    ${BINS}/Analysis/build_count_table.py $DEUREPS --table COUNTS.gz --anno ANNOTATION.gz --nextflow 2> log
+    reps_csv=\$(for f in $reps; do basename "\$f"; done | paste -sd, -)
+    ${BINS}/Analysis/build_count_table.py $DEUREPS -r \$reps_csv --table COUNTS.gz --anno ANNOTATION.gz --nextflow 2> log
     """
 }
 
diff --git a/workflows/edger_DAS.nf b/workflows/edger_DAS.nf
index 9fa62d2b..22806a0a 100644
--- a/workflows/edger_DAS.nf
+++ b/workflows/edger_DAS.nf
@@ -94,7 +94,8 @@ process prepare_count_table{
 
     script:
     """
-    ${BINS}/Analysis/build_count_table.py $DASREPS --ids --table COUNTS.gz --anno ANNOTATION.gz --nextflow 2> log
+    reps_csv=\$(for f in $reps; do basename "\$f"; done | paste -sd, -)
+    ${BINS}/Analysis/build_count_table.py $DASREPS --ids -r \$reps_csv --table COUNTS.gz --anno ANNOTATION.gz --nextflow 2> log
     """
 }
 
diff --git a/workflows/edger_DE.nf b/workflows/edger_DE.nf
index 43e0160e..a4683376 100644
--- a/workflows/edger_DE.nf
+++ b/workflows/edger_DE.nf
@@ -94,7 +94,8 @@ process prepare_count_table{
 
     script:
     """
-    ${BINS}/Analysis/build_count_table.py $DEREPS --table COUNTS.gz --anno ANNOTATION.gz --nextflow 2> log
+    reps_csv=\$(for f in $reps; do basename "\$f"; done | paste -sd, -)
+    ${BINS}/Analysis/build_count_table.py $DEREPS -r \$reps_csv --table COUNTS.gz --anno ANNOTATION.gz --nextflow 2> log
     """
 }
 
diff --git a/workflows/edger_DEU.nf b/workflows/edger_DEU.nf
index 7891d95d..aa1e5146 100644
--- a/workflows/edger_DEU.nf
+++ b/workflows/edger_DEU.nf
@@ -94,7 +94,8 @@ process prepare_count_table{
 
     script:
     """
-    ${BINS}/Analysis/build_count_table.py $DEUREPS --ids --table COUNTS.gz --anno ANNOTATION.gz --nextflow 2> log
+    reps_csv=\$(for f in $reps; do basename "\$f"; done | paste -sd, -)
+    ${BINS}/Analysis/build_count_table.py $DEUREPS --ids -r \$reps_csv --table COUNTS.gz --anno ANNOTATION.gz --nextflow 2> log
     """
 }
 

From 6ad833bb8755cc1e0bd5f8a26b89dc0aa38b6829 Mon Sep 17 00:00:00 2001
From: jfallmann <fallmann.joerg@gmail.com>
Date: Mon, 27 Apr 2026 15:05:50 +0200
Subject: [PATCH 16/39] fix in output path generation, we never split QC

---
 MONSDA/Params.py    | 10 +++++++---
 MONSDA/Workflows.py |  7 ++++++-
 2 files changed, 13 insertions(+), 4 deletions(-)

diff --git a/MONSDA/Params.py b/MONSDA/Params.py
index e59c00ef..f342292e 100644
--- a/MONSDA/Params.py
+++ b/MONSDA/Params.py
@@ -1558,9 +1558,13 @@ def get_combo_name(combinations: list) -> mu.NestedDefaultDict:
             envs = list()
             works = list()
             for step in combi:
-                for work, env in step.items():
-                    envs.append(env)
-                    works.append(work)
+                # A step can be a single dict (default) or a list of dicts
+                # (QC all-tools grouping from get_combo).
+                grouped_steps = step if isinstance(step, list) else [step]
+                for grouped_step in grouped_steps:
+                    for work, env in grouped_step.items():
+                        envs.append(env)
+                        works.append(work)
             combname[condition]["envs"].append(str.join("-", envs))
             combname[condition]["works"].append(str.join("-", works))
 
diff --git a/MONSDA/Workflows.py b/MONSDA/Workflows.py
index 3b93851f..b2f01153 100755
--- a/MONSDA/Workflows.py
+++ b/MONSDA/Workflows.py
@@ -192,7 +192,12 @@ def get_combo(wfs, config, conditions):
                     + " with Tool: "
                     + str(tools)
                 )
-            ret.append(tools)
+            # For QC, run all configured QC tools in the same workflow combo
+            # (instead of creating one combo per QC tool).
+            if subwork == "QC":
+                ret.append([tools])
+            else:
+                ret.append(tools)
 
         log.debug(f"{logid} Itertools {ret}")
         combos[condition] = itertools.product(*ret)

From 1a2bc9f157fb7433ae5deb987ede0628eab3a7f1 Mon Sep 17 00:00:00 2001
From: jfallmann <fallmann.joerg@gmail.com>
Date: Mon, 27 Apr 2026 20:04:11 +0200
Subject: [PATCH 17/39] split in pre and postqc workflows

---
 MONSDA/Params.py                |  10 +---
 MONSDA/Workflows.py             | 101 ++++++++++++++++++++++++--------
 workflows/fastqc.nf             |   4 +-
 workflows/fastqc.smk            |   2 +-
 workflows/fastqc_dedup.nf       |   4 +-
 workflows/fastqc_dedup.smk      |   2 +-
 workflows/fastqc_dedup_trim.nf  |   4 +-
 workflows/fastqc_dedup_trim.smk |   2 +-
 workflows/fastqc_raw.nf         |   4 +-
 workflows/fastqc_raw.smk        |   2 +-
 workflows/fastqc_trim.nf        |   4 +-
 workflows/fastqc_trim.smk       |   2 +-
 workflows/multiqc.nf            |   4 +-
 workflows/multiqc_rustqc.nf     |  65 ++++++++++++++++++++
 workflows/rustqc.nf             |   4 +-
 workflows/rustqc.smk            |   2 +-
 16 files changed, 165 insertions(+), 51 deletions(-)
 create mode 100644 workflows/multiqc_rustqc.nf

diff --git a/MONSDA/Params.py b/MONSDA/Params.py
index f342292e..e59c00ef 100644
--- a/MONSDA/Params.py
+++ b/MONSDA/Params.py
@@ -1558,13 +1558,9 @@ def get_combo_name(combinations: list) -> mu.NestedDefaultDict:
             envs = list()
             works = list()
             for step in combi:
-                # A step can be a single dict (default) or a list of dicts
-                # (QC all-tools grouping from get_combo).
-                grouped_steps = step if isinstance(step, list) else [step]
-                for grouped_step in grouped_steps:
-                    for work, env in grouped_step.items():
-                        envs.append(env)
-                        works.append(work)
+                for work, env in step.items():
+                    envs.append(env)
+                    works.append(work)
             combname[condition]["envs"].append(str.join("-", envs))
             combname[condition]["works"].append(str.join("-", works))
 
diff --git a/MONSDA/Workflows.py b/MONSDA/Workflows.py
index b2f01153..4eae67c3 100755
--- a/MONSDA/Workflows.py
+++ b/MONSDA/Workflows.py
@@ -192,12 +192,7 @@ def get_combo(wfs, config, conditions):
                     + " with Tool: "
                     + str(tools)
                 )
-            # For QC, run all configured QC tools in the same workflow combo
-            # (instead of creating one combo per QC tool).
-            if subwork == "QC":
-                ret.append([tools])
-            else:
-                ret.append(tools)
+            ret.append(tools)
 
         log.debug(f"{logid} Itertools {ret}")
         combos[condition] = itertools.product(*ret)
@@ -569,8 +564,15 @@ def make_pre(
                         toolenv, toolbin = map(str, listoftools[a])
                         if toolenv != envs[j] or toolbin is None:
                             continue
-                        sconf[works[j] + "ENV"] = toolenv
-                        sconf[works[j] + "BIN"] = toolbin
+                        if works[j] == "QC":
+                            _qc_prefix = "POSTQC" if toolenv == "rustqc" else "PREQC"
+                            sconf[_qc_prefix + "ENV"] = toolenv
+                            sconf[_qc_prefix + "BIN"] = toolbin
+                            sconf.pop("QCENV", None)
+                            sconf.pop("QCBIN", None)
+                        else:
+                            sconf[works[j] + "ENV"] = toolenv
+                            sconf[works[j] + "BIN"] = toolbin
                         subconf.update(sconf)
                         subname = toolenv + ".smk"
 
@@ -695,8 +697,15 @@ def make_pre(
                 if toolenv is None or toolbin is None:
                     continue
                 subconf = mu.NestedDefaultDict()
-                sconf[subwork + "ENV"] = toolenv
-                sconf[subwork + "BIN"] = toolbin
+                if subwork == "QC":
+                    _qc_prefix = "POSTQC" if toolenv == "rustqc" else "PREQC"
+                    sconf[_qc_prefix + "ENV"] = toolenv
+                    sconf[_qc_prefix + "BIN"] = toolbin
+                    sconf.pop("QCENV", None)
+                    sconf.pop("QCBIN", None)
+                else:
+                    sconf[subwork + "ENV"] = toolenv
+                    sconf[subwork + "BIN"] = toolbin
                 subconf.update(sconf)
                 subname = toolenv + ".smk"
 
@@ -884,8 +893,15 @@ def make_sub(
                                 toolenv.replace("bisulfite", "_bisulfite") + ".smk"
                             )
 
-                        sconf[works[j] + "ENV"] = toolenv
-                        sconf[works[j] + "BIN"] = toolbin
+                        if works[j] == "QC":
+                            _qc_prefix = "POSTQC" if toolenv == "rustqc" else "PREQC"
+                            sconf[_qc_prefix + "ENV"] = toolenv
+                            sconf[_qc_prefix + "BIN"] = toolbin
+                            sconf.pop("QCENV", None)
+                            sconf.pop("QCBIN", None)
+                        else:
+                            sconf[works[j] + "ENV"] = toolenv
+                            sconf[works[j] + "BIN"] = toolbin
 
                         subconf.update(sconf)
                         log.debug(logid + f"SCONF:{sconf}, SUBCONF:{subconf}")
@@ -1071,8 +1087,15 @@ def make_sub(
                     ):  # Here we can add tool specific extra cases, like e.g segehmehl bisulfite mode
                         subname = toolenv.replace("bisulfite", "_bisulfite") + ".smk"
                     subconf = mu.NestedDefaultDict()
-                    sconf[subwork + "ENV"] = toolenv
-                    sconf[subwork + "BIN"] = toolbin
+                    if subwork == "QC":
+                        _qc_prefix = "POSTQC" if toolenv == "rustqc" else "PREQC"
+                        sconf[_qc_prefix + "ENV"] = toolenv
+                        sconf[_qc_prefix + "BIN"] = toolbin
+                        sconf.pop("QCENV", None)
+                        sconf.pop("QCBIN", None)
+                    else:
+                        sconf[subwork + "ENV"] = toolenv
+                        sconf[subwork + "BIN"] = toolbin
                     subconf.update(sconf)
 
                     # RustQC is post-alignment QC only; skip if no MAPPING
@@ -2475,8 +2498,15 @@ def nf_make_pre(
                             continue
 
                         subsamples = mp.get_samples(sconf)
-                        sconf[works[j] + "ENV"] = toolenv
-                        sconf[works[j] + "BIN"] = toolbin
+                        if works[j] == "QC":
+                            _qc_prefix = "POSTQC" if toolenv == "rustqc" else "PREQC"
+                            sconf[_qc_prefix + "ENV"] = toolenv
+                            sconf[_qc_prefix + "BIN"] = toolbin
+                            sconf.pop("QCENV", None)
+                            sconf.pop("QCBIN", None)
+                        else:
+                            sconf[works[j] + "ENV"] = toolenv
+                            sconf[works[j] + "BIN"] = toolbin
                         subconf.merge(sconf)
 
                         subconf[works[j]] = mu.add_to_innermost_key_by_list(
@@ -2666,8 +2696,15 @@ def nf_make_pre(
                 toolenv, toolbin = map(str, listoftools[i])
                 if toolenv is None or toolbin is None:
                     continue
-                sconf[subwork + "ENV"] = toolenv
-                sconf[subwork + "BIN"] = toolbin
+                if subwork == "QC":
+                    _qc_prefix = "POSTQC" if toolenv == "rustqc" else "PREQC"
+                    sconf[_qc_prefix + "ENV"] = toolenv
+                    sconf[_qc_prefix + "BIN"] = toolbin
+                    sconf.pop("QCENV", None)
+                    sconf.pop("QCBIN", None)
+                else:
+                    sconf[subwork + "ENV"] = toolenv
+                    sconf[subwork + "BIN"] = toolbin
                 subconf.merge(sconf)
 
                 subconf[subwork] = mu.add_to_innermost_key_by_list(
@@ -2888,8 +2925,15 @@ def nf_make_sub(
                         ):  # Here we can add tool specific extra cases, like e.g segehmehl bisulfite mode
                             subname = toolenv.replace("bisulfite", "_bisulfite") + ".nf"
                         subsamples = mp.get_samples(sconf)
-                        sconf[works[j] + "ENV"] = toolenv
-                        sconf[works[j] + "BIN"] = toolbin
+                        if works[j] == "QC":
+                            _qc_prefix = "POSTQC" if toolenv == "rustqc" else "PREQC"
+                            sconf[_qc_prefix + "ENV"] = toolenv
+                            sconf[_qc_prefix + "BIN"] = toolbin
+                            sconf.pop("QCENV", None)
+                            sconf.pop("QCBIN", None)
+                        else:
+                            sconf[works[j] + "ENV"] = toolenv
+                            sconf[works[j] + "BIN"] = toolbin
                         subconf.merge(sconf)
 
                         subconf[works[j]] = mu.add_to_innermost_key_by_list(
@@ -3054,7 +3098,8 @@ def nf_make_sub(
 
                 if "QC" in works:
                     flowlist.append("MULTIQC")
-                    nfi = os.path.abspath(os.path.join(workflowpath, "multiqc.nf"))
+                    _mqc_nf = "multiqc_rustqc.nf" if "QC_MAPPING" in flowlist and "QC_RAW" not in flowlist else "multiqc.nf"
+                    nfi = os.path.abspath(os.path.join(workflowpath, _mqc_nf))
                     with open(nfi, "r") as nf:
                         for line in mu.comment_remover(nf.readlines()):
                             line = re.sub(condapath, 'conda "' + envpath, line)
@@ -3250,8 +3295,15 @@ def nf_make_sub(
                     ):  # Here we can add tool specific extra cases, like e.g segehmehl bisulfite mode
                         subname = toolenv.replace("bisulfite", "_bisulfite") + ".nf"
 
-                    sconf[subwork + "ENV"] = toolenv
-                    sconf[subwork + "BIN"] = toolbin
+                    if subwork == "QC":
+                        _qc_prefix = "POSTQC" if toolenv == "rustqc" else "PREQC"
+                        sconf[_qc_prefix + "ENV"] = toolenv
+                        sconf[_qc_prefix + "BIN"] = toolbin
+                        sconf.pop("QCENV", None)
+                        sconf.pop("QCBIN", None)
+                    else:
+                        sconf[subwork + "ENV"] = toolenv
+                        sconf[subwork + "BIN"] = toolbin
                     subconf.merge(sconf)
 
                     subconf[subwork] = mu.add_to_innermost_key_by_list(
@@ -3406,7 +3458,8 @@ def nf_make_sub(
 
             if "QC" in subworkflows:
                 flowlist.append("MULTIQC")
-                nfi = os.path.abspath(os.path.join(workflowpath, "multiqc.nf"))
+                _mqc_nf = "multiqc_rustqc.nf" if "QC_MAPPING" in flowlist and "QC_RAW" not in flowlist else "multiqc.nf"
+                nfi = os.path.abspath(os.path.join(workflowpath, _mqc_nf))
                 with open(nfi, "r") as nf:
                     for line in mu.comment_remover(nf.readlines()):
                         line = re.sub(condapath, 'conda "' + envpath, line)
diff --git a/workflows/fastqc.nf b/workflows/fastqc.nf
index 9ef084e1..77bd87d0 100644
--- a/workflows/fastqc.nf
+++ b/workflows/fastqc.nf
@@ -1,5 +1,5 @@
-QCENV=get_always('QCENV')
-QCBIN=get_always('QCBIN')
+QCENV=get_always('PREQCENV')
+QCBIN=get_always('PREQCBIN')
 QCPARAMS = get_always('fastqc_params_QC') ?: ''
 
 //QC RAW
diff --git a/workflows/fastqc.smk b/workflows/fastqc.smk
index b25b1787..7694bbaa 100644
--- a/workflows/fastqc.smk
+++ b/workflows/fastqc.smk
@@ -1,4 +1,4 @@
-QCBIN, QCENV = env_bin_from_config(config, 'QC')
+QCBIN, QCENV = env_bin_from_config(config, 'PREQC')
 
 if paired == 'paired':
     log.info('Running paired mode QC')
diff --git a/workflows/fastqc_dedup.nf b/workflows/fastqc_dedup.nf
index 62cf54a3..85665236 100644
--- a/workflows/fastqc_dedup.nf
+++ b/workflows/fastqc_dedup.nf
@@ -1,5 +1,5 @@
-QCENV=get_always('QCENV')
-QCBIN=get_always('QCBIN')
+QCENV=get_always('PREQCENV')
+QCBIN=get_always('PREQCBIN')
 QCPARAMS = get_always('fastqc_params_QC') ?: ''
 
 // RAW QC
diff --git a/workflows/fastqc_dedup.smk b/workflows/fastqc_dedup.smk
index b9f8676e..923115c0 100644
--- a/workflows/fastqc_dedup.smk
+++ b/workflows/fastqc_dedup.smk
@@ -1,4 +1,4 @@
-QCBIN, QCENV = env_bin_from_config(config, 'QC')
+QCBIN, QCENV = env_bin_from_config(config, 'PREQC')
 #outdir = 'QC/'+str(QCENV)+'/'
 #moutdir = 'QC/Multi/'+str(QCENV)+'/'
 
diff --git a/workflows/fastqc_dedup_trim.nf b/workflows/fastqc_dedup_trim.nf
index d875ba3e..2e53b7f2 100644
--- a/workflows/fastqc_dedup_trim.nf
+++ b/workflows/fastqc_dedup_trim.nf
@@ -1,5 +1,5 @@
-QCENV=get_always('QCENV')
-QCBIN=get_always('QCBIN')
+QCENV=get_always('PREQCENV')
+QCBIN=get_always('PREQCBIN')
 QCPARAMS = get_always('fastqc_params_QC') ?: ''
 
 //QC RAW
diff --git a/workflows/fastqc_dedup_trim.smk b/workflows/fastqc_dedup_trim.smk
index c5b12743..08d5f183 100644
--- a/workflows/fastqc_dedup_trim.smk
+++ b/workflows/fastqc_dedup_trim.smk
@@ -1,4 +1,4 @@
-QCBIN, QCENV = env_bin_from_config( config, 'QC')
+QCBIN, QCENV = env_bin_from_config(config, 'PREQC')
 #outdir = 'QC/'+str(QCENV)+'/'
 #moutdir = 'QC/Multi/'+str(QCENV)+'/'
 
diff --git a/workflows/fastqc_raw.nf b/workflows/fastqc_raw.nf
index d83e612c..70e1bb81 100644
--- a/workflows/fastqc_raw.nf
+++ b/workflows/fastqc_raw.nf
@@ -1,5 +1,5 @@
-QCENV=get_always('QCENV')
-QCBIN=get_always('QCBIN')
+QCENV=get_always('PREQCENV')
+QCBIN=get_always('PREQCBIN')
 QCPARAMS = get_always('fastqc_params_QC') ?: ''
 
 process qc_raw{
diff --git a/workflows/fastqc_raw.smk b/workflows/fastqc_raw.smk
index b9dfe662..1fe7ab37 100644
--- a/workflows/fastqc_raw.smk
+++ b/workflows/fastqc_raw.smk
@@ -1,4 +1,4 @@
-QCBIN, QCENV = env_bin_from_config(config, 'QC')
+QCBIN, QCENV = env_bin_from_config(config, 'PREQC')
 
 if paired == 'paired':
     log.info('Running paired mode QC')
diff --git a/workflows/fastqc_trim.nf b/workflows/fastqc_trim.nf
index 5aa1f262..badcea20 100644
--- a/workflows/fastqc_trim.nf
+++ b/workflows/fastqc_trim.nf
@@ -1,5 +1,5 @@
-QCENV=get_always('QCENV')
-QCBIN=get_always('QCBIN')
+QCENV=get_always('PREQCENV')
+QCBIN=get_always('PREQCBIN')
 QCPARAMS = get_always('fastqc_params_QC') ?: ''
 
 // RAW QC
diff --git a/workflows/fastqc_trim.smk b/workflows/fastqc_trim.smk
index c4088665..5446e889 100644
--- a/workflows/fastqc_trim.smk
+++ b/workflows/fastqc_trim.smk
@@ -1,4 +1,4 @@
-QCBIN, QCENV = env_bin_from_config( config, 'QC')
+QCBIN, QCENV = env_bin_from_config(config, 'PREQC')
 #outdir = 'QC/'+str(QCENV)+'/'
 #moutdir = 'QC/Multi/'+str(QCENV)+'/'
 
diff --git a/workflows/multiqc.nf b/workflows/multiqc.nf
index e82010ce..f51c1c0d 100644
--- a/workflows/multiqc.nf
+++ b/workflows/multiqc.nf
@@ -1,5 +1,5 @@
-QCENV=get_always('QCENV')
-QCBIN=get_always('QCBIN')
+QCENV=get_always('PREQCENV')
+QCBIN=get_always('PREQCBIN')
 QCPARAMS = get_always('fastqc_params_MULTI') ?: ''
 
 process mqc{
diff --git a/workflows/multiqc_rustqc.nf b/workflows/multiqc_rustqc.nf
new file mode 100644
index 00000000..71cfe752
--- /dev/null
+++ b/workflows/multiqc_rustqc.nf
@@ -0,0 +1,65 @@
+QCENV=get_always('POSTQCENV')
+QCBIN=get_always('POSTQCBIN')
+QCPARAMS = get_always('rustqc_params_MULTI') ?: ''
+
+process mqc{
+    conda "$QCENV"+".yaml"
+    container "oras://jfallmann/monsda:"+"$QCENV"
+    cpus THREADS
+	cache 'lenient'
+    //validExitStatus 0,1
+
+    publishDir "${workflow.workDir}/../" , mode: 'link',
+    saveAs: {filename ->
+        if (filename.indexOf("zip") > 0)          "QC/Multi/${COMBO}/${CONDITION}/${file(filename).getSimpleName()}.zip"
+        else if (filename.indexOf("html") > 0)    "QC/Multi/${COMBO}/${CONDITION}/${file(filename).getSimpleName()}.html"
+        else "QC/Multi/${COMBO}/${CONDITION}/${file(filename).getName()}"
+    }
+
+    input:
+    path others, stageAs: 'mqc_input??/*'
+    //path samples
+
+    output:
+    path "*.zip", emit: mqc
+    path "*.html", emit: html
+
+    script:
+    """
+    touch $others
+    OUT=\${PWD}
+    LIST=multiqc_inputs.txt
+    TMP_LIST=multiqc_inputs_unique.txt
+    BASE_QC_DIR="${workflow.workDir}/../QC"
+    COMBO_VAL="${COMBO}"
+    CONDITION_VAL="${CONDITION}"
+
+    for i in $others; do
+        dirname "\$i" >> "\$LIST"
+    done
+
+    # If the corresponding fastqc combo exists, include its output in the MultiQC report.
+    FQ_COMBO="\${COMBO_VAL/rustqc/fastqc}"
+    FQ_DIR="\${BASE_QC_DIR}/\${FQ_COMBO}/\${CONDITION_VAL}"
+    if [[ -d "\$FQ_DIR" ]]; then
+        echo "\$FQ_DIR" >> "\$LIST"
+    fi
+
+    sort -u "\$LIST" > "\$TMP_LIST"
+    export LC_ALL=en_US.utf8
+    export LC_ALL=C.UTF-8
+    multiqc -f --exclude picard --exclude gatk -k json -z -s -o "\$OUT" -l "\$TMP_LIST"
+    """
+}
+
+workflow MULTIQC{
+    take:
+    otherqcs
+    
+    main:
+    
+    mqc(otherqcs.collect())
+
+    emit:
+    mqcres = mqc.out.mqc
+}
diff --git a/workflows/rustqc.nf b/workflows/rustqc.nf
index 1505b0a3..7808bf23 100644
--- a/workflows/rustqc.nf
+++ b/workflows/rustqc.nf
@@ -1,5 +1,5 @@
-QCENV = get_always('QCENV')
-QCBIN = get_always('QCBIN')
+QCENV = get_always('POSTQCENV')
+QCBIN = get_always('POSTQCBIN')
 QCPARAMS = get_always('rustqc_params_QC') ?: ''
 
 MAPANNO = get_always('MAPPINGANNO')
diff --git a/workflows/rustqc.smk b/workflows/rustqc.smk
index 67fa3e93..d80863e5 100644
--- a/workflows/rustqc.smk
+++ b/workflows/rustqc.smk
@@ -1,4 +1,4 @@
-QCBIN, QCENV = env_bin_from_config(config, 'QC')
+QCBIN, QCENV = env_bin_from_config(config, 'POSTQC')
 
 # Map MONSDA strandedness to RustQC strandedness
 def rustqc_stranded(stranded):

From 4e2e2081c41a7a58a1744ca6cb59a9f521d85f48 Mon Sep 17 00:00:00 2001
From: jfallmann <fallmann.joerg@gmail.com>
Date: Mon, 27 Apr 2026 20:15:47 +0200
Subject: [PATCH 18/39] merge QC into one stage

---
 MONSDA/Params.py    | 12 +++++++++---
 MONSDA/Workflows.py |  7 ++++++-
 2 files changed, 15 insertions(+), 4 deletions(-)

diff --git a/MONSDA/Params.py b/MONSDA/Params.py
index e59c00ef..dfa66e28 100644
--- a/MONSDA/Params.py
+++ b/MONSDA/Params.py
@@ -1558,9 +1558,15 @@ def get_combo_name(combinations: list) -> mu.NestedDefaultDict:
             envs = list()
             works = list()
             for step in combi:
-                for work, env in step.items():
-                    envs.append(env)
-                    works.append(work)
+                if isinstance(step, list):
+                    for substep in step:
+                        for work, env in substep.items():
+                            envs.append(env)
+                            works.append(work)
+                else:
+                    for work, env in step.items():
+                        envs.append(env)
+                        works.append(work)
             combname[condition]["envs"].append(str.join("-", envs))
             combname[condition]["works"].append(str.join("-", works))
 
diff --git a/MONSDA/Workflows.py b/MONSDA/Workflows.py
index 4eae67c3..6e3f2938 100755
--- a/MONSDA/Workflows.py
+++ b/MONSDA/Workflows.py
@@ -192,7 +192,12 @@ def get_combo(wfs, config, conditions):
                     + " with Tool: "
                     + str(tools)
                 )
-            ret.append(tools)
+            # Group all QC tools into a single combo position so pre-QC
+            # (fastqc) and post-QC (rustqc) both appear in one combo name.
+            if subwork == "QC" and len(tools) > 1:
+                ret.append([tools])  # one option = the entire list of QC tools
+            else:
+                ret.append(tools)
 
         log.debug(f"{logid} Itertools {ret}")
         combos[condition] = itertools.product(*ret)

From 3e8d4ee98d978a16270de8f8105043500a096ba1 Mon Sep 17 00:00:00 2001
From: jfallmann <fallmann.joerg@gmail.com>
Date: Mon, 27 Apr 2026 20:25:21 +0200
Subject: [PATCH 19/39] nf merge qc fix

---
 MONSDA/Workflows.py | 95 +++++++++++++++++++++++++++++++++++++--------
 workflows/rustqc.nf |  2 +-
 2 files changed, 80 insertions(+), 17 deletions(-)

diff --git a/MONSDA/Workflows.py b/MONSDA/Workflows.py
index 6e3f2938..650d612e 100755
--- a/MONSDA/Workflows.py
+++ b/MONSDA/Workflows.py
@@ -2958,7 +2958,7 @@ def nf_make_sub(
                             if toolenv == "rustqc":
                                 # RustQC is post-alignment QC only
                                 if "MAPPING" in works:
-                                    flowlist.append("QC_MAPPING")
+                                    flowlist.append("RUSTQC_MAPPING")
                                 else:
                                     log.warning(
                                         logid
@@ -3103,7 +3103,14 @@ def nf_make_sub(
 
                 if "QC" in works:
                     flowlist.append("MULTIQC")
-                    _mqc_nf = "multiqc_rustqc.nf" if "QC_MAPPING" in flowlist and "QC_RAW" not in flowlist else "multiqc.nf"
+                    _mqc_nf = (
+                        "multiqc_rustqc.nf"
+                        if (
+                            ("QC_MAPPING" in flowlist or "RUSTQC_MAPPING" in flowlist)
+                            and "QC_RAW" not in flowlist
+                        )
+                        else "multiqc.nf"
+                    )
                     nfi = os.path.abspath(os.path.join(workflowpath, _mqc_nf))
                     with open(nfi, "r") as nf:
                         for line in mu.comment_remover(nf.readlines()):
@@ -3124,6 +3131,14 @@ def nf_make_sub(
                 # workflow merger
                 log.debug("FLOWLIST: " + str(flowlist))
 
+                map_qc_chan = "QC_MAPPING.out.qc"
+                if "RUSTQC_MAPPING" in flowlist:
+                    map_qc_chan = (
+                        "QC_MAPPING.out.qc.concat(RUSTQC_MAPPING.out.qc)"
+                        if "QC_MAPPING" in flowlist
+                        else "RUSTQC_MAPPING.out.qc"
+                    )
+
                 subjobs.append("\n\n" + "workflow {\n")
                 for w in [
                     "QC_RAW",
@@ -3134,6 +3149,7 @@ def nf_make_sub(
                     "MAPPING",
                     "DEDUPBAM",
                     "QC_MAPPING",
+                    "RUSTQC_MAPPING",
                     "MULTIQC",
                 ]:
                     if w in flowlist:
@@ -3185,17 +3201,33 @@ def nf_make_sub(
                                     + w
                                     + "(POSTMAPPING.out.postmap.concat(POSTMAPPING.out.postmapuni))\n"
                                 )
+                        elif w == "RUSTQC_MAPPING":
+                            if "DEDUPBAM" in flowlist:
+                                subjobs.append(
+                                    " " * 4
+                                    + w
+                                    + "(POSTMAPPING.out.postmap.concat(POSTMAPPING.out.postmapuni.concat(DEDUPBAM.out.dedup)))\n"
+                                )
+                            else:
+                                subjobs.append(
+                                    " " * 4
+                                    + w
+                                    + "(POSTMAPPING.out.postmap.concat(POSTMAPPING.out.postmapuni))\n"
+                                )
                         elif w == "MULTIQC":
-                            if "QC_RAW" not in flowlist and "QC_MAPPING" in flowlist:
+                            if "QC_RAW" not in flowlist and (
+                                "QC_MAPPING" in flowlist
+                                or "RUSTQC_MAPPING" in flowlist
+                            ):
                                 # RustQC: only BAM-level QC, no FASTQ QC
                                 subjobs.append(
-                                    " " * 4 + w + "(QC_MAPPING.out.qc.collect())\n"
+                                    " " * 4 + w + f"({map_qc_chan}.collect())\n"
                                 )
                             elif "DEDUPBAM" in flowlist and "QC_TRIMMING" in flowlist:
                                 subjobs.append(
                                     " " * 4
                                     + w
-                                    + "(QC_RAW.out.qc.concat(QC_TRIMMING.out.qc.concat(QC_MAPPING.out.qc.concat(MAPPING.out.logs))).collect())\n"
+                                    + f"(QC_RAW.out.qc.concat(QC_TRIMMING.out.qc.concat({map_qc_chan}.concat(MAPPING.out.logs))).collect())\n"
                                 )
                             elif (
                                 "DEDUPBAM" in flowlist and "QC_TRIMMING" not in flowlist
@@ -3203,13 +3235,13 @@ def nf_make_sub(
                                 subjobs.append(
                                     " " * 4
                                     + w
-                                    + "(QC_RAW.out.qc.concat(QC_MAPPING.out.qc.concat(MAPPING.out.logs)).collect())\n"
+                                    + f"(QC_RAW.out.qc.concat({map_qc_chan}.concat(MAPPING.out.logs)).collect())\n"
                                 )
                             elif "MAPPING" in flowlist and "QC_TRIMMING" in flowlist:
                                 subjobs.append(
                                     " " * 4
                                     + w
-                                    + "(QC_RAW.out.qc.concat(QC_TRIMMING.out.qc.concat(QC_MAPPING.out.qc.concat(POSTMAPPING.out.postmapuni))).collect())\n"
+                                    + f"(QC_RAW.out.qc.concat(QC_TRIMMING.out.qc.concat({map_qc_chan}.concat(POSTMAPPING.out.postmapuni))).collect())\n"
                                 )
                             elif (
                                 "MAPPING" in flowlist and "QC_TRIMMING" not in flowlist
@@ -3217,7 +3249,7 @@ def nf_make_sub(
                                 subjobs.append(
                                     " " * 4
                                     + w
-                                    + "(QC_RAW.out.qc.concat(QC_MAPPING.out.qc.concat(POSTMAPPING.out.postmapuni)).collect())\n"
+                                    + f"(QC_RAW.out.qc.concat({map_qc_chan}.concat(POSTMAPPING.out.postmapuni)).collect())\n"
                                 )
                             elif "TRIMMING" in flowlist and "QC_TRIMMING" in flowlist:
                                 subjobs.append(
@@ -3328,7 +3360,7 @@ def nf_make_sub(
                         if toolenv == "rustqc":
                             # RustQC is post-alignment QC only
                             if "MAPPING" in subworkflows:
-                                flowlist.append("QC_MAPPING")
+                                flowlist.append("RUSTQC_MAPPING")
                             else:
                                 log.warning(
                                     logid
@@ -3463,7 +3495,14 @@ def nf_make_sub(
 
             if "QC" in subworkflows:
                 flowlist.append("MULTIQC")
-                _mqc_nf = "multiqc_rustqc.nf" if "QC_MAPPING" in flowlist and "QC_RAW" not in flowlist else "multiqc.nf"
+                _mqc_nf = (
+                    "multiqc_rustqc.nf"
+                    if (
+                        ("QC_MAPPING" in flowlist or "RUSTQC_MAPPING" in flowlist)
+                        and "QC_RAW" not in flowlist
+                    )
+                    else "multiqc.nf"
+                )
                 nfi = os.path.abspath(os.path.join(workflowpath, _mqc_nf))
                 with open(nfi, "r") as nf:
                     for line in mu.comment_remover(nf.readlines()):
@@ -3484,6 +3523,14 @@ def nf_make_sub(
             # workflow merger
             log.debug("FLOWLIST: " + str(flowlist))
 
+            map_qc_chan = "QC_MAPPING.out.qc"
+            if "RUSTQC_MAPPING" in flowlist:
+                map_qc_chan = (
+                    "QC_MAPPING.out.qc.concat(RUSTQC_MAPPING.out.qc)"
+                    if "QC_MAPPING" in flowlist
+                    else "RUSTQC_MAPPING.out.qc"
+                )
+
             subjobs.append("\n\n" + "workflow {\n")
             for w in [
                 "QC_RAW",
@@ -3494,6 +3541,7 @@ def nf_make_sub(
                 "MAPPING",
                 "DEDUPBAM",
                 "QC_MAPPING",
+                "RUSTQC_MAPPING",
                 "MULTIQC",
             ]:
                 if w in flowlist:
@@ -3541,35 +3589,50 @@ def nf_make_sub(
                                 + w
                                 + "(POSTMAPPING.out.postmap.concat(POSTMAPPING.out.postmapuni))\n"
                             )
+                    elif w == "RUSTQC_MAPPING":
+                        if "DEDUPBAM" in flowlist:
+                            subjobs.append(
+                                " " * 4
+                                + w
+                                + "(POSTMAPPING.out.postmap.concat(POSTMAPPING.out.postmapuni.concat(DEDUPBAM.out.dedup)))\n"
+                            )
+                        else:
+                            subjobs.append(
+                                " " * 4
+                                + w
+                                + "(POSTMAPPING.out.postmap.concat(POSTMAPPING.out.postmapuni))\n"
+                            )
                     elif w == "MULTIQC":
-                        if "QC_RAW" not in flowlist and "QC_MAPPING" in flowlist:
+                        if "QC_RAW" not in flowlist and (
+                            "QC_MAPPING" in flowlist or "RUSTQC_MAPPING" in flowlist
+                        ):
                             # RustQC: only BAM-level QC, no FASTQ QC
                             subjobs.append(
-                                " " * 4 + w + "(QC_MAPPING.out.qc.collect())\n"
+                                " " * 4 + w + f"({map_qc_chan}.collect())\n"
                             )
                         elif "DEDUPBAM" in flowlist and "QC_TRIMMING" in flowlist:
                             subjobs.append(
                                 " " * 4
                                 + w
-                                + "(QC_RAW.out.qc.concat(QC_TRIMMING.out.qc.concat(QC_MAPPING.out.qc.concat(MAPPING.out.logs))).collect())\n"
+                                + f"(QC_RAW.out.qc.concat(QC_TRIMMING.out.qc.concat({map_qc_chan}.concat(MAPPING.out.logs))).collect())\n"
                             )
                         elif "DEDUPBAM" in flowlist and "QC_TRIMMING" not in flowlist:
                             subjobs.append(
                                 " " * 4
                                 + w
-                                + "(QC_RAW.out.qc.concat(QC_MAPPING.out.qc.concat(MAPPING.out.logs)).collect())\n"
+                                + f"(QC_RAW.out.qc.concat({map_qc_chan}.concat(MAPPING.out.logs)).collect())\n"
                             )
                         elif "MAPPING" in flowlist and "QC_TRIMMING" in flowlist:
                             subjobs.append(
                                 " " * 4
                                 + w
-                                + "(QC_RAW.out.qc.concat(QC_TRIMMING.out.qc.concat(QC_MAPPING.out.qc.concat(POSTMAPPING.out.postmapuni))).collect())\n"
+                                + f"(QC_RAW.out.qc.concat(QC_TRIMMING.out.qc.concat({map_qc_chan}.concat(POSTMAPPING.out.postmapuni))).collect())\n"
                             )
                         elif "MAPPING" in flowlist and "QC_TRIMMING" not in flowlist:
                             subjobs.append(
                                 " " * 4
                                 + w
-                                + "(QC_RAW.out.qc.concat(QC_MAPPING.out.qc.concat(POSTMAPPING.out.postmapuni)).collect())\n"
+                                + f"(QC_RAW.out.qc.concat({map_qc_chan}.concat(POSTMAPPING.out.postmapuni)).collect())\n"
                             )
                         elif "TRIMMING" in flowlist and "QC_TRIMMING" in flowlist:
                             subjobs.append(
diff --git a/workflows/rustqc.nf b/workflows/rustqc.nf
index 7808bf23..914cd8c4 100644
--- a/workflows/rustqc.nf
+++ b/workflows/rustqc.nf
@@ -44,7 +44,7 @@ process rustqc_mapped{
     """
 }
 
-workflow QC_MAPPING{
+workflow RUSTQC_MAPPING{
     take: collection
 
     main:

From 411fcc9a216252c19067af2289d1f1abd7de0cd6 Mon Sep 17 00:00:00 2001
From: jfallmann <fallmann.joerg@gmail.com>
Date: Mon, 27 Apr 2026 20:29:15 +0200
Subject: [PATCH 20/39] deterministic qc order

---
 MONSDA/Workflows.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/MONSDA/Workflows.py b/MONSDA/Workflows.py
index 650d612e..e07c630c 100755
--- a/MONSDA/Workflows.py
+++ b/MONSDA/Workflows.py
@@ -195,6 +195,14 @@ def get_combo(wfs, config, conditions):
             # Group all QC tools into a single combo position so pre-QC
             # (fastqc) and post-QC (rustqc) both appear in one combo name.
             if subwork == "QC" and len(tools) > 1:
+                qc_prio = {"fastqc": 0, "rustqc": 1}
+                tools = sorted(
+                    tools,
+                    key=lambda item: (
+                        qc_prio.get(list(item.values())[0], 99),
+                        list(item.values())[0],
+                    ),
+                )
                 ret.append([tools])  # one option = the entire list of QC tools
             else:
                 ret.append(tools)

From 490b27b6f08c8f7c9246742fbfa4fce71dbd9b95 Mon Sep 17 00:00:00 2001
From: jfallmann <fallmann.joerg@gmail.com>
Date: Mon, 27 Apr 2026 20:36:59 +0200
Subject: [PATCH 21/39] nf key fix

---
 MONSDA/Workflows.py | 66 ++++++++++++++++++++++++++++++++++-----------
 workflows/rustqc.nf | 12 ++++-----
 2 files changed, 57 insertions(+), 21 deletions(-)

diff --git a/MONSDA/Workflows.py b/MONSDA/Workflows.py
index e07c630c..af51f7e3 100755
--- a/MONSDA/Workflows.py
+++ b/MONSDA/Workflows.py
@@ -2291,13 +2291,35 @@ def nf_tool_params(
             if toolenv
             else mu.sub_dict(config[subwork], x)["OPTIONS"]
         )
-        tp.append(
-            "--" + subwork + "ENV " + toolenv + " --" + subwork + "BIN " + toolbin + " "
-        )
+        if subwork == "QC":
+            qcprefix = "POSTQC" if toolenv == "rustqc" else "PREQC"
+            tp.append(
+                "--"
+                + qcprefix
+                + "ENV "
+                + toolenv
+                + " --"
+                + qcprefix
+                + "BIN "
+                + toolbin
+                + " "
+            )
+        else:
+            tp.append(
+                "--"
+                + subwork
+                + "ENV "
+                + toolenv
+                + " --"
+                + subwork
+                + "BIN "
+                + toolbin
+                + " "
+            )
 
         toolpar = list()
         if "star" in [toolenv, toolbin]:
-                np['INDEX'] = mp.fixRunParameters(config, toolenv, sample, None, 'MAPPING', 'INDEX', "--sjdbGTFfile", "--sjdbGTFfile tmp_anno")
+            np['INDEX'] = mp.fixRunParameters(config, toolenv, sample, None, 'MAPPING', 'INDEX', "--sjdbGTFfile", "--sjdbGTFfile tmp_anno")
         for key, val in np.items():
             pars = val if val and val != "" else None
             if pars:
@@ -2310,17 +2332,31 @@ def nf_tool_params(
                 toolenv, toolbin = map(str, [sd["ENV"], sd["BIN"]])
 
             np = sd[toolenv.split("_")[0]]["OPTIONS"] if toolenv else sd["OPTIONS"]
-            tp.append(
-                "--"
-                + subwork
-                + "ENV "
-                + toolenv
-                + " --"
-                + subwork
-                + "BIN "
-                + toolbin
-                + " "
-            )
+            if subwork == "QC":
+                qcprefix = "POSTQC" if toolenv == "rustqc" else "PREQC"
+                tp.append(
+                    "--"
+                    + qcprefix
+                    + "ENV "
+                    + toolenv
+                    + " --"
+                    + qcprefix
+                    + "BIN "
+                    + toolbin
+                    + " "
+                )
+            else:
+                tp.append(
+                    "--"
+                    + subwork
+                    + "ENV "
+                    + toolenv
+                    + " --"
+                    + subwork
+                    + "BIN "
+                    + toolbin
+                    + " "
+                )
 
             toolpar = list()
             for key, val in np.items():
diff --git a/workflows/rustqc.nf b/workflows/rustqc.nf
index 914cd8c4..9ce3a806 100644
--- a/workflows/rustqc.nf
+++ b/workflows/rustqc.nf
@@ -1,6 +1,6 @@
-QCENV = get_always('POSTQCENV')
-QCBIN = get_always('POSTQCBIN')
-QCPARAMS = get_always('rustqc_params_QC') ?: ''
+RUSTQCENV = get_always('POSTQCENV')
+RUSTQCBIN = get_always('POSTQCBIN')
+RUSTQCPARAMS = get_always('rustqc_params_QC') ?: ''
 
 MAPANNO = get_always('MAPPINGANNO')
 
@@ -17,8 +17,8 @@ RUSTQC_PAIRED = (PAIRED == 'paired') ? '-p' : ''
 //RUSTQC on mapped BAMs
 
 process rustqc_mapped{
-    conda "$QCENV"+".yaml"
-    container "oras://jfallmann/monsda:"+"$QCENV"
+    conda "$RUSTQCENV"+".yaml"
+    container "oras://jfallmann/monsda:"+"$RUSTQCENV"
     cpus THREADS
     cache 'lenient'
     label 'big_mem'
@@ -40,7 +40,7 @@ process rustqc_mapped{
     fn = file(bam).getSimpleName()
     anno = file("${workflow.workDir}/../${MAPANNO}")
     """
-    rustqc rna $bam --gtf $anno -t ${task.cpus} $RUSTQC_PAIRED -s $RUSTQC_STRANDED --skip-dup-check -j results/rustqc_summary.json -o results/$fn $QCPARAMS
+    $RUSTQCBIN rna $bam --gtf $anno -t ${task.cpus} $RUSTQC_PAIRED -s $RUSTQC_STRANDED --skip-dup-check -j results/rustqc_summary.json -o results/$fn $RUSTQCPARAMS
     """
 }
 

From 160b107971773303f42e2ccaad72b9692800ba36 Mon Sep 17 00:00:00 2001
From: jfallmann <fallmann.joerg@gmail.com>
Date: Mon, 27 Apr 2026 21:16:16 +0200
Subject: [PATCH 22/39] enable samplesheet injection into config json

---
 MONSDA/Params.py                 | 192 +++++++++++++++++++++++++++++++
 MONSDA/RunMONSDA.py              |  16 +++
 configs/samplesheet_template.csv |   7 ++
 3 files changed, 215 insertions(+)
 create mode 100644 configs/samplesheet_template.csv

diff --git a/MONSDA/Params.py b/MONSDA/Params.py
index dfa66e28..c592146d 100644
--- a/MONSDA/Params.py
+++ b/MONSDA/Params.py
@@ -61,10 +61,12 @@
 # # __file__ fails if someone does os.chdir() before.
 # # sys.argv[0] also fails, because it doesn't not always contain the path.
 
+import csv
 import datetime
 import glob
 import inspect
 import itertools
+import json
 import os
 import re
 import shutil
@@ -73,6 +75,7 @@
 from collections import OrderedDict, defaultdict
 
 from natsort import natsorted
+from snakemake.common.configfile import load_configfile as _load_configfile
 
 import MONSDA.Utils as mu
 from MONSDA.Utils import check_run as check_run
@@ -101,6 +104,195 @@
     print("".join(tbe.format()), file=sys.stderr)
 
 
+def samplesheet_to_settings(samplesheet_path: str) -> dict:
+    """Read a CSV/TSV samplesheet and return a SETTINGS dict compatible with MONSDA config.
+
+    Expected columns (case-insensitive header row):
+      CONDITION  - slash-separated condition path, e.g. ``Ecoli/WT`` or ``Ecoli/WT/dummylevel``
+      SAMPLE     - sample name / accession
+      GROUP      - group label for differential analysis
+      SEQUENCING - e.g. ``paired`` or ``single``
+      REFERENCE  - path to genome FASTA (.fa.gz)
+      GTF        - path to GTF annotation (.gtf.gz)  [optional]
+      GFF        - path to GFF annotation (.gff.gz)  [optional]
+      INDEX      - path to pre-built index            [optional]
+      PREFIX     - mapper index prefix                [optional]
+      DECOY      - path to decoy file                 [optional]
+      TYPE       - sample type label                  [optional]
+      BATCH      - batch label                        [optional]
+      IP         - IP protocol info (for PEAKS)       [optional]
+
+    Per-condition metadata (SEQUENCING, REFERENCE, …) only needs to be present
+    on the first row for that condition; subsequent rows may leave those cells
+    empty (fill-down behaviour).
+
+    Parameters
+    ----------
+    samplesheet_path : str
+        Absolute or relative path to the samplesheet file.
+
+    Returns
+    -------
+    dict
+        Nested dict suitable for assigning to ``config["SETTINGS"]``.
+    """
+    logid = scriptname + ".samplesheet_to_settings: "
+
+    # --- detect delimiter ---
+    with open(samplesheet_path, newline="") as fh:
+        sample = fh.read(4096)
+
+    delimiter = None
+    # 1. trust the file extension
+    ext = os.path.splitext(samplesheet_path)[1].lower()
+    if ext in (".tsv", ".txt"):
+        delimiter = "\t"
+    elif ext == ".csv":
+        delimiter = ","
+    else:
+        # 2. try the sniffer
+        try:
+            dialect = csv.Sniffer().sniff(sample, delimiters=",\t;")
+            delimiter = dialect.delimiter
+        except csv.Error:
+            pass
+        # 3. manual probe: whichever candidate appears more on the first line
+        if delimiter is None:
+            first_line = sample.splitlines()[0] if sample else ""
+            delimiter = "\t" if first_line.count("\t") >= first_line.count(",") else ","
+
+    settings: dict = {}
+
+    # per-condition accumulator for fill-down metadata
+    cond_meta: dict = {}
+
+    with open(samplesheet_path, newline="") as fh:
+        reader = csv.DictReader(fh, delimiter=delimiter)
+        # normalise header keys to upper-case
+        if reader.fieldnames is None:
+            raise ValueError(
+                "Samplesheet appears to be empty or has no header row: "
+                + samplesheet_path
+            )
+        reader.fieldnames = [f.strip().upper() for f in reader.fieldnames]
+
+        for row in reader:
+            row = {
+                k.strip().upper(): (v.strip() if v is not None else "")
+                for k, v in row.items()
+                if k
+            }
+
+            condition_str = row.get("CONDITION", "").strip()
+            sample_name = row.get("SAMPLE", "").strip()
+            if not condition_str or not sample_name:
+                log.warning(
+                    logid + "Skipping row with missing CONDITION or SAMPLE: " + str(row)
+                )
+                continue
+
+            # fill-down: carry over non-empty metadata from previous rows of same condition
+            if condition_str not in cond_meta:
+                cond_meta[condition_str] = {}
+            for key in (
+                "SEQUENCING",
+                "REFERENCE",
+                "GTF",
+                "GFF",
+                "INDEX",
+                "PREFIX",
+                "DECOY",
+                "IP",
+            ):
+                val = row.get(key, "")
+                if val:
+                    cond_meta[condition_str][key] = val
+
+            meta = cond_meta[condition_str]
+
+            # --- navigate / create nested dict path ---
+            path_parts = [p.strip() for p in condition_str.split("/") if p.strip()]
+            node = settings
+            for part in path_parts:
+                node = node.setdefault(part, {})
+
+            # --- initialise leaf node on first encounter ---
+            if "SAMPLES" not in node:
+                node["SAMPLES"] = []
+                node["GROUPS"] = []
+                node["TYPES"] = []
+                node["BATCHES"] = []
+                node["SEQUENCING"] = meta.get("SEQUENCING", "")
+                node["REFERENCE"] = meta.get("REFERENCE", "")
+                node["INDEX"] = meta.get("INDEX", "")
+                node["PREFIX"] = meta.get("PREFIX", "")
+                node["IP"] = meta.get("IP", "")
+                gtf = meta.get("GTF", "")
+                gff = meta.get("GFF", "")
+                node["ANNOTATION"] = {"GTF": gtf, "GFF": gff}
+                decoy = meta.get("DECOY", "")
+                node["DECOY"] = {decoy: ""} if decoy else {}
+
+            node["SAMPLES"].append(sample_name)
+            node["GROUPS"].append(row.get("GROUP", ""))
+            node["TYPES"].append(row.get("TYPE", ""))
+            node["BATCHES"].append(row.get("BATCH", ""))
+
+    log.info(logid + "Built SETTINGS from samplesheet: " + str(list(settings.keys())))
+    return settings
+
+
+def inject_samplesheet_settings(configfile: str, samplesheet_path: str) -> str:
+    """Load *configfile*, populate ``SETTINGS`` from *samplesheet_path* if absent,
+    write the augmented config to ``<base>_with_settings.json`` and return that path.
+
+    Parameters
+    ----------
+    configfile : str
+        Path to the original MONSDA JSON config.
+    samplesheet_path : str
+        Path to the CSV/TSV samplesheet.
+
+    Returns
+    -------
+    str
+        Path to the written (augmented) config file.
+    """
+    logid = scriptname + ".inject_samplesheet_settings: "
+
+    config = _load_configfile(configfile)
+
+    existing = config.get("SETTINGS", {})
+    # strip comment-only SETTINGS that have no SAMPLES anywhere
+    has_samples = (
+        any(
+            isinstance(v, dict) and "SAMPLES" in v
+            for cond in existing.values()
+            if isinstance(cond, dict)
+            for v in cond.values()
+        )
+        if existing
+        else False
+    )
+
+    if has_samples:
+        log.info(
+            logid
+            + "Config already contains SETTINGS with sample data; samplesheet will be ignored."
+        )
+        return configfile
+
+    log.info(logid + "Populating SETTINGS from samplesheet: " + samplesheet_path)
+    config["SETTINGS"] = samplesheet_to_settings(samplesheet_path)
+
+    base, ext = os.path.splitext(configfile)
+    out_path = base + "_with_settings" + (ext if ext else ".json")
+    with open(out_path, "w") as fh:
+        json.dump(config, fh, indent=4)
+    log.info(logid + "Augmented config written to: " + out_path)
+    return out_path
+
+
 @check_run
 def get_samples(config: dict) -> list():
     """Check and return samples according to sample list on config.json
diff --git a/MONSDA/RunMONSDA.py b/MONSDA/RunMONSDA.py
index 571fde34..9635a1f7 100755
--- a/MONSDA/RunMONSDA.py
+++ b/MONSDA/RunMONSDA.py
@@ -129,6 +129,15 @@ def parseargs():
         action="store_true",
         help="Print version and exit",
     )
+    parser.add_argument(
+        "--samplesheet",
+        type=str,
+        default=None,
+        metavar="FILE",
+        help="CSV or TSV samplesheet to populate the SETTINGS section. "
+             "Used when the config JSON lacks a SETTINGS block. "
+             "On the first run an augmented config (<config>_with_settings.json) is written for reuse.",
+    )
 
     if len(sys.argv) == 1:
         parser.print_help(sys.stderr)
@@ -923,6 +932,13 @@ def main():
             for i in range(1, len(knownargs.config)):
                 optionalargs[0].extend(list(["-c", str(knownargs.config[i].pop())]))
 
+        # --- samplesheet injection (before any other config use) ---
+        if knownargs.samplesheet:
+            knownargs.configfile = mp.inject_samplesheet_settings(
+                knownargs.configfile,
+                os.path.abspath(knownargs.samplesheet),
+            )
+
         log.debug(
             f"{logid} ARGS: {args} {type(args)} KNOWNARGS: {knownargs} {type(knownargs)} OPTIONALARGS: {optionalargs} {type(optionalargs)}"
         )
diff --git a/configs/samplesheet_template.csv b/configs/samplesheet_template.csv
new file mode 100644
index 00000000..2e95bea4
--- /dev/null
+++ b/configs/samplesheet_template.csv
@@ -0,0 +1,7 @@
+CONDITION,SAMPLE,GROUP,SEQUENCING,REFERENCE,GTF,GFF,INDEX,PREFIX,DECOY,TYPE,BATCH,IP
+Ecoli/WT,SRR16324019,ctrl,paired,GENOMES/Ecoli/ecoli.fa.gz,GENOMES/Ecoli/ecoli.gtf.gz,GENOMES/Ecoli/ecoli.gff.gz,,,,,,
+Ecoli/WT,SRR16324018,ctrl,,,,,,,,,
+Ecoli/WT,SRR16324017,ctrl,,,,,,,,,
+Ecoli/KO,SRR16324016,ko,paired,GENOMES/Ecoli/ecoli.fa.gz,GENOMES/Ecoli/ecoli.gtf.gz,GENOMES/Ecoli/ecoli.gff.gz,,,,,,
+Ecoli/KO,SRR16324015,ko,,,,,,,,,
+Ecoli/KO,SRR16324014,ko,,,,,,,,,

From d7fc1041b27b0d708b2ebb7365675db4d89b02d5 Mon Sep 17 00:00:00 2001
From: jfallmann <fallmann.joerg@gmail.com>
Date: Mon, 4 May 2026 14:41:51 +0200
Subject: [PATCH 23/39] samplesheet update

---
 configs/samplesheet_template.csv | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/configs/samplesheet_template.csv b/configs/samplesheet_template.csv
index 2e95bea4..0d553361 100644
--- a/configs/samplesheet_template.csv
+++ b/configs/samplesheet_template.csv
@@ -1,7 +1,7 @@
 CONDITION,SAMPLE,GROUP,SEQUENCING,REFERENCE,GTF,GFF,INDEX,PREFIX,DECOY,TYPE,BATCH,IP
-Ecoli/WT,SRR16324019,ctrl,paired,GENOMES/Ecoli/ecoli.fa.gz,GENOMES/Ecoli/ecoli.gtf.gz,GENOMES/Ecoli/ecoli.gff.gz,,,,,,
-Ecoli/WT,SRR16324018,ctrl,,,,,,,,,
-Ecoli/WT,SRR16324017,ctrl,,,,,,,,,
-Ecoli/KO,SRR16324016,ko,paired,GENOMES/Ecoli/ecoli.fa.gz,GENOMES/Ecoli/ecoli.gtf.gz,GENOMES/Ecoli/ecoli.gff.gz,,,,,,
-Ecoli/KO,SRR16324015,ko,,,,,,,,,
-Ecoli/KO,SRR16324014,ko,,,,,,,,,
+FGUMI/WT/dummylevel,Sample1,ctrl,paired,GENOMES/Ecoli/ecoli.fa.gz,GENOMES/Ecoli/ecoli.gtf.gz,GENOMES/Ecoli/ecoli.gff.gz,,,,,,
+FGUMI/WT/dummylevel,Sample2,ctrl,,,,,,,,,
+FGUMI/WT/dummylevel,Sample3,ctrl,,,,,,,,,
+FGUMI/KO,Sample4,ko,paired,GENOMES/Ecoli/ecoli.fa.gz,GENOMES/Ecoli/ecoli.gtf.gz,GENOMES/Ecoli/ecoli.gff.gz,,,,,,
+FGUMI/KO,Sample5,ko,,,,,,,,,
+FGUMI/KO,Sample6,ko,,,,,,,,,

From 9499a4393f685a0bba4b3a44aebf2815372a76c0 Mon Sep 17 00:00:00 2001
From: jfallmann <fallmann.joerg@gmail.com>
Date: Mon, 4 May 2026 14:57:54 +0200
Subject: [PATCH 24/39] config updates

---
 configs/template_base_commented.json | 44 ++++++++++++++++++++++++----
 configs/tutorial_exhaustive.json     | 15 +++++++++-
 configs/tutorial_postprocess.json    | 15 +++++++++-
 configs/tutorial_toolmix.json        | 15 +++++++++-
 4 files changed, 81 insertions(+), 8 deletions(-)

diff --git a/configs/template_base_commented.json b/configs/template_base_commented.json
index 7a47a674..0c526f7e 100644
--- a/configs/template_base_commented.json
+++ b/configs/template_base_commented.json
@@ -58,22 +58,36 @@
   },
   "BASECALL": {
     "TOOLS": {
-      "guppy": "~/.local/bin/guppy-cpu/bin/guppy_basecaller"
+      "guppy": "~/.local/bin/guppy-cpu/bin/guppy_basecaller",
+      "dorado": "dorado"
     },
     "ENV" : "",
     "BIN" : "",
     "guppy": {
       "comment": {
-        "BASECALL": "Guppy options here if any, paired is not required, will be resolved by rules"
+        "BASECALL": "Guppy caller options here if any",
+        "MODEL": "Guppy model options here if any"
       },
       "OPTIONS": {
-        "BASECALL": ""
+        "BASECALL": "",
+        "MODEL": ""
+      }
+    },
+    "dorado": {
+      "comment": {
+        "CALLER": "Dorado caller options here if any",
+        "MODEL": "Dorado model options here if any"
+      },
+      "OPTIONS": {
+        "CALLER": "",
+        "MODEL": ""
       }
     }
   },
   "QC": {
     "TOOLS": {
-      "fastqc": "fastqc"
+      "fastqc": "fastqc",
+      "rustqc": "rustqc"
     },
     "ENV" : "",
     "BIN" : "",
@@ -86,13 +100,24 @@
         "QC": "",
         "MULTI": ""
       }
+    },
+    "rustqc": {
+      "comment": {
+        "QC": "RustQC options here if any, post-alignment QC",
+        "MULTI": "MultiQC options for rustqc if any"
+      },
+      "OPTIONS": {
+        "QC": "",
+        "MULTI": ""
+      }
     }
   },
   "TRIMMING": {
     "TOOLS": {
       "trimgalore": "trim_galore",
       "cutadapt": "cutadapt",
-      "bbduk": "bbmap"
+      "bbduk": "bbmap",
+      "fastp": "fastp"
     },
     "ENV" : "",
     "BIN" : "",
@@ -119,11 +144,20 @@
       "OPTIONS": {
         "TRIM": "-q 15 --length 8 -e 0.15"
       }
+    },
+    "fastp": {
+      "comment": {
+        "TRIM": "Trimming options here, --paired is not required, will be resolved by rules"
+      },
+      "OPTIONS": {
+        "TRIM": "-q 15 -l 8"
+      }
     }
   },
   "DEDUP": {
     "TOOLS": {
       "umitools": "umi_tools",
+      "picard": "picard",
       "fgumi": "fgumi"
     },
     "ENV" : "",
diff --git a/configs/tutorial_exhaustive.json b/configs/tutorial_exhaustive.json
index e811dec3..da2cecac 100644
--- a/configs/tutorial_exhaustive.json
+++ b/configs/tutorial_exhaustive.json
@@ -128,7 +128,8 @@
         "TOOLS" :
         {
             "trimgalore": "trim_galore",
-            "cutadapt": "cutadapt"
+            "cutadapt": "cutadapt",
+            "fastp": "fastp"
         },
         "Ecoli": {
             "KO": {
@@ -143,6 +144,12 @@
                     {
                         "TRIM": "-m 8 -e 0.15"  # trimming options here, --KO is not required, will be resolved by rules
                     }
+                },
+                "fastp":{
+                    "OPTIONS":
+                    {
+                        "TRIM": "-q 15 -l 8"  # trimming options here, --KO is not required, will be resolved by rules
+                    }
                 }
             },
             "WT": {
@@ -158,6 +165,12 @@
                         {
                             "TRIM": "-m 8 -e 0.15"  # trimming options here, --KO is not required, will be resolved by rules
                         }
+                    },
+                    "fastp":{
+                        "OPTIONS":
+                        {
+                            "TRIM": "-q 15 -l 8"  # trimming options here, --KO is not required, will be resolved by rules
+                        }
                     }
                 }
             }
diff --git a/configs/tutorial_postprocess.json b/configs/tutorial_postprocess.json
index aa5ba370..79ca138d 100644
--- a/configs/tutorial_postprocess.json
+++ b/configs/tutorial_postprocess.json
@@ -122,7 +122,8 @@
         "TOOLS" :
         {
             "trimgalore": "trim_galore",
-            "cutadapt": "cutadapt"
+            "cutadapt": "cutadapt",
+            "fastp": "fastp"
         },
         "Ecoli": {
             "KO": {
@@ -137,6 +138,12 @@
                     {
                         "TRIM": "-m 8 -e 0.15"  # trimming options here, --KO is not required, will be resolved by rules
                     }
+                },
+                "fastp":{
+                    "OPTIONS":
+                    {
+                        "TRIM": "-q 15 -l 8"  # trimming options here, --KO is not required, will be resolved by rules
+                    }
                 }
             },
             "WT": {
@@ -152,6 +159,12 @@
                         {
                             "TRIM": "-m 8 -e 0.15"  # trimming options here, --KO is not required, will be resolved by rules
                         }
+                    },
+                    "fastp":{
+                        "OPTIONS":
+                        {
+                            "TRIM": "-q 15 -l 8"  # trimming options here, --KO is not required, will be resolved by rules
+                        }
                     }
                 }
             }
diff --git a/configs/tutorial_toolmix.json b/configs/tutorial_toolmix.json
index e3b080fa..2f91f471 100644
--- a/configs/tutorial_toolmix.json
+++ b/configs/tutorial_toolmix.json
@@ -120,7 +120,8 @@
         "TOOLS" :
         {
             "trimgalore": "trim_galore",
-            "cutadapt": "cutadapt"
+            "cutadapt": "cutadapt",
+            "fastp": "fastp"
         },
         "Ecoli": {
             "KO": {
@@ -135,6 +136,12 @@
                     {
                         "TRIM": "-m 8 -e 0.15"  # trimming options here, --KO is not required, will be resolved by rules
                     }
+                },
+                "fastp":{
+                    "OPTIONS":
+                    {
+                        "TRIM": "-q 15 -l 8"  # trimming options here, --KO is not required, will be resolved by rules
+                    }
                 }
             },
             "WT": {
@@ -150,6 +157,12 @@
                         {
                             "TRIM": "-m 8 -e 0.15"  # trimming options here, --KO is not required, will be resolved by rules
                         }
+                    },
+                    "fastp":{
+                        "OPTIONS":
+                        {
+                            "TRIM": "-q 15 -l 8"  # trimming options here, --KO is not required, will be resolved by rules
+                        }
                     }
                 }
             }

From f272103c1ea4621bec5e6f1ea6280ae24e312795 Mon Sep 17 00:00:00 2001
From: jfallmann <fallmann.joerg@gmail.com>
Date: Mon, 4 May 2026 15:01:35 +0200
Subject: [PATCH 25/39] configurator update wip

---
 MONSDA/Configurator.py     |  88 ++++-
 MONSDA/Utils.py            |   1 +
 MONSDA/web_configurator.py | 778 ++++++++++++++++++++++++++++---------
 3 files changed, 688 insertions(+), 179 deletions(-)

diff --git a/MONSDA/Configurator.py b/MONSDA/Configurator.py
index 847d9f65..97bd7be8 100755
--- a/MONSDA/Configurator.py
+++ b/MONSDA/Configurator.py
@@ -28,12 +28,10 @@
         os.sep.join(["lib", pythonversion, "site-packages", "MONSDA"]), "share"
     )
 except:
-    installpath = os.path.cwd()
+    installpath = os.getcwd()
 
 configpath = os.path.join(installpath, "MONSDA", "configs")
 current_path = os.getcwd()
-dir_path = os.path.dirname(os.path.realpath(__file__))
-os.chdir(dir_path)
 
 template = load_configfile(os.sep.join([configpath, "template_base_commented.json"]))
 none_workflow_keys = ["WORKFLOWS", "BINS", "MAXTHREADS", "SETTINGS", "VERSION"]
@@ -70,6 +68,13 @@
     help="takes configuration file to modify",
 )
 
+parser.add_argument(
+    "--samplesheet",
+    type=str,
+    default=False,
+    help="CSV or TSV samplesheet to populate SETTINGS and condition tree for new configs/projects",
+)
+
 args = parser.parse_args()
 
 
@@ -315,6 +320,21 @@ def get_conditions_from_dict(root, keylist=[]):
         keylist.pop()
 
 
+def get_conditions_from_settings(root, keylist=[]):
+    """Yield condition paths from SETTINGS leaves that contain a SAMPLES key."""
+    if not isinstance(root, dict):
+        return
+    if "SAMPLES" in root and isinstance(root.get("SAMPLES"), list):
+        yield ":".join(keylist)
+        return
+    for k, v in root.items():
+        if not isinstance(v, dict):
+            continue
+        keylist.append(k)
+        yield from get_conditions_from_settings(v, keylist)
+        keylist.pop()
+
+
 def getPathesFromDict(d, value=None):
     def yield_func(d):
         q = [(d, [])]
@@ -790,6 +810,8 @@ def create_condition_tree():
 def add_sample_dirs(only_conditions=None):
     pickle_unfinished("add_sample_dirs")
     # project.current_func_arg = only_conditions
+    if args.samplesheet and only_conditions is None and project.mode in ["project", "config"]:
+        return assign_samplesheet()
     if "FETCH" in project.workflowsDict.keys():
         return assign_SRA(only_conditions)
     print("\n  FASTQ files:")
@@ -876,6 +898,58 @@ def add_sample_dirs(only_conditions=None):
     return assign_samples(only_conditions)
 
 
+def assign_samplesheet():
+    pickle_unfinished("assign_samplesheet")
+    prCyan("\n  Sample Assignment: samplesheet\n")
+
+    samplesheet = str(args.samplesheet)
+    if not os.path.isfile(samplesheet):
+        prRed(f"Could not find samplesheet file: {samplesheet}")
+        exit(1)
+
+    cwd = os.getcwd()
+    try:
+        # Params initializes logging at import time via Utils.setup_logger and expects
+        # a writable LOGS/ directory in the current working directory.
+        os.makedirs(os.path.join(current_path, "LOGS"), exist_ok=True)
+        os.chdir(current_path)
+        from .Params import samplesheet_to_settings
+
+        sheet_settings = samplesheet_to_settings(samplesheet)
+    except Exception as e:
+        prRed(f"Failed to parse samplesheet '{samplesheet}': {e}")
+        exit(1)
+    finally:
+        os.chdir(cwd)
+
+    if not sheet_settings:
+        prRed(f"Samplesheet '{samplesheet}' did not produce SETTINGS entries")
+        exit(1)
+
+    project.settingsDict = decouple(sheet_settings)
+    project.conditionsDict = NestedDefaultDict()
+    project.samplesDict = NestedDefaultDict()
+
+    condition_paths = [
+        x.split(":") for x in get_conditions_from_settings(project.settingsDict)
+    ]
+    if not condition_paths:
+        prRed(
+            "No condition leaves with SAMPLES found in parsed samplesheet SETTINGS"
+        )
+        exit(1)
+
+    for path in condition_paths:
+        setInDict(project.conditionsDict, path, {})
+
+    prGreen("Loaded condition tree and SETTINGS from samplesheet:")
+    print_dict(project.conditionsDict, gap="      ")
+    print("")
+    print_dict(project.settingsDict, gap="      ")
+    show_settings()
+    return select_conditioning()
+
+
 def assign_SRA(only_conditions=None):
     pickle_unfinished("assign_SRA")
     prCyan("\n  Sample Assignment:  SRA Accession Numbers\n")
@@ -1882,6 +1956,14 @@ def main():
     global guide
     project = PROJECT()
     guide = GUIDE()
+    if args.samplesheet:
+        if not str(args.samplesheet).lower().endswith((".csv", ".tsv", ".txt")):
+            print("Samplesheet flag requires a .csv/.tsv/.txt file")
+            exit()
+        args.samplesheet = os.path.abspath(args.samplesheet)
+        if not os.path.isfile(args.samplesheet):
+            print(f"Samplesheet file not found: {args.samplesheet}")
+            exit()
     if args.test:
         guide.testing = True
     if args.config:
diff --git a/MONSDA/Utils.py b/MONSDA/Utils.py
index 3a8bae03..fd2f4d4b 100644
--- a/MONSDA/Utils.py
+++ b/MONSDA/Utils.py
@@ -90,6 +90,7 @@ def setup_logger(scriptname):
     for handler in log.handlers[:]:
         handler.close()
         log.removeHandler(handler)
+    os.makedirs("LOGS", exist_ok=True)
     handler = logging.FileHandler("LOGS/MONSDA.log", mode="a")
     handler.setFormatter(
         logging.Formatter(
diff --git a/MONSDA/web_configurator.py b/MONSDA/web_configurator.py
index 7f8425a2..474f1bca 100644
--- a/MONSDA/web_configurator.py
+++ b/MONSDA/web_configurator.py
@@ -1,19 +1,23 @@
+import copy
 import json
 import os
-from typing import Any, Dict
+from typing import Any, Dict, List, Optional
 
 from fastapi import FastAPI, HTTPException
 from fastapi.middleware.cors import CORSMiddleware
-from fastapi.responses import FileResponse
-from pydantic import BaseModel
+from fastapi.responses import HTMLResponse
+from pydantic import BaseModel, Field
+from snakemake.common.configfile import load_configfile
+
+from .Params import samplesheet_to_settings
 
 TEMPLATE_PATH = os.path.join(
     os.path.dirname(__file__), "../configs/template_base_commented.json"
 )
+NONE_WORKFLOW_KEYS = ["WORKFLOWS", "BINS", "MAXTHREADS", "SETTINGS", "VERSION"]
 
-app = FastAPI(title="MONSDA Configurator Web Service")
+app = FastAPI(title="MONSDA Configurator Web")
 
-# Allow CORS for local development
 app.add_middleware(
     CORSMiddleware,
     allow_origins=["*"],
@@ -23,224 +27,646 @@
 )
 
 
-class ConfigRequest(BaseModel):
+class SamplesheetRequest(BaseModel):
+    samplesheet_path: str
+
+
+class BuildConfigRequest(BaseModel):
+    config_name: str
+    output_dir: str
+    workflows: List[str] = Field(default_factory=list)
+    tools: Dict[str, List[str]] = Field(default_factory=dict)
+    maxthreads: str = "16"
+    settings: Optional[Dict[str, Any]] = None
+    samplesheet_path: Optional[str] = None
+
+
+class ProjectRequest(BaseModel):
+    project_dir: str
+    workflows: List[str] = Field(default_factory=list)
+
+
+class SaveConfigRequest(BaseModel):
     config_name: str
-    config: Dict[str, Any]
     output_dir: str
+    config: Dict[str, Any]
 
 
 def load_template() -> Dict[str, Any]:
-    with open(TEMPLATE_PATH, "r") as f:
-        return json.load(f)
+    return load_configfile(TEMPLATE_PATH)
 
 
-def strip_comments(d):
+def strip_comments(d: Any) -> Any:
     if isinstance(d, dict):
         return {k: strip_comments(v) for k, v in d.items() if k != "comment"}
-    elif isinstance(d, list):
+    if isinstance(d, list):
         return [strip_comments(x) for x in d]
+    return d
+
+
+def set_in_path(root: Dict[str, Any], path: List[str], value: Any) -> None:
+    node = root
+    for key in path[:-1]:
+        if key not in node or not isinstance(node[key], dict):
+            node[key] = {}
+        node = node[key]
+    node[path[-1]] = value
+
+
+def get_condition_paths_from_settings(settings: Dict[str, Any]) -> List[List[str]]:
+    out: List[List[str]] = []
+
+    def _walk(node: Any, path: List[str]) -> None:
+        if not isinstance(node, dict):
+            return
+        if "SAMPLES" in node and isinstance(node.get("SAMPLES"), list):
+            out.append(path.copy())
+            return
+        for k, v in node.items():
+            if isinstance(v, dict):
+                _walk(v, path + [k])
+
+    _walk(settings, [])
+    return out
+
+
+def build_workflow_block(
+    workflow_name: str,
+    workflow_template: Dict[str, Any],
+    condition_paths: List[List[str]],
+    selected_tools: List[str],
+) -> Dict[str, Any]:
+    block: Dict[str, Any] = {}
+
+    all_tools = workflow_template.get("TOOLS", {})
+    if not selected_tools:
+        selected_tools = list(all_tools.keys())
+
+    if all_tools:
+        block["TOOLS"] = {k: all_tools[k] for k in selected_tools if k in all_tools}
+
+    # carry workflow-level defaults if present
+    for passthrough in ["FEATURES", "CUTOFFS", "COMPARABLE", "EXCLUDE"]:
+        if passthrough in workflow_template:
+            block[passthrough] = copy.deepcopy(workflow_template[passthrough])
+
+    # populate per-condition tool settings like CLI configurator does
+    for cond_path in condition_paths:
+        for tool in selected_tools:
+            tool_def = workflow_template.get(tool)
+            if not isinstance(tool_def, dict):
+                continue
+            tool_def_no_comment = strip_comments(tool_def)
+            if not tool_def_no_comment:
+                continue
+            set_in_path(block, cond_path + [tool], copy.deepcopy(tool_def_no_comment))
+
+    return block
+
+
+def build_config(req: BuildConfigRequest) -> Dict[str, Any]:
+    template = strip_comments(load_template())
+
+    if req.settings is not None:
+        settings = req.settings
+    elif req.samplesheet_path:
+        samplesheet_path = os.path.abspath(req.samplesheet_path)
+        if not os.path.isfile(samplesheet_path):
+            raise HTTPException(
+                status_code=400,
+                detail=f"Samplesheet does not exist: {samplesheet_path}",
+            )
+        settings = samplesheet_to_settings(samplesheet_path)
     else:
-        return d
+        raise HTTPException(
+            status_code=400,
+            detail="Provide either settings JSON or samplesheet_path.",
+        )
 
-    config_name: str
-    config: Dict[str, Any]
-    output_dir: str
+    condition_paths = get_condition_paths_from_settings(settings)
+    if not condition_paths:
+        raise HTTPException(
+            status_code=400,
+            detail="No condition leaves with SAMPLES found in SETTINGS.",
+        )
+
+    workflows = req.workflows or []
+    if not workflows:
+        raise HTTPException(
+            status_code=400,
+            detail="At least one workflow must be selected.",
+        )
+
+    invalid = [w for w in workflows if w not in template or w in NONE_WORKFLOW_KEYS]
+    if invalid:
+        raise HTTPException(
+            status_code=400,
+            detail=f"Unknown workflow(s): {', '.join(invalid)}",
+        )
+
+    final_config: Dict[str, Any] = {
+        "WORKFLOWS": ", ".join(workflows),
+        "BINS": template.get("BINS", ""),
+        "MAXTHREADS": str(req.maxthreads),
+        "VERSION": template.get("VERSION", ""),
+        "SETTINGS": settings,
+    }
+
+    for wf in workflows:
+        wf_template = template.get(wf, {})
+        final_config[wf] = build_workflow_block(
+            wf,
+            wf_template,
+            condition_paths,
+            req.tools.get(wf, []),
+        )
+
+    return final_config
 
 
 @app.get("/template", response_model=Dict[str, Any])
-def get_template():
-    """Get the config template (with comments)."""
-    return load_template()
+def get_template() -> Dict[str, Any]:
+    try:
+        return load_template()
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Failed to load template: {e}")
 
 
 @app.get("/template/fields", response_model=Dict[str, Any])
-def get_template_fields():
-    """Get the config template (without comments)."""
-    return strip_comments(load_template())
+def get_template_fields() -> Dict[str, Any]:
+    try:
+        return strip_comments(load_template())
+    except Exception as e:
+        raise HTTPException(
+            status_code=500, detail=f"Failed to load template fields: {e}"
+        )
+
+
+def _normalize_path(path: str) -> str:
+    if path:
+        return os.path.abspath(os.path.expanduser(path.strip()))
+    return os.getcwd()
+
+
+@app.get("/fs/roots", response_model=Dict[str, Any])
+def fs_roots() -> Dict[str, Any]:
+    cwd = os.getcwd()
+    home = os.path.expanduser("~")
+    roots = []
+    for p in [cwd, home, os.path.sep]:
+        if p and p not in roots and os.path.isdir(p):
+            roots.append(p)
+    return {"roots": roots}
+
+
+@app.get("/fs/list", response_model=Dict[str, Any])
+def fs_list(path: str = "", mode: str = "dirs") -> Dict[str, Any]:
+    mode = (mode or "dirs").lower()
+    if mode not in {"dirs", "all", "samplesheets"}:
+        raise HTTPException(
+            status_code=400, detail="mode must be dirs, all, or samplesheets"
+        )
+
+    current = _normalize_path(path)
+    if not os.path.isdir(current):
+        raise HTTPException(status_code=400, detail=f"Not a directory: {current}")
+
+    try:
+        entries = list(os.scandir(current))
+    except PermissionError:
+        raise HTTPException(status_code=403, detail=f"Permission denied: {current}")
+
+    dirs = []
+    files = []
+    for e in sorted(entries, key=lambda x: x.name.lower()):
+        if e.name in {".", ".."}:
+            continue
+        if e.is_dir(follow_symlinks=False):
+            dirs.append({"name": e.name, "path": e.path})
+            continue
+        if not e.is_file(follow_symlinks=False):
+            continue
+        if mode in {"all", "samplesheets"}:
+            if mode == "samplesheets":
+                lower = e.name.lower()
+                if not lower.endswith((".csv", ".tsv", ".txt")):
+                    continue
+            files.append({"name": e.name, "path": e.path})
+
+    parent = (
+        os.path.dirname(current.rstrip(os.sep))
+        if current != os.path.sep
+        else os.path.sep
+    )
+    return {
+        "current": current,
+        "parent": parent,
+        "dirs": dirs,
+        "files": files,
+        "mode": mode,
+    }
+
+
+@app.post("/samplesheet/parse", response_model=Dict[str, Any])
+def parse_samplesheet(req: SamplesheetRequest) -> Dict[str, Any]:
+    path = os.path.abspath(req.samplesheet_path.strip())
+    if not os.path.isfile(path):
+        raise HTTPException(status_code=400, detail=f"Samplesheet not found: {path}")
+    settings = samplesheet_to_settings(path)
+    return {
+        "samplesheet": path,
+        "settings": settings,
+        "conditions": [
+            "/".join(p) for p in get_condition_paths_from_settings(settings)
+        ],
+    }
+
+
+@app.post("/config/preview", response_model=Dict[str, Any])
+def preview_config(req: BuildConfigRequest) -> Dict[str, Any]:
+    return {"config": build_config(req)}
 
 
-@app.post("/generate_config")
-def generate_config(req: ConfigRequest):
-    """Generate a config file from user input. User specifies output_dir."""
+@app.post("/config/save", response_model=Dict[str, Any])
+def save_config(req: SaveConfigRequest) -> Dict[str, Any]:
     config_name = req.config_name.strip()
-    output_dir = os.path.abspath(req.output_dir.strip())
     if not config_name or any(c in config_name for c in "/\\"):
         raise HTTPException(status_code=400, detail="Invalid config name.")
-    if not output_dir or not os.path.isdir(output_dir):
-        raise HTTPException(status_code=400, detail="Invalid output directory.")
-    config_path = os.path.join(output_dir, f"config_{config_name}.json")
-    with open(config_path, "w") as f:
-        json.dump(req.config, f, indent=4)
-    return {"message": "Config generated.", "path": config_path}
-
-
-# Download config by full path (for security, restrict to files under allowed parent dir)
-@app.get("/download_config/")
-def download_config(config_path: str):
-    config_path = os.path.abspath(config_path)
-    if not os.path.exists(config_path):
-        raise HTTPException(status_code=404, detail="Config not found.")
-    if not config_path.endswith(".json"):
-        raise HTTPException(status_code=400, detail="Only .json files allowed.")
-    return FileResponse(config_path, filename=os.path.basename(config_path))
-
-
-class DirRequest(BaseModel):
-    config: Dict[str, Any]
-    project_dir: str
 
+    output_dir = os.path.abspath(req.output_dir.strip())
+    if not os.path.isdir(output_dir):
+        raise HTTPException(status_code=400, detail="Output directory does not exist.")
 
-def safe_makedirs(path):
-    os.makedirs(path, exist_ok=True)
+    path = os.path.join(output_dir, f"config_{config_name}.json")
+    with open(path, "w") as fh:
+        json.dump(req.config, fh, indent=4)
 
+    return {"message": "Config written.", "path": path}
 
-def create_project_structure(config: Dict[str, Any], project_dir: str):
-    # Example: create folders for workflows, logs, counts, etc.
-    safe_makedirs(project_dir)
-    for wf in config.get("WORKFLOWS", "").split(","):
-        wf = wf.strip()
-        if wf:
-            safe_makedirs(os.path.join(project_dir, wf))
-    safe_makedirs(os.path.join(project_dir, "LOGS"))
-    safe_makedirs(os.path.join(project_dir, "COUNTS"))
-    # Add more as needed
 
+@app.post("/project/create", response_model=Dict[str, Any])
+def create_project(req: ProjectRequest) -> Dict[str, Any]:
+    project_dir = os.path.abspath(req.project_dir.strip())
+    os.makedirs(project_dir, exist_ok=True)
 
-@app.post("/generate_project_dir")
-def generate_project_dir(req: DirRequest):
-    project_dir = os.path.abspath(req.project_dir)
-    if not project_dir.startswith(os.getcwd()):
-        raise HTTPException(
-            status_code=400, detail="Project dir must be inside working directory."
-        )
-    if os.path.exists(project_dir) and os.listdir(project_dir):
-        raise HTTPException(
-            status_code=400, detail="Project dir already exists and is not empty."
-        )
-    create_project_structure(req.config, project_dir)
-    return {"message": "Project directory structure created.", "path": project_dir}
+    # Basic MONSDA project skeleton
+    os.makedirs(os.path.join(project_dir, "FASTQ"), exist_ok=True)
+    os.makedirs(os.path.join(project_dir, "GENOMES"), exist_ok=True)
+    os.makedirs(os.path.join(project_dir, "LOGS"), exist_ok=True)
 
+    for wf in req.workflows:
+        os.makedirs(os.path.join(project_dir, wf), exist_ok=True)
 
-from fastapi.responses import HTMLResponse
+    return {"message": "Project directory prepared.", "path": project_dir}
 
 
 @app.get("/", response_class=HTMLResponse)
-def root():
+def root() -> str:
     return """
 <!DOCTYPE html>
 <html lang=\"en\">
 <head>
-    <meta charset=\"UTF-8\">
-    <title>MONSDA Configurator</title>
-    <style>
-        body { font-family: sans-serif; margin: 2em; background: #f8f8fa; }
-        h1 { color: #2c3e50; }
-        textarea, input, select { width: 100%; margin: 0.5em 0; }
-        .section { background: #fff; border-radius: 8px; box-shadow: 0 2px 8px #0001; padding: 1.5em; margin-bottom: 2em; }
-        button { background: #2c3e50; color: #fff; border: none; padding: 0.7em 1.5em; border-radius: 4px; cursor: pointer; }
-        button:disabled { background: #aaa; }
-        .success { color: green; }
-        .error { color: red; }
-        .field-label { font-weight: bold; margin-top: 1em; }
-    </style>
+  <meta charset=\"UTF-8\" />
+  <title>MONSDA Web Configurator</title>
+  <style>
+    body { font-family: system-ui, sans-serif; margin: 24px; background: #f7f8fb; color: #1f2937; }
+    h1 { margin-bottom: 8px; }
+    .card { background: #fff; border: 1px solid #e5e7eb; border-radius: 10px; padding: 16px; margin-bottom: 16px; }
+    .row { display: grid; grid-template-columns: 1fr 1fr; gap: 12px; }
+    input, textarea, select, button { width: 100%; box-sizing: border-box; margin-top: 6px; margin-bottom: 10px; }
+    textarea { min-height: 140px; font-family: ui-monospace, monospace; }
+    .ok { color: #065f46; }
+    .err { color: #991b1b; }
+    .muted { color: #6b7280; font-size: 0.9em; }
+    .pill { display: inline-block; padding: 2px 8px; border-radius: 999px; background: #eef2ff; margin-right: 6px; margin-bottom: 6px; }
+    .pathline { display: flex; gap: 8px; align-items: center; }
+    .pathline input { flex: 1; }
+    .pathline button { width: auto; white-space: nowrap; }
+    .browse-list { max-height: 260px; overflow: auto; border: 1px solid #e5e7eb; border-radius: 8px; padding: 8px; background: #fafafa; }
+    .browse-item { display: flex; gap: 8px; align-items: center; margin-bottom: 6px; }
+    .browse-item button { width: auto; margin: 0; }
+  </style>
 </head>
 <body>
-    <h1>MONSDA Configurator</h1>
-    <div class=\"section\">
-        <h2>Step 1: Edit Configuration</h2>
-        <button onclick=\"loadTemplate()\">Load Template</button>
-        <span id=\"template-status\"></span>
-        <textarea id=\"config-editor\" rows=\"24\" placeholder=\"Config JSON will appear here...\"></textarea>
+  <h1>MONSDA Web Configurator</h1>
+  <p class=\"muted\">Interactive builder for MONSDA configs with samplesheet support.</p>
+
+  <div class=\"card\">
+    <h3>1) Samplesheet → SETTINGS</h3>
+    <label>Samplesheet path (CSV/TSV)</label>
+    <div class="pathline">
+      <input id="samplesheetPath" placeholder="/abs/path/to/samplesheet.csv" />
+      <button onclick="openPathBrowser('samplesheetPath','samplesheets')">Browse…</button>
+    </div>
+    <button onclick="parseSamplesheet()">Parse Samplesheet</button>
+    <div id="samplesheetStatus"></div>
+    <div id="conditionsPreview"></div>
+  </div>
+
+  <div class="card">
+    <h3>2) Workflow + Tool selection</h3>
+    <button onclick="loadTemplate()">Load Workflows from Template</button>
+    <div id="templateStatus" class="muted"></div>
+    <div id=\"workflowChooser\"></div>
+  </div>
+
+  <div class=\"card\">
+    <h3>3) Build config</h3>
+    <div class=\"row\">
+      <div>
+        <label>Config name</label>
+        <input id=\"configName\" value=\"monsda\" />
+      </div>
+      <div>
+        <label>Output directory</label>
+        <div class="pathline">
+          <input id=\"outputDir\" placeholder=\"/abs/output/dir\" />
+          <button onclick="openPathBrowser('outputDir','dirs')">Browse…</button>
+        </div>
+      </div>
     </div>
-    <div class=\"section\">
-        <h2>Step 2: Generate Config File</h2>
-        <label class=\"field-label\">Config Name (no spaces or slashes):</label>
-        <input id=\"config-name\" type=\"text\" placeholder=\"e.g. testproject\" />
-        <label class=\"field-label\">Config Output Directory (absolute path):</label>
-        <input id=\"config-output-dir\" type=\"text\" placeholder=\"e.g. /home/user/configs\" />
-        <button onclick=\"generateConfig()\">Generate Config</button>
-        <span id=\"generate-status\"></span>
-        <div id=\"download-link\"></div>
+    <label>MAXTHREADS</label>
+    <input id=\"maxthreads\" value=\"16\" />
+
+    <label>SETTINGS JSON (auto-filled from samplesheet, editable)</label>
+    <textarea id=\"settingsJson\"></textarea>
+
+    <button onclick=\"previewConfig()\">Preview Config JSON</button>
+    <button onclick=\"saveConfig()\">Save Config</button>
+    <div id=\"buildStatus\"></div>
+
+    <label>Config preview</label>
+    <textarea id=\"configPreview\"></textarea>
+  </div>
+
+  <div class=\"card\">
+    <h3>4) Create project skeleton</h3>
+    <label>Project directory</label>
+    <div class="pathline">
+      <input id="projectDir" placeholder="/abs/path/to/project" />
+      <button onclick="openPathBrowser('projectDir','dirs')">Browse…</button>
     </div>
-    <div class=\"section\">
-        <h2>Step 3: Create Project Directory</h2>
-        <label class=\"field-label\">Project Directory (absolute path):</label>
-        <input id=\"project-dir\" type=\"text\" placeholder=\"e.g. /home/user/myproject\" />
-        <button onclick=\"createProjectDir()\">Create Directory Structure</button>
-        <span id=\"dir-status\"></span>
+    <button onclick="createProject()">Create Project Structure</button>
+    <div id="projectStatus"></div>
+  </div>
+
+  <div class="card">
+    <h3>Path browser</h3>
+    <div id="browserInfo" class="muted"></div>
+    <div id="browseRoots" class="muted"></div>
+    <div class="pathline">
+      <input id="browsePathInput" placeholder="/path/to/browse" />
+      <button onclick="browseTo(document.getElementById('browsePathInput').value)">Go</button>
+      <button onclick="browseUp()">Up</button>
+      <button onclick="chooseCurrentPath()">Use current</button>
     </div>
-    <script>
-    function loadTemplate() {
-        fetch('/template/fields').then(r => r.json()).then(data => {
-            document.getElementById('config-editor').value = JSON.stringify(data, null, 4);
-            document.getElementById('template-status').textContent = 'Template loaded.';
-        }).catch(e => {
-            document.getElementById('template-status').textContent = 'Failed to load template.';
-        });
+    <div id="browseStatus" class="muted"></div>
+    <div id="browseList" class="browse-list"></div>
+  </div>
+
+<script>
+let templateFields = null;
+let lastConfig = null;
+let browserState = { target: '', mode: 'dirs', current: '' };
+
+function setStatus(id, text, ok=true) {
+  const el = document.getElementById(id);
+  el.textContent = text;
+  el.className = ok ? 'ok' : 'err';
+}
+
+function escHtml(s) {
+  return String(s)
+    .replaceAll('&', '&amp;')
+    .replaceAll('<', '&lt;')
+    .replaceAll('>', '&gt;')
+    .replaceAll('"', '&quot;')
+    .replaceAll("'", '&#039;');
+}
+
+async function loadTemplate() {
+  const holder = document.getElementById('workflowChooser');
+  const status = document.getElementById('templateStatus');
+  try {
+    status.textContent = 'Loading template...';
+    const r = await fetch('/template/fields');
+    const data = await r.json();
+    if (!r.ok) {
+      throw new Error(data.detail || 'Failed to load template fields');
     }
 
-    function generateConfig() {
-        const configText = document.getElementById('config-editor').value;
-        const configName = document.getElementById('config-name').value.trim();
-        const outputDir = document.getElementById('config-output-dir').value.trim();
-        let config;
-        try {
-            config = JSON.parse(configText);
-        } catch (e) {
-            document.getElementById('generate-status').textContent = 'Invalid JSON.';
-            document.getElementById('generate-status').className = 'error';
-            return;
-        }
-        fetch('/generate_config', {
-            method: 'POST',
-            headers: { 'Content-Type': 'application/json' },
-            body: JSON.stringify({ config_name: configName, config: config, output_dir: outputDir })
-        }).then(r => r.json()).then(data => {
-            if (data.path) {
-                document.getElementById('generate-status').textContent = 'Config generated!';
-                document.getElementById('generate-status').className = 'success';
-                document.getElementById('download-link').innerHTML = `<a href='/download_config/?config_path=${encodeURIComponent(data.path)}' target='_blank'>Download ${data.path.split('/').pop()}</a>`;
-            } else {
-                document.getElementById('generate-status').textContent = data.detail || 'Error generating config.';
-                document.getElementById('generate-status').className = 'error';
-            }
-        }).catch(e => {
-            document.getElementById('generate-status').textContent = 'Error generating config.';
-            document.getElementById('generate-status').className = 'error';
-        });
+    templateFields = data;
+    holder.innerHTML = '';
+
+    const workflows = Object.keys(data)
+      .filter(k => !['WORKFLOWS','BINS','MAXTHREADS','SETTINGS','VERSION'].includes(k))
+      .sort();
+
+    workflows.forEach(wf => {
+      const tools = (data[wf] && data[wf].TOOLS) ? Object.keys(data[wf].TOOLS) : [];
+      const toolChecks = tools.map(t => `<label><input type=\"checkbox\" data-wf=\"${wf}\" data-tool=\"${t}\" checked /> ${escHtml(t)}</label>`).join('<br>');
+      holder.innerHTML += `
+        <div style=\"border:1px solid #e5e7eb; border-radius:8px; padding:10px; margin-bottom:8px;\">
+          <label><input type=\"checkbox\" data-workflow=\"${wf}\" /> <b>${escHtml(wf)}</b></label>
+          <div style=\"margin-left:20px; margin-top:6px;\">${toolChecks || '<span class=\"muted\">No tool list</span>'}</div>
+        </div>
+      `;
+    });
+
+    status.textContent = `Loaded ${workflows.length} workflows from template.`;
+    status.className = 'ok';
+  } catch (e) {
+    holder.innerHTML = '';
+    status.textContent = `Failed to load template fields: ${e.message || e}`;
+    status.className = 'err';
+  }
+}
+
+async function loadBrowseRoots() {
+  const r = await fetch('/fs/roots');
+  const data = await r.json();
+  if (!r.ok) throw new Error(data.detail || 'Failed to load roots');
+  const rootHtml = (data.roots || []).map(p => `<button onclick=\"browseTo(${JSON.stringify(p)})\">${escHtml(p)}</button>`).join(' ');
+  document.getElementById('browseRoots').innerHTML = rootHtml;
+}
+
+async function browseTo(path) {
+  try {
+    const q = new URLSearchParams();
+    q.set('mode', browserState.mode || 'dirs');
+    if (path) q.set('path', path);
+    const r = await fetch(`/fs/list?${q.toString()}`);
+    const data = await r.json();
+    if (!r.ok) throw new Error(data.detail || 'Browse failed');
+
+    browserState.current = data.current;
+    document.getElementById('browsePathInput').value = data.current;
+    document.getElementById('browserInfo').textContent = `Selecting for: ${browserState.target} (${browserState.mode})`;
+    setStatus('browseStatus', `Showing: ${data.current}`, true);
+
+    const dirs = (data.dirs || []).map(d => `
+      <div class=\"browse-item\">
+        <button onclick=\"browseTo(${JSON.stringify(d.path)})\">📁 ${escHtml(d.name)}</button>
+        <button onclick=\"choosePath(${JSON.stringify(d.path)})\">Use</button>
+      </div>`).join('');
+
+    const files = (data.files || []).map(f => `
+      <div class=\"browse-item\">
+        <button onclick=\"choosePath(${JSON.stringify(f.path)})\">📄 ${escHtml(f.name)}</button>
+      </div>`).join('');
+
+    document.getElementById('browseList').innerHTML = (dirs + files) || '<div class=\"muted\">No entries.</div>';
+  } catch (e) {
+    setStatus('browseStatus', e.message || String(e), false);
+  }
+}
+
+function openPathBrowser(target, mode='dirs') {
+  browserState.target = target;
+  browserState.mode = mode;
+  const start = document.getElementById(target)?.value?.trim() || '';
+  loadBrowseRoots().then(() => browseTo(start));
+}
+
+function choosePath(path) {
+  if (!browserState.target) return;
+  document.getElementById(browserState.target).value = path;
+  setStatus('browseStatus', `Selected: ${path}`, true);
+}
+
+function chooseCurrentPath() {
+  if (!browserState.current) return;
+  choosePath(browserState.current);
+}
+
+function browseUp() {
+  if (!browserState.current) return;
+  const parent = browserState.current === '/' ? '/' : browserState.current.replace(/\/+$/, '').replace(/\/[^\/]*$/, '') || '/';
+  browseTo(parent);
+}
+
+async function parseSamplesheet() {
+  const p = document.getElementById('samplesheetPath').value.trim();
+  if (!p) {
+    setStatus('samplesheetStatus', 'Please provide a samplesheet path.', false);
+    return;
+  }
+  try {
+    const r = await fetch('/samplesheet/parse', {
+      method: 'POST',
+      headers: { 'Content-Type': 'application/json' },
+      body: JSON.stringify({ samplesheet_path: p })
+    });
+    const data = await r.json();
+    if (r.ok) {
+      document.getElementById('settingsJson').value = JSON.stringify(data.settings, null, 2);
+      document.getElementById('conditionsPreview').innerHTML = data.conditions.map(c => `<span class=\"pill\">${c}</span>`).join('');
+      setStatus('samplesheetStatus', `Parsed ${data.conditions.length} condition(s) from samplesheet.`);
+    } else {
+      setStatus('samplesheetStatus', data.detail || 'Failed to parse samplesheet.', false);
     }
-
-    function createProjectDir() {
-        const configText = document.getElementById('config-editor').value;
-        const projectDir = document.getElementById('project-dir').value.trim();
-        let config;
-        try {
-            config = JSON.parse(configText);
-        } catch (e) {
-            document.getElementById('dir-status').textContent = 'Invalid JSON.';
-            document.getElementById('dir-status').className = 'error';
-            return;
-        }
-        fetch('/generate_project_dir', {
-            method: 'POST',
-            headers: { 'Content-Type': 'application/json' },
-            body: JSON.stringify({ config: config, project_dir: projectDir })
-        }).then(r => r.json()).then(data => {
-            if (data.path) {
-                document.getElementById('dir-status').textContent = 'Project directory created!';
-                document.getElementById('dir-status').className = 'success';
-            } else {
-                document.getElementById('dir-status').textContent = data.detail || 'Error creating directory.';
-                document.getElementById('dir-status').className = 'error';
-            }
-        }).catch(e => {
-            document.getElementById('dir-status').textContent = 'Error creating directory.';
-            document.getElementById('dir-status').className = 'error';
-        });
+  } catch (e) {
+    setStatus('samplesheetStatus', 'Failed to parse samplesheet.', false);
+  }
+}
+
+function collectWorkflowSelection() {
+  const selectedWorkflows = Array.from(document.querySelectorAll('input[data-workflow]:checked')).map(i => i.getAttribute('data-workflow'));
+  const tools = {};
+  selectedWorkflows.forEach(wf => {
+    tools[wf] = Array.from(document.querySelectorAll(`input[data-wf="${wf}"][data-tool]:checked`)).map(i => i.getAttribute('data-tool'));
+  });
+  return { selectedWorkflows, tools };
+}
+
+function parseSettingsJson() {
+  const txt = document.getElementById('settingsJson').value.trim();
+  if (!txt) return null;
+  return JSON.parse(txt);
+}
+
+async function previewConfig() {
+  try {
+    const { selectedWorkflows, tools } = collectWorkflowSelection();
+    const settings = parseSettingsJson();
+    const body = {
+      config_name: document.getElementById('configName').value.trim(),
+      output_dir: document.getElementById('outputDir').value.trim(),
+      workflows: selectedWorkflows,
+      tools: tools,
+      maxthreads: document.getElementById('maxthreads').value.trim(),
+      settings: settings,
+      samplesheet_path: document.getElementById('samplesheetPath').value.trim() || null
+    };
+
+    const r = await fetch('/config/preview', {
+      method: 'POST',
+      headers: { 'Content-Type': 'application/json' },
+      body: JSON.stringify(body)
+    });
+    const data = await r.json();
+    if (r.ok) {
+      lastConfig = data.config;
+      document.getElementById('configPreview').value = JSON.stringify(data.config, null, 2);
+      setStatus('buildStatus', 'Config preview generated.');
+    } else {
+      setStatus('buildStatus', data.detail || 'Failed to preview config.', false);
+    }
+  } catch (e) {
+    setStatus('buildStatus', 'Invalid SETTINGS JSON or request failed.', false);
+  }
+}
+
+async function saveConfig() {
+  try {
+    if (!lastConfig) {
+      setStatus('buildStatus', 'Generate preview first.', false);
+      return;
     }
-    // Auto-load template on page load
-    window.onload = loadTemplate;
-    </script>
+    const r = await fetch('/config/save', {
+      method: 'POST',
+      headers: { 'Content-Type': 'application/json' },
+      body: JSON.stringify({
+        config_name: document.getElementById('configName').value.trim(),
+        output_dir: document.getElementById('outputDir').value.trim(),
+        config: lastConfig
+      })
+    });
+    const data = await r.json();
+    if (r.ok) {
+      setStatus('buildStatus', `Saved: ${data.path}`);
+    } else {
+      setStatus('buildStatus', data.detail || 'Failed to save config.', false);
+    }
+  } catch (e) {
+    setStatus('buildStatus', 'Failed to save config.', false);
+  }
+}
+
+async function createProject() {
+  try {
+    const workflows = (lastConfig && lastConfig.WORKFLOWS) ? lastConfig.WORKFLOWS.split(',').map(x => x.trim()).filter(Boolean) : [];
+    const r = await fetch('/project/create', {
+      method: 'POST',
+      headers: { 'Content-Type': 'application/json' },
+      body: JSON.stringify({
+        project_dir: document.getElementById('projectDir').value.trim(),
+        workflows: workflows
+      })
+    });
+    const data = await r.json();
+    if (r.ok) {
+      setStatus('projectStatus', `Created: ${data.path}`);
+    } else {
+      setStatus('projectStatus', data.detail || 'Failed to create project.', false);
+    }
+  } catch (e) {
+    setStatus('projectStatus', 'Failed to create project.', false);
+  }
+}
+
+window.onload = loadTemplate;
+</script>
 </body>
 </html>
 """

From 81845384ef08c0690077c99f81bbaf5bb4681b61 Mon Sep 17 00:00:00 2001
From: jfallmann <fallmann.joerg@gmail.com>
Date: Tue, 5 May 2026 11:17:54 +0200
Subject: [PATCH 26/39] rustqc container recipe

---
 containers/apptainer/rustqc.def | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)
 create mode 100644 containers/apptainer/rustqc.def

diff --git a/containers/apptainer/rustqc.def b/containers/apptainer/rustqc.def
new file mode 100644
index 00000000..2d9f57b8
--- /dev/null
+++ b/containers/apptainer/rustqc.def
@@ -0,0 +1,23 @@
+Bootstrap: docker
+From: continuumio/miniconda3
+   
+%files
+    /home/fall/MONSDA/envs/rustqc.yaml /opt/envs/
+    ${HOME}/MONSDA/scripts /opt/MONSDA/
+
+%environment
+   
+%post
+    ls -alrt /opt/envs
+    chmod -R +x /opt/envs/rustqc.yaml
+    
+    ENV_NAME=rustqc
+    echo ". /opt/conda/etc/profile.d/conda.sh" >> $APPTAINER_ENVIRONMENT
+    echo "conda activate $ENV_NAME" >> $APPTAINER_ENVIRONMENT
+   
+    . /opt/conda/etc/profile.d/conda.sh
+    conda env create -f /opt/envs/rustqc.yaml -p /opt/conda/envs/$ENV_NAME
+    conda clean --all
+   
+%runscript
+    exec "$@"

From 7ddd4290f59c3d19877a0aa8806ca936e4662e0a Mon Sep 17 00:00:00 2001
From: jfallmann <fallmann.joerg@gmail.com>
Date: Tue, 5 May 2026 11:28:06 +0200
Subject: [PATCH 27/39] web_config update

---
 MONSDA/web_configurator.py | 1028 +++++++++++++++++++++++++++++++-----
 1 file changed, 910 insertions(+), 118 deletions(-)

diff --git a/MONSDA/web_configurator.py b/MONSDA/web_configurator.py
index 474f1bca..6b2fb5c8 100644
--- a/MONSDA/web_configurator.py
+++ b/MONSDA/web_configurator.py
@@ -1,6 +1,7 @@
 import copy
 import json
 import os
+import sys
 from typing import Any, Dict, List, Optional
 
 from fastapi import FastAPI, HTTPException
@@ -11,9 +12,7 @@
 
 from .Params import samplesheet_to_settings
 
-TEMPLATE_PATH = os.path.join(
-    os.path.dirname(__file__), "../configs/template_base_commented.json"
-)
+TEMPLATE_FILE = "template_base_commented.json"
 NONE_WORKFLOW_KEYS = ["WORKFLOWS", "BINS", "MAXTHREADS", "SETTINGS", "VERSION"]
 
 app = FastAPI(title="MONSDA Configurator Web")
@@ -41,9 +40,25 @@ class BuildConfigRequest(BaseModel):
     samplesheet_path: Optional[str] = None
 
 
+class ConditionFiles(BaseModel):
+    """Per-condition file selections from the wizard."""
+
+    condition: str  # slash-separated condition path e.g. "Ecoli/WT"
+    fastq_dir: str = ""  # directory containing FASTQ files for this condition
+    fastq_files: List[str] = Field(default_factory=list)  # explicit file paths
+    sequencing: str = "paired"  # paired | single
+    reference: str = ""  # absolute path to genome FASTA
+    gtf: str = ""  # absolute path to GTF annotation
+    gff: str = ""  # absolute path to GFF annotation (optional)
+    decoy: str = ""  # absolute path to decoy file (optional)
+
+
 class ProjectRequest(BaseModel):
     project_dir: str
-    workflows: List[str] = Field(default_factory=list)
+    project_name: str = "monsda"
+    condition_files: List[ConditionFiles] = Field(default_factory=list)
+    config: Optional[Dict[str, Any]] = None
+    settings: Optional[Dict[str, Any]] = None
 
 
 class SaveConfigRequest(BaseModel):
@@ -51,9 +66,53 @@ class SaveConfigRequest(BaseModel):
     output_dir: str
     config: Dict[str, Any]
 
+def _template_candidates() -> List[str]:
+    base_dir = os.path.abspath(os.path.dirname(__file__))
+    pythonversion = f"python{sys.version_info.major}.{sys.version_info.minor}"
+    install_share = base_dir.replace(
+        os.sep.join(["lib", pythonversion, "site-packages", "MONSDA"]), "share"
+    )
+
+    candidates = [
+        # explicit override if user/admin wants to force a template location
+        os.environ.get("MONSDA_TEMPLATE_PATH", ""),
+        # local checkout layout (repo root/configs)
+        os.path.abspath(os.path.join(base_dir, "..", "configs", TEMPLATE_FILE)),
+        # package-relative layout if configs were bundled next to package
+        os.path.abspath(os.path.join(base_dir, "configs", TEMPLATE_FILE)),
+        # Configurator.py install layout logic: <prefix>/share/MONSDA/configs
+        os.path.abspath(
+            os.path.join(install_share, "MONSDA", "configs", TEMPLATE_FILE)
+        ),
+        # generic venv/conda prefix share path
+        os.path.abspath(
+            os.path.join(sys.prefix, "share", "MONSDA", "configs", TEMPLATE_FILE)
+        ),
+        # last-resort: run directory configs
+        os.path.abspath(os.path.join(os.getcwd(), "configs", TEMPLATE_FILE)),
+    ]
+
+    # de-duplicate while preserving order and dropping empty values
+    deduped: List[str] = []
+    for c in candidates:
+        c = (c or "").strip()
+        if c and c not in deduped:
+            deduped.append(c)
+    return deduped
+
+
+def resolve_template_path() -> str:
+    for path in _template_candidates():
+        if os.path.isfile(path):
+            return path
+    tried = "\n - ".join(_template_candidates())
+    raise FileNotFoundError(
+        "Could not find template_base_commented.json. Tried:\n - " + tried
+    )
+
 
 def load_template() -> Dict[str, Any]:
-    return load_configfile(TEMPLATE_PATH)
+    return load_configfile(resolve_template_path())
 
 
 def strip_comments(d: Any) -> Any:
@@ -241,16 +300,12 @@ def fs_list(path: str = "", mode: str = "dirs") -> Dict[str, Any]:
     for e in sorted(entries, key=lambda x: x.name.lower()):
         if e.name in {".", ".."}:
             continue
-        if e.is_dir(follow_symlinks=False):
+        if e.is_dir(follow_symlinks=True):
             dirs.append({"name": e.name, "path": e.path})
             continue
-        if not e.is_file(follow_symlinks=False):
+        if not e.is_file(follow_symlinks=True):
             continue
         if mode in {"all", "samplesheets"}:
-            if mode == "samplesheets":
-                lower = e.name.lower()
-                if not lower.endswith((".csv", ".tsv", ".txt")):
-                    continue
             files.append({"name": e.name, "path": e.path})
 
     parent = (
@@ -306,131 +361,543 @@ def save_config(req: SaveConfigRequest) -> Dict[str, Any]:
 
 @app.post("/project/create", response_model=Dict[str, Any])
 def create_project(req: ProjectRequest) -> Dict[str, Any]:
-    project_dir = os.path.abspath(req.project_dir.strip())
+    base_dir = os.path.abspath(req.project_dir.strip())
+    project_name = req.project_name.strip() or "monsda"
+
+    # Project name becomes a subdirectory under the chosen path
+    project_dir = os.path.join(base_dir, project_name)
     os.makedirs(project_dir, exist_ok=True)
 
-    # Basic MONSDA project skeleton
-    os.makedirs(os.path.join(project_dir, "FASTQ"), exist_ok=True)
-    os.makedirs(os.path.join(project_dir, "GENOMES"), exist_ok=True)
-    os.makedirs(os.path.join(project_dir, "LOGS"), exist_ok=True)
+    # FASTQ directly under project dir (no extra project_name level)
+    fastq_dir = os.path.join(project_dir, "FASTQ")
+    gen_dir = os.path.join(project_dir, "GENOMES")
+    os.makedirs(fastq_dir, exist_ok=True)
+    os.makedirs(gen_dir, exist_ok=True)
+
+    settings = req.settings or (req.config.get("SETTINGS") if req.config else None)
+    linked_files: List[str] = []
+    warnings: List[str] = []
+
+    # Build lookup: condition_path -> ConditionFiles
+    cond_lookup: Dict[str, ConditionFiles] = {}
+    for cf in req.condition_files:
+        cond_lookup[cf.condition] = cf
+
+    if settings and isinstance(settings, dict):
+        condition_paths = get_condition_paths_from_settings(settings)
+        for cond_path in condition_paths:
+            cond_key = "/".join(cond_path)
+            cond_info = cond_lookup.get(cond_key)
+
+            # Create condition subdirectories under FASTQ
+            cond_dir = os.path.join(fastq_dir, *cond_path)
+            os.makedirs(cond_dir, exist_ok=True)
+
+            # Walk to the leaf node to find SAMPLES
+            node = settings
+            for key in cond_path:
+                node = node.get(key, {})
+
+            samples = node.get("SAMPLES", [])
+
+            # Link FASTQ files: prefer explicit file list, fall back to directory+sample matching
+            if cond_info and cond_info.fastq_files:
+                # Explicit file selection mode - link exactly the files the user chose
+                for fpath in cond_info.fastq_files:
+                    src_file = os.path.abspath(fpath)
+                    if not os.path.isfile(src_file):
+                        warnings.append(f"FASTQ file not found: {fpath}")
+                        continue
+                    dst = os.path.join(cond_dir, os.path.basename(src_file))
+                    if not os.path.exists(dst):
+                        os.symlink(os.path.realpath(src_file), dst)
+                        linked_files.append(dst)
+                    # Handle paired-end mate
+                    if cond_info.sequencing == "paired":
+                        basename = os.path.basename(src_file)
+                        if "_R1" in basename:
+                            mate_name = basename.replace("_R1", "_R2")
+                        elif "_1." in basename:
+                            mate_name = basename.replace("_1.", "_2.")
+                        else:
+                            mate_name = None
+                        if mate_name:
+                            mate_src = os.path.join(
+                                os.path.dirname(src_file), mate_name
+                            )
+                            mate_dst = os.path.join(cond_dir, mate_name)
+                            if os.path.isfile(mate_src) and not os.path.exists(
+                                mate_dst
+                            ):
+                                os.symlink(os.path.realpath(mate_src), mate_dst)
+                                linked_files.append(mate_dst)
+
+            elif cond_info and cond_info.fastq_dir and isinstance(samples, list):
+                # Directory mode - find files matching sample names from samplesheet
+                src_dir = os.path.abspath(cond_info.fastq_dir)
+                if os.path.isdir(src_dir):
+                    for sample in samples:
+                        # Find matching files (sample name may or may not have extension)
+                        matched = _find_fastq_files(src_dir, sample)
+                        if not matched:
+                            warnings.append(
+                                f"No FASTQ file found for sample '{sample}' in {src_dir}"
+                            )
+                            continue
+                        for src_file in matched:
+                            dst = os.path.join(cond_dir, os.path.basename(src_file))
+                            if not os.path.exists(dst):
+                                os.symlink(os.path.realpath(src_file), dst)
+                                linked_files.append(dst)
+                            # Handle paired-end mate
+                            if cond_info.sequencing == "paired":
+                                basename = os.path.basename(src_file)
+                                if "_R1" in basename:
+                                    mate_name = basename.replace("_R1", "_R2")
+                                elif "_1." in basename:
+                                    mate_name = basename.replace("_1.", "_2.")
+                                elif "_R2" in basename:
+                                    mate_name = basename.replace("_R2", "_R1")
+                                elif "_2." in basename:
+                                    mate_name = basename.replace("_2.", "_1.")
+                                else:
+                                    mate_name = None
+                                if mate_name:
+                                    mate_src = os.path.join(src_dir, mate_name)
+                                    mate_dst = os.path.join(cond_dir, mate_name)
+                                    if os.path.isfile(mate_src) and not os.path.exists(
+                                        mate_dst
+                                    ):
+                                        os.symlink(os.path.realpath(mate_src), mate_dst)
+                                        linked_files.append(mate_dst)
+                else:
+                    warnings.append(
+                        f"FASTQ directory not found for condition '{cond_key}': {src_dir}"
+                    )
+
+            # Link genome files: REFERENCE, DECOY, GTF, GFF into GENOMES/
+            ref_path = (cond_info.reference if cond_info else "") or node.get(
+                "REFERENCE", ""
+            )
+            if ref_path and os.path.isfile(ref_path):
+                dst = os.path.join(gen_dir, os.path.basename(ref_path))
+                if not os.path.exists(dst):
+                    os.symlink(os.path.realpath(ref_path), dst)
+                    linked_files.append(dst)
+                # Update settings path to relative
+                rel = os.path.relpath(dst, start=project_dir)
+                node["REFERENCE"] = rel
+            elif ref_path:
+                warnings.append(f"REFERENCE not found: {ref_path}")
+
+            decoy_path = (cond_info.decoy if cond_info else "") or node.get("DECOY", "")
+            if decoy_path and os.path.isfile(decoy_path):
+                dst = os.path.join(gen_dir, os.path.basename(decoy_path))
+                if not os.path.exists(dst):
+                    os.symlink(os.path.realpath(decoy_path), dst)
+                    linked_files.append(dst)
+                rel = os.path.relpath(dst, start=project_dir)
+                node["DECOY"] = rel
+
+            gtf_path = (cond_info.gtf if cond_info else "") or ""
+            anno = node.get("ANNOTATION", {})
+            if isinstance(anno, dict):
+                gtf_path = gtf_path or anno.get("GTF", "")
+            if gtf_path and os.path.isfile(gtf_path):
+                dst = os.path.join(gen_dir, os.path.basename(gtf_path))
+                if not os.path.exists(dst):
+                    os.symlink(os.path.realpath(gtf_path), dst)
+                    linked_files.append(dst)
+                rel = os.path.relpath(dst, start=project_dir)
+                if isinstance(anno, dict):
+                    anno["GTF"] = rel
+                    node["ANNOTATION"] = anno
+            elif gtf_path:
+                warnings.append(f"GTF not found: {gtf_path}")
+
+            gff_path = (cond_info.gff if cond_info else "") or ""
+            if isinstance(anno, dict):
+                gff_path = gff_path or anno.get("GFF", "")
+            if gff_path and os.path.isfile(gff_path):
+                dst = os.path.join(gen_dir, os.path.basename(gff_path))
+                if not os.path.exists(dst):
+                    os.symlink(os.path.realpath(gff_path), dst)
+                    linked_files.append(dst)
+                rel = os.path.relpath(dst, start=project_dir)
+                if isinstance(anno, dict):
+                    anno["GFF"] = rel
+                    node["ANNOTATION"] = anno
+            elif gff_path:
+                warnings.append(f"GFF not found: {gff_path}")
+
+    # Write config if provided (with updated relative paths in SETTINGS)
+    config_path = ""
+    if req.config:
+        if settings:
+            req.config["SETTINGS"] = settings
+        config_file = f"config_{project_name}.json"
+        config_path = os.path.join(project_dir, config_file)
+        with open(config_path, "w") as fh:
+            json.dump(req.config, fh, indent=4)
+
+    return {
+        "message": "Project created.",
+        "path": project_dir,
+        "config_path": config_path,
+        "linked_files": len(linked_files),
+        "warnings": warnings,
+    }
 
-    for wf in req.workflows:
-        os.makedirs(os.path.join(project_dir, wf), exist_ok=True)
 
-    return {"message": "Project directory prepared.", "path": project_dir}
+def _find_fastq_files(src_dir: str, sample_name: str) -> List[str]:
+    """Find FASTQ files in src_dir matching a sample name.
+
+    Matches: exact filename, or sample_name*.fastq.gz / .fq.gz / .fastq / .fq
+    Only returns R1 (or unpaired) to avoid double-linking.
+    """
+    import glob as _glob
+
+    candidates: List[str] = []
+    # Try exact match first
+    exact = os.path.join(src_dir, sample_name)
+    if os.path.isfile(exact):
+        return [exact]
+
+    # Try common FASTQ extensions
+    for ext in [".fastq.gz", ".fq.gz", ".fastq", ".fq"]:
+        path = os.path.join(src_dir, sample_name + ext)
+        if os.path.isfile(path):
+            candidates.append(path)
+        # Try with _R1 suffix
+        path_r1 = os.path.join(src_dir, sample_name + "_R1" + ext)
+        if os.path.isfile(path_r1):
+            candidates.append(path_r1)
+        # Try with _1 suffix
+        path_1 = os.path.join(src_dir, sample_name + "_1" + ext)
+        if os.path.isfile(path_1):
+            candidates.append(path_1)
+
+    if candidates:
+        return candidates
+
+    # Glob fallback: any file starting with sample_name
+    pattern = os.path.join(src_dir, sample_name + "*")
+    matches = _glob.glob(pattern)
+    # Filter to only fastq-like files and only R1/unpaired
+    for m in sorted(matches):
+        if os.path.isfile(m):
+            lower = m.lower()
+            if any(lower.endswith(e) for e in [".fastq.gz", ".fq.gz", ".fastq", ".fq"]):
+                # Skip R2/_2 to avoid double-linking
+                base = os.path.basename(m)
+                if "_R2" in base or "_2." in base:
+                    continue
+                candidates.append(m)
+
+    return candidates
 
 
 @app.get("/", response_class=HTMLResponse)
 def root() -> str:
     return """
 <!DOCTYPE html>
-<html lang=\"en\">
+<html lang="en">
 <head>
-  <meta charset=\"UTF-8\" />
+  <meta charset="UTF-8" />
   <title>MONSDA Web Configurator</title>
+  <link href="https://cdn.jsdelivr.net/npm/bootstrap@5.3.3/dist/css/bootstrap.min.css" rel="stylesheet">
+  <link href="https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700&display=swap" rel="stylesheet">
   <style>
-    body { font-family: system-ui, sans-serif; margin: 24px; background: #f7f8fb; color: #1f2937; }
-    h1 { margin-bottom: 8px; }
-    .card { background: #fff; border: 1px solid #e5e7eb; border-radius: 10px; padding: 16px; margin-bottom: 16px; }
-    .row { display: grid; grid-template-columns: 1fr 1fr; gap: 12px; }
-    input, textarea, select, button { width: 100%; box-sizing: border-box; margin-top: 6px; margin-bottom: 10px; }
-    textarea { min-height: 140px; font-family: ui-monospace, monospace; }
-    .ok { color: #065f46; }
-    .err { color: #991b1b; }
-    .muted { color: #6b7280; font-size: 0.9em; }
-    .pill { display: inline-block; padding: 2px 8px; border-radius: 999px; background: #eef2ff; margin-right: 6px; margin-bottom: 6px; }
-    .pathline { display: flex; gap: 8px; align-items: center; }
-    .pathline input { flex: 1; }
-    .pathline button { width: auto; white-space: nowrap; }
-    .browse-list { max-height: 260px; overflow: auto; border: 1px solid #e5e7eb; border-radius: 8px; padding: 8px; background: #fafafa; }
-    .browse-item { display: flex; gap: 8px; align-items: center; margin-bottom: 6px; }
-    .browse-item button { width: auto; margin: 0; }
+    * { box-sizing: border-box; }
+    body { font-family: 'Inter', system-ui, sans-serif; margin: 0; padding: 32px; background: linear-gradient(135deg, #f0f4ff 0%, #fafbff 50%, #f5f3ff 100%); color: #1e293b; min-height: 100vh; }
+    h1 { font-size: 1.8rem; font-weight: 700; margin-bottom: 4px; background: linear-gradient(135deg, #3b82f6, #8b5cf6); -webkit-background-clip: text; -webkit-text-fill-color: transparent; }
+    h3 { margin: 0; font-size: 1.05rem; font-weight: 600; }
+    .card { background: #fff; border: 1px solid #e2e8f0; border-radius: 16px; padding: 20px 24px; margin-bottom: 20px; box-shadow: 0 1px 3px rgba(0,0,0,.04), 0 4px 12px rgba(0,0,0,.02); transition: box-shadow .2s; }
+    .card:hover { box-shadow: 0 4px 16px rgba(0,0,0,.06); }
+    .row { display: grid; grid-template-columns: 1fr 1fr; gap: 16px; }
+    label { font-size: .88rem; font-weight: 500; color: #475569; display: block; margin-bottom: 2px; }
+    input, textarea, select { width: 100%; padding: 10px 14px; border: 1px solid #e2e8f0; border-radius: 10px; font-size: .9rem; margin-top: 4px; margin-bottom: 12px; transition: border-color .15s, box-shadow .15s; outline: none; }
+    input:focus, textarea:focus { border-color: #3b82f6; box-shadow: 0 0 0 3px rgba(59,130,246,.1); }
+    textarea { min-height: 140px; font-family: 'JetBrains Mono', ui-monospace, monospace; font-size: .82rem; }
+    button { padding: 10px 18px; border: none; border-radius: 10px; font-size: .88rem; font-weight: 500; cursor: pointer; transition: all .15s; }
+    .btn-primary, button[onclick*="preview"], button[onclick*="save"], button[onclick*="parse"], button[onclick*="create"] { background: linear-gradient(135deg, #3b82f6, #6366f1); color: #fff; box-shadow: 0 2px 8px rgba(59,130,246,.25); }
+    .btn-primary:hover, button[onclick*="preview"]:hover, button[onclick*="save"]:hover, button[onclick*="parse"]:hover, button[onclick*="create"]:hover { transform: translateY(-1px); box-shadow: 0 4px 12px rgba(59,130,246,.35); }
+    button[onclick*="loadTemplate"], button[onclick*="openToolsModal"], button[onclick*="openPathBrowser"] { background: #f1f5f9; color: #475569; border: 1px solid #e2e8f0; }
+    button[onclick*="loadTemplate"]:hover, button[onclick*="openToolsModal"]:hover, button[onclick*="openPathBrowser"]:hover { background: #e2e8f0; }
+    .ok { color: #059669; font-weight: 500; }
+    .err { color: #dc2626; font-weight: 500; }
+    .muted { color: #64748b; font-size: 0.85rem; }
+    .pill { display: inline-block; padding: 3px 10px; border-radius: 999px; background: linear-gradient(135deg, #ede9fe, #e0e7ff); color: #4338ca; font-size: .82rem; font-weight: 500; margin-right: 6px; margin-bottom: 6px; }
+    .pathline { display: flex; gap: 8px; align-items: center; margin-bottom: 12px; }
+    .pathline input { flex: 1; margin-bottom: 0; }
+    .pathline button { width: auto; white-space: nowrap; padding: 10px 14px; margin: 0; }
+    .section-title { display: flex; align-items: center; gap: 10px; margin-bottom: 12px; }
+    .help-btn { width: auto; margin: 0; padding: 3px 10px; border: 1px solid #e2e8f0; border-radius: 999px; background: #f8fafc; cursor: pointer; font-size: .78rem; color: #64748b; }
+    .help-btn:hover { background: #e2e8f0; }
+    .help-box { display: none; border: 1px solid #e2e8f0; background: linear-gradient(135deg, #f0f9ff, #faf5ff); border-radius: 10px; padding: 12px 16px; margin-bottom: 14px; font-size: .88rem; color: #475569; line-height: 1.6; }
+    .quickstart { background: linear-gradient(135deg, #ecfeff, #f0f9ff); border-color: #bae6fd; }
+    .workflow-grid { display: grid; grid-template-columns: repeat(auto-fit, minmax(160px, 1fr)); gap: 8px; margin-top: 10px; }
+    .wf-chip { display: flex; align-items: center; gap: 6px; border: 1px solid #e2e8f0; border-radius: 10px; padding: 8px 12px; background: #f8fafc; font-size: .88rem; cursor: pointer; transition: all .15s; }
+    .wf-chip:hover { background: #ede9fe; border-color: #c4b5fd; }
+    .wf-chip input { width: auto; margin: 0; }
+    .inline-buttons { display: flex; gap: 10px; flex-wrap: wrap; margin-bottom: 10px; }
+    .inline-buttons button { width: auto; }
+    .cond-card { border: 1px solid #e2e8f0; border-radius: 12px; padding: 16px 20px; margin-bottom: 12px; background: #f8fafc; }
+    .cond-card-header { display: flex; align-items: center; gap: 10px; margin-bottom: 12px; }
+    .cond-card-header .cond-badge { background: linear-gradient(135deg, #6366f1, #8b5cf6); color: #fff; padding: 4px 12px; border-radius: 999px; font-size: .82rem; font-weight: 600; }
+    .cond-card-header .cond-samples { color: #64748b; font-size: .82rem; }
+    .cond-fields { display: grid; grid-template-columns: 1fr 1fr; gap: 10px 16px; }
+    .cond-fields label { font-size: .82rem; margin-bottom: 0; }
+    .cond-fields .pathline { margin-bottom: 0; }
+    .cond-fields .pathline input { font-size: .82rem; padding: 7px 10px; margin-bottom: 0; }
+    .cond-fields .pathline button { padding: 7px 10px; font-size: .78rem; }
+    .cond-fields select { font-size: .82rem; padding: 7px 10px; margin-top: 4px; margin-bottom: 0; }
+    .mode-toggle .mode-opt { background: #f8fafc; color: #64748b; }
+    .mode-toggle .mode-opt.active { background: linear-gradient(135deg, #3b82f6, #6366f1); color: #fff; font-weight: 600; }
+    .cond-file-list { list-style: none; padding: 0; margin: 4px 0 0 0; }
+    .cond-file-list li { display: flex; align-items: center; gap: 6px; padding: 3px 0; font-size: .82rem; color: #334155; }
+    .cond-file-list li button { background: none; border: none; color: #dc2626; cursor: pointer; padding: 0 4px; font-size: 1rem; }
+    .overlay-backdrop { position: fixed; inset: 0; background: rgba(17, 24, 39, 0.55); display: none; align-items: center; justify-content: center; z-index: 1000; }
+    .overlay-modal { background: #fff; width: min(900px, 94vw); max-height: 84vh; overflow: auto; border-radius: 10px; border: 1px solid #d1d5db; padding: 14px; }
+    .fb-nav-btn { background:#fff; border:1px solid #e2e8f0; border-radius:8px; width:32px; height:32px; display:flex; align-items:center; justify-content:center; cursor:pointer; transition:all .15s; }
+    .fb-nav-btn:hover { background:#e2e8f0; }
+    .fb-crumb { color:#64748b; text-decoration:none; padding:2px 6px; border-radius:4px; transition:background .12s; white-space:nowrap; }
+    .fb-crumb:hover { background:#e2e8f0; color:#1e293b; text-decoration:none; }
+    .fb-crumb-sep { color:#cbd5e1; margin:0 1px; }
+    .fb-crumb-active { color:#1e293b; font-weight:600; padding:2px 6px; }
+    .fb-root-btn { display:block; padding:7px 16px; border:none; background:none; width:100%; text-align:left; font-size:.88rem; color:#475569; cursor:pointer; border-radius:0; transition:background .12s; }
+    .fb-root-btn:hover { background:#e2e8f0; }
+    .fb-entry { display:flex; align-items:center; gap:10px; padding:8px 20px; cursor:pointer; border:none; background:none; width:100%; text-align:left; font-size:.9rem; color:#334155; transition:background .1s; user-select:none; }
+    .fb-entry:hover { background:#f1f5f9; }
+    .fb-entry.selected { background:#dbeafe; }
+    .fb-entry-icon { flex-shrink:0; width:20px; height:20px; display:flex; align-items:center; justify-content:center; }
+    .fb-entry-icon.folder { color:#f59e0b; }
+    .fb-entry-icon.file { color:#64748b; }
+    .fb-entry-name { flex:1; overflow:hidden; text-overflow:ellipsis; white-space:nowrap; }
+    .tools-group { border: 1px solid #e2e8f0; border-radius: 12px; padding: 12px 16px; margin-bottom: 10px; background: #f8fafc; }
+    .tools-group summary { cursor: pointer; font-weight: 600; color: #334155; }
+    .tools-grid { display: grid; grid-template-columns: repeat(auto-fit, minmax(180px, 1fr)); gap: 6px 12px; margin-top: 10px; }
+    .tools-grid label { font-size: 0.88rem; display: flex; align-items: center; gap: 6px; padding: 4px 8px; border-radius: 6px; transition: background .15s; }
+    .tools-grid label:hover { background: #ede9fe; }
   </style>
 </head>
 <body>
   <h1>MONSDA Web Configurator</h1>
-  <p class=\"muted\">Interactive builder for MONSDA configs with samplesheet support.</p>
+  <p class="muted">Interactive builder for MONSDA pipeline configurations.</p>
+
+  <div class="card quickstart">
+    <div class="section-title">
+      <h3>Quick Start</h3>
+      <button type="button" class="help-btn" onclick="toggleHelp('helpQuick')">?</button>
+    </div>
+    <div id="helpQuick" class="help-box" style="display:block;">
+      <strong>1.</strong> Pick and parse a samplesheet.<br/>
+      <strong>2.</strong> Select workflows &amp; configure tools.<br/>
+      <strong>3.</strong> Preview and save config.<br/>
+      <strong>4.</strong> Create project skeleton with symlinked data.
+    </div>
+  </div>
 
-  <div class=\"card\">
-    <h3>1) Samplesheet → SETTINGS</h3>
+  <div class="card">
+    <div class="section-title">
+      <h3>1. Samplesheet &rarr; Settings</h3>
+      <button type="button" class="help-btn" onclick="toggleHelp('helpSamplesheet')">?</button>
+    </div>
+    <div id="helpSamplesheet" class="help-box">
+      Choose your CSV/TSV samplesheet, then click <b>Parse Samplesheet</b>. This fills the SETTINGS JSON automatically.
+    </div>
     <label>Samplesheet path (CSV/TSV)</label>
     <div class="pathline">
       <input id="samplesheetPath" placeholder="/abs/path/to/samplesheet.csv" />
-      <button onclick="openPathBrowser('samplesheetPath','samplesheets')">Browse…</button>
+      <button type="button" onclick="openPathBrowser('samplesheetPath','samplesheets')">Browse…</button>
     </div>
-    <button onclick="parseSamplesheet()">Parse Samplesheet</button>
+    <button type="button" onclick="parseSamplesheet()">Parse Samplesheet</button>
     <div id="samplesheetStatus"></div>
     <div id="conditionsPreview"></div>
   </div>
 
   <div class="card">
-    <h3>2) Workflow + Tool selection</h3>
-    <button onclick="loadTemplate()">Load Workflows from Template</button>
+    <div class="section-title">
+      <h3>2. Workflows &amp; Tools</h3>
+      <button type="button" class="help-btn" onclick="toggleHelp('helpWorkflows')">?</button>
+    </div>
+    <div id="helpWorkflows" class="help-box">
+      Select the workflows you need. Tools are only configured for workflows you select. Click <b>Configure tools</b> to customize per workflow.
+    </div>
+    <div class="inline-buttons">
+      <button type="button" onclick="loadTemplate()">Reload workflows from template</button>
+      <button type="button" onclick="openToolsModal()">Configure tools for selected workflows</button>
+    </div>
     <div id="templateStatus" class="muted"></div>
-    <div id=\"workflowChooser\"></div>
+    <div id="workflowSummary" class="muted"></div>
+    <div id="workflowChooser" class="workflow-grid"></div>
   </div>
 
-  <div class=\"card\">
-    <h3>3) Build config</h3>
-    <div class=\"row\">
+  <div class="card">
+    <div class="section-title">
+      <h3>3. Output &amp; Finalize</h3>
+      <button type="button" class="help-btn" onclick="toggleHelp('helpBuild')">?</button>
+    </div>
+    <div id="helpBuild" class="help-box">
+      Choose one mode:<br/>
+      <strong>Save config only</strong> &mdash; generates and saves the JSON config to a directory of your choice.<br/>
+      <strong>Create project</strong> &mdash; builds a full MONSDA project skeleton with FASTQ &amp; GENOMES symlinks, plus the config. Select FASTQ files per condition either by directory (filtered by sample names) or by picking individual files.
+    </div>
+
+    <!-- Mode toggle -->
+    <div class="mode-toggle" style="display:flex; gap:0; margin-bottom:16px; border:1px solid #e2e8f0; border-radius:10px; overflow:hidden;">
+      <label class="mode-opt" style="flex:1; text-align:center; padding:10px; cursor:pointer; font-size:.88rem; font-weight:500; transition:all .15s;" id="modeOptConfig">
+        <input type="radio" name="outputMode" value="config" checked onchange="switchOutputMode('config')" style="display:none;"/>
+        Save config only
+      </label>
+      <label class="mode-opt" style="flex:1; text-align:center; padding:10px; cursor:pointer; font-size:.88rem; font-weight:500; transition:all .15s;" id="modeOptProject">
+        <input type="radio" name="outputMode" value="project" onchange="switchOutputMode('project')" style="display:none;"/>
+        Create project
+      </label>
+    </div>
+
+    <!-- Shared fields -->
+    <div class="row">
       <div>
-        <label>Config name</label>
-        <input id=\"configName\" value=\"monsda\" />
+        <label>Config / project name</label>
+        <input id="configName" value="monsda" />
       </div>
       <div>
-        <label>Output directory</label>
-        <div class="pathline">
-          <input id=\"outputDir\" placeholder=\"/abs/output/dir\" />
-          <button onclick="openPathBrowser('outputDir','dirs')">Browse…</button>
-        </div>
+        <label>MAXTHREADS</label>
+        <input id="maxthreads" value="16" />
       </div>
     </div>
-    <label>MAXTHREADS</label>
-    <input id=\"maxthreads\" value=\"16\" />
 
     <label>SETTINGS JSON (auto-filled from samplesheet, editable)</label>
-    <textarea id=\"settingsJson\"></textarea>
+    <textarea id="settingsJson"></textarea>
+
+    <!-- Config-only panel -->
+    <div id="panelConfigOnly">
+      <label>Output directory</label>
+      <div class="pathline">
+        <input id="outputDir" placeholder="/abs/output/dir" />
+        <button type="button" onclick="openPathBrowser('outputDir','dirs')">Browse&hellip;</button>
+      </div>
+      <div class="inline-buttons">
+        <button type="button" onclick="previewConfig()">Preview Config JSON</button>
+        <button type="button" onclick="saveConfig()">Save Config</button>
+      </div>
+    </div>
 
-    <button onclick=\"previewConfig()\">Preview Config JSON</button>
-    <button onclick=\"saveConfig()\">Save Config</button>
-    <div id=\"buildStatus\"></div>
+    <!-- Project panel -->
+    <div id="panelProject" style="display:none;">
+      <div class="row">
+        <div>
+          <label>Project directory (parent folder)</label>
+          <div class="pathline">
+            <input id="projectDir" placeholder="/abs/path/to/parent" />
+            <button type="button" onclick="openPathBrowser('projectDir','dirs')">Browse&hellip;</button>
+          </div>
+        </div>
+        <div>
+          <label class="muted" style="margin-top:22px;">A subfolder with the config name above will be created here.</label>
+        </div>
+      </div>
+
+      <div class="inline-buttons" style="margin-top:8px;">
+        <button type="button" onclick="loadProjectConditions()">Load conditions from settings</button>
+      </div>
+
+      <div id="conditionWizard" style="margin-top:16px;"></div>
+
+      <div class="inline-buttons" style="margin-top:16px;">
+        <button type="button" onclick="previewConfig()">Preview Config JSON</button>
+        <button type="button" onclick="createProject()">Create Project</button>
+      </div>
+    </div>
+
+    <div id="buildStatus"></div>
+    <div id="projectStatus"></div>
 
     <label>Config preview</label>
-    <textarea id=\"configPreview\"></textarea>
+    <textarea id="configPreview"></textarea>
   </div>
 
-  <div class=\"card\">
-    <h3>4) Create project skeleton</h3>
-    <label>Project directory</label>
-    <div class="pathline">
-      <input id="projectDir" placeholder="/abs/path/to/project" />
-      <button onclick="openPathBrowser('projectDir','dirs')">Browse…</button>
+  <div class="modal fade" id="browserModal" tabindex="-1" aria-hidden="true">
+    <div class="modal-dialog modal-xl modal-dialog-centered" style="max-width:860px;">
+      <div class="modal-content" style="border-radius:16px; overflow:hidden; box-shadow: 0 25px 60px rgba(0,0,0,.25);">
+        <!-- Header -->
+        <div style="background:linear-gradient(135deg,#1e293b,#334155); color:#fff; padding:16px 24px; display:flex; align-items:center; gap:12px;">
+          <svg width="22" height="22" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><path d="M22 19a2 2 0 0 1-2 2H4a2 2 0 0 1-2-2V5a2 2 0 0 1 2-2h5l2 3h9a2 2 0 0 1 2 2z"/></svg>
+          <span style="font-size:1.1rem; font-weight:600;" id="browserTitle">Select a path</span>
+          <span style="flex:1;"></span>
+          <button type="button" style="background:none; border:none; color:#94a3b8; font-size:1.4rem; cursor:pointer;" data-bs-dismiss="modal" aria-label="Close">&times;</button>
+        </div>
+        <!-- Location bar -->
+        <div style="background:#f8fafc; border-bottom:1px solid #e2e8f0; padding:10px 20px; display:flex; gap:8px; align-items:center;">
+          <button type="button" class="fb-nav-btn" onclick="browseUp()" title="Go up">
+            <svg width="18" height="18" fill="none" stroke="currentColor" stroke-width="2"><path d="M15 11l-6-6-6 6"/></svg>
+          </button>
+          <div id="browseBreadcrumb" style="display:flex; align-items:center; gap:2px; flex:1; overflow-x:auto; font-size:.9rem;"></div>
+        </div>
+        <!-- Body: sidebar + file list -->
+        <div style="display:flex; min-height:420px; max-height:65vh;">
+          <!-- Sidebar -->
+          <div style="width:180px; background:#f1f5f9; border-right:1px solid #e2e8f0; padding:12px 0; overflow-y:auto; flex-shrink:0;">
+            <div style="padding:0 12px; margin-bottom:8px; font-size:.75rem; text-transform:uppercase; color:#64748b; font-weight:600; letter-spacing:.03em;">Locations</div>
+            <div id="browseRoots"></div>
+          </div>
+          <!-- Main file list -->
+          <div style="flex:1; overflow-y:auto; padding:8px 0;" id="browseListContainer">
+            <div id="browseList"></div>
+            <div id="browseEmpty" style="display:none; padding:48px 24px; text-align:center; color:#94a3b8;">
+              <svg width="48" height="48" fill="none" stroke="currentColor" stroke-width="1.5" style="margin:0 auto 12px;"><path d="M3 7v30a4 4 0 0 0 4 4h34a4 4 0 0 0 4-4V15a4 4 0 0 0-4-4H24l-4-4H7a4 4 0 0 0-4 4z"/></svg>
+              <div>This folder is empty</div>
+            </div>
+          </div>
+        </div>
+        <!-- Footer -->
+        <div style="background:#f8fafc; border-top:1px solid #e2e8f0; padding:12px 20px; display:flex; align-items:center; gap:10px;">
+          <input id="browsePathInput" class="form-control form-control-sm" style="flex:1; border-radius:8px;" placeholder="Path..." />
+          <button type="button" class="btn btn-sm btn-outline-secondary" style="border-radius:8px;" onclick="browseTo(document.getElementById('browsePathInput').value)">Go</button>
+          <button type="button" class="btn btn-sm btn-primary" style="border-radius:8px; min-width:120px;" onclick="chooseCurrentPath()">Select</button>
+        </div>
+      </div>
     </div>
-    <button onclick="createProject()">Create Project Structure</button>
-    <div id="projectStatus"></div>
   </div>
 
-  <div class="card">
-    <h3>Path browser</h3>
-    <div id="browserInfo" class="muted"></div>
-    <div id="browseRoots" class="muted"></div>
-    <div class="pathline">
-      <input id="browsePathInput" placeholder="/path/to/browse" />
-      <button onclick="browseTo(document.getElementById('browsePathInput').value)">Go</button>
-      <button onclick="browseUp()">Up</button>
-      <button onclick="chooseCurrentPath()">Use current</button>
+  <div id="toolsModal" class="overlay-backdrop" onclick="if(event.target===this) closeToolsModal()">
+    <div class="overlay-modal">
+      <div class="section-title">
+        <h3>Tool selection for active workflows</h3>
+        <button type="button" class="help-btn" onclick="closeToolsModal()">Close</button>
+      </div>
+      <div class="muted">By default all tools are selected when you enable a workflow. Uncheck tools you do not want.</div>
+      <div id="toolsModalBody"></div>
     </div>
-    <div id="browseStatus" class="muted"></div>
-    <div id="browseList" class="browse-list"></div>
   </div>
 
 <script>
 let templateFields = null;
 let lastConfig = null;
 let browserState = { target: '', mode: 'dirs', current: '' };
+let selectedToolsByWorkflow = {};
+let browserModalRef = null;
+let selectedBrowsePath = '';
+let currentOutputMode = 'config';
+
+function switchOutputMode(mode) {
+  currentOutputMode = mode;
+  document.getElementById('panelConfigOnly').style.display = mode === 'config' ? '' : 'none';
+  document.getElementById('panelProject').style.display = mode === 'project' ? '' : 'none';
+  document.getElementById('modeOptConfig').classList.toggle('active', mode === 'config');
+  document.getElementById('modeOptProject').classList.toggle('active', mode === 'project');
+}
+
+function toggleHelp(id) {
+  const el = document.getElementById(id);
+  if (!el) return;
+  el.style.display = (el.style.display === 'block') ? 'none' : 'block';
+}
 
 function setStatus(id, text, ok=true) {
   const el = document.getElementById(id);
+  if (!el) return;
   el.textContent = text;
   el.className = ok ? 'ok' : 'err';
 }
@@ -444,9 +911,23 @@ def root() -> str:
     .replaceAll("'", '&#039;');
 }
 
+function getToolsForWorkflow(wf) {
+  if (!templateFields || !templateFields[wf] || !templateFields[wf].TOOLS) return [];
+  return Object.keys(templateFields[wf].TOOLS);
+}
+
+function onWorkflowToggle(wf, checked) {
+  if (checked && !selectedToolsByWorkflow[wf]) {
+    selectedToolsByWorkflow[wf] = [];
+  }
+  const selected = Array.from(document.querySelectorAll('input[data-workflow]:checked')).map(i => i.getAttribute('data-workflow'));
+  document.getElementById('workflowSummary').textContent = selected.length ? `Selected workflows: ${selected.join(', ')}` : 'No workflows selected yet.';
+}
+
 async function loadTemplate() {
   const holder = document.getElementById('workflowChooser');
   const status = document.getElementById('templateStatus');
+  const summary = document.getElementById('workflowSummary');
   try {
     status.textContent = 'Loading template...';
     const r = await fetch('/template/fields');
@@ -463,31 +944,107 @@ def root() -> str:
       .sort();
 
     workflows.forEach(wf => {
-      const tools = (data[wf] && data[wf].TOOLS) ? Object.keys(data[wf].TOOLS) : [];
-      const toolChecks = tools.map(t => `<label><input type=\"checkbox\" data-wf=\"${wf}\" data-tool=\"${t}\" checked /> ${escHtml(t)}</label>`).join('<br>');
-      holder.innerHTML += `
-        <div style=\"border:1px solid #e5e7eb; border-radius:8px; padding:10px; margin-bottom:8px;\">
-          <label><input type=\"checkbox\" data-workflow=\"${wf}\" /> <b>${escHtml(wf)}</b></label>
-          <div style=\"margin-left:20px; margin-top:6px;\">${toolChecks || '<span class=\"muted\">No tool list</span>'}</div>
-        </div>
-      `;
+      holder.innerHTML += `<label class="wf-chip"><input type="checkbox" data-workflow="${wf}" onchange="onWorkflowToggle('${wf}', this.checked)" />${escHtml(wf)}</label>`;
     });
 
     status.textContent = `Loaded ${workflows.length} workflows from template.`;
     status.className = 'ok';
+    summary.textContent = 'No workflows selected yet.';
   } catch (e) {
     holder.innerHTML = '';
+    summary.textContent = '';
     status.textContent = `Failed to load template fields: ${e.message || e}`;
     status.className = 'err';
   }
 }
 
+function openToolsModal() {
+  const modal = document.getElementById('toolsModal');
+  const body = document.getElementById('toolsModalBody');
+  const selected = Array.from(document.querySelectorAll('input[data-workflow]:checked')).map(i => i.getAttribute('data-workflow'));
+  if (!selected.length) {
+    body.innerHTML = '<div class="muted">Select at least one workflow first.</div>';
+    modal.style.display = 'flex';
+    return;
+  }
+
+  body.innerHTML = selected.map(wf => {
+    const tools = getToolsForWorkflow(wf);
+    const active = new Set(selectedToolsByWorkflow[wf] || []);
+    const toolHtml = tools.length
+      ? tools.map(t => `<label><input type="checkbox" ${active.has(t) ? 'checked' : ''} onchange="onToolToggle('${wf}','${t}',this.checked)"/> ${escHtml(t)}</label>`).join('')
+      : '<div class="muted">No tool list available.</div>';
+    return `<details class="tools-group" open><summary>${escHtml(wf)}</summary><div class="tools-grid">${toolHtml}</div></details>`;
+  }).join('');
+  modal.style.display = 'flex';
+}
+
+function closeToolsModal() {
+  document.getElementById('toolsModal').style.display = 'none';
+}
+
+function onToolToggle(wf, tool, checked) {
+  const current = new Set(selectedToolsByWorkflow[wf] || []);
+  if (checked) current.add(tool);
+  else current.delete(tool);
+  selectedToolsByWorkflow[wf] = Array.from(current);
+}
+
 async function loadBrowseRoots() {
   const r = await fetch('/fs/roots');
   const data = await r.json();
   if (!r.ok) throw new Error(data.detail || 'Failed to load roots');
-  const rootHtml = (data.roots || []).map(p => `<button onclick=\"browseTo(${JSON.stringify(p)})\">${escHtml(p)}</button>`).join(' ');
-  document.getElementById('browseRoots').innerHTML = rootHtml;
+  const el = document.getElementById('browseRoots');
+  el.innerHTML = '';
+  (data.roots || []).forEach(p => {
+    const label = p === '/' ? 'Root (/)' : p.split('/').filter(Boolean).pop() || p;
+    const btn = document.createElement('button');
+    btn.type = 'button';
+    btn.className = 'fb-root-btn';
+    btn.innerHTML = `<svg width="14" height="14" fill="none" stroke="currentColor" stroke-width="2" style="margin-right:6px; vertical-align:-2px;"><path d="M3 3h4l1.5 2H13a1 1 0 0 1 1 1v6a1 1 0 0 1-1 1H3a1 1 0 0 1-1-1V4a1 1 0 0 1 1-1z"/></svg>${escHtml(label)}`;
+    btn.addEventListener('click', () => browseTo(p));
+    el.appendChild(btn);
+  });
+}
+
+function renderBreadcrumb(path) {
+  const crumb = document.getElementById('browseBreadcrumb');
+  if (!crumb) return;
+  crumb.innerHTML = '';
+  const parts = (path || '/').split('/').filter(Boolean);
+  let acc = '/';
+
+  const rootLink = document.createElement('a');
+  rootLink.href = '#';
+  rootLink.className = 'fb-crumb';
+  rootLink.textContent = '/';
+  rootLink.addEventListener('click', (e) => { e.preventDefault(); browseTo('/'); });
+  crumb.appendChild(rootLink);
+
+  for (let i = 0; i < parts.length; i++) {
+    const p = parts[i];
+    acc = acc === '/' ? `/${p}` : `${acc}/${p}`;
+
+    const sep = document.createElement('span');
+    sep.className = 'fb-crumb-sep';
+    sep.textContent = '/';
+    crumb.appendChild(sep);
+
+    if (i === parts.length - 1) {
+      const span = document.createElement('span');
+      span.className = 'fb-crumb-active';
+      span.textContent = p;
+      crumb.appendChild(span);
+    } else {
+      const link = document.createElement('a');
+      link.href = '#';
+      link.className = 'fb-crumb';
+      link.textContent = p;
+      const target = acc;
+      link.addEventListener('click', (e) => { e.preventDefault(); browseTo(target); });
+      crumb.appendChild(link);
+    }
+  }
 }
 
 async function browseTo(path) {
@@ -500,49 +1057,96 @@ def root() -> str:
     if (!r.ok) throw new Error(data.detail || 'Browse failed');
 
     browserState.current = data.current;
+    selectedBrowsePath = data.current;
     document.getElementById('browsePathInput').value = data.current;
-    document.getElementById('browserInfo').textContent = `Selecting for: ${browserState.target} (${browserState.mode})`;
-    setStatus('browseStatus', `Showing: ${data.current}`, true);
-
-    const dirs = (data.dirs || []).map(d => `
-      <div class=\"browse-item\">
-        <button onclick=\"browseTo(${JSON.stringify(d.path)})\">📁 ${escHtml(d.name)}</button>
-        <button onclick=\"choosePath(${JSON.stringify(d.path)})\">Use</button>
-      </div>`).join('');
+    renderBreadcrumb(data.current);
+
+    const modeLabel = browserState.mode === 'dirs' ? 'folder' : 'file';
+    document.getElementById('browserTitle').textContent = `Select a ${modeLabel}`;
+
+    const folderSvg = `<svg width="20" height="20" fill="none" stroke="currentColor" stroke-width="1.5"><path d="M2 5v11a2 2 0 0 0 2 2h12a2 2 0 0 0 2-2V9a2 2 0 0 0-2-2h-6l-2-2H4a2 2 0 0 0-2 2z"/></svg>`;
+    const fileSvg = `<svg width="20" height="20" fill="none" stroke="currentColor" stroke-width="1.5"><path d="M14 2H6a2 2 0 0 0-2 2v12a2 2 0 0 0 2 2h8a2 2 0 0 0 2-2V6l-4-4z"/><polyline points="14 2 14 6 18 6"/></svg>`;
+
+    const list = document.getElementById('browseList');
+    const empty = document.getElementById('browseEmpty');
+    list.innerHTML = '';
+
+    (data.dirs || []).forEach(d => {
+      const btn = document.createElement('button');
+      btn.type = 'button';
+      btn.className = 'fb-entry';
+      btn.innerHTML = `<span class="fb-entry-icon folder">${folderSvg}</span><span class="fb-entry-name">${escHtml(d.name)}</span>`;
+      btn.addEventListener('click', () => markBrowseSelection(d.path, btn));
+      btn.addEventListener('dblclick', () => browseTo(d.path));
+      list.appendChild(btn);
+    });
 
-    const files = (data.files || []).map(f => `
-      <div class=\"browse-item\">
-        <button onclick=\"choosePath(${JSON.stringify(f.path)})\">📄 ${escHtml(f.name)}</button>
-      </div>`).join('');
+    (data.files || []).forEach(f => {
+      const btn = document.createElement('button');
+      btn.type = 'button';
+      btn.className = 'fb-entry';
+      btn.innerHTML = `<span class="fb-entry-icon file">${fileSvg}</span><span class="fb-entry-name">${escHtml(f.name)}</span>`;
+      btn.addEventListener('click', () => { markBrowseSelection(f.path, btn); choosePath(f.path); });
+      list.appendChild(btn);
+    });
 
-    document.getElementById('browseList').innerHTML = (dirs + files) || '<div class=\"muted\">No entries.</div>';
+    empty.style.display = list.children.length ? 'none' : 'block';
   } catch (e) {
     setStatus('browseStatus', e.message || String(e), false);
   }
 }
 
+function markBrowseSelection(path, el) {
+  selectedBrowsePath = path;
+  document.getElementById('browsePathInput').value = path;
+  const list = document.getElementById('browseList');
+  if (!list) return;
+  list.querySelectorAll('.fb-entry').forEach(x => x.classList.remove('selected'));
+  if (el) el.classList.add('selected');
+}
+
 function openPathBrowser(target, mode='dirs') {
   browserState.target = target;
   browserState.mode = mode;
-  const start = document.getElementById(target)?.value?.trim() || '';
-  loadBrowseRoots().then(() => browseTo(start));
+  if (!browserModalRef) {
+    const el = document.getElementById('browserModal');
+    browserModalRef = new bootstrap.Modal(el, { backdrop: true, keyboard: true });
+  }
+  browserModalRef.show();
+  const start = (document.getElementById(target)?.value || '').trim();
+  loadBrowseRoots().then(() => browseTo(start || '')).catch((e) => {
+    setStatus('browseStatus', e.message || String(e), false);
+  });
+}
+
+function closePathBrowser() {
+  if (browserModalRef) browserModalRef.hide();
 }
 
 function choosePath(path) {
   if (!browserState.target) return;
   document.getElementById(browserState.target).value = path;
   setStatus('browseStatus', `Selected: ${path}`, true);
+  closePathBrowser();
 }
 
 function chooseCurrentPath() {
-  if (!browserState.current) return;
-  choosePath(browserState.current);
+  const picked = selectedBrowsePath || browserState.current;
+  if (!picked) return;
+  choosePath(picked);
 }
 
 function browseUp() {
   if (!browserState.current) return;
-  const parent = browserState.current === '/' ? '/' : browserState.current.replace(/\/+$/, '').replace(/\/[^\/]*$/, '') || '/';
-  browseTo(parent);
+  const cur = browserState.current;
+  if (cur === '/') {
+    browseTo('/');
+    return;
+  }
+  const parts = cur.split('/').filter(Boolean);
+  parts.pop();
+  const parent = '/' + parts.join('/');
+  browseTo(parent || '/');
 }
 
 async function parseSamplesheet() {
@@ -560,7 +1164,7 @@ def root() -> str:
     const data = await r.json();
     if (r.ok) {
       document.getElementById('settingsJson').value = JSON.stringify(data.settings, null, 2);
-      document.getElementById('conditionsPreview').innerHTML = data.conditions.map(c => `<span class=\"pill\">${c}</span>`).join('');
+      document.getElementById('conditionsPreview').innerHTML = data.conditions.map(c => `<span class="pill">${c}</span>`).join('');
       setStatus('samplesheetStatus', `Parsed ${data.conditions.length} condition(s) from samplesheet.`);
     } else {
       setStatus('samplesheetStatus', data.detail || 'Failed to parse samplesheet.', false);
@@ -574,7 +1178,9 @@ def root() -> str:
   const selectedWorkflows = Array.from(document.querySelectorAll('input[data-workflow]:checked')).map(i => i.getAttribute('data-workflow'));
   const tools = {};
   selectedWorkflows.forEach(wf => {
-    tools[wf] = Array.from(document.querySelectorAll(`input[data-wf="${wf}"][data-tool]:checked`)).map(i => i.getAttribute('data-tool'));
+    const allTools = getToolsForWorkflow(wf);
+    const picked = selectedToolsByWorkflow[wf] || allTools;
+    tools[wf] = picked.length ? picked : allTools;
   });
   return { selectedWorkflows, tools };
 }
@@ -645,28 +1251,214 @@ def root() -> str:
 
 async function createProject() {
   try {
-    const workflows = (lastConfig && lastConfig.WORKFLOWS) ? lastConfig.WORKFLOWS.split(',').map(x => x.trim()).filter(Boolean) : [];
+    const projectDir = document.getElementById('projectDir').value.trim();
+    const projectName = document.getElementById('configName').value.trim() || 'monsda';
+    if (!projectDir) {
+      setStatus('projectStatus', 'Please set a project directory.', false);
+      return;
+    }
+
+    // Collect per-condition file selections from wizard
+    const conditionFiles = [];
+    document.querySelectorAll('.cond-card').forEach(card => {
+      const cond = card.dataset.condition;
+      // Determine FASTQ mode
+      const modeRadio = card.querySelector('input[type="radio"][value="files"]:checked');
+      const useFiles = !!modeRadio;
+      let fastqFiles = [];
+      if (useFiles) {
+        card.querySelectorAll('.cond-file-list li').forEach(li => {
+          if (li.dataset.path) fastqFiles.push(li.dataset.path);
+        });
+      }
+      conditionFiles.push({
+        condition: cond,
+        fastq_dir: useFiles ? '' : (card.querySelector('.cond-fastq-dir')?.value?.trim() || ''),
+        fastq_files: fastqFiles,
+        sequencing: card.querySelector('.cond-sequencing')?.value || 'paired',
+        reference: card.querySelector('.cond-reference')?.value?.trim() || '',
+        gtf: card.querySelector('.cond-gtf')?.value?.trim() || '',
+        gff: card.querySelector('.cond-gff')?.value?.trim() || '',
+        decoy: card.querySelector('.cond-decoy')?.value?.trim() || ''
+      });
+    });
+
     const r = await fetch('/project/create', {
       method: 'POST',
       headers: { 'Content-Type': 'application/json' },
       body: JSON.stringify({
-        project_dir: document.getElementById('projectDir').value.trim(),
-        workflows: workflows
+        project_dir: projectDir,
+        project_name: projectName,
+        condition_files: conditionFiles,
+        config: lastConfig || null,
+        settings: parseSettingsJson()
       })
     });
     const data = await r.json();
     if (r.ok) {
-      setStatus('projectStatus', `Created: ${data.path}`);
+      let msg = `Project created at <strong>${escHtml(data.path)}</strong>. ${data.linked_files} file(s) symlinked.`;
+      if (data.config_path) msg += `<br/>Config written: ${escHtml(data.config_path)}`;
+      if (data.warnings && data.warnings.length) msg += `<br/><span class="err">Warnings:</span> ${data.warnings.map(escHtml).join('<br/>')}`;
+      document.getElementById('projectStatus').innerHTML = `<span class="ok">${msg}</span>`;
     } else {
       setStatus('projectStatus', data.detail || 'Failed to create project.', false);
     }
   } catch (e) {
-    setStatus('projectStatus', 'Failed to create project.', false);
+    setStatus('projectStatus', 'Failed to create project: ' + e.message, false);
+  }
+}
+
+function loadProjectConditions() {
+  const settings = parseSettingsJson();
+  if (!settings) {
+    setStatus('projectStatus', 'Parse a samplesheet first or edit SETTINGS JSON manually, then load conditions.', false);
+    return;
+  }
+
+  // Extract condition leaves from settings (those with SAMPLES key)
+  const conditions = [];
+  function walkSettings(node, path) {
+    if (!node || typeof node !== 'object') return;
+    if (Array.isArray(node.SAMPLES)) {
+      conditions.push({ path: path.join('/'), samples: node.SAMPLES, sequencing: node.SEQUENCING || 'paired', reference: node.REFERENCE || '', gtf: (node.ANNOTATION && node.ANNOTATION.GTF) || '', gff: (node.ANNOTATION && node.ANNOTATION.GFF) || '', decoy: node.DECOY || '' });
+      return;
+    }
+    for (const [k, v] of Object.entries(node)) {
+      if (typeof v === 'object' && !Array.isArray(v)) walkSettings(v, [...path, k]);
+    }
+  }
+  walkSettings(settings, []);
+
+  if (!conditions.length) {
+    setStatus('projectStatus', 'No conditions with SAMPLES found in settings.', false);
+    return;
+  }
+
+  const wizard = document.getElementById('conditionWizard');
+  wizard.innerHTML = `<div class="muted" style="margin-bottom:10px;">${conditions.length} condition(s) found. For each, select FASTQ files by directory (auto-matched by sample name) or pick individual files.</div>`;
+
+  conditions.forEach((c, idx) => {
+    const id = `cond_${idx}`;
+    const hasSamples = c.samples && c.samples.length > 0;
+    const samplesStr = hasSamples
+      ? c.samples.slice(0, 5).join(', ') + (c.samples.length > 5 ? ` (+${c.samples.length - 5} more)` : '')
+      : '(no samples defined)';
+    const card = document.createElement('div');
+    card.className = 'cond-card';
+    card.dataset.condition = c.path;
+    card.innerHTML = `
+      <div class="cond-card-header">
+        <span class="cond-badge">${escHtml(c.path)}</span>
+        <span class="cond-samples">${escHtml(samplesStr)}</span>
+      </div>
+      <div class="cond-fields">
+        <div style="grid-column:1/-1;">
+          <label>FASTQ source mode</label>
+          <div style="display:flex; gap:12px; align-items:center; margin-top:4px;">
+            <label style="font-weight:400; display:inline-flex; align-items:center; gap:4px; margin:0;">
+              <input type="radio" name="${id}_fqmode" value="dir" checked onchange="toggleFqMode('${id}','dir')" style="width:auto; margin:0;"/> Directory (filter by sample names)
+            </label>
+            <label style="font-weight:400; display:inline-flex; align-items:center; gap:4px; margin:0;">
+              <input type="radio" name="${id}_fqmode" value="files" onchange="toggleFqMode('${id}','files')" style="width:auto; margin:0;"/> Pick individual files
+            </label>
+          </div>
+        </div>
+        <div class="fq-dir-panel" id="${id}_dirpanel">
+          <label>FASTQ source directory</label>
+          <div class="pathline">
+            <input class="cond-fastq-dir" id="${id}_fastq" placeholder="/path/to/fastq/files" />
+            <button type="button" onclick="openPathBrowser('${id}_fastq','dirs')">Browse&hellip;</button>
+          </div>
+        </div>
+        <div class="fq-files-panel" id="${id}_filespanel" style="display:none;">
+          <label>Selected FASTQ files <button type="button" style="width:auto; padding:2px 8px; font-size:.78rem; margin:0 0 0 8px;" onclick="addFastqFile('${id}')">+ Add file</button></label>
+          <ul class="cond-file-list" id="${id}_filelist"></ul>
+        </div>
+        <div>
+          <label>Sequencing</label>
+          <select class="cond-sequencing">
+            <option value="paired" ${c.sequencing === 'paired' ? 'selected' : ''}>Paired-end</option>
+            <option value="single" ${c.sequencing !== 'paired' ? 'selected' : ''}>Single-end</option>
+          </select>
+        </div>
+        <div>
+          <label>Reference genome (.fa.gz)</label>
+          <div class="pathline">
+            <input class="cond-reference" id="${id}_ref" value="${escHtml(c.reference)}" placeholder="/path/to/genome.fa.gz" />
+            <button type="button" onclick="openPathBrowser('${id}_ref','all')">Browse&hellip;</button>
+          </div>
+        </div>
+        <div>
+          <label>GTF annotation (.gtf.gz)</label>
+          <div class="pathline">
+            <input class="cond-gtf" id="${id}_gtf" value="${escHtml(c.gtf)}" placeholder="/path/to/annotation.gtf.gz" />
+            <button type="button" onclick="openPathBrowser('${id}_gtf','all')">Browse&hellip;</button>
+          </div>
+        </div>
+        <div>
+          <label>GFF annotation (optional)</label>
+          <div class="pathline">
+            <input class="cond-gff" id="${id}_gff" value="${escHtml(c.gff)}" placeholder="(optional)" />
+            <button type="button" onclick="openPathBrowser('${id}_gff','all')">Browse&hellip;</button>
+          </div>
+        </div>
+        <div>
+          <label>Decoy (optional)</label>
+          <div class="pathline">
+            <input class="cond-decoy" id="${id}_decoy" value="${escHtml(c.decoy)}" placeholder="(optional)" />
+            <button type="button" onclick="openPathBrowser('${id}_decoy','all')">Browse&hellip;</button>
+          </div>
+        </div>
+      </div>
+    `;
+    wizard.appendChild(card);
+  });
+  setStatus('projectStatus', `Loaded ${conditions.length} condition(s). Fill in file paths and click Create Project.`);
+}
+
+function toggleFqMode(id, mode) {
+  document.getElementById(id + '_dirpanel').style.display = mode === 'dir' ? '' : 'none';
+  document.getElementById(id + '_filespanel').style.display = mode === 'files' ? '' : 'none';
+}
+
+function addFastqFile(id) {
+  // Use the file browser targeting a hidden input, then on selection add to the list
+  const tempId = id + '_fqtemp';
+  let tempInput = document.getElementById(tempId);
+  if (!tempInput) {
+    tempInput = document.createElement('input');
+    tempInput.type = 'hidden';
+    tempInput.id = tempId;
+    document.body.appendChild(tempInput);
   }
+  tempInput.value = '';
+  // Override choosePath temporarily to add to file list instead
+  const origChoose = choosePath;
+  choosePath = function(path) {
+    if (!browserState.target) return;
+    // Add to file list
+    const list = document.getElementById(id + '_filelist');
+    const li = document.createElement('li');
+    li.innerHTML = `<span>${escHtml(path)}</span>`;
+    const rmBtn = document.createElement('button');
+    rmBtn.type = 'button';
+    rmBtn.textContent = '\u00d7';
+    rmBtn.addEventListener('click', () => li.remove());
+    li.appendChild(rmBtn);
+    li.dataset.path = path;
+    list.appendChild(li);
+    closePathBrowser();
+    choosePath = origChoose;
+  };
+  openPathBrowser(tempId, 'all');
 }
 
-window.onload = loadTemplate;
+window.onload = () => {
+  loadTemplate();
+  switchOutputMode('config');
+};
 </script>
+<script src="https://cdn.jsdelivr.net/npm/bootstrap@5.3.3/dist/js/bootstrap.bundle.min.js"></script>
 </body>
 </html>
 """

From 3b8ee8342260a65d9631ea3db8eab66d9553950d Mon Sep 17 00:00:00 2001
From: jfallmann <fallmann.joerg@gmail.com>
Date: Tue, 5 May 2026 15:18:54 +0200
Subject: [PATCH 28/39] readme update

---
 README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index b3dfedfc..9271ea91 100644
--- a/README.md
+++ b/README.md
@@ -46,12 +46,12 @@ More information can be found in the official [documentation](https://monsda.rea
 
 ## How does it work
 
-This repository hosts the executable ```MONSDA.py``` which acts a wrapper around ```Snakemake``` and the ```config.json``` file.
+This repository hosts the executable ```MONSDA.py``` which acts a wrapper around ```Snakemake|Nextflow``` and the ```config.json``` file.
 The ```config.json``` holds all the information that is needed to run the jobs and will be parsed by ```MONSDA.py``` and split into sub-configs that can later be found in the directory ```SubSnakes``` or ```SubFlows``` respectively.
 
 To successfully run an analysis pipeline, a few steps have to be followed:
   * Directory structure: The structure for the directories is dictated by the condition-tree in the config file
-  * Config file: This is the central part of the analysis. Depending on this file ```MONSDA.py``` will determine processing steps and generate according config and ```Snakemake/Nextflow``` workflow files to run each subworkflow until all processing steps are done.
+  * Config file: This is the central part of the analysis. Depending on this file ```MONSDA.py``` will determine processing steps and generate according config and ```Snakemake|Nextflow``` workflow files to run each subworkflow until all processing steps are done.
 
 ## Run the pipeline
 

From c040c49b8c4f1c94841a8ecf703ceece654f10b0 Mon Sep 17 00:00:00 2001
From: jfallmann <fallmann.joerg@gmail.com>
Date: Tue, 5 May 2026 15:34:47 +0200
Subject: [PATCH 29/39] webconfig

---
 MONSDA/web_configurator.py | 120 +++++++++++++++++++++++++------------
 1 file changed, 83 insertions(+), 37 deletions(-)

diff --git a/MONSDA/web_configurator.py b/MONSDA/web_configurator.py
index 6b2fb5c8..ce964980 100644
--- a/MONSDA/web_configurator.py
+++ b/MONSDA/web_configurator.py
@@ -10,8 +10,11 @@
 from pydantic import BaseModel, Field
 from snakemake.common.configfile import load_configfile
 
+from . import _version
 from .Params import samplesheet_to_settings
 
+__version__ = _version.get_versions()["version"]
+
 TEMPLATE_FILE = "template_base_commented.json"
 NONE_WORKFLOW_KEYS = ["WORKFLOWS", "BINS", "MAXTHREADS", "SETTINGS", "VERSION"]
 
@@ -227,7 +230,7 @@ def build_config(req: BuildConfigRequest) -> Dict[str, Any]:
         "WORKFLOWS": ", ".join(workflows),
         "BINS": template.get("BINS", ""),
         "MAXTHREADS": str(req.maxthreads),
-        "VERSION": template.get("VERSION", ""),
+        "VERSION": __version__,
         "SETTINGS": settings,
     }
 
@@ -855,6 +858,7 @@ def root() -> str:
         <div style="background:#f8fafc; border-top:1px solid #e2e8f0; padding:12px 20px; display:flex; align-items:center; gap:10px;">
           <input id="browsePathInput" class="form-control form-control-sm" style="flex:1; border-radius:8px;" placeholder="Path..." />
           <button type="button" class="btn btn-sm btn-outline-secondary" style="border-radius:8px;" onclick="browseTo(document.getElementById('browsePathInput').value)">Go</button>
+          <button type="button" id="browseAddSelectedBtn" class="btn btn-sm btn-success" style="border-radius:8px; display:none;" onclick="addMultiselectedFiles()">Add selected</button>
           <button type="button" class="btn btn-sm btn-primary" style="border-radius:8px; min-width:120px;" onclick="chooseCurrentPath()">Select</button>
         </div>
       </div>
@@ -875,11 +879,13 @@ def root() -> str:
 <script>
 let templateFields = null;
 let lastConfig = null;
-let browserState = { target: '', mode: 'dirs', current: '' };
+let browserState = { target: '', mode: 'dirs', current: '', multiselect: false };
 let selectedToolsByWorkflow = {};
 let browserModalRef = null;
 let selectedBrowsePath = '';
 let currentOutputMode = 'config';
+let browserCallback = null;  // optional: called with path[] instead of filling input
+let browserMultiSelected = new Set(); // tracks checked paths in multiselect mode
 
 function switchOutputMode(mode) {
   currentOutputMode = mode;
@@ -1085,8 +1091,24 @@ def root() -> str:
       const btn = document.createElement('button');
       btn.type = 'button';
       btn.className = 'fb-entry';
-      btn.innerHTML = `<span class="fb-entry-icon file">${fileSvg}</span><span class="fb-entry-name">${escHtml(f.name)}</span>`;
-      btn.addEventListener('click', () => { markBrowseSelection(f.path, btn); choosePath(f.path); });
+      if (browserState.multiselect) {
+        const chk = document.createElement('input');
+        chk.type = 'checkbox';
+        chk.style.cssText = 'width:auto;margin:0 2px 0 0;cursor:pointer;flex-shrink:0;';
+        chk.checked = browserMultiSelected.has(f.path);
+        chk.addEventListener('change', () => {
+          if (chk.checked) browserMultiSelected.add(f.path);
+          else browserMultiSelected.delete(f.path);
+          btn.classList.toggle('selected', chk.checked);
+          _updateMultiselectBtn();
+        });
+        btn.prepend(chk);
+        btn.innerHTML += `<span class="fb-entry-icon file">${fileSvg}</span><span class="fb-entry-name">${escHtml(f.name)}</span>`;
+        btn.addEventListener('click', (e) => { if (e.target !== chk) { chk.checked = !chk.checked; chk.dispatchEvent(new Event('change')); } });
+      } else {
+        btn.innerHTML = `<span class="fb-entry-icon file">${fileSvg}</span><span class="fb-entry-name">${escHtml(f.name)}</span>`;
+        btn.addEventListener('click', () => { markBrowseSelection(f.path, btn); choosePath(f.path); });
+      }
       list.appendChild(btn);
     });
 
@@ -1105,12 +1127,18 @@ def root() -> str:
   if (el) el.classList.add('selected');
 }
 
-function openPathBrowser(target, mode='dirs') {
+function openPathBrowser(target, mode='dirs', callback=null) {
   browserState.target = target;
-  browserState.mode = mode;
+  browserState.multiselect = mode === 'multifiles';
+  browserState.mode = browserState.multiselect ? 'all' : mode;
+  browserCallback = callback;
+  browserMultiSelected = new Set();
+  _updateMultiselectBtn();
   if (!browserModalRef) {
     const el = document.getElementById('browserModal');
     browserModalRef = new bootstrap.Modal(el, { backdrop: true, keyboard: true });
+    // Always clear callback if modal is dismissed without selection
+    el.addEventListener('hidden.bs.modal', () => { browserCallback = null; browserMultiSelected = new Set(); _updateMultiselectBtn(); });
   }
   browserModalRef.show();
   const start = (document.getElementById(target)?.value || '').trim();
@@ -1119,15 +1147,41 @@ def root() -> str:
   });
 }
 
+function _updateMultiselectBtn() {
+  const btn = document.getElementById('browseAddSelectedBtn');
+  if (!btn) return;
+  if (browserState.multiselect) {
+    btn.style.display = '';
+    btn.textContent = browserMultiSelected.size > 0 ? `Add selected (${browserMultiSelected.size})` : 'Add selected';
+  } else {
+    btn.style.display = 'none';
+  }
+}
+
 function closePathBrowser() {
   if (browserModalRef) browserModalRef.hide();
 }
 
 function choosePath(path) {
-  if (!browserState.target) return;
-  document.getElementById(browserState.target).value = path;
+  if (browserCallback) {
+    const cb = browserCallback;
+    browserCallback = null;
+    cb([path]);
+  } else if (browserState.target) {
+    document.getElementById(browserState.target).value = path;
+  }
   setStatus('browseStatus', `Selected: ${path}`, true);
-  closePathBrowser();
+  if (!browserState.multiselect) closePathBrowser();
+}
+
+function addMultiselectedFiles() {
+  if (browserCallback && browserMultiSelected.size > 0) {
+    const cb = browserCallback;
+    browserCallback = null;
+    cb([...browserMultiSelected]);
+    browserMultiSelected = new Set();
+    closePathBrowser();
+  }
 }
 
 function chooseCurrentPath() {
@@ -1422,35 +1476,27 @@ def root() -> str:
 }
 
 function addFastqFile(id) {
-  // Use the file browser targeting a hidden input, then on selection add to the list
-  const tempId = id + '_fqtemp';
-  let tempInput = document.getElementById(tempId);
-  if (!tempInput) {
-    tempInput = document.createElement('input');
-    tempInput.type = 'hidden';
-    tempInput.id = tempId;
-    document.body.appendChild(tempInput);
-  }
-  tempInput.value = '';
-  // Override choosePath temporarily to add to file list instead
-  const origChoose = choosePath;
-  choosePath = function(path) {
-    if (!browserState.target) return;
-    // Add to file list
+  openPathBrowser('', 'multifiles', (paths) => {
     const list = document.getElementById(id + '_filelist');
-    const li = document.createElement('li');
-    li.innerHTML = `<span>${escHtml(path)}</span>`;
-    const rmBtn = document.createElement('button');
-    rmBtn.type = 'button';
-    rmBtn.textContent = '\u00d7';
-    rmBtn.addEventListener('click', () => li.remove());
-    li.appendChild(rmBtn);
-    li.dataset.path = path;
-    list.appendChild(li);
-    closePathBrowser();
-    choosePath = origChoose;
-  };
-  openPathBrowser(tempId, 'all');
+    paths.forEach(path => {
+      // Avoid duplicates
+      let exists = false;
+      list.querySelectorAll('li').forEach(li => { if (li.dataset.path === path) exists = true; });
+      if (exists) return;
+      const li = document.createElement('li');
+      const span = document.createElement('span');
+      span.textContent = path;
+      const rmBtn = document.createElement('button');
+      rmBtn.type = 'button';
+      rmBtn.textContent = '\u00d7';
+      rmBtn.title = 'Remove';
+      rmBtn.addEventListener('click', () => li.remove());
+      li.appendChild(span);
+      li.appendChild(rmBtn);
+      li.dataset.path = path;
+      list.appendChild(li);
+    });
+  });
 }
 
 window.onload = () => {

From 9716a840f42976a7b3e31585b22a109f476c7d99 Mon Sep 17 00:00:00 2001
From: jfallmann <fallmann.joerg@gmail.com>
Date: Tue, 5 May 2026 16:11:44 +0200
Subject: [PATCH 30/39] oras registry cli update

---
 MONSDA/RunMONSDA.py | 11 ++++++++++
 MONSDA/Workflows.py | 50 ++++++++++++++++++++++++++++++++-------------
 2 files changed, 47 insertions(+), 14 deletions(-)

diff --git a/MONSDA/RunMONSDA.py b/MONSDA/RunMONSDA.py
index 9635a1f7..20249adb 100755
--- a/MONSDA/RunMONSDA.py
+++ b/MONSDA/RunMONSDA.py
@@ -138,6 +138,14 @@ def parseargs():
              "Used when the config JSON lacks a SETTINGS block. "
              "On the first run an augmented config (<config>_with_settings.json) is written for reuse.",
     )
+    parser.add_argument(
+        "--oras-registry",
+        type=str,
+        default="docker.io",
+        metavar="HOST",
+        help="Container registry hostname for ORAS image pulls (default: docker.io). "
+             "Use e.g. ghcr.io for GitHub Container Registry.",
+    )
 
     if len(sys.argv) == 1:
         parser.print_help(sys.stderr)
@@ -939,6 +947,9 @@ def main():
                 os.path.abspath(knownargs.samplesheet),
             )
 
+        # --- set ORAS registry for container image pulls ---
+        mw.set_oras_registry(knownargs.oras_registry)
+
         log.debug(
             f"{logid} ARGS: {args} {type(args)} KNOWNARGS: {knownargs} {type(knownargs)} OPTIONALARGS: {optionalargs} {type(optionalargs)}"
         )
diff --git a/MONSDA/Workflows.py b/MONSDA/Workflows.py
index af51f7e3..57b00630 100755
--- a/MONSDA/Workflows.py
+++ b/MONSDA/Workflows.py
@@ -94,6 +94,27 @@
 binpath = os.path.join(installpath, "MONSDA", "scripts")
 condapath = re.compile(r'conda:\s+"')
 logfix = re.compile(r'loglevel="INFO"')
+# Matches oras:// NOT already followed by a registry host (i.e. followed directly by a
+# Docker Hub namespace like "jfallmann/" without dots before the first slash)
+oraspath = re.compile(r'oras://(?![\w.-]+\.[\w.-]+/)')
+oras_registry = "docker.io"
+
+
+def set_oras_registry(registry: str):
+    """Set the ORAS registry used for container image URIs."""
+    global oras_registry
+    oras_registry = registry.strip().rstrip("/")
+
+
+def _fix_oras(line: str) -> str:
+    """Insert oras_registry into unqualified oras:// URIs."""
+    return re.sub(oraspath, "oras://" + oras_registry + "/", line)
+
+
+def _write_workflow(filepath, content):
+    """Write workflow file with ORAS registry fix applied."""
+    content = re.sub(oraspath, "oras://" + oras_registry + "/", content)
+    write_if_different(filepath, content)
 
 try:
     scriptname = (
@@ -642,7 +663,7 @@ def make_pre(
                         ),
                     )
                 )
-                write_if_different(smko, "".join(add) + "".join(subjobs))
+                _write_workflow(smko, "".join(add) + "".join(subjobs))
 
                 confo = os.path.abspath(
                     os.path.join(
@@ -783,7 +804,7 @@ def make_pre(
                     )
                 )
 
-                write_if_different(smko, str.join("", add) + str.join("", subjobs))
+                _write_workflow(smko, str.join("", add) + str.join("", subjobs))
 
                 confo = os.path.abspath(
                     os.path.join(
@@ -1036,7 +1057,7 @@ def make_sub(
                     )
                 )
 
-                write_if_different(smko, "".join(add) + "".join(subjobs))
+                _write_workflow(smko, "".join(add) + "".join(subjobs))
 
                 confo = os.path.abspath(
                     os.path.join(
@@ -1223,7 +1244,7 @@ def make_sub(
                 os.path.join(subdir, "_".join(["_".join(condition), "subsnake.smk"]))
             )
 
-            write_if_different(smko, str.join("", add) + str.join("", subjobs))
+            _write_workflow(smko, str.join("", add) + str.join("", subjobs))
             confo = os.path.abspath(
                 os.path.join(subdir, "_".join(["_".join(condition), "subconfig.json"]))
             )
@@ -1436,7 +1457,7 @@ def make_post(
                     )
                 )
 
-                write_if_different(smko, "".join(add) + "".join(subjobs))
+                _write_workflow(smko, "".join(add) + "".join(subjobs))
 
                 confo = os.path.abspath(
                     os.path.join(
@@ -1569,7 +1590,7 @@ def make_post(
                 )
             )
 
-            write_if_different(smko, "".join(add) + "".join(subjobs))
+            _write_workflow(smko, "".join(add) + "".join(subjobs))
 
             confo = os.path.abspath(
                 os.path.join(
@@ -1661,7 +1682,7 @@ def make_summary(config, subdir, loglevel, combinations=None):
         subjobs.append("\n\n")
 
     smko = os.path.abspath(os.path.join(subdir, "summary_subsnake.smk"))
-    write_if_different(smko, "".join(subjobs))
+    _write_workflow(smko, "".join(subjobs))
 
     subconf = mu.NestedDefaultDict()
     for key in ["BINS", "MAXTHREADS", "SETTINGS"]:
@@ -1769,6 +1790,7 @@ def fixinclude(
             for line in incl.readlines():
                 line = re.sub(logfix, "loglevel='" + loglevel + "'", line)
                 line = re.sub(condapath, condaline + envpath, line)
+                line = _fix_oras(line)
                 if includeline in line:
                     line = fixinclude(
                         line, loglevel, condapath, envpath, workflowpath, logfix
@@ -2657,7 +2679,7 @@ def nf_make_pre(
                             )
                         )
                         if writeout:
-                            write_if_different(nfo, "".join(subjobs))
+                            _write_workflow(nfo, "".join(subjobs))
 
                         confo = os.path.abspath(
                             os.path.join(
@@ -2862,7 +2884,7 @@ def nf_make_pre(
                     )
                 )
                 if writeout:
-                    write_if_different(nfo, "".join(subjobs))
+                    _write_workflow(nfo, "".join(subjobs))
 
                 confo = os.path.abspath(
                     os.path.join(
@@ -3324,7 +3346,7 @@ def nf_make_sub(
                     )
                 )
                 if writeout:
-                    write_if_different(nfo, "".join(add) + "".join(subjobs))
+                    _write_workflow(nfo, "".join(add) + "".join(subjobs))
 
                 confo = os.path.abspath(
                     os.path.join(
@@ -3702,7 +3724,7 @@ def nf_make_sub(
                 os.path.join(subdir, "_".join(["_".join(condition), "subflow.nf"]))
             )
             if writeout:
-                write_if_different(nfo, "".join(subjobs))
+                _write_workflow(nfo, "".join(subjobs))
 
             confo = os.path.abspath(
                 os.path.join(subdir, "_".join(["_".join(condition), "subconfig.json"]))
@@ -3902,7 +3924,7 @@ def nf_make_post(
                     )
                 )
                 if writeout:
-                    write_if_different(nfo, "".join(add) + "".join(subjobs))
+                    _write_workflow(nfo, "".join(add) + "".join(subjobs))
 
                 confo = os.path.abspath(
                     os.path.join(
@@ -4048,7 +4070,7 @@ def nf_make_post(
                 )
             )
             if writeout:
-                write_if_different(nfo, "".join(add) + "".join(subjobs))
+                _write_workflow(nfo, "".join(add) + "".join(subjobs))
 
             confo = os.path.abspath(
                 os.path.join(
@@ -4178,7 +4200,7 @@ def nf_make_summary(config, subdir, loglevel, combinations=None):
         subjobs.append("\n\n")
 
     nfo = os.path.abspath(os.path.join(subdir, "summary_subflow.nf"))
-    write_if_different(nfo, "".join(subjobs))
+    _write_workflow(nfo, "".join(subjobs))
 
     subconf = mu.NestedDefaultDict()
     for key in ["BINS", "MAXTHREADS", "SETTINGS"]:

From 767d84200e584d367b0915a89ba12bae9d0255f3 Mon Sep 17 00:00:00 2001
From: jfallmann <fallmann.joerg@gmail.com>
Date: Tue, 5 May 2026 16:15:03 +0200
Subject: [PATCH 31/39] conda env updates for snakemake/nextflow

---
 envs/monsda.yaml     | 26 +++++++++++++-------------
 envs/monsda_min.yaml | 24 ++++++++++++------------
 2 files changed, 25 insertions(+), 25 deletions(-)

diff --git a/envs/monsda.yaml b/envs/monsda.yaml
index 39538a1d..697dc095 100644
--- a/envs/monsda.yaml
+++ b/envs/monsda.yaml
@@ -4,29 +4,29 @@ channels:
   - bioconda
   - nodefaults
 dependencies:
-  - biopython =1.86
-  - fastapi =0.128.5
+  - biopython =1.87
+  - fastapi =0.136.1
   - grep >=3.4
-  - isort =7.0.0  
+  - isort =8.0.1  
   - monsda =1.4.0
   - natsort =8.4.0
-  - nextflow =25.10.3
-  - numpy =2.4.2
+  - nextflow =26.04.0
+  - numpy =2.4.3
   - pandas =2.3.3
   - perl =5.32.1
   - pip >=25.3
   - python =3.12.2
   - pyyaml =6.0.3
-  - scipy =1.17.0
-  - snakemake =9.16.3
-  - snakemake-executor-plugin-slurm =2.1.0
+  - scipy =1.17.1
+  - snakemake =9.20.0
+  - snakemake-executor-plugin-slurm =2.6.1
   - snakemake-executor-plugin-cluster-generic =1.0.9
-  - snakemake-interface-common =1.22.0
-  - snakemake-interface-executor-plugins =9.3.9
+  - snakemake-interface-common =1.23.0
+  - snakemake-interface-executor-plugins =9.4.0
   - snakemake-interface-report-plugins =1.3.0
-  - snakemake-interface-storage-plugins =4.3.2
+  - snakemake-interface-storage-plugins =4.4.1
   - snakemake-storage-plugin-s3 =0.3.6
   - snakemake-storage-plugin-ftp =0.1.3  
-  - snakemake-storage-plugin-http =0.3.0
+  - snakemake-storage-plugin-http =0.3.1
   - snakemake-storage-plugin-zenodo =0.1.5
-  - uvicorn =0.40.0
\ No newline at end of file
+  - uvicorn =0.46.0
\ No newline at end of file
diff --git a/envs/monsda_min.yaml b/envs/monsda_min.yaml
index 5b0f3cdb..b2939fa2 100644
--- a/envs/monsda_min.yaml
+++ b/envs/monsda_min.yaml
@@ -4,28 +4,28 @@ channels:
   - bioconda
   - nodefaults
 dependencies:
-  - biopython =1.86
-  - fastapi =0.128.5
+  - biopython =1.87
+  - fastapi =0.136.1
   - grep >=3.4
-  - isort =7.0.0  
+  - isort =8.0.1  
   - natsort =8.4.0
-  - nextflow =25.10.3
-  - numpy =2.4.2
+  - nextflow =26.04.0
+  - numpy =2.4.3
   - pandas =2.3.3
   - perl =5.32.1
   - pip >=25.3
   - python =3.12.2
   - pyyaml =6.0.3
-  - scipy =1.17.0
-  - snakemake =9.16.3
-  - snakemake-executor-plugin-slurm =2.1.0
+  - scipy =1.17.1
+  - snakemake =9.20.0
+  - snakemake-executor-plugin-slurm =2.6.1
   - snakemake-executor-plugin-cluster-generic =1.0.9
-  - snakemake-interface-common =1.22.0
-  - snakemake-interface-executor-plugins =9.3.9
+  - snakemake-interface-common =1.23.0
+  - snakemake-interface-executor-plugins =9.4.0
   - snakemake-interface-report-plugins =1.3.0
-  - snakemake-interface-storage-plugins =4.3.2
+  - snakemake-interface-storage-plugins =4.4.1
   - snakemake-storage-plugin-s3 =0.3.6
   - snakemake-storage-plugin-ftp =0.1.3  
-  - snakemake-storage-plugin-http =0.3.0
+  - snakemake-storage-plugin-http =0.3.1
   - snakemake-storage-plugin-zenodo =0.1.5
   - uvicorn =0.40.0

From 8b29d8828627e35f0c2d2d06e36e0c6983c483ad Mon Sep 17 00:00:00 2001
From: jfallmann <fallmann.joerg@gmail.com>
Date: Wed, 6 May 2026 15:42:29 +0200
Subject: [PATCH 32/39] Lane split aware fastq handling, monsda now auto-merges
 samples across lanes before processing, no more preprocessing needed

---
 MONSDA/Params.py    | 175 ++++++++++++++++++++++++++++++++++++++++----
 MONSDA/RunMONSDA.py |  12 +++
 2 files changed, 173 insertions(+), 14 deletions(-)

diff --git a/MONSDA/Params.py b/MONSDA/Params.py
index c592146d..cfa00e8a 100644
--- a/MONSDA/Params.py
+++ b/MONSDA/Params.py
@@ -73,6 +73,7 @@
 import sys
 import traceback as tb
 from collections import OrderedDict, defaultdict
+from typing import Optional
 
 from natsort import natsorted
 from snakemake.common.configfile import load_configfile as _load_configfile
@@ -104,6 +105,94 @@
     print("".join(tbe.format()), file=sys.stderr)
 
 
+def _is_lane_split_file(filename: str, sample: Optional[str] = None) -> bool:
+    """Return True if filename matches lane split naming.
+
+    Accepted examples (both orderings):
+      Sample_L001_R1.fastq.gz
+      Sample_R1_L001.fastq.gz
+      Sample_R1_L001_001.fastq.gz
+    """
+
+    if sample:
+        # Match: SAMPLE_[L###_]R[12][_###] or SAMPLE_R[12][_L###][_###]
+        pattern = rf"^{re.escape(sample)}(?:_L\d{{1,3}})?_[Rr][12](?:_L\d{{1,3}})?(?:_\d+)?\.fastq\.gz$"
+    else:
+        pattern = r"^.+_[Rr][12]_L\d{1,3}(?:_\d+)?\.fastq\.gz$|^.+_L\d{1,3}_[Rr][12](?:_\d+)?\.fastq\.gz$"
+    return bool(re.match(pattern, filename))
+
+
+def _matches_sample_fastq(filename: str, sample: str) -> bool:
+    """Match configured sample against accepted FASTQ naming schemes.
+
+    Handles both lane orderings:
+      - SAMPLE_L###_R[12] (standard Illumina)
+      - SAMPLE_R[12]_L### (alternative)
+    """
+    return (
+        bool(
+            re.match(
+                rf"^{re.escape(sample)}(?:_L\d{{1,3}})?_[Rr][12](?:_L\d{{1,3}})?(?:_\d+)?\.fastq\.gz$",
+                filename,
+            )
+        )
+        or filename == sample + ".fastq.gz"
+    )
+
+
+def _strip_fastq_sample_name(filename: str) -> str:
+    """Strip read/lane suffix from FASTQ filename and return sample basename.
+
+    Handles both orderings:
+      - SAMPLE_L###_R[12][_###]
+      - SAMPLE_R[12]_L###[_###]
+    """
+    base = os.path.basename(filename)
+    return re.sub(
+        r"(?:_L\d{1,3})?_[Rr][12](?:_L\d{1,3})?(?:_\d+)?\.fastq\.gz$|\.fastq\.gz$",
+        "",
+        base,
+    )
+
+
+def _filter_lane_files_when_merged_exists(files: list) -> list:
+    """Drop lane-split FASTQs if canonical merged FASTQ exists for same sample/read.
+
+    Keeps existing behavior for non-lane files and for samples without merged targets.
+    Handles both lane orderings: SAMPLE_L###_R# and SAMPLE_R#_L###.
+    """
+    if not files:
+        return files
+
+    # Match both orderings: _L###_R# or _R#_L###
+    lane_patterns = [
+        re.compile(r"^(?P<sample>.+)_L\d{1,3}_[Rr](?P<read>[12])(?:_\d+)?\.fastq\.gz$"),
+        re.compile(r"^(?P<sample>.+)_[Rr](?P<read>[12])_L\d{1,3}(?:_\d+)?\.fastq\.gz$"),
+    ]
+    base_names = {os.path.basename(f) for f in files}
+    keep = []
+
+    for fp in files:
+        bn = os.path.basename(fp)
+        m = None
+        for pat in lane_patterns:
+            m = pat.match(bn)
+            if m:
+                break
+
+        if not m:
+            keep.append(fp)
+            continue
+
+        canonical_bn = f"{m.group('sample')}_R{m.group('read')}.fastq.gz"
+        canonical_fp = os.path.join(os.path.dirname(fp), canonical_bn)
+        if canonical_bn in base_names or os.path.exists(canonical_fp):
+            continue
+        keep.append(fp)
+
+    return keep
+
+
 def samplesheet_to_settings(samplesheet_path: str) -> dict:
     """Read a CSV/TSV samplesheet and return a SETTINGS dict compatible with MONSDA config.
 
@@ -293,6 +382,70 @@ def inject_samplesheet_settings(configfile: str, samplesheet_path: str) -> str:
     return out_path
 
 
+@check_run
+def prepare_lane_split_fastqs(config: dict) -> int:
+    """Concatenate lane-split FASTQs into canonical _R1/_R2 files when needed.
+
+    This is intentionally additive: existing canonical files are kept untouched.
+    """
+    logid = scriptname + ".Params_prepare_lane_split_fastqs: "
+    merged_files = 0
+
+    samples = [os.path.join(x) for x in sampleslong(config, nocheck="1")]
+    log.debug(logid + "Checking lane split files for samples: " + str(samples))
+
+    for sample in samples:
+        paired = checkpaired([sample], config)
+        if not paired or not any(x in paired for x in ["paired", "singlecell"]):
+            continue
+
+        sample_dir = os.path.join("FASTQ", os.path.dirname(sample))
+        sample_name = os.path.basename(sample).replace(".fastq.gz", "")
+        if not os.path.isdir(sample_dir):
+            continue
+
+        for read in ["1", "2"]:
+            # Find lane files in either format: SAMPLE_L###_R# or SAMPLE_R#_L###
+            lane_candidates = sorted(
+                set(
+                    f
+                    for pattern_glob in [
+                        os.path.join(sample_dir, f"{sample_name}_L*_R{read}*.fastq.gz"),
+                        os.path.join(sample_dir, f"{sample_name}_R{read}_L*.fastq.gz"),
+                    ]
+                    for f in glob.glob(pattern_glob)
+                    if _is_lane_split_file(os.path.basename(f), sample_name)
+                )
+            )
+
+            if len(lane_candidates) < 1:
+                continue
+
+            target = os.path.join(sample_dir, f"{sample_name}_R{read}.fastq.gz")
+            if os.path.exists(target):
+                log.info(
+                    logid
+                    + f"Found lane-split files for {sample_name} R{read}, but target already exists: {target} (keeping existing file)"
+                )
+                continue
+
+            log.info(
+                logid
+                + f"Concatenating {len(lane_candidates)} lane files into {target}"
+            )
+            with open(target, "wb") as outfh:
+                for lane_file in lane_candidates:
+                    with open(lane_file, "rb") as infh:
+                        shutil.copyfileobj(infh, outfh)
+            merged_files += 1
+
+    if merged_files > 0:
+        log.info(logid + f"Created {merged_files} concatenated lane-merged FASTQ files")
+    else:
+        log.debug(logid + "No lane-split FASTQ files required concatenation")
+    return merged_files
+
+
 @check_run
 def get_samples(config: dict) -> list():
     """Check and return samples according to sample list on config.json
@@ -321,6 +474,7 @@ def get_samples(config: dict) -> list():
         paired = checkpaired([SAMPLES[i]], config)
         log.debug(logid + "PAIRED: " + str(paired))
         f = glob.glob(s)
+        f = _filter_lane_files_when_merged_exists(f)
         log.debug(logid + "SAMPLECHECK: " + str(f))
         if f:
             f = list(set([str.join(os.sep, s.split(os.sep)[1:]) for s in f]))
@@ -393,6 +547,7 @@ def get_samples_postprocess(config: dict, subwork: str) -> list:
         paired = checkpaired([SAMPLES[i]], config)
         log.debug(logid + "PAIRED: " + str(paired))
         f = glob.glob(s)
+        f = _filter_lane_files_when_merged_exists(f)
         log.debug(logid + "SAMPLECHECK: " + str(f))
         if f:
             f = sorted(list(set([str.join(os.sep, s.split(os.sep)[1:]) for s in f])))
@@ -587,6 +742,7 @@ def get_samples_from_dir(search: str, config: dict, nocheck: str = None) -> list
         pat = os.sep.join(["FASTQ", os.sep.join(search[0:x]), "*.fastq.gz"])
         log.debug(logid + "REGEX: " + str(pat) + "\t" + "SAMPLES: " + str(samples))
         check = natsorted(glob.glob(pat), key=lambda y: y.lower())
+        check = _filter_lane_files_when_merged_exists(check)
         log.debug(logid + "check: " + str(check))
         if len(check) > 0:
             ret = list()
@@ -601,15 +757,12 @@ def get_samples_from_dir(search: str, config: dict, nocheck: str = None) -> list
                 for s in samples:
                     log.debug(logid + "x: " + str(x))
                     log.debug(logid + "sample: " + str(s))
-                    if re.match(f"^{s}_R", x) or x == s + ".fastq.gz":
+                    if _matches_sample_fastq(x, s):
                         log.debug(
                             logid
                             + "FOUND: "
                             + s
-                            + "_R"
-                            + " or "
-                            + s
-                            + ".fastq.gz"
+                            + " matching accepted FASTQ naming"
                             + " in "
                             + x
                         )
@@ -640,11 +793,7 @@ def get_samples_from_dir(search: str, config: dict, nocheck: str = None) -> list
                             os.sep.join(
                                 [
                                     os.sep.join(os.path.dirname(s).split(os.sep)[1:]),
-                                    re.sub(
-                                        r"_r1.fastq.gz|_R1.fastq.gz|_r2.fastq.gz|_R2.fastq.gz|.fastq.gz",
-                                        "",
-                                        os.path.basename(s),
-                                    ),
+                                    _strip_fastq_sample_name(os.path.basename(s)),
                                 ]
                             )
                             for s in clean
@@ -660,10 +809,8 @@ def get_samples_from_dir(search: str, config: dict, nocheck: str = None) -> list
                                         os.sep.join(
                                             os.path.dirname(s).split(os.sep)[1:]
                                         ),
-                                        re.sub(
-                                            r"_r1.fastq.gz|_R1.fastq.gz|_r2.fastq.gz|_R2.fastq.gz|.fastq.gz",
-                                            "",
-                                            os.path.basename(s),
+                                        _strip_fastq_sample_name(
+                                            os.path.basename(s)
                                         ),
                                     ]
                                 )
diff --git a/MONSDA/RunMONSDA.py b/MONSDA/RunMONSDA.py
index 20249adb..72402ba5 100755
--- a/MONSDA/RunMONSDA.py
+++ b/MONSDA/RunMONSDA.py
@@ -292,6 +292,12 @@ def run_snakemake(
         ONCE FILES ARE DOWNLOADED WE CAN START OTHER PREPROCESSING STEPS
         """
         mu.makeoutdir("TMP")
+        merged_lane_fastqs = mp.prepare_lane_split_fastqs(config)
+        if merged_lane_fastqs:
+            log.info(
+                logid
+                + f"Prepared {merged_lane_fastqs} lane-merged FASTQ files before sample collection"
+            )
         SAMPLES = mp.get_samples(config)
         log.info(logid + "SAMPLES: " + str(SAMPLES))
 
@@ -634,6 +640,12 @@ def run_nextflow(
         ONCE FILES ARE DOWNLOAD WE CAN START OTHER PREPROCESSING STEPS
         """
         mu.makeoutdir("TMP")
+        merged_lane_fastqs = mp.prepare_lane_split_fastqs(config)
+        if merged_lane_fastqs:
+            log.info(
+                logid
+                + f"Prepared {merged_lane_fastqs} lane-merged FASTQ files before sample collection"
+            )
         SAMPLES = mp.get_samples(config)
         log.info(logid + "SAMPLES: " + str(SAMPLES))
         conditions = mp.get_conditions(config)

From efc65f707f8d58758118d46c2235362d22bb4afb Mon Sep 17 00:00:00 2001
From: jfallmann <fallmann.joerg@gmail.com>
Date: Wed, 6 May 2026 16:33:59 +0200
Subject: [PATCH 33/39] nf version fix

---
 MONSDA/RunMONSDA.py | 13 ++++++++++++-
 workflows/header.nf |  2 +-
 2 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/MONSDA/RunMONSDA.py b/MONSDA/RunMONSDA.py
index 72402ba5..422c82d3 100755
--- a/MONSDA/RunMONSDA.py
+++ b/MONSDA/RunMONSDA.py
@@ -2,6 +2,7 @@
 
 import argparse
 import os
+import re
 import shlex
 import shutil
 import subprocess
@@ -848,9 +849,19 @@ def run_nextflow(
 def runjob(jobtorun):
     try:
         logid = scriptname + ".runjob: "
+        run_cmd = jobtorun
+        if (
+            re.search(r"(^|\s)nextflow(\s|$)", jobtorun)
+            and "NXF_SYNTAX_PARSER" not in jobtorun
+        ):
+            run_cmd = "NXF_SYNTAX_PARSER=v1 " + jobtorun
+            log.warning(
+                logid
+                + "Nextflow 26.04+ strict parser detected; forcing legacy parser (NXF_SYNTAX_PARSER=v1) for compatibility with generated workflows."
+            )
         # return subprocess.run(jobtorun, shell=True, universal_newlines=True, capture_output=True)  # python >= 3.7
         job = subprocess.Popen(
-            jobtorun,
+            run_cmd,
             shell=True,
             universal_newlines=True,
             stdout=subprocess.PIPE,
diff --git a/workflows/header.nf b/workflows/header.nf
index 576c43d2..7aaa6158 100644
--- a/workflows/header.nf
+++ b/workflows/header.nf
@@ -7,8 +7,8 @@
 // ALWAYS COMMENT LINES WITH '//', DO NOT USE MULTI LINE COMMENTS AS THE PARSER WILL NOT IGNORE MIDDLE LINES AND THIS WILL CAUSE CHAOS
 
 //Version Check
-nextflowVersion = '>=20.01.0.5264'
 nextflow.enable.dsl=2
+//nextflowVersion = '>=20.01.0.5264'
 
 //define unset Params
 def get_always(parameter){

From b0a95c9be37777844ec3417c0eebd1c44acc79c3 Mon Sep 17 00:00:00 2001
From: jfallmann <fallmann.joerg@gmail.com>
Date: Fri, 8 May 2026 10:15:34 +0200
Subject: [PATCH 34/39] snakemake profile update

---
 profile_snakemake/cluster_config.yaml |  7 +++-
 profile_snakemake/config.v8+.yaml     | 57 ++++++++++++++++-----------
 workflows/footer.smk                  | 33 ++++++++++++++++
 3 files changed, 72 insertions(+), 25 deletions(-)
 mode change 100644 => 100755 profile_snakemake/config.v8+.yaml

diff --git a/profile_snakemake/cluster_config.yaml b/profile_snakemake/cluster_config.yaml
index 226538a4..f9f7b122 100644
--- a/profile_snakemake/cluster_config.yaml
+++ b/profile_snakemake/cluster_config.yaml
@@ -1,6 +1,6 @@
 __default__:
-    account: user # your account name
-    partition: main # the partition to use
+    account: ${USER} # your account name
+    partition: c # the partition to use
     time: 1500 # default time (minutes)
     nodes: 1
     output: "/SLURMLOG/{rule}.{wildcards}.out"
@@ -13,3 +13,6 @@ generate_index:
 
 mapping:
     mem: 200GB
+
+multiqc:
+   mem: 30GB
diff --git a/profile_snakemake/config.v8+.yaml b/profile_snakemake/config.v8+.yaml
old mode 100644
new mode 100755
index 548731c7..2b5af595
--- a/profile_snakemake/config.v8+.yaml
+++ b/profile_snakemake/config.v8+.yaml
@@ -1,41 +1,52 @@
 executor: slurm
-cluster-generic-submit-cmd:
-  mkdir -p LOGS/SLURM/{rule} &&
-  sbatch
-    --partition={resources.partition}
-    --cpus-per-task={threads}
-    --mem={resources.mem}
-    --account={resources.account}
-    --job-name=smk-{rule}-{wildcards}
-    --output=LOGS/SLURM/{rule}/{rule}-{wildcards}-%j.out
-    --parsable
+
 default-resources:
-  slurm_account: "user" # your account name
+  slurm_account: "aob" # your account name
   slurm_partition: "c" # the partition to use
   nodes: 1
-  runtime: "8h"
-  mem: "20GB" # default memory     
+  mem_mb: "20000 * (2 ** (attempt - 1))"  
+  runtime: "480 * (2 ** (attempt - 1))"   
 set-resources:
   generate_index:
-    mem: "160GB"
+    mem_mb: "80000 * (2 ** (attempt - 1))"
   mapping:
-    mem: "160GB"
-  sortsam:
-    mem: "30GB"                                                                                 
-  count_mappers:
-    mem: "40GB"
+    mem_mb: "80000 * (2 ** (attempt - 1))"
   multiqc:
-    mem: "100GB"
+    mem_mb: "40000 * (2 ** (attempt - 1))"
   run_deseq2:
-    mem: "40GB"
-    slurm_extra: "'--qos=medium'"
+    mem_mb: "40000 * (2 ** (attempt - 1))"
+    slurm_qos: "medium"
+    runtime: "18h"
+  run_DTU:
+    mem_mb: "40000 * (2 ** (attempt - 1))"
+    slurm_qos: "long"
+    runtime: "120h"
+  run_edger:
+    mem_mb: "40000 * (2 ** (attempt - 1))"
+    slurm_qos: "medium"
     runtime: "18h"
+  salmon_index:
+    mem_mb: "40000 * (2 ** (attempt - 1))"
+  sortsam:
+    mem_mb: "16000 * (2 ** (attempt - 1))"
+    slurm_qos: "rapid"
+    runtime: "1h"
+  atacorrect:
+    mem_mb: "40000 * (2 ** (attempt - 1))"
+  bindetect:
+    mem_mb: "60000 * (2 ** (attempt - 1))"
+  gridss_call:
+    mem_mb: "40000 * (2 ** (attempt - 1))"
+    threads: 8
+  gridss_setupreference:
+    mem_mb: "40000 * (2 ** (attempt - 1))"
+    threads: 8
 jobs: 1000
 keep-going: True
 rerun-incomplete: True
 scheduler: greedy
 max-jobs-per-second: 10
 max-status-checks-per-second: 10
-restart-times: 3
+restart-times: 1
 local-cores: 1
 latency-wait: 600
diff --git a/workflows/footer.smk b/workflows/footer.smk
index 04b84b4b..17081eeb 100644
--- a/workflows/footer.smk
+++ b/workflows/footer.smk
@@ -1,3 +1,36 @@
+## Queue-agnostic fallback resources for all rules.
+## These defaults apply when a rule has no explicit resource set.
+## They keep local/non-Slurm runs from lacking baseline resource values.
+_fallback_rule_resources = {
+	"mem_mb": 20000,
+	"runtime": 480,
+	"nodes": 1,
+}
+
+_user_rule_defaults = config.get("DEFAULT_RULE_RESOURCES", {})
+if isinstance(_user_rule_defaults, dict):
+	_fallback_rule_resources.update(_user_rule_defaults)
+
+
+def _has_resource(rule_obj, resource_name):
+	try:
+		return resource_name in rule_obj.resources.keys()
+	except Exception:
+		return hasattr(rule_obj.resources, resource_name)
+
+
+def _set_resource(rule_obj, resource_name, value):
+	try:
+		rule_obj.resources[resource_name] = value
+	except Exception:
+		setattr(rule_obj.resources, resource_name, value)
+
+
+for _rule in workflow.rules:
+	for _res_name, _res_value in _fallback_rule_resources.items():
+		if not _has_resource(_rule, _res_name):
+			_set_resource(_rule, _res_name, _res_value)
+
 onsuccess:
     print("Workflow finished, no error")
 onerror:

From 1d85ff5f5e6640951c4f11d1a3ab553d68b7a1c4 Mon Sep 17 00:00:00 2001
From: jfallmann <fallmann.joerg@gmail.com>
Date: Fri, 8 May 2026 10:15:51 +0200
Subject: [PATCH 35/39] nextflow profile update

---
 profile_nextflow/nextflow.config | 95 +++++++++++++++++++++++++-------
 workflows/countreads.nf          |  4 +-
 workflows/deseq2_DE.nf           |  2 +-
 workflows/dexseq_DEU.nf          |  4 +-
 workflows/diego_DAS.nf           |  2 +-
 workflows/edger_DAS.nf           |  2 +-
 workflows/edger_DE.nf            |  2 +-
 workflows/edger_DEU.nf           |  2 +-
 workflows/mapping.nf             |  8 +--
 9 files changed, 88 insertions(+), 33 deletions(-)

diff --git a/profile_nextflow/nextflow.config b/profile_nextflow/nextflow.config
index aec407b5..6d15e289 100644
--- a/profile_nextflow/nextflow.config
+++ b/profile_nextflow/nextflow.config
@@ -1,27 +1,82 @@
+// Queue-agnostic fallback defaults for ALL processes.
+// These apply regardless of which executor/profile is active.
+// Inline process directives (e.g. memory 16.GB in sortsam) take precedence.
+process {
+    cpus          = 1
+    errorStrategy = 'retry'
+    maxRetries    = 2
+    memory        = { 20.GB * (1 << ((task.attempt ?: 1) - 1)) }
+    time          = { 8.h * (1 << ((task.attempt ?: 1) - 1)) }
+}
+
 profiles {
-  slurm {
-    process.executor = 'slurm'
-    process.memory = '10 GB'
-    process.queue = 'main'
-    withName: '_idx|_map' {
-      memory = '160GB'
+    slurm {
+        process.executor = 'slurm'
+        process.queue    = 'c'  // default partition – override per-rule below
+
+        // Non-default overrides only – rules that need more than the global defaults.
+        process {
+            withName: 'bwa_idx|bwa2_idx|bwameth_idx|hisat2_idx|minimap_idx|segemehl_idx|segemehl3_idx|star_idx' {
+                memory = { 80.GB * (1 << ((task.attempt ?: 1) - 1)) }
+            }
+            withName: 'bwa_mapping|bwa2_mapping|bwameth_mapping|hisat2_mapping|minimap_mapping|segemehl_mapping|segemehl3_mapping|star_mapping' {
+                memory = { 80.GB * (1 << ((task.attempt ?: 1) - 1)) }
+            }
+            withName: 'salmon_idx|kallisto_idx' {
+                memory = { 40.GB * (1 << ((task.attempt ?: 1) - 1)) }
+            }
+            withName: 'mqc' {
+                memory = { 40.GB * (1 << ((task.attempt ?: 1) - 1)) }
+            }
+            withName: 'run_deseq2|run_edger' {
+                memory = { 40.GB * (1 << ((task.attempt ?: 1) - 1)) }
+                queue  = 'medium'
+                time   = { 18.h * (1 << ((task.attempt ?: 1) - 1)) }
+            }
+            withName: 'run_drimseq|run_dexseq' {
+                memory = { 40.GB * (1 << ((task.attempt ?: 1) - 1)) }
+                queue  = 'long'
+                time   = { 120.h * (1 << ((task.attempt ?: 1) - 1)) }
+            }
+            withName: 'atacorrect' {
+                memory = { 40.GB * (1 << ((task.attempt ?: 1) - 1)) }
+            }
+            withName: 'bindetect' {
+                memory = { 60.GB * (1 << ((task.attempt ?: 1) - 1)) }
+            }
+            withName: 'gridss_call|gridss_setupreference' {
+                memory = { 40.GB * (1 << ((task.attempt ?: 1) - 1)) }
+                cpus   = 8
+            }
+        }
     }
-  }
 
-  local {
-    process.executor = 'local'
-  }
+    local {
+        process.executor = 'local'
+    }
 
-  apptainer {
-    conda.enabled           = true 
-    apptainer.enabled       = true
-    apptainer.autoMounts    = true
-    docker.enabled          = false
-    process {
-        withName: '_idx|_map' {
-            memory = '160GB'
+    apptainer {
+        conda.enabled           = true
+        apptainer.enabled       = true
+        apptainer.autoMounts    = true
+        docker.enabled          = false
+        process {
+                beforeScript = '''
+            if [[ -n "${CONDA_PREFIX:-}" && -f "${CONDA_PREFIX}/etc/profile.d/conda.sh" ]]; then
+              source "${CONDA_PREFIX}/etc/profile.d/conda.sh"
+            elif [[ -n "${CONDA_EXE:-}" ]]; then
+              _conda_base="$(dirname "$(dirname "${CONDA_EXE}")")"
+              source "${_conda_base}/etc/profile.d/conda.sh"
+            elif [[ -f "${HOME}/miniconda3/etc/profile.d/conda.sh" ]]; then
+              source "${HOME}/miniconda3/etc/profile.d/conda.sh"
+            elif [[ -f "${HOME}/anaconda3/etc/profile.d/conda.sh" ]]; then
+              source "${HOME}/anaconda3/etc/profile.d/conda.sh"
+            else
+              echo "Could not locate conda.sh" >&2
+              exit 1
+            fi
+            conda activate apptainer
+            '''
         }
-        beforeScript = 'source ~/anaconda3/etc/profile.d/conda.sh; conda activate apptainer; export PATH=${PATH}:${CONDA_PREFIX}/bin'
     }
-  }
 }
\ No newline at end of file
diff --git a/workflows/countreads.nf b/workflows/countreads.nf
index b54cf7b2..377410ba 100644
--- a/workflows/countreads.nf
+++ b/workflows/countreads.nf
@@ -147,7 +147,7 @@ process count_mappers{
     conda "samtools.yaml"
     container "oras://jfallmann/monsda:"+"samtools"
     cpus THREADS
-    memory 16.GB
+    memory { 16.GB * (1 << ((task.attempt ?: 1) - 1)) }
 	cache 'lenient'
     //validExitStatus 0,1
 
@@ -178,7 +178,7 @@ process featurecount{
     conda "$COUNTENV"+".yaml"
     container "oras://jfallmann/monsda:"+"$COUNTENV"
     cpus THREADS
-    memory 16.GB
+    memory { 16.GB * (1 << ((task.attempt ?: 1) - 1)) }
 	cache 'lenient'
     //validExitStatus 0,1
 
diff --git a/workflows/deseq2_DE.nf b/workflows/deseq2_DE.nf
index d555c913..0c446a12 100644
--- a/workflows/deseq2_DE.nf
+++ b/workflows/deseq2_DE.nf
@@ -22,7 +22,7 @@ process featurecount_deseq{
     conda "$COUNTENV"+".yaml"
     container "oras://jfallmann/monsda:"+"$COUNTENV"
     cpus THREADS
-    memory 16.GB
+    memory { 16.GB * (1 << ((task.attempt ?: 1) - 1)) }
 	cache 'lenient'
     //validExitStatus 0,1
 
diff --git a/workflows/dexseq_DEU.nf b/workflows/dexseq_DEU.nf
index 8d938a17..0fb02c75 100644
--- a/workflows/dexseq_DEU.nf
+++ b/workflows/dexseq_DEU.nf
@@ -21,7 +21,7 @@ process prepare_deu_annotation{
     conda "$COUNTENV"+".yaml"
     container "oras://jfallmann/monsda:"+"$COUNTENV"
     cpus THREADS
-    memory 16.GB
+    memory { 16.GB * (1 << ((task.attempt ?: 1) - 1)) }
 	cache 'lenient'
     //validExitStatus 0,1
 
@@ -61,7 +61,7 @@ process featurecount_dexseq{
     conda "$COUNTENV"+".yaml"
     container "oras://jfallmann/monsda:"+"$COUNTENV"
     cpus THREADS
-    memory 16.GB
+    memory { 16.GB * (1 << ((task.attempt ?: 1) - 1)) }
 	cache 'lenient'
     //validExitStatus 0,1
 
diff --git a/workflows/diego_DAS.nf b/workflows/diego_DAS.nf
index a2a43a73..b0a40373 100644
--- a/workflows/diego_DAS.nf
+++ b/workflows/diego_DAS.nf
@@ -24,7 +24,7 @@ process featurecount_diego{
     conda "$COUNTENV"+".yaml"
     container "oras://jfallmann/monsda:"+"$COUNTENV"
     cpus THREADS
-    memory 16.GB
+    memory { 16.GB * (1 << ((task.attempt ?: 1) - 1)) }
 	cache 'lenient'
     //validExitStatus 0,1
 
diff --git a/workflows/edger_DAS.nf b/workflows/edger_DAS.nf
index 22806a0a..cedb3a93 100644
--- a/workflows/edger_DAS.nf
+++ b/workflows/edger_DAS.nf
@@ -22,7 +22,7 @@ process featurecount_edger{
     conda "$COUNTENV"+".yaml"
     container "oras://jfallmann/monsda:"+"$COUNTENV"
     cpus THREADS
-    memory 16.GB
+    memory { 16.GB * (1 << ((task.attempt ?: 1) - 1)) }
 	cache 'lenient'
     //validExitStatus 0,1
 
diff --git a/workflows/edger_DE.nf b/workflows/edger_DE.nf
index a4683376..5a55648a 100644
--- a/workflows/edger_DE.nf
+++ b/workflows/edger_DE.nf
@@ -22,7 +22,7 @@ process featurecount_edger{
     conda "$COUNTENV"+".yaml"
     container "oras://jfallmann/monsda:"+"$COUNTENV"
     cpus THREADS
-    memory 16.GB
+    memory { 16.GB * (1 << ((task.attempt ?: 1) - 1)) }
 	cache 'lenient'
     //validExitStatus 0,1
 
diff --git a/workflows/edger_DEU.nf b/workflows/edger_DEU.nf
index aa1e5146..a0a27db1 100644
--- a/workflows/edger_DEU.nf
+++ b/workflows/edger_DEU.nf
@@ -22,7 +22,7 @@ process featurecount_edger{
     conda "$COUNTENV"+".yaml"
     container "oras://jfallmann/monsda:"+"$COUNTENV"
     cpus THREADS
-    memory 16.GB
+    memory { 16.GB * (1 << ((task.attempt ?: 1) - 1)) }
 	cache 'lenient'
     //validExitStatus 0,1
 
diff --git a/workflows/mapping.nf b/workflows/mapping.nf
index 62b06ffa..b455eba4 100644
--- a/workflows/mapping.nf
+++ b/workflows/mapping.nf
@@ -4,7 +4,7 @@ process sortsam{
     conda "samtools.yaml"
     container "oras://jfallmann/monsda:"+"samtools"
     cpus THREADS
-    memory 16.GB
+    memory { 16.GB * (1 << ((task.attempt ?: 1) - 1)) }
 	cache 'lenient'
     //validExitStatus 0,1
 
@@ -34,7 +34,7 @@ process sam2bam{
     conda "samtools.yaml"
     container "oras://jfallmann/monsda:"+"samtools"
     cpus THREADS
-    memory 16.GB
+    memory { 16.GB * (1 << ((task.attempt ?: 1) - 1)) }
 	cache 'lenient'
     //validExitStatus 0,1
 
@@ -66,7 +66,7 @@ process uniqsam{
     conda "samtools.yaml"
     container "oras://jfallmann/monsda:"+"samtools"
     cpus THREADS
-    memory 16.GB
+    memory { 16.GB * (1 << ((task.attempt ?: 1) - 1)) }
 	cache 'lenient'
     //validExitStatus 0,1
 
@@ -101,7 +101,7 @@ process sam2bamuniq{
     conda "samtools.yaml"
     container "oras://jfallmann/monsda:"+"samtools"
     cpus THREADS
-    memory 16.GB
+    memory { 16.GB * (1 << ((task.attempt ?: 1) - 1)) }
 	cache 'lenient'
     //validExitStatus 0,1
 

From 633a5e2e7019b971b170ea98ce400de0a62821c9 Mon Sep 17 00:00:00 2001
From: jfallmann <fallmann.joerg@gmail.com>
Date: Fri, 8 May 2026 11:38:09 +0200
Subject: [PATCH 36/39] added umicollapse as drop-in replacement for umi_tools
 dedup

---
 MONSDA/Workflows.py               |  14 +--
 configs/template_umicollapse.json | 118 ++++++++++++++++++++++++
 envs/umicollapse.yaml             |  10 ++
 profile_snakemake/config.yaml     |  12 +++
 workflows/umicollapse.nf          | 135 +++++++++++++++++++++++++++
 workflows/umicollapse.smk         | 146 ++++++++++++++++++++++++++++++
 workflows/umicollapse_dedup.nf    |  64 +++++++++++++
 7 files changed, 492 insertions(+), 7 deletions(-)
 create mode 100644 configs/template_umicollapse.json
 create mode 100644 envs/umicollapse.yaml
 create mode 100644 profile_snakemake/config.yaml
 create mode 100644 workflows/umicollapse.nf
 create mode 100644 workflows/umicollapse.smk
 create mode 100644 workflows/umicollapse_dedup.nf

diff --git a/MONSDA/Workflows.py b/MONSDA/Workflows.py
index 57b00630..81aab2e1 100755
--- a/MONSDA/Workflows.py
+++ b/MONSDA/Workflows.py
@@ -957,7 +957,7 @@ def make_sub(
                             and toolenv != "rustqc"
                         ):
                             if "DEDUP" in works and any(
-                                x in envs for x in ["umitools", "fgumi"]
+                                x in envs for x in ["umitools", "fgumi", "umicollapse"]
                             ):
                                 subname = toolenv + "_dedup_trim.smk"
                             else:
@@ -970,7 +970,7 @@ def make_sub(
                             and toolenv != "rustqc"
                         ):
                             if "DEDUP" in subworkflows and any(
-                                x in envs for x in ["umitools", "fgumi"]
+                                x in envs for x in ["umitools", "fgumi", "umicollapse"]
                             ):
                                 subname = toolenv + "_dedup.smk"
                             else:
@@ -980,7 +980,7 @@ def make_sub(
                         if works[j] == "DEDUP" and toolenv == "picard":
                             subname = toolenv + "_dedup.smk"
                             subconf.pop("PREDEDUP", None)
-                        elif works[j] == "DEDUP" and toolenv in ["umitools", "fgumi"]:
+                        elif works[j] == "DEDUP" and toolenv in ["umitools", "fgumi", "umicollapse"]:
                             subconf["PREDEDUP"] = "enabled"
 
                         smkf = os.path.abspath(os.path.join(workflowpath, subname))
@@ -1160,7 +1160,7 @@ def make_sub(
                     if subwork == "DEDUP" and toolenv == "picard":
                         subname = toolenv + "_dedup.smk"
                         subconf.pop("PREDEDUP", None)
-                    elif subwork == "DEDUP" and toolenv in ["umitools", "fgumi"]:
+                    elif subwork == "DEDUP" and toolenv in ["umitools", "fgumi", "umicollapse"]:
                         subconf["PREDEDUP"] = "enabled"
                     # Add rulethemall based on chosen workflows
                     add.append(
@@ -3040,7 +3040,7 @@ def nf_make_sub(
                             else:
                                 if "DEDUP" in subworkflows:
                                     flowlist.append("QC_RAW")
-                                    if toolenv in ["umitools", "fgumi"]:
+                                    if toolenv in ["umitools", "fgumi", "umicollapse"]:
                                         flowlist.append("DEDUPEXTRACT")
                                     if "MAPPING" in works:
                                         flowlist.append("QC_MAPPING")
@@ -3056,7 +3056,7 @@ def nf_make_sub(
 
                         if works[j] == "DEDUP":
                             deduptool = toolenv
-                            if toolenv in ["umitools", "fgumi"]:
+                            if toolenv in ["umitools", "fgumi", "umicollapse"]:
                                 flowlist.append("PREDEDUP")
                                 subconf["PREDEDUP"] = "enabled"
                                 if "QC" in flowlist:
@@ -3463,7 +3463,7 @@ def nf_make_sub(
                     # Dedup tools can be extended here
                     if subwork == "DEDUP" and toolenv == "picard":
                         subname = toolenv + "_dedup.nf"
-                    elif subwork == "DEDUP" and toolenv in ["umitools", "fgumi"]:
+                    elif subwork == "DEDUP" and toolenv in ["umitools", "fgumi", "umicollapse"]:
                         deduptool = toolenv
                         flowlist.append("PREDEDUP")
                         subconf["PREDEDUP"] = "enabled"
diff --git a/configs/template_umicollapse.json b/configs/template_umicollapse.json
new file mode 100644
index 00000000..cff24128
--- /dev/null
+++ b/configs/template_umicollapse.json
@@ -0,0 +1,118 @@
+{
+  "WORKFLOWS": "QC,MAPPING,DEDUP,COUNTING",
+  "BINS": "",
+  "MAXTHREADS": "20",
+  "VERSION": "1.4.0",
+  "SETTINGS": {
+    "id": {
+      "condition": {
+        "SAMPLES": [
+          "rep_1",
+          "rep_2"
+        ],
+        "GROUPS": [
+          "WT",
+          "WT"
+        ],
+        "TYPES": [
+          "standard",
+          "standard"
+        ],
+        "BATCHES": [
+          "1",
+          "1"
+        ],
+        "SEQUENCING": "paired",
+        "REFERENCE": "GENOMES/Dm6/dm6.fa.gz",
+        "INDEX": "GENOMES/Dm6/INDICES/star",
+        "PREFIX": "idx",
+        "ANNOTATION": {
+          "GTF": "GENOMES/Dm6/dm6.gtf.gz",
+          "GFF": "GENOMES/Dm6/dm6.gff3.gz"
+        }
+      }
+    }
+  },
+  "QC": {
+    "TOOLS": {
+      "fastqc": "fastqc"
+    },
+    "id": {
+      "condition": {
+        "fastqc": {
+          "OPTIONS": {
+            "QC": "",
+            "MULTI": ""
+          }
+        }
+      }
+    }
+  },
+  "MAP": {
+    "TOOLS": {
+      "star": "STAR"
+    },
+    "id": {
+      "condition": {
+        "star": {
+          "OPTIONS": {
+            "INDEX": "--sjdbGTFfeatureExon exon --sjdbGTFtagExonParentTranscript Parent --genomeSAindexNbases 13",
+            "MAP": "--sjdbGTFfeatureExon exon --sjdbGTFtagExonParentTranscript Parent --outSAMprimaryFlag AllBestScore --outSAMattributes NH HI NM MD AS nM jM jI XS",
+            "EXTENSION": ""
+          }
+        }
+      }
+    }
+  },
+  "DEDUP": {
+    "TOOLS": {
+      "umicollapse": "umicollapse",
+      "umitools": "umi_tools",
+      "fgumi": "fgumi",
+      "picard": "picard"
+    },
+    "id": {
+      "condition": {
+        "umicollapse": {
+          "OPTIONS": {
+            "DEDUP": "--algo dir --umi-sep _"
+          }
+        },
+        "umitools": {
+          "OPTIONS": {
+            "WHITELIST": "--extract-method string --bc-pattern 'XNNNNX'",
+            "EXTRACT": "--extract-umi-method read_id",
+            "DEDUP": ""
+          }
+        },
+        "fgumi": {
+          "OPTIONS": {
+            "EXTRACT": "",
+            "DEDUP": ""
+          }
+        },
+        "picard": {
+          "OPTIONS": {
+            "JAVA": "",
+            "DEDUP": ""
+          }
+        }
+      }
+    }
+  },
+  "COUNTING": {
+    "TOOLS": {
+      "salmon": "salmon"
+    },
+    "id": {
+      "condition": {
+        "salmon": {
+          "OPTIONS": {
+            "INDEX": "-k 31",
+            "COUNTING": "-l U --validateMappings"
+          }
+        }
+      }
+    }
+  }
+}
diff --git a/envs/umicollapse.yaml b/envs/umicollapse.yaml
new file mode 100644
index 00000000..06018058
--- /dev/null
+++ b/envs/umicollapse.yaml
@@ -0,0 +1,10 @@
+name: umicollapse
+channels:
+  - conda-forge
+  - bioconda
+  - nodefaults
+dependencies:
+  - samtools =1.21
+  - umicollapse
+  - umi_tools =1.1.2
+  - dateutils =0.6.12
diff --git a/profile_snakemake/config.yaml b/profile_snakemake/config.yaml
new file mode 100644
index 00000000..a4e4edb5
--- /dev/null
+++ b/profile_snakemake/config.yaml
@@ -0,0 +1,12 @@
+restart-times: 3
+jobscript: "slurm-jobscript.sh"
+cluster-generic-submit-cmd: "slurm-submit.py"
+#cluster-status: "slurm-status.py"
+max-jobs-per-second: 1
+max-status-checks-per-second: 3
+local-cores: 1
+latency-wait: 600
+#use-conda: True
+keep-going: True
+rerun-incomplete: True
+#printshellcmds: True
diff --git a/workflows/umicollapse.nf b/workflows/umicollapse.nf
new file mode 100644
index 00000000..2968c5b2
--- /dev/null
+++ b/workflows/umicollapse.nf
@@ -0,0 +1,135 @@
+DEDUPENV=get_always('DEDUPENV')
+DEDUPBIN=get_always('DEDUPBIN')
+
+WHITELISTPARAMS = get_always('umicollapse_params_WHITELIST') ?: ''
+EXTRACTPARAMS = get_always('umicollapse_params_EXTRACT') ?: ''
+
+// UMI extraction uses umi_tools extract (bundled in umicollapse env);
+// BAM deduplication is handled by umicollapse_dedup.nf.
+
+process whitelist{
+    conda "$DEDUPENV"+".yaml"
+    container "oras://jfallmann/monsda:"+"$DEDUPENV"
+    cpus THREADS
+	cache 'lenient'
+    //validExitStatus 0,1
+
+    publishDir "${workflow.workDir}/../" , mode: 'link',
+    saveAs: {filename ->
+        if (filename.indexOf("_whitelist") > 0)         "DEDUP_FASTQ/${COMBO}/${CONDITION}/${file(filename).getSimpleName()}_whitelist"
+        else if (filename.indexOf("log") > 0)           "LOGS/${COMBO}/${CONDITION}/DEDUP/dedup_whitelist.log"
+        else null
+    }
+
+    input:
+    path samples
+        
+    output:
+    path "*_whitelist", emit: wl
+
+    script:    
+    if (WHITELISTPARAMS == ''){    
+        outf = samples[0].getSimpleName().replace("_R1","")+"_dummy_whitelist"
+        """
+        touch $outf
+        """
+    } else {
+        if (PAIRED == 'paired'){
+            r1 = samples[0]
+            r2 = samples[1]
+            outf = samples[0].getSimpleName().replace("_R1","")+"_whitelist"
+            """
+                mkdir tmp && umi_tools whitelist $WHITELISTPARAMS --temp-dir tmp --log=wl.log --stdin=$r1 --read2-in=$r2 --stdout=$outf
+            """
+        }
+        else{
+            outf = samples.getSimpleName()+"_whitelist"
+            """
+                mkdir tmp && umi_tools whitelist $WHITELISTPARAMS --temp-dir tmp --log=wl.log --stdin=$samples --stdout=$outf
+            """
+        }
+    }
+}
+
+process extract_fq{
+    conda "$DEDUPENV"+".yaml"
+    container "oras://jfallmann/monsda:"+"$DEDUPENV"
+    cpus THREADS
+	cache 'lenient'
+    //validExitStatus 0,1
+
+    publishDir "${workflow.workDir}/../" , mode: 'link',
+    saveAs: {filename ->
+        if (filename.indexOf("_dedup.fastq.gz") > 0)      "DEDUP_FASTQ/${COMBO}/${CONDITION}/${file(filename).getSimpleName()}.fastq.gz"
+        else if (filename.indexOf("log") > 0)             "LOGS/${COMBO}/${CONDITION}/DEDUP/dedup_extract.log"
+        else null
+    }
+
+    input:
+    path wl
+    path samples
+        
+    output:
+    path "*_dedup.fastq.gz", emit: extract
+    path "ex.log", emit: logs
+
+    script:
+    if (PAIRED == 'paired'){
+        r1 = samples[0]
+        r2 = samples[1]
+        outf = samples[0].getSimpleName()+"_dedup.fastq.gz"
+        outf2 = samples[1].getSimpleName()+"_dedup.fastq.gz"
+        if (!!(wl =~ /dummy_whitelist/)){
+            """
+                mkdir tmp && umi_tools extract $EXTRACTPARAMS --temp-dir tmp --log=ex.log --stdin=$r1 --read2-in=$r2 --stdout=$outf --read2-out=$outf2
+            """
+        }
+        else{
+            """
+                mkdir tmp && umi_tools extract $EXTRACTPARAMS --whitelist=$wl --temp-dir tmp --log=ex.log --stdin=$r1 --read2-in=$r2 --stdout=$outf --read2-out=$outf2
+            """
+        }
+    }
+    else{
+        outf = samples.getSimpleName()+"_dedup.fastq.gz"
+        if (!!(wl =~ /dummy_whitelist/)){
+            """
+                mkdir tmp && umi_tools extract $EXTRACTPARAMS --temp-dir tmp --log=ex.log --stdin=$samples --stdout=$outf
+            """
+        }
+        else{        
+            """
+                mkdir tmp && umi_tools extract $EXTRACTPARAMS --whitelist=$wl --temp-dir tmp --log=ex.log --stdin=$samples --stdout=$outf
+            """
+        }
+    }
+}
+
+workflow DEDUPEXTRACT{
+    take: 
+    collection
+
+    main:
+    //SAMPLE CHANNELS
+    if ( PREDEDUP == 'enabled' ){ 
+        if (PAIRED == 'paired'){                
+            whitelist(samples_ch.collate( 2 ))
+            extract_fq(whitelist.out.wl, samples_ch.collate( 2 ))
+        } else{                
+            whitelist(samples_ch.collate( 1 ))
+            extract_fq(whitelist.out.wl, samples_ch.collate( 1 ))
+        }
+    }else{
+        if (PAIRED == 'paired'){
+            whitelist(collection.collate(2))
+            extract_fq(whitelist.out.wl, collection.collate( 2 ))
+        } else{
+            whitelist(collection.collate( 1 ))
+            extract_fq(whitelist.out.wl, collection.collate( 1 ))
+        }
+    }
+
+    emit:    
+    extract = extract_fq.out.extract
+    logs = extract_fq.out.logs
+}
diff --git a/workflows/umicollapse.smk b/workflows/umicollapse.smk
new file mode 100644
index 00000000..e7a83c9e
--- /dev/null
+++ b/workflows/umicollapse.smk
@@ -0,0 +1,146 @@
+DEDUPBIN, DEDUPENV = env_bin_from_config(config, 'DEDUP')
+
+wlparams = tool_params(SAMPLES[0], None, config, "DEDUP", DEDUPENV)['OPTIONS'].get('WHITELIST')
+
+wildcard_constraints:
+    type = "sorted|sorted_unique"
+
+# UMI extraction uses umi_tools extract (bundled in umicollapse env);
+# BAM deduplication uses umicollapse bam for significantly faster dedup.
+
+if paired == 'paired':
+    if wlparams:
+        rule whitelist:
+            input:  r1 = lambda wildcards: "FASTQ/{rawfile}_R1.fastq.gz".format(rawfile=[x for x in SAMPLES if x.split(os.sep)[-1] in wildcards.file][0]),
+                    r2 = lambda wildcards: "FASTQ/{rawfile}_R2.fastq.gz".format(rawfile=[x for x in SAMPLES if x.split(os.sep)[-1] in wildcards.file][0])
+            output: wl = "DEDUP_FASTQ/{combo}/{file}_whitelist",
+                    td = temp(directory("TMP/UMIWL/{combo}/{file}"))
+            log:   "LOGS/{combo}/{file}_dedup_whitelist.log"
+            conda: ""+DEDUPENV+".yaml"
+            container: "oras://jfallmann/monsda:"+DEDUPENV+""
+            threads: 1
+            params: dpara = lambda wildcards: tool_params(wildcards.file, None, config, "DEDUP", DEDUPENV)['OPTIONS'].get('WHITELIST', ""),
+                    dedup = "umi_tools"
+            resources:
+                mem_mb = lambda wildcards, attempt: 20000 * (2 ** (attempt - 1)),
+                runtime = lambda wildcards, attempt: 480 * (2 ** (attempt - 1))
+            shell:  "mkdir -p {output.td} && {params.dedup} whitelist {params.dpara} --temp-dir {output.td} --log={log} --stdin={input.r1} --read2-in={input.r2} --stdout={output.wl}"
+
+        rule extract:
+            input:  r1 = lambda wildcards: "FASTQ/{rawfile}_R1.fastq.gz".format(rawfile=[x for x in SAMPLES if x.split(os.sep)[-1] in wildcards.file][0]),
+                    r2 = lambda wildcards: "FASTQ/{rawfile}_R2.fastq.gz".format(rawfile=[x for x in SAMPLES if x.split(os.sep)[-1] in wildcards.file][0]),
+                    wl = rules.whitelist.output.wl
+            output: o1 = "DEDUP_FASTQ/{combo}/{file}_R1_dedup.fastq.gz",
+                    o2 = "DEDUP_FASTQ/{combo}/{file}_R2_dedup.fastq.gz",
+                    td = temp(directory("TMP/UMIEX/{combo}/{file}"))
+            log:   "LOGS/{combo}/{file}_dedup_extract.log"
+            conda: ""+DEDUPENV+".yaml"
+            container: "oras://jfallmann/monsda:"+DEDUPENV+""
+            threads: 1
+            params: dpara = lambda wildcards: tool_params(wildcards.file, None, config, "DEDUP", DEDUPENV)['OPTIONS'].get('EXTRACT', ""),
+                    dedup = "umi_tools"
+            resources:
+                mem_mb = lambda wildcards, attempt: 20000 * (2 ** (attempt - 1)),
+                runtime = lambda wildcards, attempt: 480 * (2 ** (attempt - 1))
+            shell:  "mkdir -p {output.td} && {params.dedup} extract {params.dpara} --temp-dir {output.td} --log={log} --error-correct-cell --whitelist={input.wl} --stdin={input.r1} --read2-in={input.r2} --stdout={output.o1} --read2-out={output.o2}"
+    else:
+        rule extract:
+            input:  r1 = lambda wildcards: "FASTQ/{rawfile}_R1.fastq.gz".format(rawfile=[x for x in SAMPLES if x.split(os.sep)[-1] in wildcards.file][0]),
+                    r2 = lambda wildcards: "FASTQ/{rawfile}_R2.fastq.gz".format(rawfile=[x for x in SAMPLES if x.split(os.sep)[-1] in wildcards.file][0])
+            output: o1 = "DEDUP_FASTQ/{combo}/{file}_R1_dedup.fastq.gz",
+                    o2 = "DEDUP_FASTQ/{combo}/{file}_R2_dedup.fastq.gz",
+                    td = temp(directory("TMP/UMIEX/{combo}/{file}"))
+            log:   "LOGS/{combo}/{file}_dedup_extract.log"
+            conda: ""+DEDUPENV+".yaml"
+            container: "oras://jfallmann/monsda:"+DEDUPENV+""
+            threads: 1
+            params: dpara = lambda wildcards: tool_params(wildcards.file, None, config, "DEDUP", DEDUPENV)['OPTIONS'].get('EXTRACT', ""),
+                    dedup = "umi_tools"
+            resources:
+                mem_mb = lambda wildcards, attempt: 20000 * (2 ** (attempt - 1)),
+                runtime = lambda wildcards, attempt: 480 * (2 ** (attempt - 1))
+            shell:  "mkdir -p {output.td} && {params.dedup} extract {params.dpara} --temp-dir {output.td} --log={log} --stdin={input.r1} --read2-in={input.r2} --stdout={output.o1} --read2-out={output.o2}"
+
+else:
+    if wlparams:
+        rule whitelist:
+            input:  r1 = lambda wildcards: "FASTQ/{rawfile}.fastq.gz".format(rawfile=[x for x in SAMPLES if x.split(os.sep)[-1] in wildcards.file][0])
+            output: wl = "DEDUP_FASTQ/{combo}/{file}_whitelist",
+                    td = temp(directory("TMP/UMIWL/{combo}/{file}"))
+            log:   "LOGS/{combo}/{file}_dedup_whitelist.log"
+            conda: ""+DEDUPENV+".yaml"
+            container: "oras://jfallmann/monsda:"+DEDUPENV+""
+            threads: 1
+            params: dpara = lambda wildcards: tool_params(wildcards.file, None, config, "DEDUP", DEDUPENV)['OPTIONS'].get('WHITELIST', ""),
+                    dedup = "umi_tools"
+            resources:
+                mem_mb = lambda wildcards, attempt: 20000 * (2 ** (attempt - 1)),
+                runtime = lambda wildcards, attempt: 480 * (2 ** (attempt - 1))
+            shell:  "mkdir -p {output.td} && {params.dedup} whitelist {params.dpara} --temp-dir {output.td} --log={log} --stdin={input.r1} --stdout={output.wl}"
+
+        rule extract:
+            input:  r1 = lambda wildcards: "FASTQ/{rawfile}.fastq.gz".format(rawfile=[x for x in SAMPLES if x.split(os.sep)[-1] in wildcards.file][0]),
+                    wl = rules.whitelist.output.wl
+            output: o1 = "DEDUP_FASTQ/{combo}/{file}_dedup.fastq.gz",
+                    td = temp(directory("TMP/UMIEX/{combo}/{file}"))
+            log:   "LOGS/{combo}/{file}_dedup_extract.log"
+            conda: ""+DEDUPENV+".yaml"
+            container: "oras://jfallmann/monsda:"+DEDUPENV+""
+            threads: 1
+            params: dpara = lambda wildcards: tool_params(wildcards.file, None, config, "DEDUP", DEDUPENV)['OPTIONS'].get('EXTRACT', ""),
+                    dedup = "umi_tools"
+            resources:
+                mem_mb = lambda wildcards, attempt: 20000 * (2 ** (attempt - 1)),
+                runtime = lambda wildcards, attempt: 480 * (2 ** (attempt - 1))
+            shell:  "mkdir -p {output.td} && {params.dedup} extract {params.dpara} --temp-dir {output.td} --log={log} --error-correct-cell --whitelist={input.wl} --stdin={input.r1} --stdout={output.o1}"
+
+    else:
+        rule extract:
+            input:  r1 = lambda wildcards: "FASTQ/{rawfile}.fastq.gz".format(rawfile=[x for x in SAMPLES if x.split(os.sep)[-1] in wildcards.file][0])
+            output: o1 = "DEDUP_FASTQ/{combo}/{file}_dedup.fastq.gz",
+                    td = temp(directory("TMP/UMIEX/{combo}/{file}"))
+            log:   "LOGS/{combo}/{file}_dedup_extract.log"
+            conda: ""+DEDUPENV+".yaml"
+            container: "oras://jfallmann/monsda:"+DEDUPENV+""
+            threads: 1
+            params: dpara = lambda wildcards: tool_params(wildcards.file, None, config, "DEDUP", DEDUPENV)['OPTIONS'].get('EXTRACT', ""),
+                    dedup = "umi_tools"
+            resources:
+                mem_mb = lambda wildcards, attempt: 20000 * (2 ** (attempt - 1)),
+                runtime = lambda wildcards, attempt: 480 * (2 ** (attempt - 1))
+            shell:  "mkdir -p {output.td} && {params.dedup} extract {params.dpara} --temp-dir {output.td} --log={log} --stdin={input.r1} --stdout={output.o1}"
+
+if paired == 'paired':
+    rule dedupbam:
+        input:  bam = "MAPPED/{combo}/{file}_mapped_{type}.bam"
+        output: bam = report("MAPPED/{combo}/{file}_mapped_{type}_dedup.bam", category="DEDUP"),
+                bai = report("MAPPED/{combo}/{file}_mapped_{type}_dedup.bam.bai", category="DEDUP"),
+                td = temp(directory("TMP/UMIDD/{combo}/{file}_{type}"))
+        log:    "LOGS/{combo}/{file}_{type}/dedupbam.log"
+        conda:  ""+DEDUPENV+".yaml"
+        container: "oras://jfallmann/monsda:"+DEDUPENV+""
+        threads: 1
+        priority: 0
+        params: dpara = lambda wildcards: tool_params(wildcards.file, None, config, "DEDUP", DEDUPENV)['OPTIONS'].get('DEDUP', ""),
+                dedup = DEDUPBIN
+        resources:
+            mem_mb = lambda wildcards, attempt: 20000 * (2 ** (attempt - 1)),
+            runtime = lambda wildcards, attempt: 480 * (2 ** (attempt - 1))
+        shell: "mkdir -p {output.td} && {params.dedup} bam -i {input.bam} -o {output.td}/dedup.unsorted.bam --paired {params.dpara} > {log} 2>&1 && samtools sort -@ {threads} -o {output.bam} {output.td}/dedup.unsorted.bam >> {log} 2>&1 && samtools index {output.bam} >> {log} 2>&1"
+else:
+    rule dedupbam:
+        input:  bam = "MAPPED/{combo}/{file}_mapped_{type}.bam"
+        output: bam = report("MAPPED/{combo}/{file}_mapped_{type}_dedup.bam", category="DEDUP"),
+                bai = report("MAPPED/{combo}/{file}_mapped_{type}_dedup.bam.bai", category="DEDUP"),
+                td = temp(directory("TMP/UMIDD/{combo}/{file}_{type}"))
+        log:    "LOGS/{combo}/{file}_{type}/dedupbam.log"
+        conda:  ""+DEDUPENV+".yaml"
+        container: "oras://jfallmann/monsda:"+DEDUPENV+""
+        threads: 1
+        priority: 0
+        params: dpara = lambda wildcards: tool_params(wildcards.file, None, config, "DEDUP", DEDUPENV)['OPTIONS'].get('DEDUP', ""),
+                dedup = DEDUPBIN
+        resources:
+            mem_mb = lambda wildcards, attempt: 20000 * (2 ** (attempt - 1)),
+            runtime = lambda wildcards, attempt: 480 * (2 ** (attempt - 1))
+        shell: "mkdir -p {output.td} && {params.dedup} bam -i {input.bam} -o {output.td}/dedup.unsorted.bam {params.dpara} > {log} 2>&1 && samtools sort -@ {threads} -o {output.bam} {output.td}/dedup.unsorted.bam >> {log} 2>&1 && samtools index {output.bam} >> {log} 2>&1"
diff --git a/workflows/umicollapse_dedup.nf b/workflows/umicollapse_dedup.nf
new file mode 100644
index 00000000..8a77bc60
--- /dev/null
+++ b/workflows/umicollapse_dedup.nf
@@ -0,0 +1,64 @@
+DEDUPENV=get_always('DEDUPENV')
+DEDUPBIN=get_always('DEDUPBIN')
+
+DEDUPPARAMS = get_always('umicollapse_params_DEDUP') ?: ''
+
+process dedup_bam{
+    conda "$DEDUPENV"+".yaml"
+    container "oras://jfallmann/monsda:"+"$DEDUPENV"
+    cpus 1
+	cache 'lenient'
+    //validExitStatus 0,1
+
+    publishDir "${workflow.workDir}/../" , mode: 'link',
+    saveAs: {filename ->
+        if (filename.endsWith("_dedup.bam"))          "MAPPED/${COMBO}/${CONDITION}/${file(filename).getName()}"
+        else if (filename.indexOf("_dedup.bam.bai") > 0) "MAPPED/${COMBO}/${CONDITION}/${file(filename).getName()}"
+        else if (filename.indexOf("dedup.log") > 0)           "LOGS/${COMBO}/${CONDITION}/DEDUP/${file(filename).getName()}"
+        else null
+    }
+
+    input:
+    path todedup
+    path bami
+        
+    output:
+    path "*_dedup.bam", emit: bam
+    path "*_dedup.bam.bai", emit: bai
+    path "*_dedup.log", emit: logs
+
+    memory { 20.GB * (1 << ((task.attempt ?: 1) - 1)) }
+    time { 8.h * (1 << ((task.attempt ?: 1) - 1)) }
+
+    script:
+    bams = todedup[0]
+    bais = todedup[1]
+    outf = bams.getSimpleName()+"_dedup.bam"
+    outl = bams.getSimpleName()+"_dedup.log"
+    if (PAIRED == 'paired'){        
+        """
+            mkdir -p TMP && $DEDUPBIN bam -i $bams -o TMP/dedup.unsorted.bam --paired $DEDUPPARAMS > $outl 2>&1 && samtools sort -@ ${task.cpus} -o $outf TMP/dedup.unsorted.bam >> $outl 2>&1 && samtools index $outf >> $outl 2>&1
+        """
+    }
+    else{
+        """
+            mkdir -p TMP && $DEDUPBIN bam -i $bams -o TMP/dedup.unsorted.bam $DEDUPPARAMS > $outl 2>&1 && samtools sort -@ ${task.cpus} -o $outf TMP/dedup.unsorted.bam >> $outl 2>&1 && samtools index $outf >> $outl 2>&1
+        """
+    }
+}
+
+workflow DEDUPBAM{
+    take: 
+    map
+    mapi
+    mapu
+    mapui
+
+    main:
+    dedup_bam(map.concat(mapu), mapi.concat(mapui))
+
+    emit:
+    dedup = dedup_bam.out.bam
+    dedupbai = dedup_bam.out.bai
+    deduplog = dedup_bam.out.logs
+}

From 90864af0d6ecb7e8e1c433971d21788995e85285 Mon Sep 17 00:00:00 2001
From: jfallmann <fallmann.joerg@gmail.com>
Date: Fri, 8 May 2026 13:40:07 +0200
Subject: [PATCH 37/39] rustqc dedup after remove fix

---
 MONSDA/Workflows.py          |  34 +++------
 workflows/multiqc_rustqc.smk | 136 ++++++++---------------------------
 workflows/rustqc.smk         |  30 --------
 3 files changed, 39 insertions(+), 161 deletions(-)

diff --git a/MONSDA/Workflows.py b/MONSDA/Workflows.py
index 81aab2e1..88819823 100755
--- a/MONSDA/Workflows.py
+++ b/MONSDA/Workflows.py
@@ -3268,18 +3268,11 @@ def nf_make_sub(
                                     + "(POSTMAPPING.out.postmap.concat(POSTMAPPING.out.postmapuni))\n"
                                 )
                         elif w == "RUSTQC_MAPPING":
-                            if "DEDUPBAM" in flowlist:
-                                subjobs.append(
-                                    " " * 4
-                                    + w
-                                    + "(POSTMAPPING.out.postmap.concat(POSTMAPPING.out.postmapuni.concat(DEDUPBAM.out.dedup)))\n"
-                                )
-                            else:
-                                subjobs.append(
-                                    " " * 4
-                                    + w
-                                    + "(POSTMAPPING.out.postmap.concat(POSTMAPPING.out.postmapuni))\n"
-                                )
+                            subjobs.append(
+                                " " * 4
+                                + w
+                                + "(POSTMAPPING.out.postmap.concat(POSTMAPPING.out.postmapuni))\n"
+                            )
                         elif w == "MULTIQC":
                             if "QC_RAW" not in flowlist and (
                                 "QC_MAPPING" in flowlist
@@ -3656,18 +3649,11 @@ def nf_make_sub(
                                 + "(POSTMAPPING.out.postmap.concat(POSTMAPPING.out.postmapuni))\n"
                             )
                     elif w == "RUSTQC_MAPPING":
-                        if "DEDUPBAM" in flowlist:
-                            subjobs.append(
-                                " " * 4
-                                + w
-                                + "(POSTMAPPING.out.postmap.concat(POSTMAPPING.out.postmapuni.concat(DEDUPBAM.out.dedup)))\n"
-                            )
-                        else:
-                            subjobs.append(
-                                " " * 4
-                                + w
-                                + "(POSTMAPPING.out.postmap.concat(POSTMAPPING.out.postmapuni))\n"
-                            )
+                        subjobs.append(
+                            " " * 4
+                            + w
+                            + "(POSTMAPPING.out.postmap.concat(POSTMAPPING.out.postmapuni))\n"
+                        )
                     elif w == "MULTIQC":
                         if "QC_RAW" not in flowlist and (
                             "QC_MAPPING" in flowlist or "RUSTQC_MAPPING" in flowlist
diff --git a/workflows/multiqc_rustqc.smk b/workflows/multiqc_rustqc.smk
index 1ecd2b1f..17022c50 100644
--- a/workflows/multiqc_rustqc.smk
+++ b/workflows/multiqc_rustqc.smk
@@ -1,109 +1,31 @@
-if rundedup:
-    if paired == 'paired':
-        if prededup:
-            rule multiqc:
-                input:  expand(rules.rustqc_mapped.output.js, file=samplecond(SAMPLES, config), combo=combo),
-                        expand(rules.rustqc_uniquemapped.output.js, file=samplecond(SAMPLES, config), combo=combo),
-                        expand(rules.rustqc_dedupmapped.output.js, file=samplecond(SAMPLES, config), combo=combo),
-                        expand(rules.rustqc_uniquededup.output.js, file=samplecond(SAMPLES, config), combo=combo),
-                        expand(rules.sam2bam.output.bam, file=samplecond(SAMPLES, config), combo=combo),
-                        expand(rules.sam2bamuniq.output.uniqbam, file=samplecond(SAMPLES, config), combo=combo),
-                        expand(rules.dedupbam.output.bam, file=samplecond(SAMPLES, config), combo=combo, type=["sorted", "sorted_unique"])
-                output: html = report("QC/Multi/{combo}/{condition}/multiqc_report.html", category="QC"),
-                        tmp = temp("QC/Multi/{combo}/{condition}/tmp"),
-                        lst = "QC/Multi/{combo}/{condition}/qclist.txt"
-                log:    "LOGS/{combo}/{condition}_multiqc.log"
-                conda:  "rustqc.yaml"
-                container: "oras://jfallmann/monsda:rustqc"
-                threads: 1
-                params: qpara = lambda wildcards: tool_params(SAMPLES[0], None, config, 'QC', QCENV)['OPTIONS'].get('MULTI', "")
-                shell:  "OUT=$(dirname {output.html}); for i in {input}; do echo $(dirname \"${{i}}\") >> {output.tmp}; done; FQ_COMBO=$(echo {wildcards.combo} | sed 's/rustqc/fastqc/g'); FQ_DIR=QC/${{FQ_COMBO}}/{wildcards.condition}; if [ -d \"${{FQ_DIR}}\" ]; then echo ${{FQ_DIR}} >> {output.tmp}; fi; cat {output.tmp} | sort -u > {output.lst}; export LC_ALL=C.UTF-8; multiqc -f {params.qpara} --exclude picard --exclude gatk -k json -z -s -o ${{OUT}} -l {output.lst} 2> {log}"
-        else:
-            rule multiqc:
-                input:  expand(rules.rustqc_mapped.output.js, file=samplecond(SAMPLES, config), combo=combo),
-                        expand(rules.rustqc_uniquemapped.output.js, file=samplecond(SAMPLES, config), combo=combo),
-                        expand(rules.rustqc_dedupmapped.output.js, file=samplecond(SAMPLES, config), combo=combo),
-                        expand(rules.rustqc_uniquededup.output.js, file=samplecond(SAMPLES, config), combo=combo),
-                        expand(rules.sam2bam.output.bam, file=samplecond(SAMPLES, config), combo=combo),
-                        expand(rules.sam2bamuniq.output.uniqbam, file=samplecond(SAMPLES, config), combo=combo),
-                        expand(rules.dedupbam.output.bam, file=samplecond(SAMPLES, config), combo=combo, type=["sorted", "sorted_unique"])
-                output: html = report("QC/Multi/{combo}/{condition}/multiqc_report.html", category="QC"),
-                        tmp = temp("QC/Multi/{combo}/{condition}/tmp"),
-                        lst = "QC/Multi/{combo}/{condition}/qclist.txt"
-                log:    "LOGS/{combo}/{condition}_multiqc.log"
-                conda:  "rustqc.yaml"
-                container: "oras://jfallmann/monsda:rustqc"
-                threads: 1
-                params: qpara = lambda wildcards: tool_params(SAMPLES[0], None, config, 'QC', QCENV)['OPTIONS'].get('MULTI', "")
-                shell:  "OUT=$(dirname {output.html}); for i in {input}; do echo $(dirname \"${{i}}\") >> {output.tmp}; done; FQ_COMBO=$(echo {wildcards.combo} | sed 's/rustqc/fastqc/g'); FQ_DIR=QC/${{FQ_COMBO}}/{wildcards.condition}; if [ -d \"${{FQ_DIR}}\" ]; then echo ${{FQ_DIR}} >> {output.tmp}; fi; cat {output.tmp} | sort -u > {output.lst}; export LC_ALL=C.UTF-8; multiqc -f {params.qpara} --exclude picard --exclude gatk -k json -z -s -o ${{OUT}} -l {output.lst} 2> {log}"
-
-    else:
-        if prededup:
-            rule multiqc:
-                input:  expand(rules.rustqc_mapped.output.js, file=samplecond(SAMPLES, config), combo=combo),
-                        expand(rules.rustqc_uniquemapped.output.js, file=samplecond(SAMPLES, config), combo=combo),
-                        expand(rules.rustqc_dedupmapped.output.js, file=samplecond(SAMPLES, config), combo=combo),
-                        expand(rules.rustqc_uniquededup.output.js, file=samplecond(SAMPLES, config), combo=combo),
-                        expand(rules.sam2bam.output.bam, file=samplecond(SAMPLES, config), combo=combo),
-                        expand(rules.sam2bamuniq.output.uniqbam, file=samplecond(SAMPLES, config), combo=combo),
-                        expand(rules.dedupbam.output.bam, file=samplecond(SAMPLES, config), combo=combo, type=["sorted", "sorted_unique"])
-                output: html = report("QC/Multi/{combo}/{condition}/multiqc_report.html", category="QC"),
-                        tmp = temp("QC/Multi/{combo}/{condition}/tmp"),
-                        lst = "QC/Multi/{combo}/{condition}/qclist.txt"
-                log:    "LOGS/{combo}/{condition}_multiqc.log"
-                conda:  "rustqc.yaml"
-                container: "oras://jfallmann/monsda:rustqc"
-                threads: 1
-                params: qpara = lambda wildcards: tool_params(SAMPLES[0], None, config, 'QC', QCENV)['OPTIONS'].get('MULTI', "")
-                shell:  "OUT=$(dirname {output.html}); for i in {input}; do echo $(dirname \"${{i}}\") >> {output.tmp}; done; FQ_COMBO=$(echo {wildcards.combo} | sed 's/rustqc/fastqc/g'); FQ_DIR=QC/${{FQ_COMBO}}/{wildcards.condition}; if [ -d \"${{FQ_DIR}}\" ]; then echo ${{FQ_DIR}} >> {output.tmp}; fi; cat {output.tmp} | sort -u > {output.lst}; export LC_ALL=C.UTF-8; multiqc -f {params.qpara} --exclude picard --exclude gatk -k json -z -s -o ${{OUT}} -l {output.lst} 2> {log}"
-        else:
-            rule multiqc:
-                input:  expand(rules.rustqc_mapped.output.js, file=samplecond(SAMPLES, config), combo=combo),
-                        expand(rules.rustqc_uniquemapped.output.js, file=samplecond(SAMPLES, config), combo=combo),
-                        expand(rules.rustqc_dedupmapped.output.js, file=samplecond(SAMPLES, config), combo=combo),
-                        expand(rules.rustqc_uniquededup.output.js, file=samplecond(SAMPLES, config), combo=combo),
-                        expand(rules.sam2bam.output.bam, file=samplecond(SAMPLES, config), combo=combo),
-                        expand(rules.sam2bamuniq.output.uniqbam, file=samplecond(SAMPLES, config), combo=combo),
-                        expand(rules.dedupbam.output.bam, file=samplecond(SAMPLES, config), combo=combo, type=["sorted", "sorted_unique"])
-                output: html = report("QC/Multi/{combo}/{condition}/multiqc_report.html", category="QC"),
-                        tmp = temp("QC/Multi/{combo}/{condition}/tmp"),
-                        lst = "QC/Multi/{combo}/{condition}/qclist.txt"
-                log:    "LOGS/{combo}/{condition}_multiqc.log"
-                conda:  "rustqc.yaml"
-                container: "oras://jfallmann/monsda:rustqc"
-                threads: 1
-                params: qpara = lambda wildcards: tool_params(SAMPLES[0], None, config, 'QC', QCENV)['OPTIONS'].get('MULTI', "")
-                shell:  "OUT=$(dirname {output.html}); for i in {input}; do echo $(dirname \"${{i}}\") >> {output.tmp}; done; FQ_COMBO=$(echo {wildcards.combo} | sed 's/rustqc/fastqc/g'); FQ_DIR=QC/${{FQ_COMBO}}/{wildcards.condition}; if [ -d \"${{FQ_DIR}}\" ]; then echo ${{FQ_DIR}} >> {output.tmp}; fi; cat {output.tmp} | sort -u > {output.lst}; export LC_ALL=C.UTF-8; multiqc -f {params.qpara} --exclude picard --exclude gatk -k json -z -s -o ${{OUT}} -l {output.lst} 2> {log}"
+if paired == 'paired':
+    rule multiqc:
+        input:  expand(rules.rustqc_mapped.output.js, file=samplecond(SAMPLES, config), combo=combo),
+                expand(rules.rustqc_uniquemapped.output.js, file=samplecond(SAMPLES, config), combo=combo),
+                expand(rules.sam2bam.output.bam, file=samplecond(SAMPLES, config), combo=combo),
+                expand(rules.sam2bamuniq.output.uniqbam, file=samplecond(SAMPLES, config), combo=combo)
+        output: html = report("QC/Multi/{combo}/{condition}/multiqc_report.html", category="QC"),
+                tmp = temp("QC/Multi/{combo}/{condition}/tmp"),
+                lst = "QC/Multi/{combo}/{condition}/qclist.txt"
+        log:    "LOGS/{combo}/{condition}_multiqc.log"
+        conda:  "rustqc.yaml"
+        container: "oras://jfallmann/monsda:rustqc"
+        threads: 1
+        params: qpara = lambda wildcards: tool_params(SAMPLES[0], None, config, 'QC', QCENV)['OPTIONS'].get('MULTI', "")
+        shell:  "OUT=$(dirname {output.html}); for i in {input}; do echo $(dirname \"${{i}}\") >> {output.tmp}; done; FQ_COMBO=$(echo {wildcards.combo} | sed 's/rustqc/fastqc/g'); FQ_DIR=QC/${{FQ_COMBO}}/{wildcards.condition}; if [ -d \"${{FQ_DIR}}\" ]; then echo ${{FQ_DIR}} >> {output.tmp}; fi; cat {output.tmp} | sort -u > {output.lst}; export LC_ALL=C.UTF-8; multiqc -f {params.qpara} --exclude picard --exclude gatk -k json -z -s -o ${{OUT}} -l {output.lst} 2> {log}"
 
 else:
-    if paired == 'paired':
-        rule multiqc:
-            input:  expand(rules.rustqc_mapped.output.js, file=samplecond(SAMPLES, config), combo=combo),
-                    expand(rules.rustqc_uniquemapped.output.js, file=samplecond(SAMPLES, config), combo=combo),
-                    expand(rules.sam2bam.output.bam, file=samplecond(SAMPLES, config), combo=combo),
-                    expand(rules.sam2bamuniq.output.uniqbam, file=samplecond(SAMPLES, config), combo=combo)
-            output: html = report("QC/Multi/{combo}/{condition}/multiqc_report.html", category="QC"),
-                    tmp = temp("QC/Multi/{combo}/{condition}/tmp"),
-                    lst = "QC/Multi/{combo}/{condition}/qclist.txt"
-            log:    "LOGS/{combo}/{condition}_multiqc.log"
-            conda:  "rustqc.yaml"
-            container: "oras://jfallmann/monsda:rustqc"
-            threads: 1
-            params: qpara = lambda wildcards: tool_params(SAMPLES[0], None, config, 'QC', QCENV)['OPTIONS'].get('MULTI', "")
-            shell:  "OUT=$(dirname {output.html}); for i in {input}; do echo $(dirname \"${{i}}\") >> {output.tmp}; done; FQ_COMBO=$(echo {wildcards.combo} | sed 's/rustqc/fastqc/g'); FQ_DIR=QC/${{FQ_COMBO}}/{wildcards.condition}; if [ -d \"${{FQ_DIR}}\" ]; then echo ${{FQ_DIR}} >> {output.tmp}; fi; cat {output.tmp} | sort -u > {output.lst}; export LC_ALL=C.UTF-8; multiqc -f {params.qpara} --exclude picard --exclude gatk -k json -z -s -o ${{OUT}} -l {output.lst} 2> {log}"
-
-    else:
-        rule multiqc:
-            input:  expand(rules.rustqc_mapped.output.js, file=samplecond(SAMPLES, config), combo=combo),
-                    expand(rules.rustqc_uniquemapped.output.js, file=samplecond(SAMPLES, config), combo=combo),
-                    expand(rules.sam2bam.output.bam, file=samplecond(SAMPLES, config), combo=combo),
-                    expand(rules.sam2bamuniq.output.uniqbam, file=samplecond(SAMPLES, config), combo=combo)
-            output: html = report("QC/Multi/{combo}/{condition}/multiqc_report.html", category="QC"),
-                    tmp = temp("QC/Multi/{combo}/{condition}/tmp"),
-                    lst = "QC/Multi/{combo}/{condition}/qclist.txt"
-            log:    "LOGS/{combo}/{condition}_multiqc.log"
-            conda:  "rustqc.yaml"
-            container: "oras://jfallmann/monsda:rustqc"
-            threads: 1
-            params: qpara = lambda wildcards: tool_params(SAMPLES[0], None, config, 'QC', QCENV)['OPTIONS'].get('MULTI', "")
-            shell:  "OUT=$(dirname {output.html}); for i in {input}; do echo $(dirname \"${{i}}\") >> {output.tmp}; done; FQ_COMBO=$(echo {wildcards.combo} | sed 's/rustqc/fastqc/g'); FQ_DIR=QC/${{FQ_COMBO}}/{wildcards.condition}; if [ -d \"${{FQ_DIR}}\" ]; then echo ${{FQ_DIR}} >> {output.tmp}; fi; cat {output.tmp} | sort -u > {output.lst}; export LC_ALL=C.UTF-8; multiqc -f {params.qpara} --exclude picard --exclude gatk -k json -z -s -o ${{OUT}} -l {output.lst} 2> {log}"
+    rule multiqc:
+        input:  expand(rules.rustqc_mapped.output.js, file=samplecond(SAMPLES, config), combo=combo),
+                expand(rules.rustqc_uniquemapped.output.js, file=samplecond(SAMPLES, config), combo=combo),
+                expand(rules.sam2bam.output.bam, file=samplecond(SAMPLES, config), combo=combo),
+                expand(rules.sam2bamuniq.output.uniqbam, file=samplecond(SAMPLES, config), combo=combo)
+        output: html = report("QC/Multi/{combo}/{condition}/multiqc_report.html", category="QC"),
+                tmp = temp("QC/Multi/{combo}/{condition}/tmp"),
+                lst = "QC/Multi/{combo}/{condition}/qclist.txt"
+        log:    "LOGS/{combo}/{condition}_multiqc.log"
+        conda:  "rustqc.yaml"
+        container: "oras://jfallmann/monsda:rustqc"
+        threads: 1
+        params: qpara = lambda wildcards: tool_params(SAMPLES[0], None, config, 'QC', QCENV)['OPTIONS'].get('MULTI', "")
+        shell:  "OUT=$(dirname {output.html}); for i in {input}; do echo $(dirname \"${{i}}\") >> {output.tmp}; done; FQ_COMBO=$(echo {wildcards.combo} | sed 's/rustqc/fastqc/g'); FQ_DIR=QC/${{FQ_COMBO}}/{wildcards.condition}; if [ -d \"${{FQ_DIR}}\" ]; then echo ${{FQ_DIR}} >> {output.tmp}; fi; cat {output.tmp} | sort -u > {output.lst}; export LC_ALL=C.UTF-8; multiqc -f {params.qpara} --exclude picard --exclude gatk -k json -z -s -o ${{OUT}} -l {output.lst} 2> {log}"
diff --git a/workflows/rustqc.smk b/workflows/rustqc.smk
index d80863e5..579715c0 100644
--- a/workflows/rustqc.smk
+++ b/workflows/rustqc.smk
@@ -40,33 +40,3 @@ rule rustqc_uniquemapped:
              paired = RUSTQC_PAIRED,
              stranded = RUSTQC_STRANDED
     shell: "rustqc rna {input.r1} --gtf {params.anno} -t {threads} {params.paired} -s {params.stranded} --skip-dup-check -j {output.js} -o {output.o1} {params.qpara} 2> {log}"
-
-rule rustqc_dedupmapped:
-    input:  r1 = "MAPPED/{combo}/{file}_mapped_sorted_dedup.bam",
-            r2 = "MAPPED/{combo}/{file}_mapped_sorted_dedup.bam.bai"
-    output: o1 = directory("QC/{combo}/{file}_mapped_sorted_dedup"),
-        js = "QC/{combo}/{file}_mapped_sorted_dedup/rustqc_summary.json"
-    log:    "LOGS/{combo}/{file}_rustqc_dedupmapped.log"
-    conda:  ""+QCENV+".yaml"
-    container: "oras://jfallmann/monsda:"+QCENV+""
-    threads: MAXTHREAD
-    params:  qpara = lambda wildcards: tool_params(SAMPLES[0], None, config, 'QC', QCENV)['OPTIONS'].get('QC', ""),
-             anno = ANNOTATION,
-             paired = RUSTQC_PAIRED,
-             stranded = RUSTQC_STRANDED
-    shell: "rustqc rna {input.r1} --gtf {params.anno} -t {threads} {params.paired} -s {params.stranded} -j {output.js} -o {output.o1} {params.qpara} 2> {log}"
-
-rule rustqc_uniquededup:
-    input:  r1 = "MAPPED/{combo}/{file}_mapped_sorted_unique_dedup.bam",
-            r2 = "MAPPED/{combo}/{file}_mapped_sorted_unique_dedup.bam.bai"
-    output: o1 = directory("QC/{combo}/{file}_mapped_sorted_unique_dedup"),
-        js = "QC/{combo}/{file}_mapped_sorted_unique_dedup/rustqc_summary.json"
-    log:    "LOGS/{combo}/{file}_rustqc_uniquededup.log"
-    conda:  ""+QCENV+".yaml"
-    container: "oras://jfallmann/monsda:"+QCENV+""
-    threads: MAXTHREAD
-    params:  qpara = lambda wildcards: tool_params(SAMPLES[0], None, config, 'QC', QCENV)['OPTIONS'].get('QC', ""),
-             anno = ANNOTATION,
-             paired = RUSTQC_PAIRED,
-             stranded = RUSTQC_STRANDED
-    shell: "rustqc rna {input.r1} --gtf {params.anno} -t {threads} {params.paired} -s {params.stranded} -j {output.js} -o {output.o1} {params.qpara} 2> {log}"

From d0bfb4facf7d8d9f18c07cad031a63b7588eb14c Mon Sep 17 00:00:00 2001
From: jfallmann <fallmann.joerg@gmail.com>
Date: Mon, 11 May 2026 13:12:57 +0200
Subject: [PATCH 38/39] parallelized lane merge, cleanup

---
 MONSDA/Logger.py                  |  44 +-----------
 MONSDA/Params.py                  | 113 ++++++++++++++++--------------
 MONSDA/Utils.py                   |  42 -----------
 MONSDA/Workflows.py               |  41 -----------
 profile_snakemake/config.v8+.yaml |   4 +-
 5 files changed, 64 insertions(+), 180 deletions(-)

diff --git a/MONSDA/Logger.py b/MONSDA/Logger.py
index b4b4bd9f..6d9fbafd 100755
--- a/MONSDA/Logger.py
+++ b/MONSDA/Logger.py
@@ -1,47 +1,5 @@
 # Logger.py ---
-#
-# Filename: Logger.py
-# Description:
-# Author: Joerg Fallmann
-# Maintainer:
-# Created: Mon Aug 12 10:26:55 2019 (+0200)
-# Version:
-# Package-Requires: ()
-# Last-Updated: Wed Apr 29 16:42:40 2020 (+0200)
-#           By: Joerg Fallmann
-#     Update #: 91
-# URL:
-# Doc URL:
-# Keywords:
-# Compatibility:
-#
-#
-
-# Commentary:
-#
-#
-#
-#
-
-# Change Log:
-#
-#
-#
-#
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, either version 3 of the License, or (at
-# your option) any later version.
-#
-# This program is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-# General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with GNU Emacs.  If not, see <http://www.gnu.org/licenses/>.
-#
-#
+
 
 # Code:
 import logging
diff --git a/MONSDA/Params.py b/MONSDA/Params.py
index cfa00e8a..6a62a918 100644
--- a/MONSDA/Params.py
+++ b/MONSDA/Params.py
@@ -1,46 +1,4 @@
 # Params.py ---
-#
-# Filename: Params.py
-# Description:
-# Author: Joerg Fallmann
-# Maintainer:
-# Created: Tue Sep 18 15:39:06 2018 (+0200)
-# Version:
-# Package-Requires: ()
-# Last-Updated: Thu Feb  4 18:01:07 2021 (+0100)
-#           By: Joerg Fallmann
-#     Update #: 2888
-# URL:
-# Doc URL:
-# Keywords:
-# Compatibility:
-#
-#
-
-# Commentary:
-#
-#
-#
-
-# Change Log:
-#
-#
-#
-#
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, either version 3 of the License, or (at
-# your option) any later version.
-#
-# This program is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-# General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with GNU Emacs.  If not, see <http://www.gnu.org/licenses/>.
-#
-#
 
 # Code:
 # import os, sys, inspect
@@ -73,6 +31,7 @@
 import sys
 import traceback as tb
 from collections import OrderedDict, defaultdict
+from concurrent.futures import ThreadPoolExecutor, as_completed
 from typing import Optional
 
 from natsort import natsorted
@@ -383,17 +342,50 @@ def inject_samplesheet_settings(configfile: str, samplesheet_path: str) -> str:
 
 
 @check_run
-def prepare_lane_split_fastqs(config: dict) -> int:
+def _merge_lane_files(target: str, lane_candidates: list, logid: str) -> str:
+    """Concatenate *lane_candidates* gzip files into *target*.
+
+    Returns *target* on success; raises on any I/O error.
+    """
+    with open(target, "wb") as outfh:
+        for lane_file in lane_candidates:
+            with open(lane_file, "rb") as infh:
+                shutil.copyfileobj(infh, outfh)
+    return target
+
+
+def prepare_lane_split_fastqs(config: dict, max_workers: Optional[int] = None) -> int:
     """Concatenate lane-split FASTQs into canonical _R1/_R2 files when needed.
 
+    Merges are executed in parallel using a ``ThreadPoolExecutor`` so that all
+    samples/reads are processed concurrently (I/O-bound work, thread-safe).
+
+    Parameters
+    ----------
+    config : dict
+        Parsed MONSDA config.
+    max_workers : int, optional
+        Maximum number of parallel merge threads.  When ``None`` (default),
+        the value is taken from ``config["MAXTHREADS"]``; if that is also
+        absent the ``ThreadPoolExecutor`` built-in default is used.
+
     This is intentionally additive: existing canonical files are kept untouched.
     """
     logid = scriptname + ".Params_prepare_lane_split_fastqs: "
     merged_files = 0
 
+    if max_workers is None:
+        try:
+            max_workers = int(config["MAXTHREADS"])
+        except (KeyError, TypeError, ValueError):
+            max_workers = None  # fall back to ThreadPoolExecutor default
+
     samples = [os.path.join(x) for x in sampleslong(config, nocheck="1")]
     log.debug(logid + "Checking lane split files for samples: " + str(samples))
 
+    # --- collect all (target, lane_candidates) pairs first ---
+    merge_tasks: list = []  # list of (target, lane_candidates)
+
     for sample in samples:
         paired = checkpaired([sample], config)
         if not paired or not any(x in paired for x in ["paired", "singlecell"]):
@@ -433,16 +425,33 @@ def prepare_lane_split_fastqs(config: dict) -> int:
                 logid
                 + f"Concatenating {len(lane_candidates)} lane files into {target}"
             )
-            with open(target, "wb") as outfh:
-                for lane_file in lane_candidates:
-                    with open(lane_file, "rb") as infh:
-                        shutil.copyfileobj(infh, outfh)
-            merged_files += 1
-
-    if merged_files > 0:
-        log.info(logid + f"Created {merged_files} concatenated lane-merged FASTQ files")
-    else:
+            merge_tasks.append((target, lane_candidates))
+
+    if not merge_tasks:
         log.debug(logid + "No lane-split FASTQ files required concatenation")
+        return 0
+
+    # --- run all merges in parallel ---
+    with ThreadPoolExecutor(max_workers=max_workers) as pool:
+        futures = {
+            pool.submit(_merge_lane_files, target, lanes, logid): target
+            for target, lanes in merge_tasks
+        }
+        for future in as_completed(futures):
+            target = futures[future]
+            try:
+                future.result()
+                merged_files += 1
+            except Exception:
+                exc_type, exc_value, exc_tb = sys.exc_info()
+                tbe = tb.TracebackException(exc_type, exc_value, exc_tb)
+                log.error(
+                    logid
+                    + f"Failed to merge lane files into {target}: "
+                    + "".join(tbe.format())
+                )
+
+    log.info(logid + f"Created {merged_files} concatenated lane-merged FASTQ files")
     return merged_files
 
 
diff --git a/MONSDA/Utils.py b/MONSDA/Utils.py
index fd2f4d4b..8dd741f5 100644
--- a/MONSDA/Utils.py
+++ b/MONSDA/Utils.py
@@ -1,46 +1,4 @@
 # Utils.py ---
-#
-# Filename: Utils.py
-# Description:
-# Author: Joerg Fallmann
-# Maintainer:
-# Created: Tue Sep 18 15:39:06 2018 (+0200)
-# Version:
-# Package-Requires: ()
-# Last-Updated: Thu Feb  4 18:01:07 2021 (+0100)
-#           By: Joerg Fallmann
-#     Update #: 2888
-# URL:
-# Doc URL:
-# Keywords:
-# Compatibility:
-#
-#
-
-# Commentary:
-#
-#
-#
-
-# Change Log:
-#
-#
-#
-#
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, either version 3 of the License, or (at
-# your option) any later version.
-#
-# This program is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-# General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with GNU Emacs.  If not, see <http://www.gnu.org/licenses/>.
-#
-#
 
 # Code:
 # import os, sys, inspect
diff --git a/MONSDA/Workflows.py b/MONSDA/Workflows.py
index 88819823..2206e7bb 100755
--- a/MONSDA/Workflows.py
+++ b/MONSDA/Workflows.py
@@ -1,45 +1,4 @@
 # Workflows.py ---
-#
-# Filename: Workflows.py
-# Description:
-# Author: Joerg Fallmann
-# Maintainer:
-# Created: Tue Sep 18 15:39:06 2018 (+0200)
-# Version:
-# Package-Requires: ()
-# Last-Updated: Thu Feb  4 18:01:07 2021 (+0100)
-#           By: Joerg Fallmann
-#     Update #: 2888
-# URL:
-# Doc URL:
-# Keywords:
-# Compatibility:
-#
-#
-
-# Commentary:
-#
-#
-#
-
-# Change Log:
-#
-#
-#
-#
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, either version 3 of the License, or (at
-# your option) any later version.
-#
-# This program is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with GNU Emacs.  If not, see <http://www.gnu.org/licenses/>.
-#
-#
 
 # Code:
 # import os, sys, inspect
diff --git a/profile_snakemake/config.v8+.yaml b/profile_snakemake/config.v8+.yaml
index 2b5af595..a984a4d2 100755
--- a/profile_snakemake/config.v8+.yaml
+++ b/profile_snakemake/config.v8+.yaml
@@ -1,8 +1,8 @@
 executor: slurm
 
 default-resources:
-  slurm_account: "aob" # your account name
-  slurm_partition: "c" # the partition to use
+  slurm_account: "${SLURM_ACCOUNT}" # your account name
+  slurm_partition: "${SLURM_PARTITION}" # the partition to use
   nodes: 1
   mem_mb: "20000 * (2 ** (attempt - 1))"  
   runtime: "480 * (2 ** (attempt - 1))"   

From 1714486a832cae931f937a53db1ad7b1de382c0c Mon Sep 17 00:00:00 2001
From: jfallmann <fallmann.joerg@gmail.com>
Date: Tue, 19 May 2026 11:58:41 +0200
Subject: [PATCH 39/39] salmon config fixes

---
 configs/template.json                | 12 ++++++------
 configs/template_base_commented.json | 12 ++++++------
 configs/template_clean.json          | 12 ++++++------
 envs/salmon.yaml                     |  2 +-
 tests/data/config_Test.json          |  4 ++--
 tests/data/config_Test_local.json    |  4 ++--
 6 files changed, 23 insertions(+), 23 deletions(-)

diff --git a/configs/template.json b/configs/template.json
index 374e77b3..ad52f0ed 100644
--- a/configs/template.json
+++ b/configs/template.json
@@ -241,8 +241,8 @@
                                         "salmon": {
                                             "OPTIONS":
                                             {
-                                                "INDEX": "-l A --gencode",  # salmon index options
-                                                "QUANT": "--gcBias",  # salmon quant options
+                                                "INDEX": "",  # salmon index options
+                                                "QUANT": "--validateMappings --seqBias --gcBias",  # salmon quant options
                                             }
                                         }
                                     }
@@ -462,8 +462,8 @@
                     {
                         "OPTIONS":
                         {
-                            "INDEX": "-l A --gencode",  # salmon index options
-                            "QUANT": "--gcBias",  # salmon quant options
+                            "INDEX": "",  # salmon index options
+                            "QUANT": "--validateMappings --seqBias --gcBias",  # salmon quant options
                             "DTU": ""  # Options for Analysis
                         }
                     },
@@ -471,8 +471,8 @@
                     {
                         "OPTIONS":
                         {
-                            "INDEX": "-l A --gencode",  # salmon index options
-                            "QUANT": "--gcBias",  # salmon quant options
+                            "INDEX": "",  # salmon index options
+                            "QUANT": "--validateMappings --seqBias --gcBias",  # salmon quant options
                             "DTU": ""  # Options for Analysis
                         }
                     }
diff --git a/configs/template_base_commented.json b/configs/template_base_commented.json
index 0c526f7e..93928bb8 100644
--- a/configs/template_base_commented.json
+++ b/configs/template_base_commented.json
@@ -322,8 +322,8 @@
       },
       "OPTIONS":
         {
-            "INDEX": "-l A --gencode",
-            "QUANT": "--gcBias"
+            "INDEX": "",
+            "QUANT": "--validateMappings --seqBias --gcBias"
         }
     }
   },
@@ -528,8 +528,8 @@
           "DTU": "Options for DTU Analysis, leave blank unless you know exactly what you are doing, can be used to set cutoffs for filtering, e.g. min_samps_feature_expr, min_gene_expr ..."
       },
       "OPTIONS": {
-          "INDEX": "-l A --gencode -d GENOMES/salmon_decoy",
-          "QUANT": "--gcBias",
+          "INDEX": "-d GENOMES/salmon_decoy",
+          "QUANT": "--validateMappings --seqBias --gcBias",
           "DTU": ""
       }
     },
@@ -540,8 +540,8 @@
           "DTU": "Options for DTU Analysis, leave blank unless you know exactly what you are doing, , can be used to set cutoffs for filtering, e.g. min_samps_feature_expr, min_gene_expr ..."
       },
       "OPTIONS": {
-          "INDEX": "-l A --gencode -d GENOMES/salmon_decoy",
-          "QUANT": "--gcBias",
+          "INDEX": "-d GENOMES/salmon_decoy",
+          "QUANT": "--validateMappings --seqBias --gcBias",
           "DTU": ""
       }
     }
diff --git a/configs/template_clean.json b/configs/template_clean.json
index a335c204..78ea39f1 100644
--- a/configs/template_clean.json
+++ b/configs/template_clean.json
@@ -232,8 +232,8 @@
         "salmon": {
           "OPTIONS":
             {
-                "INDEX": "-l A --gencode",
-                "QUANT": "--gcBias"
+                "INDEX": "",
+                "QUANT": "--validateMappings --seqBias --gcBias"
             }
         }
       }
@@ -436,8 +436,8 @@
         {
             "OPTIONS":
             {
-                "INDEX": "-l A --gencode",
-                "QUANT": "--gcBias",
+                "INDEX": "",
+                "QUANT": "--validateMappings --seqBias --gcBias",
                 "DTU": ""
             }
         },
@@ -445,8 +445,8 @@
         {
             "OPTIONS":
             {
-                "INDEX": "-l A --gencode",
-                "QUANT": "--gcBias",
+                "INDEX": "",
+                "QUANT": "--validateMappings --seqBias --gcBias",
                 "DTU": ""
             }
         }
diff --git a/envs/salmon.yaml b/envs/salmon.yaml
index 071b81f9..cfff7361 100644
--- a/envs/salmon.yaml
+++ b/envs/salmon.yaml
@@ -3,4 +3,4 @@ channels:
   - conda-forge
   - bioconda
 dependencies:
-  - salmon =1.10.3
\ No newline at end of file
+  - salmon =1.11.4
\ No newline at end of file
diff --git a/tests/data/config_Test.json b/tests/data/config_Test.json
index f002306c..e7eededd 100644
--- a/tests/data/config_Test.json
+++ b/tests/data/config_Test.json
@@ -254,8 +254,8 @@
         "Test": {
             "drimseq": {
                 "OPTIONS": {
-                    "INDEX": "-l A --gencode -d GENOMES/transcripts.fa.gz",
-                    "QUANT": "--gcBias",
+                    "INDEX": "-d GENOMES/transcripts.fa.gz",
+                    "QUANT": "--validateMappings --seqBias --gcBias",
                     "DTU": ""
                 }
             }
diff --git a/tests/data/config_Test_local.json b/tests/data/config_Test_local.json
index 56d924aa..19bc01a5 100644
--- a/tests/data/config_Test_local.json
+++ b/tests/data/config_Test_local.json
@@ -254,8 +254,8 @@
         "Test": {
             "drimseq": {
                 "OPTIONS": {
-                    "INDEX": "-l A --gencode -d GENOMES/transcripts.fa.gz",
-                    "QUANT": "--gcBias",
+                    "INDEX": "-d GENOMES/transcripts.fa.gz",
+                    "QUANT": "--validateMappings --seqBias --gcBias",
                     "DTU": ""
                 }
             }