Merge pull request #36 from nf-core/crop_overview_multiqc

Crop overview multiqc
nf-core · Dec 15, 2023 · 3a0b2ef · 3a0b2ef
2 parents 68a2028 + 8bd15fd
commit 3a0b2ef
Show file tree

Hide file tree

Showing 24 changed files with 285 additions and 139 deletions.
diff --git a/.gitpod.yml b/.gitpod.yml
@@ -1,4 +1,4 @@
-image: nfcore/gitpod:latest
+image: nfcore/gitpod:dev
 tasks:
   - name: Update Nextflow and setup pre-commit
     command: |

diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -3,6 +3,17 @@
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
+## v1.0.1dev - [2023.12.11]
+
+Crop overview is provided to Multiqc - now when create_training_subset is run, multiqc and customdumpsoftwareversions are also run.
+
+### `Added`
+
+- removed CropSummary.txt from published outputs - it gets collected at multiqc step and published there
+- moved crop_overview.png to MultiQC folder
+- gitpod container is nf-core/gitpod:dev instead of latest to include new versions of nf-tools and nf-test
+- MOLKARTQCPNG process to add name to png for multiqc report, and combine if multiple samples are processed
+
 ## v1.0.1dev - [2023.12.07]
 
 Local module revamp - all should use the same Docker image to save space.

diff --git a/bin/collect_QC.py b/bin/collect_QC.py
@@ -2,9 +2,31 @@
 
 #### This script takes regionprops_tabe output from mcquant and the raw spot tables from Resolve bioscience as input
 #### and calculates some QC metrics for masks and spot assignments
+### If png files are provided, it combines them into one
 
 import argparse
 import pandas as pd
+from PIL import Image, ImageDraw, ImageFont
+import os
+
+
+def combine_png_files(input_paths, output_path):
+    print(input_paths)
+    images = []
+    for file_path in input_paths:
+        img = Image.open(file_path)
+        image_name = os.path.basename(file_path).replace(".ome", "").replace(".crop", "_crop")
+        draw = ImageDraw.Draw(img)
+        font_size = 50
+        font = ImageFont.load_default(font_size)
+        draw.text((100, 50), image_name, fill="black", font=font)
+        images.append(img)
+
+    width, height = images[0].size
+    combined_image = Image.new("RGB", (width, len(images) * height))
+    for i, img in enumerate(images):
+        combined_image.paste(img, (0, i * height))
+    combined_image.save(os.path.join(output_path, "crop_overview.png"))
 
 
 def summarize_spots(spot_table):
@@ -49,63 +71,68 @@ def summarize_segmasks(cellxgene_table, spots_summary):
     parser.add_argument("-d", "--sample_id", help="Sample ID.")
     parser.add_argument("-g", "--segmentation_method", help="Segmentation method used.")
     parser.add_argument("--filterqc", required=False, help="QC from mask filter step")
+    parser.add_argument("--png_overview", nargs="+", help="Crop overview image paths")
     parser.add_argument("--version", action="version", version="0.1.0")
 
     args = parser.parse_args()
 
-    ## Read in cellxgene_table table
-    cellxgene_table = pd.read_csv(args.cellxgene, sep=",")
-
-    ## Read in spot table
-    spots = pd.read_table(args.spots, sep="\t", names=["x", "y", "z", "gene"])
-    duplicated = sum(spots.gene.str.contains("Duplicated"))
-    spots = spots[~spots.gene.str.contains("Duplicated")]
-
-    ## Pass on filterqc values
-    filterqc = pd.read_csv(
-        args.filterqc,
-        names=["below_min_area", "below_percentage", "above_max_area", "above_percentage", "total_labels"],
-        header=None,
-    )
-
-    ## Summarize spots table
-    summary_spots = summarize_spots(spots)
-    summary_segmentation = summarize_segmasks(cellxgene_table, summary_spots)
-
-    ## Create pandas data frame with one row per parameter and write each value in summary_segmentation to a new row in the data frame
-    summary_df = pd.DataFrame(
-        columns=[
-            "sample_id",
-            "segmentation_method",
-            "total_cells",
-            "avg_area",
-            "total_spots",
-            "spot_assign_per_cell",
-            "spot_assign_total",
-            "spot_assign_percent",
-            "duplicated_total",
-            "labels_total",
-            "labels_below_thresh",
-            "labels_above_thresh",
+    if args.png_overview != None:
+        combine_png_files(args.png_overview, args.outdir)
+
+    else:
+        ## Read in cellxgene_table table
+        cellxgene_table = pd.read_csv(args.cellxgene, sep=",")
+
+        ## Read in spot table
+        spots = pd.read_table(args.spots, sep="\t", names=["x", "y", "z", "gene"])
+        duplicated = sum(spots.gene.str.contains("Duplicated"))
+        spots = spots[~spots.gene.str.contains("Duplicated")]
+
+        ## Pass on filterqc values
+        filterqc = pd.read_csv(
+            args.filterqc,
+            names=["below_min_area", "below_percentage", "above_max_area", "above_percentage", "total_labels"],
+            header=None,
+        )
+
+        ## Summarize spots table
+        summary_spots = summarize_spots(spots)
+        summary_segmentation = summarize_segmasks(cellxgene_table, summary_spots)
+
+        ## Create pandas data frame with one row per parameter and write each value in summary_segmentation to a new row in the data frame
+        summary_df = pd.DataFrame(
+            columns=[
+                "sample_id",
+                "segmentation_method",
+                "total_cells",
+                "avg_area",
+                "total_spots",
+                "spot_assign_per_cell",
+                "spot_assign_total",
+                "spot_assign_percent",
+                "duplicated_total",
+                "labels_total",
+                "labels_below_thresh",
+                "labels_above_thresh",
+            ]
+        )
+        summary_df.loc[0] = [
+            ##args.sample_id,
+            args.sample_id + "_" + args.segmentation_method,
+            args.segmentation_method,
+            summary_segmentation[0],
+            summary_segmentation[1],
+            summary_spots[1],
+            summary_segmentation[2],
+            summary_segmentation[3],
+            summary_segmentation[4],
+            duplicated,
+            filterqc.total_labels[1],
+            filterqc.below_min_area[1],
+            filterqc.above_max_area[1],
         ]
-    )
-    summary_df.loc[0] = [
-        ##args.sample_id,
-        args.sample_id + "_" + args.segmentation_method,
-        args.segmentation_method,
-        summary_segmentation[0],
-        summary_segmentation[1],
-        summary_spots[1],
-        summary_segmentation[2],
-        summary_segmentation[3],
-        summary_segmentation[4],
-        duplicated,
-        filterqc.total_labels[1],
-        filterqc.below_min_area[1],
-        filterqc.above_max_area[1],
-    ]
-    print(args.sample_id)
-    # Write summary_df to a csv file
-    summary_df.to_csv(
-        f"{args.outdir}/{args.sample_id}.{args.segmentation_method}.spot_QC.csv", header=True, index=False
-    )
+        print(args.sample_id)
+        # Write summary_df to a csv file
+        summary_df.to_csv(
+            f"{args.outdir}/{args.sample_id}.{args.segmentation_method}.spot_QC.csv", header=True, index=False
+        )
diff --git a/conf/modules.config b/conf/modules.config
@@ -42,12 +42,16 @@ process {
         ]
     }
 
+    withName: 'MOLKARTQCPNG' {
+        ext.when = { params.create_training_subset }
+        publishDir = [
+            path: { "${params.outdir}/molkartqc" },
+            pattern: "*.png"
+        ]
+    }
+
     withName: 'CREATE_STACK' {
-        ext.when  = {
-            (params.segmentation_method.split(',').contains('cellpose') ||
-            params.segmentation_method.split(',').contains('ilastik') ||
-            params.create_training_subset) &&
-            (image.size() == 2 ) }
+        ext.when  = { image.size() > 1 }
         ext.prefix = { "${meta.id}_stack" }
         ext.args  = [ "",
             params.clahe_pyramid_tile ? "--tile_size ${params.clahe_pyramid_tile}" : "",
@@ -107,6 +111,7 @@ process {
     }
 
     withName: "ILASTIK_PIXELCLASSIFICATION" {
+        ext.when = { params.segmentation_method.split(',').contains('ilastik') }
         publishDir = [
             path: { "${params.outdir}/segmentation/ilastik" },
             pattern: "*.{h5}",
@@ -115,6 +120,7 @@ process {
     }
 
     withName: "ILASTIK_MULTICUT" {
+        ext.when = { params.segmentation_method.split(',').contains('ilastik') }
         publishDir = [
             path: { "${params.outdir}/segmentation/ilastik" },
             pattern: "*.tiff",
@@ -123,10 +129,11 @@ process {
     }
 
     withName: "CROPHDF5" {
+        ext.when = { params.create_training_subset }
         publishDir = [
             path: "${params.outdir}/training_subset/hdf5",
             mode: params.publish_dir_mode,
-            pattern: "*{C,c}rop*.{hdf5,h5,txt}"
+            pattern: "*{C,c}rop*.{hdf5,h5}"
         ]
         ext.args = [ "",
             "--crop",
@@ -138,14 +145,16 @@ process {
     }
 
     withName: "CROPTIFF" {
+        ext.when = { params.create_training_subset }
         publishDir = [
             path: "${params.outdir}/training_subset/tiff",
             mode: params.publish_dir_mode,
-            pattern: "*.{tiff,tif,png}"
+            pattern: "*.{tiff,tif}"
         ]
     }
 
     withName: "TIFFH5CONVERT" {
+        ext.when = { params.segmentation_method.split(',').contains('ilastik') }
         publishDir = [
             path: "${params.outdir}/converted_hdf5",
             pattern: "*.{hdf5,h5}"

diff --git a/docs/output.md b/docs/output.md
@@ -112,6 +112,7 @@ MolkartQC is a local module used for gathering useful quality-control metrics fo
 
 - `multiqc/`
   - `final_QC.all_samples.csv`: all molkartqc outputs concatenated to one `csv` file.
+  - `*.crop_overview.png`: Crop overview for visual assessment of crop placement on the whole sample.
   - `multiqc_report.html`: a standalone HTML file that can be viewed in your web browser.
   - `multiqc_data/`: directory containing parsed statistics from the different tools used in the pipeline.
   - `multiqc_plots/`: directory containing static images from the report in various formats.
@@ -141,10 +142,8 @@ Results generated by MultiQC collate pipeline QC from supported tools e.g. FastQ
 - `training_subset/`
   - `hdf5/`
     - `*_crop[0-9]+.hdf5`: `hdf5` crops for training Pixel classification and Multicut models with ilastik for segmentation.
-    - `*CropSummary.txt`: Summary of the created crops - used by tiff crops and for overview creation.
   - `tiff/`
     - `*_crop[0-9]+.tiff`: `tiff` crops for training Cellpose to create a custom segmentation model.
-    - `*.crop_overview.png`: Crop overview for visual assessment of crop placement on the whole sample.
 
 </details>
 

diff --git a/modules.json b/modules.json
@@ -12,7 +12,7 @@
                     },
                     "custom/dumpsoftwareversions": {
                         "branch": "master",
-                        "git_sha": "fc6caedab1c71113aa440900e5989dd1c03ec58f",
+                        "git_sha": "bba7e362e4afead70653f84d8700588ea28d0f9e",
                         "installed_by": ["modules"]
                     },
                     "deepcell/mesmer": {
@@ -22,12 +22,12 @@
                     },
                     "ilastik/multicut": {
                         "branch": "master",
-                        "git_sha": "516189e968feb4ebdd9921806988b4c12b4ac2dc",
+                        "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5",
                         "installed_by": ["modules"]
                     },
                     "ilastik/pixelclassification": {
                         "branch": "master",
-                        "git_sha": "516189e968feb4ebdd9921806988b4c12b4ac2dc",
+                        "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5",
                         "installed_by": ["modules"]
                     },
                     "mindagap/duplicatefinder": {
@@ -37,12 +37,12 @@
                     },
                     "mindagap/mindagap": {
                         "branch": "master",
-                        "git_sha": "8fc1d24c710ebe1d5de0f2447ec9439fd3d9d66a",
+                        "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5",
                         "installed_by": ["modules"]
                     },
                     "multiqc": {
                         "branch": "master",
-                        "git_sha": "8fc1d24c710ebe1d5de0f2447ec9439fd3d9d66a",
+                        "git_sha": "4ab13872435962dadc239979554d13709e20bf29",
                         "installed_by": ["modules"]
                     }
                 }

diff --git a/modules/local/clahe_dask.nf → modules/local/clahe.nf b/modules/local/clahe_dask.nf → modules/local/clahe.nf
diff --git a/modules/local/create_stack.nf → modules/local/createstack.nf b/modules/local/create_stack.nf → modules/local/createstack.nf
diff --git a/modules/local/molkartqcpng.nf b/modules/local/molkartqcpng.nf
@@ -0,0 +1,43 @@
+process MOLKARTQCPNG {
+    label 'process_single'
+
+    container 'ghcr.io/schapirolabor/molkart-local:v0.0.1'
+
+    input:
+    path(png)
+
+    output:
+    path("*.png")      , emit: png_overview
+    path "versions.yml", emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args = task.ext.args ?: ''
+
+    """
+    collect_QC.py \\
+        --png_overview $png \\
+        --outdir . \\
+        $args
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        molkartqc: \$(collect_QC.py --version)
+    END_VERSIONS
+    """
+
+    stub:
+    def args = task.ext.args ?: ''
+    def prefix = task.ext.prefix ?: "${meta.id}"
+
+    """
+    touch ${prefix}.png
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        molkartqc: \$(collect_QC.py --version)
+    END_VERSIONS
+    """
+}
diff --git a/modules/local/samplesheet_check.nf b/modules/local/samplesheet_check.nf
diff --git a/modules/local/tiffh5convert.nf b/modules/local/tiffh5convert.nf
@@ -16,6 +16,7 @@ process TIFFH5CONVERT {
 
     script:
     def args = task.ext.args ?: ''
+    def prefix = task.ext.prefix ?: "${meta.id}"
 
     """
     crop_hdf5.py \\

diff --git a/modules/nf-core/custom/dumpsoftwareversions/environment.yml b/modules/nf-core/custom/dumpsoftwareversions/environment.yml
-Original file line number
+Diff line change
@@ Expand Up / @@ -16,6 +16,7 @@ process TIFFH5CONVERT { @@
         script:
         def args = task.ext.args ?: ''
+        def prefix = task.ext.prefix ?: "${meta.id}"
         """
         crop_hdf5.py \\
@@ Expand Down @@