Skip to content

Commit

Permalink
Merge pull request #36 from nf-core/crop_overview_multiqc
Browse files Browse the repository at this point in the history
Crop overview multiqc
  • Loading branch information
FloWuenne authored Dec 15, 2023
2 parents 68a2028 + 8bd15fd commit 3a0b2ef
Show file tree
Hide file tree
Showing 24 changed files with 285 additions and 139 deletions.
2 changes: 1 addition & 1 deletion .gitpod.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
image: nfcore/gitpod:latest
image: nfcore/gitpod:dev
tasks:
- name: Update Nextflow and setup pre-commit
command: |
Expand Down
11 changes: 11 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,17 @@
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## v1.0.1dev - [2023.12.11]

Crop overview is provided to Multiqc - now when create_training_subset is run, multiqc and customdumpsoftwareversions are also run.

### `Added`

- removed CropSummary.txt from published outputs - it gets collected at multiqc step and published there
- moved crop_overview.png to MultiQC folder
- gitpod container is nf-core/gitpod:dev instead of latest to include new versions of nf-tools and nf-test
- MOLKARTQCPNG process to add name to png for multiqc report, and combine if multiple samples are processed

## v1.0.1dev - [2023.12.07]

Local module revamp - all should use the same Docker image to save space.
Expand Down
137 changes: 82 additions & 55 deletions bin/collect_QC.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,31 @@

#### This script takes regionprops_tabe output from mcquant and the raw spot tables from Resolve bioscience as input
#### and calculates some QC metrics for masks and spot assignments
### If png files are provided, it combines them into one

import argparse
import pandas as pd
from PIL import Image, ImageDraw, ImageFont
import os


def combine_png_files(input_paths, output_path):
print(input_paths)
images = []
for file_path in input_paths:
img = Image.open(file_path)
image_name = os.path.basename(file_path).replace(".ome", "").replace(".crop", "_crop")
draw = ImageDraw.Draw(img)
font_size = 50
font = ImageFont.load_default(font_size)
draw.text((100, 50), image_name, fill="black", font=font)
images.append(img)

width, height = images[0].size
combined_image = Image.new("RGB", (width, len(images) * height))
for i, img in enumerate(images):
combined_image.paste(img, (0, i * height))
combined_image.save(os.path.join(output_path, "crop_overview.png"))


def summarize_spots(spot_table):
Expand Down Expand Up @@ -49,63 +71,68 @@ def summarize_segmasks(cellxgene_table, spots_summary):
parser.add_argument("-d", "--sample_id", help="Sample ID.")
parser.add_argument("-g", "--segmentation_method", help="Segmentation method used.")
parser.add_argument("--filterqc", required=False, help="QC from mask filter step")
parser.add_argument("--png_overview", nargs="+", help="Crop overview image paths")
parser.add_argument("--version", action="version", version="0.1.0")

args = parser.parse_args()

## Read in cellxgene_table table
cellxgene_table = pd.read_csv(args.cellxgene, sep=",")

## Read in spot table
spots = pd.read_table(args.spots, sep="\t", names=["x", "y", "z", "gene"])
duplicated = sum(spots.gene.str.contains("Duplicated"))
spots = spots[~spots.gene.str.contains("Duplicated")]

## Pass on filterqc values
filterqc = pd.read_csv(
args.filterqc,
names=["below_min_area", "below_percentage", "above_max_area", "above_percentage", "total_labels"],
header=None,
)

## Summarize spots table
summary_spots = summarize_spots(spots)
summary_segmentation = summarize_segmasks(cellxgene_table, summary_spots)

## Create pandas data frame with one row per parameter and write each value in summary_segmentation to a new row in the data frame
summary_df = pd.DataFrame(
columns=[
"sample_id",
"segmentation_method",
"total_cells",
"avg_area",
"total_spots",
"spot_assign_per_cell",
"spot_assign_total",
"spot_assign_percent",
"duplicated_total",
"labels_total",
"labels_below_thresh",
"labels_above_thresh",
if args.png_overview != None:
combine_png_files(args.png_overview, args.outdir)

else:
## Read in cellxgene_table table
cellxgene_table = pd.read_csv(args.cellxgene, sep=",")

## Read in spot table
spots = pd.read_table(args.spots, sep="\t", names=["x", "y", "z", "gene"])
duplicated = sum(spots.gene.str.contains("Duplicated"))
spots = spots[~spots.gene.str.contains("Duplicated")]

## Pass on filterqc values
filterqc = pd.read_csv(
args.filterqc,
names=["below_min_area", "below_percentage", "above_max_area", "above_percentage", "total_labels"],
header=None,
)

## Summarize spots table
summary_spots = summarize_spots(spots)
summary_segmentation = summarize_segmasks(cellxgene_table, summary_spots)

## Create pandas data frame with one row per parameter and write each value in summary_segmentation to a new row in the data frame
summary_df = pd.DataFrame(
columns=[
"sample_id",
"segmentation_method",
"total_cells",
"avg_area",
"total_spots",
"spot_assign_per_cell",
"spot_assign_total",
"spot_assign_percent",
"duplicated_total",
"labels_total",
"labels_below_thresh",
"labels_above_thresh",
]
)
summary_df.loc[0] = [
##args.sample_id,
args.sample_id + "_" + args.segmentation_method,
args.segmentation_method,
summary_segmentation[0],
summary_segmentation[1],
summary_spots[1],
summary_segmentation[2],
summary_segmentation[3],
summary_segmentation[4],
duplicated,
filterqc.total_labels[1],
filterqc.below_min_area[1],
filterqc.above_max_area[1],
]
)
summary_df.loc[0] = [
##args.sample_id,
args.sample_id + "_" + args.segmentation_method,
args.segmentation_method,
summary_segmentation[0],
summary_segmentation[1],
summary_spots[1],
summary_segmentation[2],
summary_segmentation[3],
summary_segmentation[4],
duplicated,
filterqc.total_labels[1],
filterqc.below_min_area[1],
filterqc.above_max_area[1],
]
print(args.sample_id)
# Write summary_df to a csv file
summary_df.to_csv(
f"{args.outdir}/{args.sample_id}.{args.segmentation_method}.spot_QC.csv", header=True, index=False
)
print(args.sample_id)
# Write summary_df to a csv file
summary_df.to_csv(
f"{args.outdir}/{args.sample_id}.{args.segmentation_method}.spot_QC.csv", header=True, index=False
)
23 changes: 16 additions & 7 deletions conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -42,12 +42,16 @@ process {
]
}

withName: 'MOLKARTQCPNG' {
ext.when = { params.create_training_subset }
publishDir = [
path: { "${params.outdir}/molkartqc" },
pattern: "*.png"
]
}

withName: 'CREATE_STACK' {
ext.when = {
(params.segmentation_method.split(',').contains('cellpose') ||
params.segmentation_method.split(',').contains('ilastik') ||
params.create_training_subset) &&
(image.size() == 2 ) }
ext.when = { image.size() > 1 }
ext.prefix = { "${meta.id}_stack" }
ext.args = [ "",
params.clahe_pyramid_tile ? "--tile_size ${params.clahe_pyramid_tile}" : "",
Expand Down Expand Up @@ -107,6 +111,7 @@ process {
}

withName: "ILASTIK_PIXELCLASSIFICATION" {
ext.when = { params.segmentation_method.split(',').contains('ilastik') }
publishDir = [
path: { "${params.outdir}/segmentation/ilastik" },
pattern: "*.{h5}",
Expand All @@ -115,6 +120,7 @@ process {
}

withName: "ILASTIK_MULTICUT" {
ext.when = { params.segmentation_method.split(',').contains('ilastik') }
publishDir = [
path: { "${params.outdir}/segmentation/ilastik" },
pattern: "*.tiff",
Expand All @@ -123,10 +129,11 @@ process {
}

withName: "CROPHDF5" {
ext.when = { params.create_training_subset }
publishDir = [
path: "${params.outdir}/training_subset/hdf5",
mode: params.publish_dir_mode,
pattern: "*{C,c}rop*.{hdf5,h5,txt}"
pattern: "*{C,c}rop*.{hdf5,h5}"
]
ext.args = [ "",
"--crop",
Expand All @@ -138,14 +145,16 @@ process {
}

withName: "CROPTIFF" {
ext.when = { params.create_training_subset }
publishDir = [
path: "${params.outdir}/training_subset/tiff",
mode: params.publish_dir_mode,
pattern: "*.{tiff,tif,png}"
pattern: "*.{tiff,tif}"
]
}

withName: "TIFFH5CONVERT" {
ext.when = { params.segmentation_method.split(',').contains('ilastik') }
publishDir = [
path: "${params.outdir}/converted_hdf5",
pattern: "*.{hdf5,h5}"
Expand Down
3 changes: 1 addition & 2 deletions docs/output.md
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,7 @@ MolkartQC is a local module used for gathering useful quality-control metrics fo

- `multiqc/`
- `final_QC.all_samples.csv`: all molkartqc outputs concatenated to one `csv` file.
- `*.crop_overview.png`: Crop overview for visual assessment of crop placement on the whole sample.
- `multiqc_report.html`: a standalone HTML file that can be viewed in your web browser.
- `multiqc_data/`: directory containing parsed statistics from the different tools used in the pipeline.
- `multiqc_plots/`: directory containing static images from the report in various formats.
Expand Down Expand Up @@ -141,10 +142,8 @@ Results generated by MultiQC collate pipeline QC from supported tools e.g. FastQ
- `training_subset/`
- `hdf5/`
- `*_crop[0-9]+.hdf5`: `hdf5` crops for training Pixel classification and Multicut models with ilastik for segmentation.
- `*CropSummary.txt`: Summary of the created crops - used by tiff crops and for overview creation.
- `tiff/`
- `*_crop[0-9]+.tiff`: `tiff` crops for training Cellpose to create a custom segmentation model.
- `*.crop_overview.png`: Crop overview for visual assessment of crop placement on the whole sample.

</details>

Expand Down
10 changes: 5 additions & 5 deletions modules.json
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
},
"custom/dumpsoftwareversions": {
"branch": "master",
"git_sha": "fc6caedab1c71113aa440900e5989dd1c03ec58f",
"git_sha": "bba7e362e4afead70653f84d8700588ea28d0f9e",
"installed_by": ["modules"]
},
"deepcell/mesmer": {
Expand All @@ -22,12 +22,12 @@
},
"ilastik/multicut": {
"branch": "master",
"git_sha": "516189e968feb4ebdd9921806988b4c12b4ac2dc",
"git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5",
"installed_by": ["modules"]
},
"ilastik/pixelclassification": {
"branch": "master",
"git_sha": "516189e968feb4ebdd9921806988b4c12b4ac2dc",
"git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5",
"installed_by": ["modules"]
},
"mindagap/duplicatefinder": {
Expand All @@ -37,12 +37,12 @@
},
"mindagap/mindagap": {
"branch": "master",
"git_sha": "8fc1d24c710ebe1d5de0f2447ec9439fd3d9d66a",
"git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5",
"installed_by": ["modules"]
},
"multiqc": {
"branch": "master",
"git_sha": "8fc1d24c710ebe1d5de0f2447ec9439fd3d9d66a",
"git_sha": "4ab13872435962dadc239979554d13709e20bf29",
"installed_by": ["modules"]
}
}
Expand Down
File renamed without changes.
File renamed without changes.
43 changes: 43 additions & 0 deletions modules/local/molkartqcpng.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
process MOLKARTQCPNG {
label 'process_single'

container 'ghcr.io/schapirolabor/molkart-local:v0.0.1'

input:
path(png)

output:
path("*.png") , emit: png_overview
path "versions.yml", emit: versions

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''

"""
collect_QC.py \\
--png_overview $png \\
--outdir . \\
$args
cat <<-END_VERSIONS > versions.yml
"${task.process}":
molkartqc: \$(collect_QC.py --version)
END_VERSIONS
"""

stub:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"

"""
touch ${prefix}.png
cat <<-END_VERSIONS > versions.yml
"${task.process}":
molkartqc: \$(collect_QC.py --version)
END_VERSIONS
"""
}
31 changes: 0 additions & 31 deletions modules/local/samplesheet_check.nf

This file was deleted.

1 change: 1 addition & 0 deletions modules/local/tiffh5convert.nf
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ process TIFFH5CONVERT {

script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"

"""
crop_hdf5.py \\
Expand Down

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 3a0b2ef

Please sign in to comment.