Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,13 @@
Change Log
==========

3.10.0
=====
* Added functionality to rerun a MetaWorkflowRun with imported steps from a previous run
* Added functionality to replace multiple QC items present for a file with an existing one and release it if necessary
* Some refactoring and code formatting


3.9.0
=====
* Added functionality to run sample identity checks
Expand Down
71 changes: 53 additions & 18 deletions magma_smaht/commands/create_meta_workflow_run.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
mwfr_long_read_bamqc,
mwfr_short_read_fastqc,
mwfr_custom_qc,
mwfr_sample_identity_check
mwfr_sample_identity_check,
)
from magma_smaht.utils import get_auth_key

Expand All @@ -30,7 +30,12 @@ def cli():
@cli.command()
@click.help_option("--help", "-h")
@click.option(
"-f", "--fileset-accession", required=True, type=str, help="Fileset accession"
"-f",
"--fileset-accessions",
required=True,
multiple=True,
type=str,
help="Fileset accessions",
)
@click.option(
"-l",
Expand All @@ -46,16 +51,23 @@ def cli():
type=str,
help="Name of environment in smaht-keys file",
)
def align_illumina(fileset_accession, length_required, auth_env):
def align_illumina(fileset_accessions, length_required, auth_env):
"""Alignment MWFR for Illumina data"""
smaht_key = get_auth_key(auth_env)
mwfr_illumina_alignment(fileset_accession, length_required, smaht_key)
for fileset_accession in fileset_accessions:
print(f"Working on Fileset {fileset_accession}")
mwfr_illumina_alignment(fileset_accession, length_required, smaht_key)


@cli.command()
@click.help_option("--help", "-h")
@click.option(
"-f", "--fileset-accession", required=True, type=str, help="Fileset accession"
"-f",
"--fileset-accessions",
required=True,
multiple=True,
type=str,
help="Fileset accessions",
)
@click.option(
"-l",
Expand All @@ -71,16 +83,23 @@ def align_illumina(fileset_accession, length_required, auth_env):
type=str,
help="Name of environment in smaht-keys file",
)
def align_rnaseq(fileset_accession, sequence_length, auth_env):
def align_rnaseq(fileset_accessions, sequence_length, auth_env):
"""Alignment MWFR for RNA-Seq data"""
smaht_key = get_auth_key(auth_env)
mwfr_rnaseq_alignment(fileset_accession, sequence_length, smaht_key)
for fileset_accession in fileset_accessions:
print(f"Working on Fileset {fileset_accession}")
mwfr_rnaseq_alignment(fileset_accession, sequence_length, smaht_key)


@cli.command()
@click.help_option("--help", "-h")
@click.option(
"-f", "--fileset-accession", required=True, type=str, help="Fileset accession"
"-f",
"--fileset-accessions",
required=True,
multiple=True,
type=str,
help="Fileset accessions",
)
@click.option(
"-e",
Expand All @@ -89,16 +108,23 @@ def align_rnaseq(fileset_accession, sequence_length, auth_env):
type=str,
help="Name of environment in smaht-keys file",
)
def align_pacbio(fileset_accession, auth_env):
def align_pacbio(fileset_accessions, auth_env):
"""Alignment MWFR for PacBio data"""
smaht_key = get_auth_key(auth_env)
mwfr_pacbio_alignment(fileset_accession, smaht_key)
for fileset_accession in fileset_accessions:
print(f"Working on Fileset {fileset_accession}")
mwfr_pacbio_alignment(fileset_accession, smaht_key)


@cli.command()
@click.help_option("--help", "-h")
@click.option(
"-f", "--fileset-accession", required=True, type=str, help="Fileset accession"
"-f",
"--fileset-accessions",
required=True,
multiple=True,
type=str,
help="Fileset accessions",
)
@click.option(
"-e",
Expand All @@ -107,16 +133,23 @@ def align_pacbio(fileset_accession, auth_env):
type=str,
help="Name of environment in smaht-keys file",
)
def align_hic(fileset_accession, auth_env):
def align_hic(fileset_accessions, auth_env):
"""Alignment MWFR for HIC data"""
smaht_key = get_auth_key(auth_env)
mwfr_hic_alignment(fileset_accession, smaht_key)
for fileset_accession in fileset_accessions:
print(f"Working on Fileset {fileset_accession}")
mwfr_hic_alignment(fileset_accession, smaht_key)


@cli.command()
@click.help_option("--help", "-h")
@click.option(
"-f", "--fileset-accession", required=True, type=str, help="Fileset accession"
"-f",
"--fileset-accessions",
required=True,
multiple=True,
type=str,
help="Fileset accessions",
)
@click.option(
"-e",
Expand All @@ -125,10 +158,12 @@ def align_hic(fileset_accession, auth_env):
type=str,
help="Name of environment in smaht-keys file",
)
def align_ont(fileset_accession, auth_env):
def align_ont(fileset_accessions, auth_env):
"""Alignment MWFR for ONT data"""
smaht_key = get_auth_key(auth_env)
mwfr_ont_alignment(fileset_accession, smaht_key)
for fileset_accession in fileset_accessions:
print(f"Working on Fileset {fileset_accession}")
mwfr_ont_alignment(fileset_accession, smaht_key)


@cli.command()
Expand All @@ -139,7 +174,7 @@ def align_ont(fileset_accession, auth_env):
required=True,
type=str,
multiple=True,
help="Fileset accession",
help="Fileset accessions",
)
@click.option(
"-c",
Expand Down Expand Up @@ -196,7 +231,7 @@ def qc_short_read_fastq(file_accession, auth_env):
required=True,
type=str,
multiple=True,
help="Fileset accession",
help="Fileset accessions",
)
@click.option(
"-r",
Expand Down
115 changes: 109 additions & 6 deletions magma_smaht/commands/wrangler_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,68 @@ def reset_all_failed_mwfrs(auth_env):
wrangler_utils.reset_all_failed_mwfrs(smaht_key)


@cli.command()
@click.help_option("--help", "-h")
@click.option(
"-r",
"--mwfr-uuid",
required=True,
type=str,
help="MetaWorkflowRun UUID or accession used as the basis for the new MWFR",
)
@click.option(
"-m",
"--mwf-uuid",
required=True,
type=str,
help="MetaWorkflow UUID or accession used as the basis for the new MWFR",
)
@click.option(
"-i",
"--input-arg",
required=True,
type=str,
help="argument_name of the input argument to use to calculate input structure, e.g. `input_files_bam`",
)
@click.option(
"-s",
"--steps-to-import",
required=True,
type=str,
help="Comma-separated list of workflow run names to import from the old MWFR",
)
@click.option(
"--remove-file-qc",
required=False,
type=str,
help="Removes the QC item of the specified file",
)
@click.option(
"-e",
"--auth-env",
required=True,
type=str,
help="Name of environment in smaht-keys file",
)
def rerun_mwfr(
mwfr_uuid: str,
mwf_uuid: str,
input_arg: str,
steps_to_import: str,
remove_file_qc: str,
auth_env: str,
):
"""Creates a new MetaWorkflowRun based on the MWF and MWFR specified. All workflow runs
specified in steps_to_import will be imported from the given MWFR to the new MWFR. All
input variables and other properties will be copied over. The old MWFR will deleted.
"""
smaht_key = get_auth_key(auth_env)
steps_to_import_list = steps_to_import.split(",")
wrangler_utils.rerun_mwfr(
mwfr_uuid, mwf_uuid, input_arg, steps_to_import_list, remove_file_qc, smaht_key
)


@cli.command()
@click.help_option("--help", "-h")
@click.option(
Expand All @@ -107,7 +169,7 @@ def reset_all_failed_mwfrs(auth_env):
"-m",
"--mode",
required=True,
type=click.Choice(['keep_oldest', 'keep_newest']),
type=click.Choice(["keep_oldest", "keep_newest"]),
help="Merge mode",
)
@click.option(
Expand All @@ -130,6 +192,48 @@ def merge_qc_items(file_accessions, mode, auth_env):
wrangler_utils.merge_qc_items(f, mode, smaht_key)


@cli.command()
@click.help_option("--help", "-h")
@click.option(
"-f",
"--file-accession",
required=True,
type=str,
help="File accession",
)
@click.option(
"-k",
"--keep-index",
required=True,
type=int,
help="Index of the QC item to keep",
)
@click.option(
"-r",
"--release",
default=False,
is_flag=True,
show_default=True,
help="Release the remaining QC item",
)
@click.option(
"-e",
"--auth-env",
required=True,
type=str,
help="Name of environment in smaht-keys file",
)
def replace_qc_item(file_accession, keep_index, release, auth_env):
"""
Replace the QC item of a file with the one at the given index.
If a file has multiple QC items, this command will remove all but the one with given index.
Can be useful if QC has been rerun and the old QC item is no longer needed. This function also
releases the remaining QC item if the release flag is set.
"""
smaht_key = get_auth_key(auth_env)
wrangler_utils.replace_qc_item(file_accession, keep_index, release, smaht_key)


@cli.command()
@click.help_option("--help", "-h")
@click.option(
Expand Down Expand Up @@ -157,7 +261,7 @@ def merge_qc_items(file_accessions, mode, auth_env):
)
def archive_unaligned_reads(fileset_accessions, dry_run, auth_env):
"""
Archive (submitted) unaligned reads of a fileset.
Archive (submitted) unaligned reads of a fileset.
Every submitted unaligned read in the fileset will receive the s3_lifecycle_categor=short_term_archive.
"""
smaht_key = get_auth_key(auth_env)
Expand Down Expand Up @@ -188,7 +292,6 @@ def sample_identity_check_status(num_files, auth_env):
"""
smaht_key = get_auth_key(auth_env)
wrangler_utils.sample_identity_check_status(num_files, smaht_key)



@cli.command()
Expand Down Expand Up @@ -221,10 +324,10 @@ def sample_identity_check_status(num_files, auth_env):
type=str,
help="Name of environment in smaht-keys file",
)
def set_property(identifier,property_key,property_value,auth_env):
"""Set item property to value by uuid. """
def set_property(identifier, property_key, property_value, auth_env):
"""Set item property to value by uuid."""
smaht_key = get_auth_key(auth_env)
wrangler_utils.set_property(identifier,property_key,property_value,smaht_key)
wrangler_utils.set_property(identifier, property_key, property_value, smaht_key)


if __name__ == "__main__":
Expand Down
56 changes: 56 additions & 0 deletions magma_smaht/constants.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
#!/usr/bin/env python3

# Portal Constants

# MetaWorkflow names are used to get the latest version.
# We assume that they don't change!
MWF_NAME_ILLUMINA = "Illumina_alignment_GRCh38"
MWF_NAME_RNASEQ = "RNA-seq_bulk_short_reads_GRCh38"
MWF_NAME_ONT = "ONT_alignment_GRCh38"
MWF_NAME_PACBIO = "PacBio_alignment_GRCh38"
MWF_NAME_HIC = "Hi-C_alignment_GRCh38"
MWF_NAME_FASTQC = "Illumina_FASTQ_quality_metrics"
MWF_NAME_FASTQ_LONG_READ = "long_reads_FASTQ_quality_metrics"
MWF_NAME_FASTQ_SHORT_READ = "short_reads_FASTQ_quality_metrics"
MWF_NAME_CRAM_TO_FASTQ_PAIRED_END = "cram_to_fastq_paired-end"
MWF_NAME_BAM_TO_FASTQ_PAIRED_END = "bam_to_fastq_paired-end"
MWF_NAME_BAMQC_SHORT_READ = "paired-end_short_reads_BAM_quality_metrics_GRCh38"
MWF_NAME_ULTRA_LONG_BAMQC = "ultra-long_reads_BAM_quality_metrics_GRCh38"
MWF_NAME_LONG_READ_BAMQC = "long_reads_BAM_quality_metrics_GRCh38"
MWF_SAMPLE_IDENTITY_CHECK = "sample_identity_check"

# Input argument names
INPUT_FILES_R1_FASTQ_GZ = "input_files_r1_fastq_gz"
INPUT_FILES_R2_FASTQ_GZ = "input_files_r2_fastq_gz"
INPUT_FILES_BAM = "input_files_bam"
INPUT_FILES = "input_files"
INPUT_FILES_FASTQ_GZ = "input_files_fastq_gz"
INPUT_FILES_CRAM = "input_files_cram"
GENOME_REFERENCE_FASTA = "genome_reference_fasta"
SAMPLE_NAME = "sample_name"
SAMPLE_NAMES = "sample_names"
LENGTH_REQUIRED = "length_required"
LIBRARY_ID = "library_id"
GENOME_REFERENCE_STAR = "genome_reference_star"
IS_STRANDED = "is_stranded"
STRANDEDNESS = "strandedness"

# Schema fields
COMMON_FIELDS = "common_fields"
UUID = "uuid"
COMPLETED = "completed"
DESCRIPTION = "description"
TAGS = "tags"
SUBMISSION_CENTERS = "submission_centers"
SEQUENCING_CENTER = "sequencing_center"
CONSORTIA = "consortia"
FILE_SETS = "file_sets"
META_WORFLOW_RUN = "MetaWorkflowRun"
ACCESSION = "accession"
ALIASES = "aliases"
UPLOADED = "uploaded"
DELETED = "deleted"
STATUS = "status"
FIRST_STRANDED = "First Stranded"
SECOND_STRANDED = "Second Stranded"
FAILED_JOBS = "failed_jobs"
Loading