diff --git a/q2_amr/amrfinderplus/feature_data.py b/q2_amr/amrfinderplus/feature_data.py new file mode 100644 index 0000000..0c135f3 --- /dev/null +++ b/q2_amr/amrfinderplus/feature_data.py @@ -0,0 +1,162 @@ +import glob +import os +import shutil +import tempfile + +from q2_types.feature_data_mag import MAGSequencesDirFmt +from q2_types.genome_data import ( + GenesDirectoryFormat, + LociDirectoryFormat, + ProteinsDirectoryFormat, +) + +from q2_amr.amrfinderplus.types import ( + AMRFinderPlusAnnotationsDirFmt, + AMRFinderPlusDatabaseDirFmt, +) +from q2_amr.amrfinderplus.utils import run_amrfinderplus_n + + +def _validate_inputs(mags, loci, proteins): + if mags and loci and not proteins: + raise ValueError( + "Loci input can only be given in combination with proteins input." + ) + if mags and not loci and proteins: + raise ValueError( + "MAGs and proteins inputs together can only " + "be given in combination with loci input." + ) + if not mags and not proteins: + raise ValueError("MAGs or proteins input has to be provided.") + + +def _get_file_paths(file, mags, proteins, loci): + # If mags is provided, mag_id is extracted from the file name. + if mags: + mag_id = os.path.splitext(os.path.basename(file))[0] + + # If proteins are provided, construct the expected protein file path. + if proteins: + protein_path = os.path.join(str(proteins), f"{mag_id}_proteins.fasta") + + # Raise an error if the expected protein file does not exist. + if not os.path.exists(protein_path): + raise ValueError( + f"Proteins file for ID '{mag_id}' is missing in proteins input." + ) + else: + protein_path = None + + # If only proteins are provided (without mags), determine mag_id and protein path. + else: + # Extract mag_id from the file name, excluding the last 9 characters + # '_proteins'. + mag_id = os.path.splitext(os.path.basename(file))[0][:-9] + protein_path = file + + # If loci are provided, construct the expected GFF file path. + if loci: + gff_path = os.path.join(str(loci), f"{mag_id}_loci.gff") + + # Raise an error if the expected GFF file does not exist. + if not os.path.exists(gff_path): + raise ValueError(f"GFF file for ID '{mag_id}' is missing in loci input.") + else: + gff_path = None + + return mag_id, protein_path, gff_path + + +def _move_or_create_files(src_dir: str, mag_id: str, file_operations: list): + # Loop through all files. + for file_name, target_dir in file_operations: + # If the file exists move it to the destination dir and attach mag_id. + if os.path.exists(os.path.join(src_dir, file_name)): + shutil.move( + os.path.join(src_dir, file_name), + os.path.join(str(target_dir), f"{mag_id}_{file_name}"), + ) + # If the file does not exist, create empty placeholder file in the + # destination dir. + else: + with open(os.path.join(str(target_dir), f"{mag_id}_{file_name}"), "w"): + pass + + +def annotate_feature_data_amrfinderplus( + amrfinderplus_db: AMRFinderPlusDatabaseDirFmt, + mags: MAGSequencesDirFmt = None, + proteins: ProteinsDirectoryFormat = None, + loci: LociDirectoryFormat = None, + organism: str = None, + plus: bool = False, + report_all_equal: bool = False, + ident_min: float = None, + curated_ident: bool = False, + coverage_min: float = 0.5, + translation_table: str = "11", + annotation_format: str = "prodigal", + report_common: bool = False, + gpipe_org: bool = False, + threads: int = None, +) -> ( + AMRFinderPlusAnnotationsDirFmt, + AMRFinderPlusAnnotationsDirFmt, + GenesDirectoryFormat, + ProteinsDirectoryFormat, +): + # Check for unallowed input combinations. + _validate_inputs(mags, loci, proteins) + + # Create all output directories. + amr_annotations = AMRFinderPlusAnnotationsDirFmt() + amr_all_mutations = AMRFinderPlusAnnotationsDirFmt() + amr_genes = GenesDirectoryFormat() + amr_proteins = ProteinsDirectoryFormat() + + # Create list of files to loop over, if mags is provided then files in mags will be + # used if only proteins is provided then files in proteins will be used + if mags: + files = glob.glob(os.path.join(str(mags), "*")) + else: + files = glob.glob(os.path.join(str(proteins), "*")) + + with tempfile.TemporaryDirectory() as tmp: + # Loop over all files + for file in files: + # Get paths to protein and gff files, and get mag_id + mag_id, protein_path, gff_path = _get_file_paths(file, mags, proteins, loci) + + # Run amrfinderplus + run_amrfinderplus_n( + working_dir=tmp, + amrfinderplus_db=amrfinderplus_db, + dna_sequences=file if mags else None, + protein_sequences=protein_path, + gff=gff_path, + organism=organism, + plus=plus, + report_all_equal=report_all_equal, + ident_min=ident_min, + curated_ident=curated_ident, + coverage_min=coverage_min, + translation_table=translation_table, + annotation_format=annotation_format, + report_common=report_common, + gpipe_org=gpipe_org, + threads=threads, + ) + + # Output filenames and output directories + file_operations = [ + ("amr_annotations.tsv", amr_annotations), + ("amr_all_mutations.tsv", amr_all_mutations), + ("amr_genes.fasta", amr_genes), + ("amr_proteins.fasta", amr_proteins), + ] + + # Move the files or create placeholder files + _move_or_create_files(tmp, mag_id, file_operations) + + return amr_annotations, amr_all_mutations, amr_genes, amr_proteins diff --git a/q2_amr/amrfinderplus/tests/test_feature_data.py b/q2_amr/amrfinderplus/tests/test_feature_data.py new file mode 100644 index 0000000..e077e52 --- /dev/null +++ b/q2_amr/amrfinderplus/tests/test_feature_data.py @@ -0,0 +1,220 @@ +import os +from pathlib import Path +from unittest.mock import MagicMock, patch + +from q2_types.feature_data_mag import MAGSequencesDirFmt +from q2_types.genome_data import ProteinsDirectoryFormat +from qiime2.plugin.testing import TestPluginBase + +from q2_amr.amrfinderplus.feature_data import ( + _get_file_paths, + _move_or_create_files, + _validate_inputs, + annotate_feature_data_amrfinderplus, +) + + +class TestValidateInputs(TestPluginBase): + package = "q2_amr.amrfinderplus.tests" + + def test_loci_mags(self): + with self.assertRaisesRegex( + ValueError, + "Loci input can only be given in combination with proteins input.", + ): + _validate_inputs(mags="mags", loci="loci", proteins=None) + + def test_no_loci_protein_mags(self): + with self.assertRaisesRegex( + ValueError, + "MAGs and proteins inputs together can only be given in combination with " + "loci input.", + ): + _validate_inputs(mags="mags", loci=None, proteins="proteins") + + def test_no_protein_no_mags(self): + with self.assertRaisesRegex( + ValueError, "MAGs or proteins input has to be provided." + ): + _validate_inputs(mags=None, loci="loci_directory", proteins=None) + + +class TestMoveOrCreateFiles(TestPluginBase): + package = "q2_amr.amrfinderplus.tests" + + def setUp(self): + super().setUp() + + self.tmp = self.temp_dir.name + self.src_dir = os.path.join(self.tmp, "src_dir") + self.target_dir = os.path.join(self.tmp, "target_dir") + os.mkdir(self.src_dir) + os.mkdir(self.target_dir) + + def test_move_file(self): + # Create a dummy file in the source directory + with open(os.path.join(self.src_dir, "test_file.txt"), "w"): + pass + + # Define the file operations + file_operations = [("test_file.txt", self.target_dir)] + + # Run the function + _move_or_create_files( + src_dir=self.src_dir, + mag_id="mag", + file_operations=file_operations, + ) + + # Assert the file was moved + self.assertTrue( + os.path.exists(os.path.join(self.target_dir, "mag_test_file.txt")) + ) + + def test_file_missing_create_placeholder(self): + # Define the file operations + file_operations = [("test_file.txt", self.target_dir)] + + # Run the function + _move_or_create_files( + src_dir=self.src_dir, + mag_id="mag", + file_operations=file_operations, + ) + + # Assert the file was moved + self.assertTrue( + os.path.exists(os.path.join(self.target_dir, "mag_test_file.txt")) + ) + + def test_with_mags_and_proteins_file_missing(self): + with self.assertRaisesRegex( + ValueError, "Proteins file for ID 'mag_id' is missing in proteins input." + ): + _get_file_paths("path/mag_id.fasta", "path/mags", "path/proteins", None) + + +class TestGetFilePaths(TestPluginBase): + package = "q2_amr.amrfinderplus.tests" + + def setUp(self): + super().setUp() + + self.test_dir = self.temp_dir + self.test_dir_path = Path(self.test_dir.name) + self.file_path = self.test_dir_path / "test_file.fasta" + self.file_path.touch() # Create an empty test file + + def test_with_mags_and_proteins_file_exists(self): + protein_file_path = self.test_dir_path / "test_file_proteins.fasta" + protein_file_path.touch() # Create an empty protein file + + mag_id, protein_path, gff_path = _get_file_paths( + file=self.file_path, + mags=self.test_dir_path, + proteins=self.test_dir_path, + loci=None, + ) + self.assertEqual(mag_id, "test_file") + self.assertEqual(protein_path, str(protein_file_path)) + self.assertIsNone(gff_path) + + def test_with_mags_and_proteins_file_missing(self): + with self.assertRaisesRegex( + ValueError, + "Proteins file for ID 'test_file' is missing in proteins input.", + ): + _get_file_paths( + file=self.file_path, + mags=self.test_dir_path, + proteins=self.test_dir_path, + loci=None, + ) + + def test_with_proteins_only(self): + protein_file_path = self.test_dir_path / "test_file_proteins.fasta" + protein_file_path.touch() # Create an empty protein file + + mag_id, protein_path, gff_path = _get_file_paths( + file=protein_file_path, mags=None, proteins=self.test_dir_path, loci=None + ) + self.assertEqual(mag_id, "test_file") + self.assertEqual(protein_path, protein_file_path) + self.assertIsNone(gff_path) + + def test_with_loci_file_exists(self): + gff_file_path = self.test_dir_path / "test_file_loci.gff" + gff_file_path.touch() # Create an empty GFF file + + mag_id, protein_path, gff_path = _get_file_paths( + file=self.file_path, + mags=self.test_dir_path, + proteins=None, + loci=self.test_dir_path, + ) + self.assertEqual(mag_id, "test_file") + self.assertIsNone(protein_path) + self.assertEqual(gff_path, str(gff_file_path)) + + def test_with_loci_file_missing(self): + with self.assertRaisesRegex( + ValueError, "GFF file for ID 'test_file' is missing in loci input." + ): + _get_file_paths( + file=self.file_path, + mags=self.test_dir_path, + proteins=None, + loci=self.test_dir_path, + ) + + def test_with_mags_proteins_and_loci_all_files_exist(self): + protein_file_path = self.test_dir_path / "test_file_proteins.fasta" + gff_file_path = self.test_dir_path / "test_file_loci.gff" + protein_file_path.touch() # Create an empty protein file + gff_file_path.touch() # Create an empty GFF file + + mag_id, protein_path, gff_path = _get_file_paths( + file=self.file_path, + mags=self.test_dir_path, + proteins=self.test_dir_path, + loci=self.test_dir_path, + ) + self.assertEqual(mag_id, "test_file") + self.assertEqual(protein_path, str(protein_file_path)) + self.assertEqual(gff_path, str(gff_file_path)) + + +class TestAnnotateFeatureDataAMRFinderPlus(TestPluginBase): + package = "q2_amr.amrfinderplus.tests" + + @patch("q2_amr.amrfinderplus.feature_data._validate_inputs") + @patch( + "q2_amr.amrfinderplus.feature_data._get_file_paths", + return_value=("mag_id", "protein_path", "gff_path"), + ) + @patch("q2_amr.amrfinderplus.feature_data.run_amrfinderplus_n") + @patch("q2_amr.amrfinderplus.feature_data._move_or_create_files") + def test_annotate_feature_data_amrfinderplus_mags( + self, mock_validate, mock_paths, mock_run, mock_move + ): + mags = MAGSequencesDirFmt() + with open(os.path.join(str(mags), "mag.fasta"), "w"): + pass + annotate_feature_data_amrfinderplus(amrfinderplus_db=MagicMock(), mags=mags) + + @patch("q2_amr.amrfinderplus.feature_data._validate_inputs") + @patch( + "q2_amr.amrfinderplus.feature_data._get_file_paths", + return_value=("mag_id", "protein_path", "gff_path"), + ) + @patch("q2_amr.amrfinderplus.feature_data.run_amrfinderplus_n") + @patch("q2_amr.amrfinderplus.feature_data._move_or_create_files") + def test_annotate_feature_data_amrfinderplus_proteins( + self, mock_validate, mock_paths, mock_run, mock_move + ): + proteins = ProteinsDirectoryFormat() + with open(os.path.join(str(proteins), "proteins.fasta"), "w"): + pass + annotate_feature_data_amrfinderplus( + amrfinderplus_db=MagicMock(), proteins=proteins + ) diff --git a/q2_amr/amrfinderplus/tests/test_sample_data.py b/q2_amr/amrfinderplus/tests/test_sample_data.py index 9d4557b..af40869 100644 --- a/q2_amr/amrfinderplus/tests/test_sample_data.py +++ b/q2_amr/amrfinderplus/tests/test_sample_data.py @@ -8,37 +8,40 @@ from q2_amr.amrfinderplus.types import AMRFinderPlusDatabaseDirFmt +def mock_run_amrfinderplus_n( + working_dir, + amrfinderplus_db, + dna_sequences, + protein_sequences, + gff, + organism, + plus, + report_all_equal, + ident_min, + curated_ident, + coverage_min, + translation_table, + annotation_format, + report_common, + gpipe_org, + threads, +): + with open(os.path.join(working_dir, "amr_annotations.tsv"), "w"): + pass + if organism: + with open(os.path.join(working_dir, "amr_all_mutations.tsv"), "w"): + pass + if dna_sequences: + with open(os.path.join(working_dir, "amr_genes.fasta"), "w"): + pass + if protein_sequences: + with open(os.path.join(working_dir, "amr_proteins.fasta"), "w"): + pass + + class TestAnnotateSampleDataAMRFinderPlus(TestPluginBase): package = "q2_amr.amrfinderplus.tests" - def mock_run_amrfinderplus_n( - self, - working_dir, - amrfinderplus_db, - dna_sequences, - protein_sequences, - gff, - organism, - plus, - report_all_equal, - ident_min, - curated_ident, - coverage_min, - translation_table, - annotation_format, - report_common, - gpipe_org, - threads, - ): - with open(os.path.join(working_dir, "amr_annotations.tsv"), "w"): - pass - if organism: - with open(os.path.join(working_dir, "amr_all_mutations.tsv"), "w"): - pass - if dna_sequences: - with open(os.path.join(working_dir, "amr_genes.fasta"), "w"): - pass - files_contigs = [ "amr_annotations.tsv", "amr_all_mutations.tsv", @@ -83,7 +86,7 @@ def _helper(self, sequences, organism, files): mock_read_in_txt = MagicMock() with patch( "q2_amr.amrfinderplus.sample_data.run_amrfinderplus_n", - side_effect=self.mock_run_amrfinderplus_n, + side_effect=mock_run_amrfinderplus_n, ), patch( "q2_amr.amrfinderplus.sample_data.read_in_txt", mock_read_in_txt ), patch( diff --git a/q2_amr/plugin_setup.py b/q2_amr/plugin_setup.py index b57aeb6..84a0655 100644 --- a/q2_amr/plugin_setup.py +++ b/q2_amr/plugin_setup.py @@ -8,8 +8,9 @@ import importlib from q2_types.feature_data import FeatureData +from q2_types.feature_data_mag import MAG from q2_types.feature_table import FeatureTable, Frequency -from q2_types.genome_data import Genes, GenomeData +from q2_types.genome_data import Genes, GenomeData, Loci, Proteins from q2_types.per_sample_sequences import ( Contigs, MAGs, @@ -33,6 +34,7 @@ from q2_amr import __version__ from q2_amr.amrfinderplus.database import fetch_amrfinderplus_db +from q2_amr.amrfinderplus.feature_data import annotate_feature_data_amrfinderplus from q2_amr.amrfinderplus.sample_data import annotate_sample_data_amrfinderplus from q2_amr.amrfinderplus.types._format import ( AMRFinderPlusAnnotationFormat, @@ -1156,6 +1158,35 @@ "Vibrio_vulnificus", ] +organisms_gpipe = [ + "Acinetobacter", + "Burkholderia_cepacia_complex", + "Burkholderia_pseudomallei", + "Campylobacter", + "Citrobacter_freundii", + "Clostridioides_difficile", + "Enterobacter_asburiae", + "Enterobacter_cloacae", + "Enterococcus_faecalis", + "Enterococcus_faecium", + "Escherichia_coli_Shigella", + "Klebsiella_oxytoca", + "Klebsiella", + "Neisseria_gonorrhoeae", + "Neisseria_meningitidis", + "Pseudomonas_aeruginosa", + "Salmonella", + "Serratia", + "Staphylococcus_aureus", + "Staphylococcus_pseudintermedius", + "Streptococcus_agalactiae", + "Streptococcus_pneumoniae", + "Streptococcus_pyogenes", + "Vibrio_cholerae", + "Vibrio_parahaemolyticus", + "Vibrio_vulnificus", +] + translation_tables = [ "1", "2", @@ -1192,24 +1223,111 @@ } ) +parameters_sample_data = { + "organism": P_organism, + "plus": Bool, + "report_all_equal": Bool, + "ident_min": Float % Range(0, 1, inclusive_start=True, inclusive_end=True), + "curated_ident": Bool, + "coverage_min": Float % Range(0, 1, inclusive_start=True, inclusive_end=True), + "translation_table": Str % Choices(translation_tables), + "report_common": Bool, + "gpipe_org": P_gpipe_org, + "threads": Int % Range(0, None, inclusive_start=False), +} + +parameters_feature_data = { + **parameters_sample_data, + "annotation_format": Str + % Choices( + "bakta", + "genbank", + "microscope", + "patric", + "pgap", + "prodigal", + "prokka", + "pseudomonasdb", + "rast", + "stand", + ), +} + +parameter_descriptions_sample_data = { + "organism": "Taxon used for screening known resistance causing point mutations " + "and blacklisting of common, non-informative genes.", + "plus": "Provide results from 'Plus' genes such as virulence factors, " + "stress-response genes, etc.", + "report_all_equal": "Report all equally scoring BLAST and HMM matches. This " + "will report multiple lines for a single element if there " + "are multiple reference proteins that have the same score. " + "On those lines the fields Accession of closest sequence " + "and Name of closest sequence will be different showing " + "each of the database proteins that are equally close to " + "the query sequence.", + "ident_min": "Minimum identity for a blast-based hit (Methods BLAST or " + "PARTIAL). Setting this value to something other than -1 " + "will override curated similarity cutoffs. We only recommend " + "using this option if you have a specific reason.", + "curated_ident": "Use the curated threshold for a blast-based hit, if it " + "exists and 0.9 otherwise. This will overwrite the value specified with the " + "'ident_min' parameter", + "coverage_min": "Minimum proportion of reference gene covered for a " + "BLAST-based hit (Methods BLAST or PARTIAL).", + "translation_table": "Translation table used for BLASTX.", + "report_common": "Report proteins common to a taxonomy group.", + "gpipe_org": "Use Pathogen Detection taxgroup names as arguments to the " + "organism option", + "threads": "The number of threads to use for processing. AMRFinderPlus " + "defaults to 4 on hosts with >= 4 cores. Setting this number higher" + " than the number of cores on the running host may cause blastp to " + "fail. Using more than 4 threads may speed up searches.", +} + +parameter_descriptions_feature_data = { + **parameter_descriptions_sample_data, + "annotation_format": "GFF file format.", +} + +output_descriptions_amrfinderpolus = { + "amr_annotations": "Annotated AMR genes and mutations.", + "amr_all_mutations": "Report of genotypes at all locations screened for point " + "mutations. These files allow you to distinguish between called " + "point mutations that were the sensitive variant and the point " + "mutations that could not be called because the sequence was not " + "found. This file will contain all detected variants from the " + "reference sequence, so it could be used as an initial screen for " + "novel variants. Note 'Gene symbols' for mutations not in the " + "database (identifiable by [UNKNOWN] in the Sequence name field) " + "have offsets that are relative to the start of the sequence " + "indicated in the field 'Accession of closest sequence' while " + "'Gene symbols' from known point-mutation sites have gene symbols " + "that match the Pathogen Detection Reference Gene Catalog " + "standardized nomenclature for point mutations.", + "amr_genes": "Sequences that were identified by AMRFinderPlus as AMR genes. " + "This will include the entire region that aligns to the references for " + "point mutations.", +} + +output_descriptions_sample_data = { + **output_descriptions_amrfinderpolus, + "feature_table": "Presence/Absence table of ARGs in all samples.", +} +output_descriptions_feature_data = { + **output_descriptions_amrfinderpolus, + "amr_proteins": "Protein Sequences that were identified by AMRFinderPlus as " + "AMR genes. This will include the entire region that aligns to the references " + "for point mutations.", +} + + plugin.methods.register_function( function=annotate_sample_data_amrfinderplus, inputs={ "sequences": SampleData[MAGs | Contigs], "amrfinderplus_db": AMRFinderPlusDatabase, }, - parameters={ - "organism": P_organism, - "plus": Bool, - "report_all_equal": Bool, - "ident_min": Float % Range(0, 1, inclusive_start=True, inclusive_end=True), - "curated_ident": Bool, - "coverage_min": Float % Range(0, 1, inclusive_start=True, inclusive_end=True), - "translation_table": Str % Choices(translation_tables), - "report_common": Bool, - "gpipe_org": P_gpipe_org, - "threads": Int % Range(0, None, inclusive_start=False), - }, + parameters=parameters_sample_data, outputs=[ ("amr_annotations", SampleData[AMRFinderPlusAnnotations]), ("amr_all_mutations", SampleData[AMRFinderPlusAnnotations]), @@ -1220,60 +1338,42 @@ "sequences": "MAGs or contigs to be annotated with AMRFinderPlus.", "amrfinderplus_db": "AMRFinderPlus Database.", }, - parameter_descriptions={ - "organism": "Taxon used for screening known resistance causing point mutations " - "and blacklisting of common, non-informative genes.", - "plus": "Provide results from 'Plus' genes such as virulence factors, " - "stress-response genes, etc.", - "report_all_equal": "Report all equally scoring BLAST and HMM matches. This " - "will report multiple lines for a single element if there " - "are multiple reference proteins that have the same score. " - "On those lines the fields Accession of closest sequence " - "and Name of closest sequence will be different showing " - "each of the database proteins that are equally close to " - "the query sequence.", - "ident_min": "Minimum identity for a blast-based hit (Methods BLAST or " - "PARTIAL). Setting this value to something other than -1 " - "will override curated similarity cutoffs. We only recommend " - "using this option if you have a specific reason.", - "curated_ident": "Use the curated threshold for a blast-based hit, if it " - "exists and 0.9 otherwise. This will overwrite the value specified with the " - "'ident_min' parameter", - "coverage_min": "Minimum proportion of reference gene covered for a " - "BLAST-based hit (Methods BLAST or PARTIAL).", - "translation_table": "Translation table used for BLASTX.", - "report_common": "Report proteins common to a taxonomy group.", - "gpipe_org": "Use Pathogen Detection taxgroup names as arguments to the " - "organism option.", - "threads": "The number of threads to use for processing. AMRFinderPlus " - "defaults to 4 on hosts with >= 4 cores. Setting this number higher" - " than the number of cores on the running host may cause blastp to " - "fail. Using more than 4 threads may speed up searches.", - }, - output_descriptions={ - "amr_annotations": "Annotated AMR genes and mutations.", - "amr_all_mutations": "Report of genotypes at all locations screened for point " - "mutations. These files allow you to distinguish between called " - "point mutations that were the sensitive variant and the point " - "mutations that could not be called because the sequence was not " - "found. This file will contain all detected variants from the " - "reference sequence, so it could be used as an initial screen for " - "novel variants. Note 'Gene symbols' for mutations not in the " - "database (identifiable by [UNKNOWN] in the Sequence name field) " - "have offsets that are relative to the start of the sequence " - "indicated in the field 'Accession of closest sequence' while " - "'Gene symbols' from known point-mutation sites have gene symbols " - "that match the Pathogen Detection Reference Gene Catalog " - "standardized nomenclature for point mutations.", - "amr_genes": "Sequences that were identified by AMRFinderPlus as AMR genes. " - "This will include the entire region that aligns to the references for " - "point mutations.", - "feature_table": "Presence/Absence table of ARGs in all samples.", - }, + parameter_descriptions=parameter_descriptions_sample_data, + output_descriptions=output_descriptions_sample_data, name="Annotate MAGs or contigs with AMRFinderPlus.", description="Annotate sample data MAGs or contigs with antimicrobial resistance " "genes with AMRFinderPlus.", - citations=[], + citations=[citations["feldgarden2021amrfinderplus"]], +) + +plugin.methods.register_function( + function=annotate_feature_data_amrfinderplus, + inputs={ + "mags": FeatureData[MAG], + "proteins": GenomeData[Proteins], + "loci": GenomeData[Loci], + "amrfinderplus_db": AMRFinderPlusDatabase, + }, + parameters=parameters_feature_data, + outputs=[ + ("amr_annotations", FeatureData[AMRFinderPlusAnnotations]), + ("amr_all_mutations", FeatureData[AMRFinderPlusAnnotations]), + ("amr_genes", GenomeData[Genes]), + ("amr_proteins", GenomeData[Proteins]), + ], + input_descriptions={ + "mags": "MAGs to be annotated with AMRFinderPlus.", + "proteins": "Protein sequences to be annotated with AMRFinderPlus.", + "loci": "GFF files to give sequence coordinates for proteins input. Required " + "for combined searches of protein and DNA sequences.", + "amrfinderplus_db": "AMRFinderPlus Database.", + }, + parameter_descriptions=parameter_descriptions_feature_data, + output_descriptions=output_descriptions_feature_data, + name="Annotate Sequences with AMRFinderPlus.", + description="Annotate DNA or protein sequences with antimicrobial resistance genes " + "with AMRFinderPlus.", + citations=[citations["feldgarden2021amrfinderplus"]], ) # Registrations