Skip to content

Commit

Permalink
added sampledata contigs as input
Browse files Browse the repository at this point in the history
  • Loading branch information
VinzentRisch committed Jul 10, 2024
1 parent 81f0fbb commit 7577214
Show file tree
Hide file tree
Showing 3 changed files with 67 additions and 38 deletions.
82 changes: 53 additions & 29 deletions q2_amr/amrfinderplus/mags.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
import os
import shutil
import tempfile
from typing import Union

import pandas as pd
from q2_types.genome_data import GenesDirectoryFormat
from q2_types.per_sample_sequences import MultiMAGSequencesDirFmt
from q2_types.per_sample_sequences import ContigSequencesDirFmt, MultiMAGSequencesDirFmt

from q2_amr.amrfinderplus.types import (
AMRFinderPlusDatabaseDirFmt,
Expand All @@ -14,8 +15,8 @@
from q2_amr.card.utils import create_count_table, read_in_txt


def annotate_mags_amrfinderplus(
mags: MultiMAGSequencesDirFmt,
def annotate_sample_data_amrfinderplus(
sequences: Union[MultiMAGSequencesDirFmt, ContigSequencesDirFmt],
amrfinderplus_db: AMRFinderPlusDatabaseDirFmt,
organism: str = None,
plus: bool = False,
Expand All @@ -30,25 +31,43 @@ def annotate_mags_amrfinderplus(
GenesDirectoryFormat,
pd.DataFrame,
):
manifest = mags.manifest.view(pd.DataFrame)

annotations = ARMFinderPlusAnnotationsDirFmt()
mutations = ARMFinderPlusAnnotationsDirFmt()
genes = GenesDirectoryFormat()

frequency_list = []

with tempfile.TemporaryDirectory() as tmp:
for samp_mag in list(manifest.index):
input_sequence = manifest.loc[samp_mag, "filename"]
# Create list of paths to all mags or contigs
if isinstance(sequences, MultiMAGSequencesDirFmt):
manifest = sequences.manifest.view(pd.DataFrame)
files = manifest["filename"]
else:
files = [
os.path.join(str(sequences), file) for file in os.listdir(str(sequences))
]

sample_id = samp_mag[0]
mag_id = samp_mag[1]
with tempfile.TemporaryDirectory() as tmp:
# Iterate over paths of mags or contigs
for file in files:
# Set sample and mag ids and output file pats for mag or contig
if isinstance(sequences, MultiMAGSequencesDirFmt):
index_value = manifest.query("filename == @file").index[0]
sample_id = index_value[0]
mag_id = index_value[1]
annotations_path = os.path.join(tmp, f"{mag_id}_amr_annotations.tsv")
mutations_path = os.path.join(tmp, f"{mag_id}_amr_mutations.tsv")
genes_path = os.path.join(tmp, f"{mag_id}_amr_genes.fasta")
else:
sample_id = os.path.splitext(os.path.basename(file))[0][:-8]
mag_id = ""
annotations_path = os.path.join(tmp, "amr_annotations.tsv")
mutations_path = os.path.join(tmp, "amr_mutations.tsv")
genes_path = os.path.join(tmp, f"{sample_id}_amr_genes.fasta")

# Run amrfinderplus
run_amrfinderplus_n(
working_dir=tmp,
amrfinderplus_db=amrfinderplus_db,
dna_sequence=input_sequence,
dna_sequence=file,
protein_sequence=None,
gff=None,
organism=organism,
Expand All @@ -58,32 +77,37 @@ def annotate_mags_amrfinderplus(
coverage_min=coverage_min,
translation_table=translation_table,
threads=threads,
id=mag_id + "_",
mag_id=mag_id,
sample_id=sample_id,
)

# Create frequency dataframe and append it to list
frequency_df = read_in_txt(
path=os.path.join(tmp, f"{mag_id}_amr_annotations.tsv"),
path=os.path.join(tmp, annotations_path),
samp_bin_name=str(os.path.join(sample_id, mag_id)),
data_type="mags",
colname="Gene symbol",
)
frequency_list.append(frequency_df)

for dir_format, file_name in zip(
[annotations, mutations, genes],
[
f"{mag_id}_amr_annotations.tsv",
f"{mag_id}_amr_mutations.tsv",
f"{mag_id}_amr_genes.fasta",
],
):
if dir_format in [annotations, mutations]:
des_dir = os.path.join(str(dir_format), sample_id)
os.makedirs(des_dir, exist_ok=True)
else:
des_dir = str(dir_format)
shutil.move(os.path.join(tmp, file_name), des_dir)
# Move mutations file. If it is not created, create an empty mutations file
des_dir_mutations = os.path.join(str(mutations), sample_id)
os.makedirs(des_dir_mutations, exist_ok=True)
if organism:
shutil.move(mutations_path, des_dir_mutations)
else:
with open(
os.path.join(str(mutations), os.path.basename(mutations_path)), "w"
):
pass

frequency_list.append(frequency_df)
# Move annotations file
des_dir_annotations = os.path.join(str(annotations), sample_id)
os.makedirs(des_dir_annotations, exist_ok=True)
shutil.move(annotations_path, des_dir_annotations)

# Move genes file
shutil.move(genes_path, str(genes))

feature_table = create_count_table(df_list=frequency_list)
return (
Expand Down
11 changes: 6 additions & 5 deletions q2_amr/amrfinderplus/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,14 +16,15 @@ def run_amrfinderplus_n(
coverage_min,
translation_table,
threads,
id,
mag_id,
sample_id,
):
cmd = [
"amrfinder",
"--database",
str(amrfinderplus_db),
"-o",
f"{working_dir}/{id}amr_annotations.tsv",
f"{working_dir}/{mag_id + '_' if mag_id else ''}amr_annotations.tsv",
"--print_node",
]
if dna_sequence:
Expand All @@ -32,7 +33,7 @@ def run_amrfinderplus_n(
"-n",
dna_sequence,
"--nucleotide_output",
f"{working_dir}/{id}amr_genes.fasta",
f"{working_dir}/{mag_id if mag_id else sample_id}_amr_genes.fasta",
]
)
if protein_sequence:
Expand All @@ -41,7 +42,7 @@ def run_amrfinderplus_n(
"-p",
protein_sequence,
"--protein_output",
f"{working_dir}/{id}amr_proteins.fasta",
f"{working_dir}/{mag_id + '_' if mag_id else ''}amr_proteins.fasta",
]
)
if gff:
Expand All @@ -54,7 +55,7 @@ def run_amrfinderplus_n(
"--organism",
organism,
"--mutation_all",
f"{working_dir}/{id}amr_mutations.tsv",
f"{working_dir}/{mag_id + '_' if mag_id else ''}amr_mutations.tsv",
]
)
if plus:
Expand Down
12 changes: 8 additions & 4 deletions q2_amr/plugin_setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from q2_types.feature_table import FeatureTable, Frequency
from q2_types.genome_data import Genes, GenomeData
from q2_types.per_sample_sequences import (
Contigs,
MAGs,
PairedEndSequencesWithQuality,
SequencesWithQuality,
Expand All @@ -31,7 +32,7 @@
from qiime2.plugin import Citations, Plugin

from q2_amr import __version__
from q2_amr.amrfinderplus.mags import annotate_mags_amrfinderplus
from q2_amr.amrfinderplus.mags import annotate_sample_data_amrfinderplus
from q2_amr.amrfinderplus.types._format import (
AMRFinderPlusDatabaseDirFmt,
ARMFinderPlusAnnotationDirFmt,
Expand Down Expand Up @@ -1143,8 +1144,11 @@
]

plugin.methods.register_function(
function=annotate_mags_amrfinderplus,
inputs={"mags": SampleData[MAGs], "amrfinderplus_db": AMRFinderPlusDatabase},
function=annotate_sample_data_amrfinderplus,
inputs={
"sequences": SampleData[MAGs | Contigs],
"amrfinderplus_db": AMRFinderPlusDatabase,
},
parameters={
"organism": Str % Choices(organisms),
"plus": Bool,
Expand All @@ -1161,7 +1165,7 @@
("feature_table", FeatureTable[Frequency]),
],
input_descriptions={
"mags": "MAGs to be annotated with AMRFinderPlus.",
"sequences": "MAGs to be annotated with AMRFinderPlus.",
"amrfinderplus_db": "AMRFinderPlus Database.",
},
parameter_descriptions={
Expand Down

0 comments on commit 7577214

Please sign in to comment.