Skip to content

Commit

Permalink
added another column as index for the counttable when working with al…
Browse files Browse the repository at this point in the history
…lele mapping
  • Loading branch information
VinzentRisch committed Jan 10, 2024
1 parent 94069c6 commit 069df6e
Show file tree
Hide file tree
Showing 3 changed files with 54 additions and 21 deletions.
7 changes: 4 additions & 3 deletions q2_amr/card/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,13 +91,14 @@ def load_card_db(
)


def read_in_txt(path: str, samp_bin_name: str, data_type):
def read_in_txt(path: str, samp_bin_name: str, data_type: str, map_type=None):
# Read in txt file to pd.Dataframe
df = pd.read_csv(path, sep="\t")

# Process the df depending on the data type (from reads or mags)
# Process the df depending on the data type and mapping type
if data_type == "reads":
df = df[["ARO Term", "All Mapped Reads"]]
colname = "Reference Sequence" if map_type == "allele" else "ARO Term"
df = df[[colname, "All Mapped Reads"]]
df.rename(columns={"All Mapped Reads": samp_bin_name}, inplace=True)
else:
df = df[["Best_Hit_ARO"]]
Expand Down
60 changes: 46 additions & 14 deletions q2_amr/tests/card/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,15 +16,23 @@ class TestAnnotateReadsCARD(TestPluginBase):
@classmethod
def setUpClass(cls):
cls.count_df_list = []
for colname, ARG, sample in zip(
["ARO Term", "ARO Term", "Best_Hit_ARO"],
["mdtF", "mdtE", "mdtF"],
["sample1", "sample2", "sample1"],
for colname, values in zip(
["Reference Sequence", "ARO Term", "Best_Hit_ARO"],
[
[
"ARO:3000796|ID:121|Name:mdtF|NCBI:U00096.1",
"ARO:3000815|ID:154|Name:mgrA|NCBI:BA000018.3",
"ARO:3000805|ID:172|Name:OprN|NCBI:AE004091.2",
"ARO:3000026|ID:377|Name:mepA|NCBI:AY661734.1",
],
["mdtF", "mgrA", "OprN", "mepA"],
["mdtF", "mgrA", "OprN", "mepA"],
],
):
df = pd.DataFrame(
{
colname: [ARG, "mgrA", "OprN", "mepA"],
sample: ["1", "1", "1", "1"],
colname: values,
"sample1": ["1", "1", "1", "1"],
}
)
cls.count_df_list.append(df)
Expand Down Expand Up @@ -125,24 +133,48 @@ def test_exception_raised(self):
def test_read_in_txt_mags(self):
# Test read_in_txt with output data from annotate_mags_card
self.read_in_txt_test_body(
"output.mags.txt", "sample1", self.count_df_list[2], "mags"
filename="output.mags.txt",
samp_bin_name="sample1",
exp=self.count_df_list[2],
data_type="mags",
)

def test_read_in_txt_reads(self):
def test_read_in_txt_reads_allele(self):
# Test read_in_txt with output data from annotate_reads_card
self.read_in_txt_test_body(
"output.allele_mapping_data.txt", "sample1", self.count_df_list[0], "reads"
filename="output.allele_mapping_data.txt",
samp_bin_name="sample1",
exp=self.count_df_list[0],
data_type="reads",
map_type="allele",
)

def read_in_txt_test_body(self, txt_file, samp_bin_name, mapping_data, data_type):
# Create expected and observed count dataframes and compares them
exp = mapping_data
obs = read_in_txt(self.get_data_path(txt_file), samp_bin_name, data_type)
def test_read_in_txt_reads_gene(self):
# Test read_in_txt with output data from annotate_reads_card
self.read_in_txt_test_body(
filename="output.gene_mapping_data.txt",
samp_bin_name="sample1",
exp=self.count_df_list[1],
data_type="reads",
map_type="gene",
)

def read_in_txt_test_body(
self, filename, samp_bin_name, exp, data_type, map_type=None
):
# Create expected and observed count dataframes and compare them
obs = read_in_txt(
self.get_data_path(filename), samp_bin_name, data_type, map_type
)
pd.testing.assert_frame_equal(exp, obs)

def test_create_count_table(self):
# Create list of dataframes to be used by create_count_table
df_list = [self.count_df_list[1].copy(), self.count_df_list[1].copy()]
df_list[1].iloc[0, 0] = "mdtE"
df_list[1].rename(columns={"sample1": "sample2"}, inplace=True)

# Create observed count table with create_count_table function
df_list = [self.count_df_list[0], self.count_df_list[1]]
obs = create_count_table(df_list)
obs = obs.astype(str)

Expand Down
8 changes: 4 additions & 4 deletions q2_amr/tests/data/output.gene_mapping_data.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
ARO Term ARO Accession Reference Model Type Reference DB Alleles with Mapped Reads Reference Allele(s) Identity to CARD Reference Protein (%) Resistomes & Variants: Observed in Genome(s) Resistomes & Variants: Observed in Plasmid(s) Resistomes & Variants: Observed Pathogen(s) Completely Mapped Reads Mapped Reads with Flanking Sequence All Mapped Reads Average Percent Coverage Average Length Coverage (bp) Average MAPQ (Completely Mapped Reads) Number of Mapped Baits Number of Mapped Baits with Reads Average Number of reads per Bait Number of reads per Bait Coefficient of Variation (%) Number of reads mapping to baits and mapping to complete gene Number of reads mapping to baits and mapping to complete gene (%) Mate Pair Linkage (# reads) Reference Length AMR Gene Family Drug Class Resistance Mechanism
mdtF 3000796 protein homolog model CARD 1 100.0 no data no data Escherichia coli 2.00 0.00 2.00 8.09 252.00 193.00 0 0 0 0 N/A N/A 3114 resistance-nodulation-cell division (RND) antibiotic efflux pump macrolide antibiotic; fluoroquinolone antibiotic; penam antibiotic efflux
mgrA 3000815 protein homolog model CARD 1 100.0 no data no data Staphylococcus aureus 1.00 0.00 1.00 19.59 87.00 172.00 0 0 0 0 N/A N/A 444 ATP-binding cassette (ABC) antibiotic efflux pump; major facilitator superfamily (MFS) antibiotic efflux pump fluoroquinolone antibiotic; cephalosporin; penam; tetracycline antibiotic; peptide antibiotic; disinfecting agents and antiseptics antibiotic efflux
OprN 3000805 protein homolog model CARD 1 100.0 no data no data Pseudomonas aeruginosa 2.00 0.00 2.00 17.76 252.00 193.00 0 0 0 0 N/A N/A 1419 resistance-nodulation-cell division (RND) antibiotic efflux pump fluoroquinolone antibiotic; diaminopyrimidine antibiotic; phenicol antibiotic antibiotic efflux
mepA 3000026 protein homolog model CARD 1 100.0 no data no data Staphylococcus aureus 2.00 0.00 2.00 17.70 240.00 190.50 0 0 0 0 N/A N/A 1356 multidrug and toxic compound extrusion (MATE) transporter glycylcycline; tetracycline antibiotic antibiotic efflux
mdtF 3000796 protein homolog model CARD 1 100.0 no data no data Escherichia coli 1.00 0.00 1 8.09 252.00 193.00 0 0 0 0 N/A N/A 3114 resistance-nodulation-cell division (RND) antibiotic efflux pump macrolide antibiotic; fluoroquinolone antibiotic; penam antibiotic efflux
mgrA 3000815 protein homolog model CARD 1 100.0 no data no data Staphylococcus aureus 1.00 0.00 1 19.59 87.00 172.00 0 0 0 0 N/A N/A 444 ATP-binding cassette (ABC) antibiotic efflux pump; major facilitator superfamily (MFS) antibiotic efflux pump fluoroquinolone antibiotic; cephalosporin; penam; tetracycline antibiotic; peptide antibiotic; disinfecting agents and antiseptics antibiotic efflux
OprN 3000805 protein homolog model CARD 1 100.0 no data no data Pseudomonas aeruginosa 1.00 0.00 1 17.76 252.00 193.00 0 0 0 0 N/A N/A 1419 resistance-nodulation-cell division (RND) antibiotic efflux pump fluoroquinolone antibiotic; diaminopyrimidine antibiotic; phenicol antibiotic antibiotic efflux
mepA 3000026 protein homolog model CARD 1 100.0 no data no data Staphylococcus aureus 1.00 0.00 1 17.70 240.00 190.50 0 0 0 0 N/A N/A 1356 multidrug and toxic compound extrusion (MATE) transporter glycylcycline; tetracycline antibiotic antibiotic efflux

0 comments on commit 069df6e

Please sign in to comment.