Skip to content

Commit e1da72b

Browse files
committed
WIP: create proviral landscape output, for #16
1 parent 4a382da commit e1da72b

File tree

3 files changed

+66
-1
lines changed

3 files changed

+66
-1
lines changed

Singularity

+1-1
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ From: centos:7
1313
MAINTAINER BC CfE in HIV/AIDS https://github.com/cfe-lab/
1414
KIVE_INPUTS sample_info_csv contigs_csv conseqs_csv cascade_csv
1515
KIVE_OUTPUTS outcome_summary_csv conseqs_primers_csv contigs_primers_csv \
16-
table_precursor_csv hivseqinr_results_tar
16+
table_precursor_csv proviral_landscape_csv hivseqinr_results_tar
1717
KIVE_THREADS 1
1818
KIVE_MEMORY 6000
1919

gene_splicer/sample.py

+5
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,9 @@ def parse_args():
4040
parser.add_argument('table_precursor_csv',
4141
help='Sequence data ready to upload',
4242
type=FileType('w'))
43+
parser.add_argument('proviral_landscape_csv',
44+
help='Data for proviral landscape plot',
45+
type=FileType('w'))
4346
parser.add_argument('hivseqinr_results_tar',
4447
help="Archive file with HIVSeqinR's final results folder.",
4548
type=FileType('wb'))
@@ -100,12 +103,14 @@ def main():
100103
for file in fasta_files:
101104
gene_splicer.run(file, outdir=outpath)
102105
utils.generate_table_precursor(name=run_name, outpath=outpath)
106+
utils.generate_proviral_landscape_csv(name=run_name, outpath=outpath)
103107
copy_output(outpath / 'outcome_summary.csv', args.outcome_summary_csv)
104108
copy_output(outpath / (run_name + '_conseqs_primer_analysis.csv'),
105109
args.conseqs_primers_csv)
106110
copy_output(outpath / (run_name + '_contigs_primer_analysis.csv'),
107111
args.contigs_primers_csv)
108112
copy_output(outpath / 'table_precursor.csv', args.table_precursor_csv)
113+
copy_output(outpath / 'proviral_landscape.csv', args.proviral_landscape_csv)
109114

110115

111116
if __name__ == '__main__':

gene_splicer/utils.py

+60
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import csv
12
import logging
23
import os
34
import re
@@ -493,6 +494,65 @@ def generate_table_precursor_2(hivseqinr_resultsfile, filtered_file,
493494
return table_precursorfile
494495

495496

497+
def generate_proviral_landscape_csv(outpath):
498+
proviral_landscape_csv = os.path.join(outpath, 'proviral_landscape.csv')
499+
landscape_rows = []
500+
501+
table_precursor_csv = os.path.join(outpath, 'table_precursor.csv')
502+
blastn_csv = glob.glob(
503+
os.path.join(
504+
outpath,
505+
'hivseqinr*',
506+
'Results_Intermediate',
507+
'Output_Blastn_HXB2MEGA28_tabdelim.txt'
508+
)
509+
)[0]
510+
511+
blastn_columns = ['qseqid',
512+
'qlen',
513+
'sseqid',
514+
'sgi',
515+
'slen',
516+
'qstart',
517+
'qend',
518+
'sstart',
519+
'send',
520+
'evalue',
521+
'bitscore',
522+
'length',
523+
'pident',
524+
'nident',
525+
'btop',
526+
'stitle',
527+
'sstrand']
528+
with open(blastn_csv, 'r') as blastn_file:
529+
blastn_reader = DictReader(blastn_file, fieldnames=blastn_columns)
530+
for row in blastn_reader:
531+
if row['qseqid'] in ['8E5LAV', 'HXB2']:
532+
# skip the positive control rows
533+
continue
534+
# TODO: have to skip weird entries at start and end
535+
[run_name, sample_name, reference, seqtype] = row['seqid'].split('::', expand=True)
536+
landscape_entry = {'ref_start': row['sstart'],
537+
'ref_end': row['send'],
538+
'samp_name': sample_name,
539+
'samp_id': f"{run_name}::{sample_name}"}
540+
landscape_rows.append(landscape_entry)
541+
542+
with open(table_precursor_csv, 'r') as tab_prec:
543+
tab_prec_reader = DictReader(tab_prec)
544+
for row in tab_prec_reader:
545+
samp_name = row['sample']
546+
verdict = row['MyVerdict']
547+
for entry in landscape_rows:
548+
if entry['samp_name'] == samp_name:
549+
entry['defect'] = verdict
550+
551+
landscape_columns = ['samp_id', 'ref_start', 'ref_end', 'defect']
552+
with open(proviral_landscape_csv, 'w') as landscape_file:
553+
landscape_writer = csv.DictWriter(landscape_file, fieldnames=landscape_columns)
554+
555+
496556
def get_softclipped_region(query, alignment, alignment_path):
497557
try:
498558
size, op = alignment.iloc[0]['cigar'][0]

0 commit comments

Comments
 (0)