Skip to content

Commit e3efdea

Browse files
committed
Use unstitched_cascade.csv for proviral pipeline
1 parent 873613d commit e3efdea

7 files changed

+19
-5
lines changed

Singularity

+1-1
Original file line numberDiff line numberDiff line change
@@ -157,7 +157,7 @@ From: python:3.8
157157
conseq_all_csv concordance_csv concordance_seed_csv failed_align_csv \
158158
coverage_scores_csv coverage_maps_tar aligned_csv g2p_aligned_csv \
159159
genome_coverage_csv genome_coverage_svg genome_concordance_svg \
160-
unstitched_conseq_csv unstitched_contigs_csv contigs_csv \
160+
unstitched_cascade_csv unstitched_conseq_csv unstitched_contigs_csv contigs_csv \
161161
read_entropy_csv conseq_region_csv conseq_stitched_csv
162162
KIVE_THREADS 2
163163
KIVE_MEMORY 6000

docs/steps.md

+7
Original file line numberDiff line numberDiff line change
@@ -353,6 +353,13 @@ Individual files are described after the list of steps.
353353
* unstitched_conseq.csv
354354
* region - the region mapped to
355355
* sequence - the consensus sequence used
356+
* unstitched_cascade.csv - number of read pairs that flow through the pipeline steps
357+
* demultiplexed - count from the raw FASTQ
358+
* v3loop - aligned with V3LOOP
359+
* g2p - valid reads to count in G2P
360+
* prelim_map - mapped to other references on first pass
361+
* remap - mapped to other references after remapping
362+
* aligned - aligned with a reference and merged with mate
356363
* resistance.csv
357364
* region - the region code, like PR or RT
358365
* drug_class - the drug class code from the HIVdb rules, like NRTI

micall/monitor/kive_watcher.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@
5959
'resistance_consensus_csv',
6060
'wg_fasta',
6161
'mid_fasta',
62+
'unstitched_cascade_csv',
6263
'unstitched_conseq_csv',
6364
'unstitched_contigs_csv',
6465
'contigs_csv',
@@ -939,12 +940,13 @@ def run_proviral_pipeline(self, sample_watcher, folder_watcher, description):
939940
run_dataset['argument_name']: run_dataset['dataset']
940941
for run_dataset in main_run['datasets']
941942
if run_dataset['argument_name'] in ('sample_info_csv',
943+
'unstitched_cascade_csv',
942944
'unstitched_conseq_csv',
943-
'unstitched_contigs_csv',
944-
'cascade_csv')}
945+
'unstitched_contigs_csv')}
945946
input_datasets = {
946947
argument_name: self.kive_retry(lambda: self.session.get(url).json())
947948
for argument_name, url in input_dataset_urls.items()}
949+
input_datasets['cascade_csv'] = input_datasets.pop('unstitched_cascade_csv')
948950
input_datasets['conseqs_csv'] = input_datasets.pop('unstitched_conseq_csv')
949951
input_datasets['contigs_csv'] = input_datasets.pop('unstitched_contigs_csv')
950952
run = self.find_or_launch_run(

micall/tests/test_kive_watcher.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1765,7 +1765,7 @@ def test_launch_proviral_run(raw_data_with_two_samples, mock_open_kive):
17651765
argument_name='unstitched_conseq_csv'),
17661766
dict(dataset='/datasets/113/',
17671767
argument_type='O',
1768-
argument_name='cascade_csv')]] # run datasets
1768+
argument_name='unstitched_cascade_csv')]] # run datasets
17691769
mock_session.get.return_value.json.side_effect = [
17701770
dict(url='/datasets/110/', id=110),
17711771
dict(url='/datasets/111/', id=111),

micall_docker.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -1099,7 +1099,8 @@ def collate_samples(run_info: RunInfo):
10991099
'concordance.csv',
11001100
'concordance_seed.csv']
11011101
if run_info.is_denovo:
1102-
filenames += ['conseq_stitched.csv', 'conseq_region.csv', 'unstitched_conseq.csv']
1102+
filenames += ['conseq_stitched.csv', 'conseq_region.csv',
1103+
'unstitched_cascade.csv', 'unstitched_conseq.csv', 'unstitched_contigs.csv']
11031104
for filename in filenames:
11041105
out_path = run_info.output_path
11051106
with open(os.path.join(out_path, filename), 'w') as fout:

micall_kive.py

+3
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,9 @@ def parse_args():
8080
action='store_true',
8181
help='Use de novo assembly instead of mapping to '
8282
'reference sequences.')
83+
parser.add_argument('unstitched_cascade_csv',
84+
nargs='?',
85+
help='count of reads at each step')
8386
parser.add_argument('unstitched_conseq_csv',
8487
nargs='?',
8588
help='CSV containing mapping unstitched consensus sequences')

release_test_microtest.py

+1
Original file line numberDiff line numberDiff line change
@@ -465,6 +465,7 @@ def process_sample(self, fastq_file: Path):
465465
'genome_coverage.csv',
466466
'genome_coverage.svg',
467467
'genome_concordance.svg',
468+
'unstitched_cascade.csv',
468469
'unstitched_conseq.csv',
469470
'unstitched_contigs.csv',
470471
'contigs.csv',

0 commit comments

Comments
 (0)