Skip to content

Commit d9081bf

Browse files
committed
bugfix: mito annotations incorporate variant fields
1 parent 877eb16 commit d9081bf

File tree

4 files changed

+17
-10
lines changed

4 files changed

+17
-10
lines changed

v03_pipeline/lib/tasks/exports/fields.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import hail as hl
22

33
from v03_pipeline.lib.model import DatasetType, ReferenceGenome, SampleType
4+
from v03_pipeline.lib.tasks.exports.misc import reformat_transcripts_for_export
45

56

67
def reference_independent_contig(locus: hl.LocusExpression):
@@ -323,7 +324,11 @@ def get_consequences_fields(
323324
'sortedTranscriptConsequences': ht.sortedTranscriptConsequences,
324325
},
325326
DatasetType.MITO: lambda ht: {
326-
'sortedTranscriptConsequences': ht.sortedTranscriptConsequences,
327+
# MITO transcripts are not exported to their own table,
328+
# but the structure should be preserved here.
329+
'sortedTranscriptConsequences': hl.enumerate(
330+
ht.sortedTranscriptConsequences,
331+
).starmap(reformat_transcripts_for_export),
327332
},
328333
DatasetType.SV: lambda ht: {
329334
'sortedGeneConsequences': ht.sortedGeneConsequences,

v03_pipeline/lib/tasks/exports/misc.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ def array_structexpression_fields(ht: hl.Table):
4747

4848

4949
def reformat_transcripts_for_export(i: int, s: hl.StructExpression):
50-
return (
50+
formatted_s = (
5151
s.annotate(
5252
majorConsequence=s.consequenceTerms.first(),
5353
transcriptRank=i,
@@ -62,6 +62,7 @@ def reformat_transcripts_for_export(i: int, s: hl.StructExpression):
6262
transcriptRank=i,
6363
).drop('isLofNagnag', 'lofFilters')
6464
)
65+
return sorted_hl_struct(formatted_s)
6566

6667

6768
def export_parquet_filterable_transcripts_fields(

v03_pipeline/lib/tasks/exports/write_new_transcripts_parquet.py

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@
1313
from v03_pipeline.lib.tasks.exports.misc import (
1414
camelcase_array_structexpression_fields,
1515
reformat_transcripts_for_export,
16-
sorted_hl_struct,
1716
unmap_formatting_annotation_enums,
1817
)
1918
from v03_pipeline.lib.tasks.files import GCSorLocalFolderTarget, GCSorLocalTarget
@@ -62,7 +61,5 @@ def create_table(self) -> None:
6261
key_=ht.key_,
6362
transcripts=hl.enumerate(
6463
ht.sortedTranscriptConsequences,
65-
)
66-
.starmap(reformat_transcripts_for_export)
67-
.map(sorted_hl_struct),
64+
).starmap(reformat_transcripts_for_export),
6865
)

v03_pipeline/lib/tasks/exports/write_new_variants_parquet_test.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -407,16 +407,20 @@ def test_mito_write_new_variants_parquet_test(
407407
'sortedTranscriptConsequences': [
408408
{
409409
'aminoAcids': None,
410+
'biotype': 'Mt_tRNA',
410411
'canonical': 1,
411412
'codons': None,
413+
'consequenceTerms': ['non_coding_transcript_exon_variant'],
412414
'geneId': 'ENSG00000210049',
413415
'hgvsc': 'ENST00000387314.1:n.2T>C',
414416
'hgvsp': None,
417+
'loftee': {
418+
'isLofNagnag': None,
419+
'lofFilters': None,
420+
},
421+
'majorConsequence': 'non_coding_transcript_exon_variant',
415422
'transcriptId': 'ENST00000387314',
416-
'isLofNagnag': None,
417-
'biotype': 'Mt_tRNA',
418-
'consequenceTerms': ['non_coding_transcript_exon_variant'],
419-
'lofFilters': None,
423+
'transcriptRank': 0,
420424
},
421425
],
422426
},

0 commit comments

Comments
 (0)