From fcb407c0072a3a58081e3098aab5d7f8d29b4ebf Mon Sep 17 00:00:00 2001 From: Jover Lee Date: Thu, 19 Dec 2024 11:54:30 -0800 Subject: [PATCH] transform: Move annotations to _after_ accessions Allows overrides of accessions if there is something that we want to manually correct in the accession links. --- bin/transform-genbank | 3 +-- bin/transform-gisaid | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/bin/transform-genbank b/bin/transform-genbank index 95c00f2d..5af323e4 100755 --- a/bin/transform-genbank +++ b/bin/transform-genbank @@ -207,8 +207,8 @@ if __name__ == '__main__': | ParseGeographicColumnsGenbank( base / 'source-data/us-state-codes.tsv' ) | AbbreviateAuthors() | ApplyUserGeoLocationSubstitutionRules(geoRules) - | MergeUserAnnotatedMetadata(annotations, idKey = 'genbank_accession' ) | MergeUserAnnotatedMetadata(accessions, idKey = 'genbank_accession_rev' ) + | MergeUserAnnotatedMetadata(annotations, idKey = 'genbank_accession' ) | FillDefaultLocationData() | patchUKData(args.cog_uk_accessions, args.cog_uk_metadata) | GenbankProblematicFilter( args.problem_data, @@ -301,4 +301,3 @@ if __name__ == '__main__': strain_name = updated_strain_names_by_line_no[entry[LINE_NUMBER_KEY]] print( '>' , strain_name , sep='' , file= fasta_OUT) print( entry['sequence'] , file= fasta_OUT) - diff --git a/bin/transform-gisaid b/bin/transform-gisaid index eb1bea0d..e8f95a9e 100755 --- a/bin/transform-gisaid +++ b/bin/transform-gisaid @@ -182,8 +182,8 @@ if __name__ == '__main__': pipeline = (pipeline | ApplyUserGeoLocationSubstitutionRules(geoRules) - | MergeUserAnnotatedMetadata(annotations) | MergeUserAnnotatedMetadata(accessions) + | MergeUserAnnotatedMetadata(annotations) | FillDefaultLocationData() )