Skip to content

Commit e015893

Browse files
committed
Try speeding up the report with pandas
1 parent 780891b commit e015893

File tree

1 file changed

+4
-16
lines changed

1 file changed

+4
-16
lines changed

pangolin/scripts/report_results.py

Lines changed: 4 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -35,22 +35,10 @@ def make_objects(background_data, lineages_present):
3535
lineages_to_taxa = defaultdict(list)
3636
lin_obj_dict = {}
3737

38-
with open(background_data,newline="") as f:
39-
reader = csv.DictReader(f)
40-
for row in reader:
41-
name = row["sequence_name"]
42-
lin_string = row["lineage"]
43-
date = row["sample_date"]
44-
country = row["country"]
45-
46-
tax_name = f"{name}|{country}|{date}"
47-
48-
if lin_string in lineages_present:
49-
new_taxon = classes.taxon(tax_name, lin_string)
50-
taxa.append(new_taxon)
51-
52-
lineages_to_taxa[lin_string].append(new_taxon)
53-
38+
background_df = pd.read_csv(background_data).query("lineage in @lineages_present")
39+
background_df['taxa'] = background_df.apply(lambda r: classes.taxon(f"{r['sequence_name']}|{r['country']}|{r['sample_date']}", r['lineage']), axis=1)
40+
lineages_to_taxa = background_df.groupby("lineage")["taxa"].apply(list).to_dict()
41+
taxa = list(background_df['taxa'])
5442

5543
for lin, taxa in lineages_to_taxa.items():
5644
l_o = classes.lineage(lin, taxa)

0 commit comments

Comments
 (0)