-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathorg_bag.py
executable file
·36 lines (29 loc) · 990 Bytes
/
org_bag.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
#!/usr/bin/env python
import argparse, json
psr = argparse.ArgumentParser("baseline solution")
psr.add_argument("-o", dest='opt', help="output")
psr.add_argument('ipt', nargs="+", help="input")
psr.add_argument('--field', default="org", help="field input")
args = psr.parse_args()
import pandas as pd, re, os
from glob import glob
from functools import reduce
fl = args.ipt
def org_bag(fn):
au=pd.read_csv(fn)
k=os.path.basename(fn)[:-4]
rk = k.replace('_','\W+')
sau = au[au.name.str.contains(re.compile("^{}$".format(rk), re.IGNORECASE))]
cluster = sau.groupby('org')['id'].apply(list)
return k, list(cluster.values)
def uniglue_bag(fn):
au=pd.read_csv(fn)
k=os.path.basename(fn)[:-4]
cluster=au.groupby("group_result")['id'].apply(list)
return k, list(cluster.values)
if args.field == "org":
func = org_bag
elif args.field == "uniglue":
func = uniglue_bag
rst = dict([func(fn) for fn in fl])
json.dump(rst, open(args.opt, 'w'))