|
11 | 11 | import pandas as pd |
12 | 12 | from StringIO import StringIO |
13 | 13 | import numpy as np |
14 | | -#from numpy import nan |
15 | 14 |
|
| 15 | +## Define namespaces |
16 | 16 | CHEBI_BASE_URI = "http://purl.obolibrary.org/obo/" |
17 | 17 | RXNORM_BASE_URI = "http://purl.bioontology.org/ontology/RXNORM/" |
18 | | -## Define data inputs |
19 | 18 |
|
20 | | -#UNIIS = "../linkedSPLs-update/data/UMLS/UNIIs-from-UMLS.txt" |
21 | | -#PT_UNII = "mappings/FDAPreferredTermToUNII.tsv" |
22 | | -#PT_RXCUI = "mappings/fda-active-moiety-string-name-rxnorm-mapping.csv" |
| 19 | +## Define data inputs |
| 20 | +PT_UNII = "../LinkedSPLs-update/data/FDA/FDAPreferredSubstanceToUNII.txt" |
| 21 | +UNII_RXCUI = "../LinkedSPLs-update/data/UMLS/UNIIs-Rxcuis-from-UMLS.txt" |
23 | 22 |
|
24 | 23 | PT_CHEBI = "mappings/UNIIToChEBI-06102015.txt" |
25 | 24 | PT_DRUGBANK = "mappings/fda-substance-preferred-name-to-drugbank-06102015.txt" |
26 | | -UNII_PT_RXCUI = "mappings/PreferredTerm-UNII-Rxcui-mapping.txt" |
| 25 | + |
27 | 26 | UNII_NUI_PREFERRED_NAME_ROLE = "mappings/EPC_extraction_most_recent_06102015.txt" |
28 | 27 | DRON_CHEBI_RXCUI = "mappings/cleaned-dron-chebi-rxcui-ingredient-06222015.txt" |
29 | | -#OMOP_RXCUI = "mappings/imeds_drugids_to_rxcuis.csv" |
| 28 | +OMOP_RXCUI = "mappings/active-ingredient-omopid-rxcui-09042015.dsv" |
30 | 29 |
|
| 30 | +## Get UNII - PT - RXCUI |
| 31 | +unii_pt_cols = ['unii','pt'] |
| 32 | +unii_pt_DF = pd.read_csv(PT_UNII, sep='\t', names=unii_pt_cols) |
31 | 33 |
|
32 | | -## read mappings of pt, unii and rxcui |
33 | | - |
34 | | -unii_pt_rxcui_cols = ['unii','pt','rxcui'] |
35 | | -unii_pt_rxcui_DF = pd.read_csv(UNII_PT_RXCUI, sep='\t', names=unii_pt_rxcui_cols, skiprows=[0]) |
| 34 | +rxcui_unii_cols = ['rxcui','unii'] |
| 35 | +rxcui_unii_DF = pd.read_csv(UNII_RXCUI, sep='|', names=rxcui_unii_cols) |
| 36 | +rxcui_unii_DF['rxcui'] = rxcui_unii_DF['rxcui'].astype('str') |
36 | 37 |
|
| 38 | +unii_pt_rxcui_DF = unii_pt_DF.merge(rxcui_unii_DF, on=['unii'], how='left') |
| 39 | +print unii_pt_rxcui_DF.info() |
37 | 40 |
|
38 | 41 | ## read mappings of pt and drugbank uri |
39 | 42 |
|
40 | 43 | pt_drugbank_cols = ['pt','db_uri1','db_uri2'] |
41 | 44 | pt_drugbank_DF = pd.read_csv(PT_DRUGBANK, sep='\t', names=pt_drugbank_cols) |
42 | 45 |
|
43 | | - |
44 | 46 | ## read mappings of pt and chebi |
45 | 47 |
|
46 | 48 | pt_chebi_cols = ['pt','chebi'] |
47 | 49 | pt_chebi_DF = pd.read_csv(PT_CHEBI, sep='\t', names=pt_chebi_cols) |
48 | | -#print pt_chebi_DF.info() |
49 | 50 |
|
50 | 51 | ## read mappings of dron and rxcui |
51 | 52 | dron_chebi_rxcui_cols = ['dron','chebi','rxcui'] |
52 | 53 | dron_chebi_rxcui_DF = pd.read_csv(DRON_CHEBI_RXCUI, sep='|', names=dron_chebi_rxcui_cols, usecols=[0,2]) |
| 54 | +dron_chebi_rxcui_DF['rxcui'] = dron_chebi_rxcui_DF['rxcui'].astype('str') |
53 | 55 |
|
54 | 56 | ## read mappings of unii, nui and preferredNameAndRole |
55 | 57 | unii_nui_namerole_cols = ['setid', 'unii','nui','nameAndRole'] |
56 | 58 | unii_nui_namerole_DF = pd.read_csv(UNII_NUI_PREFERRED_NAME_ROLE, sep='\t', names=unii_nui_namerole_cols)[['unii','nui','nameAndRole']] |
57 | | -#print unii_nui_namerole_DF.info() |
58 | 59 |
|
59 | 60 |
|
| 61 | +## read mappings of omopid and rxcui |
| 62 | +omop_rxcui_cols = ['omopid','rxcui'] |
| 63 | +omop_rxcui_DF = pd.read_csv(OMOP_RXCUI, sep='|', names=omop_rxcui_cols) |
| 64 | +omop_rxcui_DF['rxcui'] = omop_rxcui_DF['rxcui'].astype('str') |
| 65 | + |
60 | 66 | ## merge pt, unii, rxcui and drugbank uri |
61 | 67 | unii_pt_rxcui_db_DF = unii_pt_rxcui_DF.merge(pt_drugbank_DF, on=['pt'], how='left') |
62 | | -#print unii_pt_rxcui_db_DF.info() |
63 | | -unii_pt_rxcui_db_DF.to_csv('PT-RXCUI-UNII-DB.csv', sep='\t', index=False) |
64 | 68 |
|
| 69 | +unii_pt_rxcui_db_DF.to_csv('PT-RXCUI-UNII-DB.csv', sep='\t', index=False) |
65 | 70 |
|
66 | 71 | ## merge chebi |
67 | | - |
68 | 72 | merged_chebi_DF = unii_pt_rxcui_db_DF.merge(pt_chebi_DF, on=['pt'], how='left') |
69 | | -#print merged_chebi_DF.info() |
70 | 73 |
|
71 | 74 | ## merge dron id |
72 | 75 | merged_dron_DF = merged_chebi_DF.merge(dron_chebi_rxcui_DF, on=['rxcui'], how = 'left') |
73 | 76 |
|
| 77 | +## merge omop id |
| 78 | +merged_omop_DF = merged_dron_DF.merge(omop_rxcui_DF, on=['rxcui'], how = 'left') |
| 79 | + |
74 | 80 | ## merge <nui> and <preferred name and role> |
75 | | -merged_epc_DF = merged_dron_DF.merge(unii_nui_namerole_DF, on=['unii'], how='left') |
| 81 | +merged_epc_DF = merged_omop_DF.merge(unii_nui_namerole_DF, on=['unii'], how='left') |
76 | 82 |
|
77 | 83 | print merged_epc_DF.info() |
78 | 84 |
|
79 | | - |
80 | 85 | merged_epc_DF.to_csv('mergedActiveMoiety.csv', sep='\t', index=False) |
81 | 86 |
|
0 commit comments