|
11 | 11 | import pandas as pd
|
12 | 12 | from StringIO import StringIO
|
13 | 13 | import numpy as np
|
14 |
| -#from numpy import nan |
15 | 14 |
|
| 15 | +## Define namespaces |
16 | 16 | CHEBI_BASE_URI = "http://purl.obolibrary.org/obo/"
|
17 | 17 | RXNORM_BASE_URI = "http://purl.bioontology.org/ontology/RXNORM/"
|
18 |
| -## Define data inputs |
19 | 18 |
|
20 |
| -#UNIIS = "../linkedSPLs-update/data/UMLS/UNIIs-from-UMLS.txt" |
21 |
| -#PT_UNII = "mappings/FDAPreferredTermToUNII.tsv" |
22 |
| -#PT_RXCUI = "mappings/fda-active-moiety-string-name-rxnorm-mapping.csv" |
| 19 | +## Define data inputs |
| 20 | +PT_UNII = "../LinkedSPLs-update/data/FDA/FDAPreferredSubstanceToUNII.txt" |
| 21 | +UNII_RXCUI = "../LinkedSPLs-update/data/UMLS/UNIIs-Rxcuis-from-UMLS.txt" |
23 | 22 |
|
24 | 23 | PT_CHEBI = "mappings/UNIIToChEBI-06102015.txt"
|
25 | 24 | PT_DRUGBANK = "mappings/fda-substance-preferred-name-to-drugbank-06102015.txt"
|
26 |
| -UNII_PT_RXCUI = "mappings/PreferredTerm-UNII-Rxcui-mapping.txt" |
| 25 | + |
27 | 26 | UNII_NUI_PREFERRED_NAME_ROLE = "mappings/EPC_extraction_most_recent_06102015.txt"
|
28 | 27 | DRON_CHEBI_RXCUI = "mappings/cleaned-dron-chebi-rxcui-ingredient-06222015.txt"
|
29 |
| -#OMOP_RXCUI = "mappings/imeds_drugids_to_rxcuis.csv" |
| 28 | +OMOP_RXCUI = "mappings/active-ingredient-omopid-rxcui-09042015.dsv" |
30 | 29 |
|
| 30 | +## Get UNII - PT - RXCUI |
| 31 | +unii_pt_cols = ['unii','pt'] |
| 32 | +unii_pt_DF = pd.read_csv(PT_UNII, sep='\t', names=unii_pt_cols) |
31 | 33 |
|
32 |
| -## read mappings of pt, unii and rxcui |
33 |
| - |
34 |
| -unii_pt_rxcui_cols = ['unii','pt','rxcui'] |
35 |
| -unii_pt_rxcui_DF = pd.read_csv(UNII_PT_RXCUI, sep='\t', names=unii_pt_rxcui_cols, skiprows=[0]) |
| 34 | +rxcui_unii_cols = ['rxcui','unii'] |
| 35 | +rxcui_unii_DF = pd.read_csv(UNII_RXCUI, sep='|', names=rxcui_unii_cols) |
| 36 | +rxcui_unii_DF['rxcui'] = rxcui_unii_DF['rxcui'].astype('str') |
36 | 37 |
|
| 38 | +unii_pt_rxcui_DF = unii_pt_DF.merge(rxcui_unii_DF, on=['unii'], how='left') |
| 39 | +print unii_pt_rxcui_DF.info() |
37 | 40 |
|
38 | 41 | ## read mappings of pt and drugbank uri
|
39 | 42 |
|
40 | 43 | pt_drugbank_cols = ['pt','db_uri1','db_uri2']
|
41 | 44 | pt_drugbank_DF = pd.read_csv(PT_DRUGBANK, sep='\t', names=pt_drugbank_cols)
|
42 | 45 |
|
43 |
| - |
44 | 46 | ## read mappings of pt and chebi
|
45 | 47 |
|
46 | 48 | pt_chebi_cols = ['pt','chebi']
|
47 | 49 | pt_chebi_DF = pd.read_csv(PT_CHEBI, sep='\t', names=pt_chebi_cols)
|
48 |
| -#print pt_chebi_DF.info() |
49 | 50 |
|
50 | 51 | ## read mappings of dron and rxcui
|
51 | 52 | dron_chebi_rxcui_cols = ['dron','chebi','rxcui']
|
52 | 53 | dron_chebi_rxcui_DF = pd.read_csv(DRON_CHEBI_RXCUI, sep='|', names=dron_chebi_rxcui_cols, usecols=[0,2])
|
| 54 | +dron_chebi_rxcui_DF['rxcui'] = dron_chebi_rxcui_DF['rxcui'].astype('str') |
53 | 55 |
|
54 | 56 | ## read mappings of unii, nui and preferredNameAndRole
|
55 | 57 | unii_nui_namerole_cols = ['setid', 'unii','nui','nameAndRole']
|
56 | 58 | unii_nui_namerole_DF = pd.read_csv(UNII_NUI_PREFERRED_NAME_ROLE, sep='\t', names=unii_nui_namerole_cols)[['unii','nui','nameAndRole']]
|
57 |
| -#print unii_nui_namerole_DF.info() |
58 | 59 |
|
59 | 60 |
|
| 61 | +## read mappings of omopid and rxcui |
| 62 | +omop_rxcui_cols = ['omopid','rxcui'] |
| 63 | +omop_rxcui_DF = pd.read_csv(OMOP_RXCUI, sep='|', names=omop_rxcui_cols) |
| 64 | +omop_rxcui_DF['rxcui'] = omop_rxcui_DF['rxcui'].astype('str') |
| 65 | + |
60 | 66 | ## merge pt, unii, rxcui and drugbank uri
|
61 | 67 | unii_pt_rxcui_db_DF = unii_pt_rxcui_DF.merge(pt_drugbank_DF, on=['pt'], how='left')
|
62 |
| -#print unii_pt_rxcui_db_DF.info() |
63 |
| -unii_pt_rxcui_db_DF.to_csv('PT-RXCUI-UNII-DB.csv', sep='\t', index=False) |
64 | 68 |
|
| 69 | +unii_pt_rxcui_db_DF.to_csv('PT-RXCUI-UNII-DB.csv', sep='\t', index=False) |
65 | 70 |
|
66 | 71 | ## merge chebi
|
67 |
| - |
68 | 72 | merged_chebi_DF = unii_pt_rxcui_db_DF.merge(pt_chebi_DF, on=['pt'], how='left')
|
69 |
| -#print merged_chebi_DF.info() |
70 | 73 |
|
71 | 74 | ## merge dron id
|
72 | 75 | merged_dron_DF = merged_chebi_DF.merge(dron_chebi_rxcui_DF, on=['rxcui'], how = 'left')
|
73 | 76 |
|
| 77 | +## merge omop id |
| 78 | +merged_omop_DF = merged_dron_DF.merge(omop_rxcui_DF, on=['rxcui'], how = 'left') |
| 79 | + |
74 | 80 | ## merge <nui> and <preferred name and role>
|
75 |
| -merged_epc_DF = merged_dron_DF.merge(unii_nui_namerole_DF, on=['unii'], how='left') |
| 81 | +merged_epc_DF = merged_omop_DF.merge(unii_nui_namerole_DF, on=['unii'], how='left') |
76 | 82 |
|
77 | 83 | print merged_epc_DF.info()
|
78 | 84 |
|
79 |
| - |
80 | 85 | merged_epc_DF.to_csv('mergedActiveMoiety.csv', sep='\t', index=False)
|
81 | 86 |
|
0 commit comments