Skip to content

Commit 869d879

Browse files
author
Richard Boyce
committed
add omop id to active moiety entity graph
1 parent 5f035f4 commit 869d879

File tree

3 files changed

+78149
-77503
lines changed

3 files changed

+78149
-77503
lines changed

linkedSPLs/LinkedSPLs-activeMoiety/README

+29-20
Original file line numberDiff line numberDiff line change
@@ -6,17 +6,19 @@ This folder will hold a graph that maps the active moiety components of an SPL t
66

77
Inputs are original mappings listed as belows:
88

9-
(1) UNII to RxNORM
9+
(1) PT to UNII
1010

11-
(2) UNII to ChEBI
11+
(2) UNII to RxNORM
1212

13-
(3) RxNORM to Drugbank
13+
(3) PT to Drugbank
1414

15-
(5) RxNORM to NDFRT (EPC)
15+
(4) PT to ChEBI
1616

17-
(6) RxNORM to OMOP
17+
(5) RxNORM to OMOP
1818

19-
(6) PreferredTerm to drug class
19+
(6) RxNORM to DrOn
20+
21+
(7) RxNORM to NDFRT (EPC)
2022

2123
Outputs is a RDF/XML graph that represents all active moiety with linked resouces.
2224

@@ -28,12 +30,24 @@ Procedures to get active moieties RDF graph
2830
<STEP 1>: prepare mappings in folder mappings/
2931

3032
----------------------------------------------
31-
mappings/dron#to#chebi#and#rxnorm.txt
33+
mappings of PT to UNII (FDA)
34+
----------------------------------------------
35+
36+
FROM "../../LinkedSPLs-update/data/FDA/FDAPreferredSubstanceToUNII.txt"
37+
38+
----------------------------------------------
39+
mappings of UNII to RXCUI (UMLS)
40+
----------------------------------------------
41+
42+
FROM "../../LinkedSPLs-update/data/UMLS/UNIIs-Rxcuis-from-UMLS.txt"
43+
44+
----------------------------------------------
45+
mappings/dron-to-chebi-and-rxnorm.txt
3246
----------------------------------------------
33-
FROM "../../LinkedSPLs#update/mappings/DrOn#to#RxNorm/dron#to#chebi#and#rxnorm.txt"
47+
FROM "../../LinkedSPLs-update/mappings/DrOn-to-RxNorm/dron-to-chebi-and-rxnorm.txt"
3448

3549
----------------------------------------------
36-
mappings/imeds_drugids_to_rxcuis.csv
50+
mappings/active-ingredient-omopid-rxcui-09042015.dsv
3751
----------------------------------------------
3852

3953
query OMOP CDM V5 (GeriOMOP) by SQL query below:
@@ -47,24 +61,19 @@ cpt.CONCEPT_CLASS = 'Ingredient';
4761
right click result table and export to csv ('|' delimited)
4862

4963
----------------------------------------------
50-
mappings/UNIIToChEBI#<DATE>.txt
51-
----------------------------------------------
52-
FROM "../../LinkedSPLs#update/mappings/PT#UNII#ChEBI#mapping/UNIIToChEBI#<DATE>.txt"
53-
54-
----------------------------------------------
55-
mappings/pt_drugbank#<DATE>.txt
64+
mappings/UNIIToChEBI-<DATE>.txt
5665
----------------------------------------------
57-
FROM "../../LinkedSPLs#update/mappings/ChEBI#DrugBank#bio2rdf#mapping/fda#substance#preferred#name#to#drugbank#<DATE>.txt"
66+
FROM "../../LinkedSPLs-update/mappings/PT-UNII-ChEBI-mapping/UNIIToChEBI-<DATE>.txt"
5867

5968
----------------------------------------------
60-
mappings/PreferredTerm#UNII#Rxcui#mapping.txt
69+
mappings/pt_drugbank-<DATE>.txt
6170
----------------------------------------------
62-
FROM "../../LinkedSPLs#update/mappings/RxNORM#mapping/PreferredTerm#UNII#Rxcui#mapping.txt"
71+
FROM "../../LinkedSPLs-update/mappings/ChEBI-DrugBank-bio2rdf-mapping/fda-substance-preferred-name-to-drugbank-<DATE>.txt"
6372

6473
----------------------------------------------
6574
mappings/EPC_extraction_most_recent_<DATE>.txt"
6675
----------------------------------------------
67-
FROM "../../LinkedSPLs#update/mappings/pharmacologic_class_indexing/EPC_extraction_most_recent_<DATE>.txt"
76+
FROM "../../LinkedSPLs-update/mappings/pharmacologic_class_indexing/EPC_extraction_most_recent_<DATE>.txt"
6877

6978

7079
<STEP 2>: run python script to merge those mappings together
@@ -94,7 +103,7 @@ from "http://biopython.org/wiki/Download"
94103
(2) rdflib
95104

96105
(3) rdflib#jsonld
97-
from "https://github.com/RDFLib/rdflib#jsonld"
106+
from "https://github.com/RDFLib/rdflib-jsonld"
98107

99108
(4) pandas
100109

linkedSPLs/LinkedSPLs-activeMoiety/mergeToActiveMoiety.py

+25-20
Original file line numberDiff line numberDiff line change
@@ -11,71 +11,76 @@
1111
import pandas as pd
1212
from StringIO import StringIO
1313
import numpy as np
14-
#from numpy import nan
1514

15+
## Define namespaces
1616
CHEBI_BASE_URI = "http://purl.obolibrary.org/obo/"
1717
RXNORM_BASE_URI = "http://purl.bioontology.org/ontology/RXNORM/"
18-
## Define data inputs
1918

20-
#UNIIS = "../linkedSPLs-update/data/UMLS/UNIIs-from-UMLS.txt"
21-
#PT_UNII = "mappings/FDAPreferredTermToUNII.tsv"
22-
#PT_RXCUI = "mappings/fda-active-moiety-string-name-rxnorm-mapping.csv"
19+
## Define data inputs
20+
PT_UNII = "../LinkedSPLs-update/data/FDA/FDAPreferredSubstanceToUNII.txt"
21+
UNII_RXCUI = "../LinkedSPLs-update/data/UMLS/UNIIs-Rxcuis-from-UMLS.txt"
2322

2423
PT_CHEBI = "mappings/UNIIToChEBI-06102015.txt"
2524
PT_DRUGBANK = "mappings/fda-substance-preferred-name-to-drugbank-06102015.txt"
26-
UNII_PT_RXCUI = "mappings/PreferredTerm-UNII-Rxcui-mapping.txt"
25+
2726
UNII_NUI_PREFERRED_NAME_ROLE = "mappings/EPC_extraction_most_recent_06102015.txt"
2827
DRON_CHEBI_RXCUI = "mappings/cleaned-dron-chebi-rxcui-ingredient-06222015.txt"
29-
#OMOP_RXCUI = "mappings/imeds_drugids_to_rxcuis.csv"
28+
OMOP_RXCUI = "mappings/active-ingredient-omopid-rxcui-09042015.dsv"
3029

30+
## Get UNII - PT - RXCUI
31+
unii_pt_cols = ['unii','pt']
32+
unii_pt_DF = pd.read_csv(PT_UNII, sep='\t', names=unii_pt_cols)
3133

32-
## read mappings of pt, unii and rxcui
33-
34-
unii_pt_rxcui_cols = ['unii','pt','rxcui']
35-
unii_pt_rxcui_DF = pd.read_csv(UNII_PT_RXCUI, sep='\t', names=unii_pt_rxcui_cols, skiprows=[0])
34+
rxcui_unii_cols = ['rxcui','unii']
35+
rxcui_unii_DF = pd.read_csv(UNII_RXCUI, sep='|', names=rxcui_unii_cols)
36+
rxcui_unii_DF['rxcui'] = rxcui_unii_DF['rxcui'].astype('str')
3637

38+
unii_pt_rxcui_DF = unii_pt_DF.merge(rxcui_unii_DF, on=['unii'], how='left')
39+
print unii_pt_rxcui_DF.info()
3740

3841
## read mappings of pt and drugbank uri
3942

4043
pt_drugbank_cols = ['pt','db_uri1','db_uri2']
4144
pt_drugbank_DF = pd.read_csv(PT_DRUGBANK, sep='\t', names=pt_drugbank_cols)
4245

43-
4446
## read mappings of pt and chebi
4547

4648
pt_chebi_cols = ['pt','chebi']
4749
pt_chebi_DF = pd.read_csv(PT_CHEBI, sep='\t', names=pt_chebi_cols)
48-
#print pt_chebi_DF.info()
4950

5051
## read mappings of dron and rxcui
5152
dron_chebi_rxcui_cols = ['dron','chebi','rxcui']
5253
dron_chebi_rxcui_DF = pd.read_csv(DRON_CHEBI_RXCUI, sep='|', names=dron_chebi_rxcui_cols, usecols=[0,2])
54+
dron_chebi_rxcui_DF['rxcui'] = dron_chebi_rxcui_DF['rxcui'].astype('str')
5355

5456
## read mappings of unii, nui and preferredNameAndRole
5557
unii_nui_namerole_cols = ['setid', 'unii','nui','nameAndRole']
5658
unii_nui_namerole_DF = pd.read_csv(UNII_NUI_PREFERRED_NAME_ROLE, sep='\t', names=unii_nui_namerole_cols)[['unii','nui','nameAndRole']]
57-
#print unii_nui_namerole_DF.info()
5859

5960

61+
## read mappings of omopid and rxcui
62+
omop_rxcui_cols = ['omopid','rxcui']
63+
omop_rxcui_DF = pd.read_csv(OMOP_RXCUI, sep='|', names=omop_rxcui_cols)
64+
omop_rxcui_DF['rxcui'] = omop_rxcui_DF['rxcui'].astype('str')
65+
6066
## merge pt, unii, rxcui and drugbank uri
6167
unii_pt_rxcui_db_DF = unii_pt_rxcui_DF.merge(pt_drugbank_DF, on=['pt'], how='left')
62-
#print unii_pt_rxcui_db_DF.info()
63-
unii_pt_rxcui_db_DF.to_csv('PT-RXCUI-UNII-DB.csv', sep='\t', index=False)
6468

69+
unii_pt_rxcui_db_DF.to_csv('PT-RXCUI-UNII-DB.csv', sep='\t', index=False)
6570

6671
## merge chebi
67-
6872
merged_chebi_DF = unii_pt_rxcui_db_DF.merge(pt_chebi_DF, on=['pt'], how='left')
69-
#print merged_chebi_DF.info()
7073

7174
## merge dron id
7275
merged_dron_DF = merged_chebi_DF.merge(dron_chebi_rxcui_DF, on=['rxcui'], how = 'left')
7376

77+
## merge omop id
78+
merged_omop_DF = merged_dron_DF.merge(omop_rxcui_DF, on=['rxcui'], how = 'left')
79+
7480
## merge <nui> and <preferred name and role>
75-
merged_epc_DF = merged_dron_DF.merge(unii_nui_namerole_DF, on=['unii'], how='left')
81+
merged_epc_DF = merged_omop_DF.merge(unii_nui_namerole_DF, on=['unii'], how='left')
7682

7783
print merged_epc_DF.info()
7884

79-
8085
merged_epc_DF.to_csv('mergedActiveMoiety.csv', sep='\t', index=False)
8186

0 commit comments

Comments
 (0)