Skip to content

Commit fc7d520

Browse files
resolved merge conflict
2 parents c5155ab + 4d6c512 commit fc7d520

File tree

59 files changed

+21965
-98182
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

59 files changed

+21965
-98182
lines changed

.gitignore

+15-1
Original file line numberDiff line numberDiff line change
@@ -2,4 +2,18 @@
22
.DS_Store
33
*.lock
44
*.log
5-
linkedSPLs/LinkedSPLs-activeMoiety/activeMoietySub-in-rdf.xml
5+
6+
linkedSPLs/LinkedSPLs-activeMoiety/mergedActiveMoiety.csv
7+
linkedSPLs/LinkedSPLs-clinicalDrug/mergedClinicalDrug.tsv
8+
linkedSPLs/LinkedSPLs-activeMoiety/activeMoietySub-in-rdf.xml
9+
linkedSPLs/LinkedSPLs-clinicalDrug/clinicalDrugSub-in-rdf.xml
10+
11+
linkedSPLs/LinkedSPLs-activeMoiety/mappings/
12+
linkedSPLs/LinkedSPLs-clinicalDrug/mappings/
13+
14+
linkedSPLs/LinkedSPLs-activeMoiety/PT-RXCUI-UNII-DB.csv
15+
16+
linkedSPLs/LinkedSPLs-update/data/
17+
linkedSPLs/LinkedSPLs-update/load-dailymed-spls/problematic-spls/
18+
linkedSPLs/LinkedSPLs-update/mappings/FDA-pharmacogenetic-info-mapping/cached-table-downloads/
19+
linkedSPLs/LinkedSPLs-update/mappings/PT-UNII-ChEBI-mapping/ChEBIJavaClient/bin/

MIT-LICENSE.txt

+21
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
Copyright 2014 Bio2RDF project team and other contributors
2+
http://bio2rdf.org
3+
4+
Permission is hereby granted, free of charge, to any person obtaining
5+
a copy of this software and associated documentation files (the
6+
"Software"), to deal in the Software without restriction, including
7+
without limitation the rights to use, copy, modify, merge, publish,
8+
distribute, sublicense, and/or sell copies of the Software, and to
9+
permit persons to whom the Software is furnished to do so, subject to
10+
the following conditions:
11+
12+
The above copyright notice and this permission notice shall be
13+
included in all copies or substantial portions of the Software.
14+
15+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
19+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
20+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
21+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

README.md

+10
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
Bio2RDF-scripts
2+
===============
3+
This Git repository holds all of the RDF converter scripts used to generate Bio2RDF linked data.
4+
5+
Requirements
6+
-------------
7+
See the [wiki](https://github.com/bio2rdf/bio2rdf-scripts/wiki) for details.
8+
9+
---
10+
Licensed under [MIT License](http://en.wikipedia.org/wiki/MIT_License), see [license page](https://github.com/bio2rdf/bio2rdf-scripts/wiki/MIT-License) for details.

bioportal/bioportal.php

+16-9
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ function __construct($argv) {
3636
parent::__construct($argv,'bioportal');
3737
parent::addParameter('files',true,null,'all','all or comma-separated list of ontology short names to process');
3838
parent::addParameter('download_url',false,null,'http://data.bioontology.org/');
39-
parent::addParameter('exclude',false,null,"AURA",'ontologies to exclude - use acronyms');
39+
parent::addParameter('exclude',false,null,"AURA,HOOM",'ontologies to exclude - use acronyms');
4040
parent::addParameter('continue_from',false,null,"",'the ontology abbreviation to restart from');
4141
parent::addParameter('ncbo_api_key',false,null,null,'BioPortal API key (please use your own)');
4242
parent::addParameter('ncbo_api_key_file',false,null,'ncbo.api.key','BioPortal API key file');
@@ -123,7 +123,6 @@ function Run()
123123
if(isset($ls['description'])) $description = $ls['description'];
124124

125125
$rfile = $ls['ontology']['links']['download'];
126-
127126
$lfile = $abbv.".".$format.".gz";
128127
if(!file_exists($idir.$lfile) or parent::getParameterValue('download') == 'true') {
129128
echo "downloading ... ";
@@ -134,7 +133,7 @@ function Run()
134133
$ret = curl_setopt($ch, CURLOPT_HEADER, 1);
135134
$ret = curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
136135
$ret = curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
137-
$ret = curl_setopt($ch, CURLOPT_TIMEOUT, 300);
136+
$ret = curl_setopt($ch, CURLOPT_TIMEOUT, 600);
138137
$ret = curl_exec($ch);
139138
if(!$ret) {echo "no content";continue;}
140139

@@ -167,12 +166,13 @@ function Run()
167166

168167
// process
169168
echo "converting ... ";
170-
set_time_limit(0);
169+
171170
// let's double check the format
172171
$fp = gzopen($idir.$lfile,"r");
173172
$l = gzgets($fp);
174173
if(strstr($l,"xml")) $format= "owl";
175174
gzclose($fp);
175+
176176
if($format == 'obo') {
177177
$this->OBO2RDF($abbv);
178178
} else if($format == 'owl') {
@@ -182,6 +182,7 @@ function Run()
182182
} else {
183183
echo "no processor for $label (format $format)".PHP_EOL;
184184
}
185+
185186
if(!file_exists($odir.$ofile)) { echo "no output".PHP_EOL;continue;}
186187
parent::getWriteFile()->close();
187188
parent::clear();
@@ -366,7 +367,7 @@ public function TriplifyMap($a, $prefix)
366367

367368
} else {
368369
parent::addRDF(
369-
parent::triplifyString($s_uri,$p_uri,$a['o'],(($a['o_datatype'] == '')?null:$a['o_datatype']),(($a['o_lang'] == '')?null:$a['o_lang']))
370+
parent::triplifyString($s_uri,$p_uri,addslashes($a['o']),(($a['o_datatype'] == '')?null:$a['o_datatype']),(($a['o_lang'] == '')?null:$a['o_lang']))
370371
);
371372
}
372373

@@ -394,7 +395,7 @@ function OBO2RDF($abbv)
394395
$graph_uri = '<'.parent::getRegistry()->getFQURI(parent::getGraphURI()).'>';
395396
$bid = 1;
396397

397-
while($l = parent::getReadFile()->read()) {
398+
while(FALSE !== ($l = parent::getReadFile()->read())) {
398399
$lt = trim($l);
399400
if(strlen($lt) == 0) continue;
400401
if($lt[0] == '!') continue;
@@ -461,6 +462,7 @@ function OBO2RDF($abbv)
461462
else {$ns = strtolower($c[0]);$id=$c[1];}
462463
$id = str_replace( array("(",")"), array("_",""), $id);
463464
$tid = $ns.":".$id;
465+
echo $tid.PHP_EOL;
464466
} else if($a[0] == "name") {
465467
$buf .= parent::describeClass($tid,addslashes(stripslashes($a[1])));
466468
} else if($a[0] == "is_a") {
@@ -483,7 +485,8 @@ function OBO2RDF($abbv)
483485
$buf .= $t;
484486
$is_deprecated = true;
485487
} else if($a[0] == "id") {
486-
parent::getRegistry()->parseQName($a[1],$ns,$id);
488+
parent::getRegistry()->parseQName($a[1],$ns,$id);
489+
if(trim($ns) == '') $ns = "unspecified";
487490
$tid = "$ns:$id";
488491
// $buf .= parent::describeClass($tid,null,"owl:Class");
489492
// $buf .= parent::triplify($tid,"rdfs:isDefinedBy",$ouri);
@@ -610,6 +613,7 @@ function OBO2RDF($abbv)
610613
} else if($a[0] == "is_a") {
611614
// do subclassing
612615
parent::getRegistry()->parseQName($a[1],$ns,$id);
616+
if(trim($ns) == '') $ns = "unspecified";
613617
$t = parent::triplify($tid,"rdfs:subClassOf","$ns:$id");
614618
$buf .= $t;
615619
$min .= $t;
@@ -657,17 +661,19 @@ function OBO2RDF($abbv)
657661
$c = explode(" ",$a[1]);
658662
if(count($c) == 1) { // just a class
659663
parent::getRegistry()->parseQName($c[0],$ns,$id);
664+
if(trim($ns) == '') $ns = "unspecified";
660665
$relationship .= parent::getRegistry()->getFQURI("$ns:$id");
661666
$buf .= parent::triplify($tid,"rdfs:subClassOf","$ns:$id");
662667

663668
} else if(count($c) == 2) { // an expression
664669
parent::getRegistry()->parseQName($c[0],$pred_ns,$pred_id);
665670
parent::getRegistry()->parseQName($c[1],$obj_ns,$obj_id);
671+
if(trim($obj_ns) == '') $obj_ns = "unspecified";
666672

667673
$relationship .= '_:b'.$bid.' <'.parent::getRegistry()->getFQURI('owl:onProperty').'> <'.parent::getRegistry()->getFQURI("obo_vocabulary:".$pred_id)."> $graph_uri .".PHP_EOL;
668674
$relationship .= '_:b'.$bid.' <'.parent::getRegistry()->getFQURI('owl:someValuesFrom').'> <'.parent::getRegistry()->getFQURI("$obj_ns:$obj_id")."> $graph_uri .".PHP_EOL;
669675

670-
$buf .= parent::triplify($tid,"obo_vocabulary:$pred_id","$obj_ns:$obj_id");
676+
$buf .= parent::triplify($tid,"obo_vocabulary:$pred_id","$obj_ns:$obj_id"); #@todo this causes problem with OGG-MM
671677
}
672678
} else {
673679
// default handler
@@ -676,7 +682,8 @@ function OBO2RDF($abbv)
676682
} else {
677683
//header
678684
//format-version: 1.0
679-
$buf .= parent::triplifyString($ouri,"obo_vocabulary:$a[0]",str_replace( array('"','\:'), array('\"',':'), isset($a[1])?$a[1]:""));
685+
$buf .= parent::triplifyString($ouri,"obo_vocabulary:$a[0]",
686+
str_replace( array('"','\:'), array('\"',':'), isset($a[1])?$a[1]:""));
680687
}
681688

682689
if($minimal || $minimalp) parent::getWriteFile()->write($min);

chembl/chembl.php

+62-1
Original file line numberDiff line numberDiff line change
@@ -1017,8 +1017,68 @@ function compounds($connection) {
10171017
}
10181018
parent::writeRDFBufferToWriteFile();
10191019
}
1020+
}
1021+
$result->free();
1022+
}
10201023

1021-
$result->free();
1024+
/*
1025+
* parse the assays tables
1026+
*/
1027+
function process_assays() {
1028+
1029+
$this->set_write_file("assays");
1030+
1031+
$allIDs = mysql_query(
1032+
"SELECT DISTINCT * FROM assays, assay_type " .
1033+
"WHERE assays.assay_type = assay_type.assay_type"
1034+
);
1035+
1036+
$num = mysql_numrows($allIDs);
1037+
1038+
while ($row = mysql_fetch_assoc($allIDs)) {
1039+
1040+
$assay = "chembl:assay_".$row['assay_id'];
1041+
$this->AddRDF($this->QQuad($assay,"rdf:type","chembl_vocabulary:Assay"));
1042+
1043+
//chembl assay id
1044+
$chembl = "chembl:". $row['chembl_id'];
1045+
$this->AddRDF($this->QQuadl($assay,"dc:identifier",$row['chembl_id']));
1046+
$this->AddRDF($this->QQuad($assay,"owl:equivalentClass",$chembl));
1047+
$this->AddRDF($this->QQuad($chembl,"owl:equivalentClass",$assay));
1048+
$this->WriteRDFBufferToWriteFile();
1049+
1050+
if ($row['description']) {
1051+
# clean up description
1052+
$description = $row['description'];
1053+
$description = str_replace("\\", "\\\\", $description);
1054+
$description = str_replace("\"", "\\\"", $description);
1055+
$this->AddRDF($this->QQuadl($assay,"chembl_vocabulary:hasDescription",$description));
1056+
}
1057+
1058+
if ($row['doc_id']){
1059+
$this->AddRDF($this->QQuad($assay,"chembl_vocabulary:citesAsDataSource","chembl:reference_".$row['doc_id']));
1060+
}
1061+
1062+
$props = mysql_query("SELECT DISTINCT * FROM assay2target WHERE assay_id = " . $row['assay_id']);
1063+
1064+
while ($prop = mysql_fetch_assoc($props)) {
1065+
if ($prop['tid']) {
1066+
$target = "chembl:target_".$prop['tid'];
1067+
$this->AddRDF($this->QQuad($assay,"chembl_vocabulary:hasTarget",$target));
1068+
1069+
if ($prop['confidence_score']) {
1070+
$targetScore = "chembl:tscore_".md5($assay.$prop['tid']);
1071+
$this->AddRDF($this->QQuad($assay,"chembl_vocabulary:hasTargetScore",$targetScore));
1072+
$this->AddRDF($this->QQuad($targetScore,"chembl_vocabulary:forTarget",$target));
1073+
$this->AddRDF($this->QQuadl($targetScore,"rdf:value",$prop['confidence_score']));
1074+
}
1075+
}
1076+
1077+
$this->WriteRDFBufferToWriteFile();
1078+
1079+
}
1080+
$this->AddRDF($this->QQuad($assay,"chembl_vocabulary:hasAssayType","chembl_vocabulary:".$row['assay_desc']));
1081+
$this->WriteRDFBufferToWriteFile();
10221082
}
10231083
}
10241084

@@ -1287,4 +1347,5 @@ function protein_families($connection){
12871347
}
12881348
}
12891349
}
1350+
12901351
?>

0 commit comments

Comments
 (0)