@@ -42,6 +42,17 @@ function __construct($argv) {
42
42
43
43
function Run ()
44
44
{
45
+ if (parent ::getParameterValue ('download ' ) === true )
46
+ {
47
+ $ this ->download ();
48
+ }
49
+ if (parent ::getParameterValue ('process ' ) === true )
50
+ {
51
+ $ this ->process ();
52
+ }
53
+ }
54
+
55
+ function download (){
45
56
46
57
// get the file list
47
58
if (parent ::getParameterValue ('files ' ) == 'all ' ) {
@@ -53,92 +64,157 @@ function Run()
53
64
54
65
//set directory values
55
66
$ ldir = parent ::getParameterValue ('indir ' );
56
- $ odir = parent ::getParameterValue ('outdir ' );
57
67
$ rdir = parent ::getParameterValue ('download_url ' );
58
68
59
69
//make sure directories end with slash
60
70
if (substr ($ ldir , -1 ) !== "/ " ){
61
71
$ ldir = $ ldir ."/ " ;
62
72
}
63
-
73
+
74
+ $ gz_suffix = ".gz " ;
75
+
76
+ foreach ($ files AS $ file ) {
77
+ if ($ file == 'chem_gene_ixn_types ' ) $ suffix = '.tsv ' ;
78
+ else if ($ file == 'exposure_ontology ' ) $ suffix = '.obo ' ;
79
+ else $ suffix = ".tsv.gz " ;
80
+ $ lfile = $ ldir .$ file .$ gz_suffix ;
81
+ $ rfile = $ rdir .'CTD_ ' .$ file .$ suffix ;
82
+ if ($ suffix == ".tsv.gz " ) {
83
+ Utils::DownloadSingle ($ rfile , $ lfile );
84
+ } else {
85
+ Utils::DownloadSingle ($ rfile , "compress.zlib:// " .$ lfile );
86
+ }
87
+ }
88
+ }
89
+
90
+ function process (){
91
+ // get the file list
92
+ if (parent ::getParameterValue ('files ' ) == 'all ' ) {
93
+ $ files = explode ("| " ,parent ::getParameterList ('files ' ));
94
+ array_shift ($ files );
95
+ } else {
96
+ $ files = explode (", " ,parent ::getParameterValue ('files ' ));
97
+ }
98
+
99
+ $ dataset_description = '' ;
100
+
101
+ //set directory values
102
+ $ ldir = parent ::getParameterValue ('indir ' );
103
+ $ rdir = parent ::getParameterValue ('download_url ' );
104
+ $ odir = parent ::getParameterValue ('outdir ' );
105
+
106
+ //make sure input and output directories end with slash
107
+ if (substr ($ ldir , -1 ) !== "/ " ){
108
+ $ ldir = $ ldir ."/ " ;
109
+ }
110
+
64
111
if (substr ($ odir , -1 ) !== "/ " ){
65
112
$ odir = $ odir ."/ " ;
66
113
}
67
114
68
- $ gz_suffix = ".gz " ;
115
+ $ graph_uri = parent ::getGraphURI ();
116
+ if (parent ::getParameterValue ('dataset_graph ' ) == true ) parent ::setGraphURI (parent ::getDatasetURI ());
69
117
70
- foreach ($ files AS $ file ) {
118
+ $ gz_suffix = ".gz " ;
119
+
120
+ foreach ($ files as $ file ){
121
+ if ($ file == 'chem_gene_ixn_types ' ) $ suffix = '.tsv ' ;
122
+ else if ($ file == 'exposure_ontology ' ) $ suffix = '.obo ' ;
123
+ else $ suffix = ".tsv.gz " ;
71
124
72
125
$ lfile = $ ldir .$ file .$ gz_suffix ;
73
- $ ofile = $ odir ."ctd_ " .$ file .".nt " ;
126
+ $ rfile = $ rdir .'CTD_ ' .$ file .$ suffix ;
127
+ $ ofile = "ctd_ " .$ file .".nt " ;
74
128
$ gz = false ;
75
129
76
- if ($ this ->GetParameterValue ('graph_uri ' )) {
77
- $ ofile = $ odir ."ctd_ " .$ file .'.nq ' ;
78
- }
79
-
80
- if (strstr (parent ::getParameterValue ('output_format ' ), "gz " )) {
81
- $ ofile .= '.gz ' ;
82
- $ gz = true ;
83
- }
84
-
85
130
if (!file_exists ($ lfile )) {
86
131
trigger_error ($ lfile ." not found. Will attempt to download. " , E_USER_NOTICE );
87
- $ this ->SetParameterValue ('download ' ,true );
88
- }
89
-
90
- if (parent ::getParameterValue ('download ' ) == true ) {
91
- if ($ file == 'chem_gene_ixn_types ' ) $ suffix = '.tsv ' ;
92
- else if ($ file == 'exposure_ontology ' ) $ suffix = '.obo ' ;
93
- else $ suffix = ".tsv.gz " ;
94
-
95
- $ rfile = $ rdir .'CTD_ ' .$ file .$ suffix ;
96
132
if ($ suffix == ".tsv.gz " ) {
97
133
Utils::DownloadSingle ($ rfile , $ lfile );
98
134
} else {
99
135
Utils::DownloadSingle ($ rfile , "compress.zlib:// " .$ lfile );
100
136
}
101
137
}
102
138
139
+ if ($ this ->GetParameterValue ('graph_uri ' )) {
140
+ $ ofile = "ctd_ " .$ file .'.nq ' ;
141
+ }
142
+
143
+ if (strstr (parent ::getParameterValue ('output_format ' ), "gz " )) {
144
+ $ ofile .= '.gz ' ;
145
+ $ gz = true ;
146
+ }
147
+
103
148
echo "Processing " .$ file ." ... " ;
104
- parent ::setWriteFile ($ ofile , $ gz );
149
+ parent ::setWriteFile ($ odir . $ ofile , $ gz );
105
150
106
151
//set read file
107
152
parent ::setReadFile ($ lfile , TRUE );
108
153
109
154
$ fnx = "CTD_ " .$ file ;
110
- if ($ this ->$ fnx () === FALSE ) {
111
- trigger_error ("Error in $ fnx " );
112
- exit ;
113
- }
155
+ $ this ->$ fnx ();
114
156
115
- //write RDF to file
116
- parent ::writeRDFBufferToWriteFile ();
117
-
118
157
//close write file
119
158
parent ::getWriteFile ()->close ();
120
- echo "Done! " .PHP_EOL ;
121
-
122
- }//foreach
123
-
124
- // generate the dataset release file
125
- echo "generating dataset release file... " ;
126
- $ desc = parent ::getBio2RDFDatasetDescription (
127
- $ this ->getPrefix (),
128
- "https://github.com/bio2rdf/bio2rdf-scripts/blob/master/sgd/sgd.php " ,
129
- $ this ->getBio2RDFDownloadURL ($ this ->getNamespace ()),
130
- "http://ctdbase.org " ,
131
- array ("use " , "no-commercial " ),
132
- "http://ctdbase.org/about/legal.jsp " ,
133
- parent ::getParameterValue ('download_url ' ),
134
- parent ::getDatasetVersion ()
135
- );
136
- $ this ->setWriteFile ($ odir .$ this ->getBio2RDFReleaseFile ($ this ->getNamespace ()));
137
- $ this ->getWriteFile ()->write ($ desc );
138
- $ this ->getWriteFile ()->close ();
159
+ echo "done! " .PHP_EOL ;
160
+
161
+ // generate the dataset release file
162
+ echo "Generating dataset description... " ;
163
+
164
+ if ($ file == "chemicals " ){
165
+ $ dataset = "http://identifiers.org/ctd.chemical/ " ;
166
+ } else if ($ file == "diseases " ){
167
+ $ dataset = "http://identifiers.org/ctd.disease/ " ;
168
+ } else if ($ file == "genes " ){
169
+ $ dataset = "http://identifiers.org/ctd.gene/ " ;
170
+ } else {
171
+ $ dataset = null ;
172
+ }
173
+ // dataset description
174
+ $ source_file = (new DataResource ($ this ))
175
+ ->setURI ($ rfile )
176
+ ->setTitle ("Comparative Toxicogenomics Database ( $ file. $ gz_suffix " )
177
+ ->setRetrievedDate ( date ("Y-m-d\TG:i:s\Z " , filemtime ($ lfile )))
178
+ ->setFormat ("text/tab-separated-value " )
179
+ ->setFormat ("application/gzip " )
180
+ ->setPublisher ("http://ctdbase.org/ " )
181
+ ->setHomepage ("http://ctdbase.org/ " )
182
+ ->setRights ("use " )
183
+ ->setRights ("by-attribution " )
184
+ ->setRights ("no-commercial " )
185
+ ->setLicense ("http://ctdbase.org/about/legal.jsp " )
186
+ ->setDataset ($ dataset );
187
+
188
+ $ prefix = parent ::getPrefix ();
189
+ $ bVersion = parent ::getParameterValue ('bio2rdf_release ' );
190
+ $ date = date ("Y-m-d\TG:i:s\Z " );
191
+ $ output_file = (new DataResource ($ this ))
192
+ ->setURI ("http://download.bio2df.org/release/ $ bVersion/ $ prefix/ $ ofile " )
193
+ ->setTitle ("Bio2RDF v $ bVersion RDF version of $ prefix (generated at $ date) " )
194
+ ->setSource ($ source_file ->getURI ())
195
+ ->setCreator ("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/ctd/ctd.php " )
196
+ ->setCreateDate ($ date )
197
+ ->setHomepage ("http://download.bio2rdf.org/release/ $ bVersion/ $ prefix/ $ prefix.html " )
198
+ ->setPublisher ("http://bio2rdf.org " )
199
+ ->setRights ("use-share-modify " )
200
+ ->setRights ("by-attribution " )
201
+ ->setRights ("restricted-by-source-license " )
202
+ ->setLicense ("http://creativecommons.org/licenses/by/3.0/ " )
203
+ ->setDataset (parent ::getDatasetURI ());
204
+
205
+ if ($ gz ) $ output_file ->setFormat ("application/gzip " );
206
+ if (strstr (parent ::getParameterValue ('output_format ' ),"nt " )) $ output_file ->setFormat ("application/n-triples " );
207
+ else $ output_file ->setFormat ("application/n-quads " );
208
+
209
+ $ dataset_description .= $ source_file ->toRDF ().$ output_file ->toRDF ();
210
+ }
211
+
212
+ parent ::setGraphURI ($ graph_uri );
213
+ parent ::setWriteFile ($ odir .parent ::getBio2RDFReleaseFile ());
214
+ parent ::getWriteFile ()->write ($ dataset_description );
215
+ parent ::getWriteFile ()->close ();
139
216
echo "done! " .PHP_EOL ;
140
-
141
- return TRUE ;
217
+
142
218
}
143
219
144
220
@@ -162,7 +238,7 @@ function CTD_chemicals()
162
238
163
239
if ($ first ) {
164
240
if (($ c = count ($ a ) != 8 )) {
165
- trigger_error ("Expecting 8 fields, found $ c! " ); return FALSE ;
241
+ trigger_error ("CTD_chemicals function expects 8 fields, found $ c! " . PHP_EOL , E_USER_WARNING ) ;
166
242
}
167
243
$ first = false ;
168
244
}
@@ -209,7 +285,7 @@ function CTD_chem_gene_ixns()
209
285
210
286
if ($ first ) {
211
287
if (($ c = count ($ a )) != 11 ) {
212
- trigger_error ("Expecting 11 fields, found $ c! " ); return FALSE ;
288
+ trigger_error ("CTD_chem_gene_ixns function expects 11 fields, found $ c! " . PHP_EOL , E_USER_WARNING ) ;
213
289
}
214
290
$ first = false ;
215
291
}
@@ -287,7 +363,7 @@ function CTD_chemicals_diseases()
287
363
288
364
if ($ first ) {
289
365
if (($ c = count ($ a )) != 10 ) {
290
- trigger_error ("Expecting 10 fields, found $ c! " ); return FALSE ;
366
+ trigger_error ("CTD_chemicals_diseases function expects 10 fields, found $ c! " . PHP_EOL , E_USER_WARNING ) ;
291
367
}
292
368
$ first = false ;
293
369
}
@@ -357,7 +433,7 @@ function CTD_chem_pathways_enriched()
357
433
$ a = explode ("\t" ,trim ($ l ));
358
434
if ($ first ) {
359
435
if (($ c = count (explode ("\t" ,$ l ))) != 11 ) {
360
- trigger_error ("Expecting 11 fields, found $ c! " );
436
+ trigger_error ("CTD_chem_pathways_enriched function expects 11 fields, found $ c! " . PHP_EOL , E_USER_WARNING );
361
437
return FALSE ;
362
438
}
363
439
$ first = false ;
@@ -400,7 +476,7 @@ function CTD_diseases()
400
476
// check number of columns
401
477
if ($ first ) {
402
478
if (($ c = count (explode ("\t" ,$ l ))) != 9 ) {
403
- trigger_error ("Expecting 9 fields, found $ c! " );
479
+ trigger_error ("CTD_diseases function expects 9 fields, found $ c! " . PHP_EOL , E_USER_WARNING );
404
480
return FALSE ;
405
481
}
406
482
$ first = false ;
@@ -436,7 +512,7 @@ function CTD_diseases_pathways()
436
512
// check number of columns
437
513
if ($ first ) {
438
514
if (($ c = count (explode ("\t" ,$ l ))) != 5 ) {
439
- trigger_error ("Expecting 5 fields, found $ c! " );
515
+ trigger_error ("CTD_diseases_pathways function expects 5 fields, found $ c! " . PHP_EOL , E_USER_WARNING );
440
516
return FALSE ;
441
517
}
442
518
$ first = false ;
@@ -478,7 +554,7 @@ function CTD_genes_diseases()
478
554
// check number of columns
479
555
if ($ first ) {
480
556
if (($ c = count (explode ("\t" ,$ l ))) != 9 ) {
481
- trigger_error ("Expecting 9 fields, found $ c! " );
557
+ trigger_error ("CTD_genes_diseases function expects 9 fields, found $ c! " . PHP_EOL , E_USER_WARNING );
482
558
return FALSE ;
483
559
}
484
560
$ first = false ;
@@ -537,7 +613,7 @@ function CTD_genes_pathways()
537
613
// check number of columns
538
614
if ($ first ) {
539
615
if (($ c = count (explode ("\t" ,$ l ))) != 4 ) {
540
- trigger_error ("Expecting 4 fields, found $ c! " );
616
+ trigger_error ("CTD_genes_pathways function expects 4 fields, found $ c! " . PHP_EOL , E_USER_WARNING );
541
617
return FALSE ;
542
618
}
543
619
$ first = false ;
@@ -573,7 +649,7 @@ function CTD_Pathways()
573
649
// check number of columns
574
650
if ($ first ) {
575
651
if (($ c = count (explode ("\t" ,$ l ))) != 2 ) {
576
- trigger_error ("Expecting 2 fields, found $ c! " );
652
+ trigger_error ("CTD_pathways function expects 2 fields, found $ c! " . PHP_EOL , E_USER_WARNING );
577
653
return FALSE ;
578
654
}
579
655
$ first = false ;
@@ -605,16 +681,16 @@ function CTD_Genes()
605
681
while ($ l = $ this ->GetReadFile ()->Read ()) {
606
682
if ($ l [0 ] == '# ' ) continue ;
607
683
$ a = explode ("\t" ,$ l );
608
-
684
+
609
685
// check number of columns
610
686
if ($ first ) {
611
687
if (($ c = count (explode ("\t" ,$ l ))) != 5 ) {
612
- trigger_error ("Expecting 5 fields, found $ c! " );
688
+ trigger_error ("CTD_genes function expects 5 fields, found $ c! " . PHP_EOL , E_USER_WARNING );
613
689
return FALSE ;
614
690
}
615
691
$ first = false ;
616
692
}
617
-
693
+
618
694
$ symbol = str_replace (array ("\\/ " ),array ('| ' ),$ a [0 ]);
619
695
$ label = str_replace ("\\+/ " ,'+ ' ,$ a [1 ]);
620
696
$ geneid = $ a [2 ];
@@ -656,7 +732,7 @@ function CTD_chem_go_enriched()
656
732
// check number of columns
657
733
if ($ first ) {
658
734
if (($ c = count (explode ("\t" ,$ l ))) != 13 ) {
659
- trigger_error ("Expecting 13 fields, found $ c! " );
735
+ trigger_error ("CTD_chem_go_enriched function expects 13 fields, found $ c! " . PHP_EOL , E_USER_WARNING );
660
736
return FALSE ;
661
737
}
662
738
$ first = false ;
@@ -693,7 +769,7 @@ function CTD_chem_gene_ixn_types()
693
769
// check number of columns
694
770
if ($ first ) {
695
771
if (($ c = count (explode ("\t" ,$ l ))) != 4 ) {
696
- trigger_error ("Expecting 4 fields, found $ c! " );
772
+ trigger_error ("CTD_chem_gene_ixn_types function expects 4 fields, found $ c! " . PHP_EOL , E_USER_WARNING );
697
773
return FALSE ;
698
774
}
699
775
$ first = false ;
@@ -717,17 +793,4 @@ function CTD_chem_gene_ixn_types()
717
793
718
794
} // end class
719
795
720
- $ start = microtime (true );
721
-
722
- set_error_handler ('error_handler ' );
723
- $ parser = new CTDParser ($ argv );
724
- $ parser ->Run ();
725
-
726
- $ end = microtime (true );
727
- $ time_taken = $ end - $ start ;
728
- print "Started: " .date ("l jS F \@ g:i:s a " , $ start )."\n" ;
729
- print "Finished: " .date ("l jS F \@ g:i:s a " , $ end )."\n" ;
730
- print "Took: " .$ time_taken ." seconds \n"
731
-
732
-
733
796
?>
0 commit comments