@@ -32,6 +32,7 @@ class AffymetrixParser extends Bio2RDFizer
32
32
function __construct ($ argv ) {
33
33
parent ::__construct ($ argv ,"affymetrix " );
34
34
parent ::addParameter ('files ' ,true ,null ,'all ' ,'' );
35
+ parent ::addParameter ('version ' ,false ,null ,'33 ' ,'to set another version to parse from ' );
35
36
parent ::addParameter ('download_url ' ,false ,null ,'http://www.affymetrix.com/support/technical/annotationfilesmain.affx ' ,'' );
36
37
parent ::initialize ();
37
38
}
@@ -76,36 +77,36 @@ function Run()
76
77
}
77
78
}
78
79
if (!isset ($ myfiles )) exit ; // nothing to do
79
-
80
-
80
+ $ dataset_description = '' ;
81
+
81
82
// iterate over the files
82
-
83
- // print_r($myfiles);
84
83
foreach ($ myfiles AS $ rfile ) {
85
- // download
86
84
$ base_file = substr ($ rfile ,strrpos ($ rfile ,"/ " )+1 );
87
85
$ base_url = substr ($ rfile ,0 , strrpos ($ rfile ,"/ " ));
88
- echo "processing $ base_file, from $ base_url " .PHP_EOL ;
89
- $ csv_file = $ base_file .".csv " ;
90
- $ zip_file = $ csv_file .".zip " ;
91
-
92
- $ lfile = $ ldir .$ zip_file ;
93
- if (!file_exists ($ lfile ) || $ this ->GetParameterValue ('download ' ) == true ) {
94
- $ rfile = $ url .$ zip_file ;
95
- trigger_error ("Downloading $ zip_file from $ rfile " , E_USER_NOTICE );
96
- if (Utils::Download ($ base_url ,array ($ zip_file ),$ ldir ) === FALSE ) {
97
- trigger_error ("Unable to download $ file. skipping " , E_USER_WARNING );
98
- continue ;
99
- }
100
- }
101
86
102
- // set the dataset version
87
+ // get and set the dataset version
103
88
if (parent ::getDatasetVersion () == null ) {
104
89
preg_match ("/\.na([0-9]{2})\.annot/ " ,$ base_file ,$ m );
105
90
if (isset ($ m [1 ])) {
106
91
$ this ->setDatasetVersion ($ m [1 ]);
107
92
}
108
93
}
94
+ if (parent ::getDatasetVersion () != parent ::getParameterValue ('version ' )) {
95
+ $ base_file = str_replace (
96
+ "na " .parent ::getDatasetVersion (),
97
+ "na " .parent ::getParameterValue ('version ' ),
98
+ $ base_file );
99
+ }
100
+
101
+ $ csv_file = $ base_file .".csv " ;
102
+ $ zip_file = $ csv_file .".zip " ;
103
+
104
+ $ lfile = $ ldir .$ zip_file ;
105
+ if (!file_exists ($ lfile )) {
106
+ echo "skipping: $ lfile does not exist " .PHP_EOL ;
107
+ continue ;
108
+ }
109
+ echo "processing $ base_file, from $ base_url " .PHP_EOL ;
109
110
110
111
// open the zip file
111
112
$ zin = new ZipArchive ();
@@ -119,48 +120,64 @@ function Run()
119
120
return FALSE ;
120
121
}
121
122
122
- $ this -> SetReadFile ($ lfile );
123
- $ this -> GetReadFile ()->SetFilePointer ($ fp );
123
+ parent :: setReadFile ($ lfile );
124
+ parent :: getReadFile ()->setFilePointer ($ fp );
124
125
125
126
// set the write file
126
- $ outfile = $ base_file .'.nt ' ; $ gz =false ;
127
- if ($ this ->GetParameterValue ('graph_uri ' )) {$ outfile = $ base_file .'.nq ' ;}
128
- if ($ this ->GetParameterValue ('gzip ' )) {
129
- $ outfile .= '.gz ' ;
130
- $ gz = true ;
131
- }
132
- $ this ->setWriteFile ($ odir .$ outfile , $ gz );
127
+ $ gz = (strstr (parent ::getParameterValue ('output_format ' ),".gz " ) === FALSE )?false :true ;
128
+ $ outfile = 'affymetrix- ' .$ base_file .". " .parent ::getParameterValue ('output_format ' );
133
129
134
- // parse the file
130
+ $ this -> setWriteFile ( $ odir . $ outfile , $ gz );
135
131
$ this ->parse ();
136
-
137
132
parent ::getWriteFile ()->close ();
138
133
parent ::getReadFile ()->close ();
134
+ parent ::clear ();
139
135
140
- $ bio2rdf_download_files [] = $ this ->getBio2RDFDownloadURL ($ this ->getNamespace ()).$ outfile ;
136
+ // dataset description
137
+ $ source_file = (new DataResource ($ this ))
138
+ ->setURI ($ rfile )
139
+ ->setTitle ("Affymetrix Probeset : $ base_file " )
140
+ ->setRetrievedDate ( date ("Y-m-d\TG:i:s\Z " , filemtime ($ lfile )))
141
+ ->setFormat ("text/tab-separated-value " )
142
+ ->setFormat ("application/zip " )
143
+ ->setPublisher ("http://affymetrix.com " )
144
+ ->setHomepage ("http://www.affymetrix.com/support/technical/annotationfilesmain.affx " )
145
+ ->setRights ("use " )
146
+ ->setRights ("no-commercial " )
147
+ ->setRights ("registration-required " )
148
+ ->setLicense ("http://www.affymetrix.com/about_affymetrix/legal/index.affx " )
149
+ ->setDataset ("http://identifiers.org/affy.probeset/ " );
141
150
142
- parent ::clear ();
143
- }
144
-
151
+ $ prefix = parent ::getPrefix ();
152
+ $ bVersion = parent ::getParameterValue ('bio2rdf_release ' );
153
+ $ date = date ("Y-m-d\TG:i:s\Z " );
154
+ $ output_file = (new DataResource ($ this ))
155
+ ->setURI ("http://download.bio2df.org/release/ $ bVersion/ $ prefix/ $ outfile " )
156
+ ->setTitle ("Bio2RDF v $ bVersion RDF version of $ prefix (generated at $ date) " )
157
+ ->setSource ($ source_file ->getURI ())
158
+ ->setCreator ("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/affymetrix/affymetrix.php " )
159
+ ->setCreateDate ($ date )
160
+ ->setHomepage ("http://download.bio2rdf.org/release/ $ bVersion/ $ prefix/ $ prefix.html " )
161
+ ->setPublisher ("http://bio2rdf.org " )
162
+ ->setRights ("use-share-modify " )
163
+ ->setRights ("by-attribution " )
164
+ ->setRights ("restricted-by-source-license " )
165
+ ->setLicense ("http://creativecommons.org/licenses/by/3.0/ " )
166
+ ->setDataset (parent ::getDatasetURI ());
145
167
146
- // generate the release file
147
- $ desc = $ this ->getBio2RDFDatasetDescription (
148
- $ this ->getNamespace (),
149
- "https://github.com/bio2rdf/bio2rdf-scripts/blob/master/affymetrix/affymetrix.php " ,
150
- $ bio2rdf_download_files ,
151
- "dsfsdfs " ,
152
- "http://affymetrix.com/ " ,
153
- array ("use-share-modify " ,"no-commercial " ),
154
- null , // license
155
- parent ::getParameterValue ('download_url ' ),
156
- parent ::getDatasetVersion ()
157
- );
158
- $ this ->setWriteFile ($ odir .$ this ->getBio2RDFReleaseFile ($ this ->getNamespace ()));
159
- $ this ->getWriteFile ()->write ($ desc );
168
+ if ($ gz ) $ output_file ->setFormat ("application/gzip " );
169
+ if (strstr (parent ::getParameterValue ('output_format ' ),"nt " )) $ output_file ->setFormat ("application/n-triples " );
170
+ else $ output_file ->setFormat ("application/n-quads " );
171
+
172
+ $ dataset_description .= $ source_file ->toRDF ().$ output_file ->toRDF ();
173
+ }
174
+ // write the dataset description
175
+ $ this ->setWriteFile ($ odir .$ this ->getBio2RDFReleaseFile ());
176
+ $ this ->getWriteFile ()->write ($ dataset_description );
160
177
$ this ->getWriteFile ()->close ();
161
178
162
179
return true ;
163
- }
180
+ }
164
181
165
182
function Parse ()
166
183
{
0 commit comments