@@ -106,39 +106,6 @@ function Run(){
106
106
107
107
}//run
108
108
109
- function sync_files (){
110
- $ this ->setup_ftp ();
111
- $ files = parent ::getParameterValue ('files ' );
112
- if ($ files == 'all ' ){
113
- $ this ->sync_all_files ();
114
- }
115
- }
116
-
117
- function sync_all_files (){
118
- $ dir = $ this ->getParameterValue ('indir ' );
119
- if ($ dir == null || strlen ($ dir ) == 0 ){
120
- trigger_error ("Could not find input directory! \n" );
121
- exit ;
122
- }
123
- echo "syncing genbank files... " ;
124
- exec ("ncftpget " .parent ::getParameterValue ('download_url ' )."*.gz " );
125
- }
126
-
127
- /**
128
- * Create workspace and mount genbank
129
- **/
130
- function setup_ftp (){
131
- //create workspace if doesn't already exist
132
- if ($ this ->CreateDirectory ($ this ->GetParameterValue ('workspace ' )) === TRUE ){
133
- echo "set up workspace " .$ this ->GetParameterValue ('workspace ' )."\n" ;
134
- }else {
135
- echo "failed to create workspace exiting program " ;
136
- exit ;
137
- }
138
-
139
- echo "Setting up FTP mount: \n" ;
140
- exec ("curlftpfs " .$ this ->getParameterValue ('download_url ' )." " .$ this ->getParameterValue ('workspace ' ));
141
- }
142
109
function process (){
143
110
$ gb_record_str = "" ;
144
111
while ($ aLine = $ this ->getReadFile ()->Read (4096 )) {
@@ -173,8 +140,9 @@ function process(){
173
140
$ parsed_segments_arr = $ this ->parseSegment ($ segments );
174
141
}
175
142
176
- //$features = $this->retrieveSections("FEATURES", $sectionsRaw);
177
- //$parsed_features_arr = $this->parseFeatures($features);
143
+ $ features = $ this ->retrieveSections ("FEATURES " , $ sectionsRaw );
144
+ $ parsed_features_arr = $ this ->parseFeatures ($ features );
145
+
178
146
//get the source section
179
147
$ source = $ this ->retrieveSections ("SOURCE " , $ sectionsRaw );
180
148
$ parsed_source_arr = $ this ->parseSource ($ source );
@@ -262,14 +230,19 @@ function process(){
262
230
263
231
}
264
232
/**
265
- *
233
+ * Parse the features section of genbank documents according to:
234
+ * http://www.insdc.org/documents/feature-table
266
235
*/
267
236
function parseFeatures ($ feature_arr ){
268
237
$ rm = array ();
269
238
foreach ($ feature_arr as $ feat ){
270
239
$ feature_raw = utf8_encode (trim ($ feat ['value ' ]));
271
- //print_r($feature_raw);
272
- echo "\n*** \n" ;
240
+ if (strlen ($ feature_raw )){
241
+ //remove multiple spaces and newlines
242
+ $ feature_raw = preg_replace ('/\s\s*/ ' , ' ' , $ feature_raw );
243
+ print_r ($ feature_raw );
244
+ echo "\n*** \n" ;
245
+ }
273
246
}
274
247
return $ rm ;
275
248
}
@@ -280,9 +253,7 @@ function parseFeatures($feature_arr){
280
253
*/
281
254
function parseReferences ($ ref_arr ){
282
255
$ rm = array ();
283
-
284
256
$ reference_fields = array ("AUTHORS " , "TITLE " , "JOURNAL " , "MEDLINE " , "PUBMED " , "REMARK " );
285
-
286
257
foreach ($ ref_arr as $ reference ){
287
258
$ ref_raw = utf8_encode (trim ($ reference ['value ' ]));
288
259
if (strlen ($ ref_raw )){
@@ -538,6 +509,11 @@ function getStrandedness($aStr){
538
509
}
539
510
}
540
511
512
+ /**
513
+ * Get a copy of the complete feature map with definition and
514
+ * comments (when available). See http://www.insdc.org/documents/feature-table
515
+ * for reference
516
+ */
541
517
function getFeatures (){
542
518
$ features = array (
543
519
'assembly_gap ' => array (
@@ -968,6 +944,7 @@ function getFeatures(){
968
944
plasmids can contain multiple origins of transfer "
969
945
),
970
946
);
947
+ return $ features ;
971
948
}
972
949
973
950
/**
0 commit comments