@@ -117,7 +117,7 @@ function process(){
117
117
$ sectionsRaw = $ this ->parseGenbankRaw ($ gb_record_str );
118
118
/**
119
119
* SECTIONS being parsed:
120
- * locus, definition, accession, version, keywords, segment, source, reference,
120
+ * locus, definition, accession, version, keywords, segment, source, reference, features
121
121
*/
122
122
//get locus section(s)
123
123
$ locus = $ this ->retrieveSections ("LOCUS " , $ sectionsRaw );
@@ -146,14 +146,21 @@ function process(){
146
146
//get the source section
147
147
$ source = $ this ->retrieveSections ("SOURCE " , $ sectionsRaw );
148
148
$ parsed_source_arr = $ this ->parseSource ($ source );
149
+
150
+ $ contig = $ this ->retrieveSections ("CONTIG " , $ sectionsRaw );
151
+ if (!empty ($ contig )){
152
+ $ parsed_contig_arr = $ this ->parseContig ($ contig );
153
+ }
154
+
155
+
149
156
//get the reference section
150
157
$ references = $ this ->retrieveSections ("REFERENCE " , $ sectionsRaw );
151
158
$ parsed_refs_arr = $ this ->parseReferences ($ references );
152
159
$ gb_res = "gi: " .$ parsed_version_arr ['gi ' ];
153
160
$ gb_label = utf8_encode (htmlspecialchars ($ parsed_definition_arr [0 ]));
154
161
155
162
parent ::AddRDF (
156
- parent ::describeIndividual ($ gb_res , $ gb_label , $ this ->getVoc ()."genbank- record " ).
163
+ parent ::describeIndividual ($ gb_res , $ gb_label , $ this ->getVoc ()."genbank-record " ).
157
164
parent ::triplifyString ($ gb_res , $ this ->getVoc ().'sequence-length ' , $ parsed_locus_arr [0 ]['sequence_length ' ]).
158
165
parent ::triplifyString ($ gb_res , $ this ->getVoc ().'strandedness ' , $ parsed_locus_arr [0 ]['strandedness ' ]).
159
166
parent ::triplify ($ gb_res , "rdf:type " , $ this ->getRes ().$ parsed_locus_arr [0 ]['mol_type ' ]).
@@ -165,7 +172,54 @@ function process(){
165
172
);
166
173
167
174
foreach ($ parsed_features_arr as $ aFeature ) {
168
- print_r ($ aFeature );
175
+ //getFeatures
176
+ $ type = $ aFeature ['type ' ];
177
+ $ feat_desc = $ this ->getFeatures ($ type );
178
+ $ label = preg_replace ('/\s\s*/ ' , ' ' , $ feat_desc ['definition ' ]);
179
+ $ comment = null ;
180
+ $ value = $ aFeature ['value ' ];
181
+ $ value_arr = explode ("/ " , $ value );
182
+ $ location = preg_replace ('/\n/ ' , '' ,$ value_arr [0 ]);
183
+ $ class_id = parent ::getVoc ().md5 ($ type );
184
+ $ feat_res = parent ::getRes ().md5 ($ type .$ location .$ gb_res );
185
+ $ feat_label = utf8_encode ($ type ." " .$ location ." for " .$ gb_res );
186
+
187
+
188
+ if (isset ($ feat_desc ['comment ' ])){
189
+ $ comment = $ feat_desc ['comment ' ];
190
+ $ comment = preg_replace ('/\s\s*/ ' , ' ' , $ comment );
191
+ $ label .= " " .$ comment ;
192
+ }
193
+
194
+
195
+ parent ::AddRDF (
196
+ parent ::describeClass ($ class_id , $ label , parent ::getVoc ()."Feature " ).
197
+ parent ::describeIndividual ($ feat_res , $ feat_label , $ class_id ).
198
+ parent ::triplify ($ gb_res , $ this ->getVoc ()."has-feature " , $ feat_res )
199
+ );
200
+
201
+
202
+ foreach ($ value_arr as $ aL ){
203
+ //check if aL has an equals in it
204
+ $ p = "/(\S+)\=(.*)/ " ;
205
+ preg_match ($ p , $ aL , $ m );
206
+ if (count ($ m )){
207
+ if ($ m [1 ] == "db_xref " ){
208
+ parent ::AddRDF (
209
+ parent ::triplify ($ feat_res , "rdfs:seeAlso " , str_replace ("\"" , "" , $ m [2 ]))
210
+ );
211
+ }else {
212
+ parent ::AddRDF (
213
+ parent ::triplifyString ($ feat_res , $ this ->getVoc ().$ m [1 ], utf8_encode (str_replace ("\"" , "" , $ m [2 ])))
214
+ );
215
+ }
216
+ }
217
+ }
218
+
219
+
220
+
221
+
222
+
169
223
}
170
224
171
225
foreach ($ parsed_accession_arr [0 ] as $ acc ){
@@ -179,7 +233,13 @@ function process(){
179
233
parent ::triplifyString ($ gb_res , $ this ->getVoc ()."versioned-accession " , $ parsed_version_arr ['versioned_accession ' ])
180
234
);
181
235
}
182
-
236
+ if (isset ($ parsed_contig_arr )){
237
+ foreach ($ parsed_contig_arr as $ aContig ) {
238
+ parent ::AddRDF (
239
+ parent ::triplifyString ($ gb_res , $ this ->getVoc ()."contig " , parent ::safeLiteral ($ aContig ))
240
+ );
241
+ }
242
+ }
183
243
foreach ($ parsed_keyword_arr as $ akw ){
184
244
parent ::AddRDF (
185
245
parent ::triplifyString ($ gb_res , $ this ->getVoc ()."keyword " , $ akw )
@@ -228,7 +288,6 @@ function process(){
228
288
if (count ($ matches ) == 0 ){
229
289
$ gb_record_str .= $ aLine ;
230
290
}
231
- exit ;
232
291
}//while
233
292
234
293
}
@@ -240,8 +299,6 @@ function parseFeatures($feature_arr){
240
299
241
300
$ out = array ();
242
301
//get a copy of the features array
243
- $ features = $ this ->getFeatures ();
244
- $ feat_keys = array_keys ($ features );
245
302
foreach ($ feature_arr as $ feat ){
246
303
$ feature_raw = utf8_encode (trim ($ feat ['value ' ]));
247
304
@@ -355,6 +412,14 @@ function parseSource($source_arr){
355
412
return $ rm ;
356
413
}
357
414
415
+ function parseContig ($ source_arr ){
416
+ $ rm = array ();
417
+ foreach ($ source_arr as $ s ){
418
+ $ s_r = utf8_encode (trim ($ s ['value ' ]));
419
+ $ rm [] = $ s_r ;
420
+ }
421
+ return $ rm ;
422
+ }
358
423
359
424
/**
360
425
* Parse the segment section according to section 3.4.9 of
@@ -529,11 +594,11 @@ function getStrandedness($aStr){
529
594
}
530
595
531
596
/**
532
- * Get a copy of the complete feature map with definition and
533
- * comments (when available). See http://www.insdc.org/documents/feature-table
534
- * for reference
597
+ * Get a feature map with definition and
598
+ * comments (when available) for a given key . See http://www.insdc.org/documents/feature-table
599
+ * for reference
535
600
*/
536
- function getFeatures (){
601
+ function getFeatures ($ aKey ){
537
602
$ features = array (
538
603
'assembly_gap ' => array (
539
604
'definition ' => 'gap between two components of a CON record that is part of a genome assembly ' ,
@@ -963,6 +1028,16 @@ function getFeatures(){
963
1028
plasmids can contain multiple origins of transfer "
964
1029
),
965
1030
);
1031
+ if (strlen ($ aKey )){
1032
+ if (array_key_exists ($ aKey , $ features )){
1033
+ return $ features [$ aKey ];
1034
+ }else {
1035
+ trigger_error ("Could not find key: " .$ aKey ."\n" , E_USER_NOTICE );
1036
+ }
1037
+ }else {
1038
+ trigger_error ("Invalid key: " .$ key ."\n" , E_USER_ERROR );
1039
+ exit ;
1040
+ }
966
1041
return $ features ;
967
1042
}
968
1043
0 commit comments