@@ -117,7 +117,7 @@ function process(){
117117 $ sectionsRaw = $ this ->parseGenbankRaw ($ gb_record_str );
118118 /**
119119 * SECTIONS being parsed:
120- * locus, definition, accession, version, keywords, segment, source, reference,
120+ * locus, definition, accession, version, keywords, segment, source, reference, features
121121 */
122122 //get locus section(s)
123123 $ locus = $ this ->retrieveSections ("LOCUS " , $ sectionsRaw );
@@ -146,14 +146,21 @@ function process(){
146146 //get the source section
147147 $ source = $ this ->retrieveSections ("SOURCE " , $ sectionsRaw );
148148 $ parsed_source_arr = $ this ->parseSource ($ source );
149+
150+ $ contig = $ this ->retrieveSections ("CONTIG " , $ sectionsRaw );
151+ if (!empty ($ contig )){
152+ $ parsed_contig_arr = $ this ->parseContig ($ contig );
153+ }
154+
155+
149156 //get the reference section
150157 $ references = $ this ->retrieveSections ("REFERENCE " , $ sectionsRaw );
151158 $ parsed_refs_arr = $ this ->parseReferences ($ references );
152159 $ gb_res = "gi: " .$ parsed_version_arr ['gi ' ];
153160 $ gb_label = utf8_encode (htmlspecialchars ($ parsed_definition_arr [0 ]));
154161
155162 parent ::AddRDF (
156- parent ::describeIndividual ($ gb_res , $ gb_label , $ this ->getVoc ()."genbank- record " ).
163+ parent ::describeIndividual ($ gb_res , $ gb_label , $ this ->getVoc ()."genbank-record " ).
157164 parent ::triplifyString ($ gb_res , $ this ->getVoc ().'sequence-length ' , $ parsed_locus_arr [0 ]['sequence_length ' ]).
158165 parent ::triplifyString ($ gb_res , $ this ->getVoc ().'strandedness ' , $ parsed_locus_arr [0 ]['strandedness ' ]).
159166 parent ::triplify ($ gb_res , "rdf:type " , $ this ->getRes ().$ parsed_locus_arr [0 ]['mol_type ' ]).
@@ -165,7 +172,54 @@ function process(){
165172 );
166173
167174 foreach ($ parsed_features_arr as $ aFeature ) {
168- print_r ($ aFeature );
175+ //getFeatures
176+ $ type = $ aFeature ['type ' ];
177+ $ feat_desc = $ this ->getFeatures ($ type );
178+ $ label = preg_replace ('/\s\s*/ ' , ' ' , $ feat_desc ['definition ' ]);
179+ $ comment = null ;
180+ $ value = $ aFeature ['value ' ];
181+ $ value_arr = explode ("/ " , $ value );
182+ $ location = preg_replace ('/\n/ ' , '' ,$ value_arr [0 ]);
183+ $ class_id = parent ::getVoc ().md5 ($ type );
184+ $ feat_res = parent ::getRes ().md5 ($ type .$ location .$ gb_res );
185+ $ feat_label = utf8_encode ($ type ." " .$ location ." for " .$ gb_res );
186+
187+
188+ if (isset ($ feat_desc ['comment ' ])){
189+ $ comment = $ feat_desc ['comment ' ];
190+ $ comment = preg_replace ('/\s\s*/ ' , ' ' , $ comment );
191+ $ label .= " " .$ comment ;
192+ }
193+
194+
195+ parent ::AddRDF (
196+ parent ::describeClass ($ class_id , $ label , parent ::getVoc ()."Feature " ).
197+ parent ::describeIndividual ($ feat_res , $ feat_label , $ class_id ).
198+ parent ::triplify ($ gb_res , $ this ->getVoc ()."has-feature " , $ feat_res )
199+ );
200+
201+
202+ foreach ($ value_arr as $ aL ){
203+ //check if aL has an equals in it
204+ $ p = "/(\S+)\=(.*)/ " ;
205+ preg_match ($ p , $ aL , $ m );
206+ if (count ($ m )){
207+ if ($ m [1 ] == "db_xref " ){
208+ parent ::AddRDF (
209+ parent ::triplify ($ feat_res , "rdfs:seeAlso " , str_replace ("\"" , "" , $ m [2 ]))
210+ );
211+ }else {
212+ parent ::AddRDF (
213+ parent ::triplifyString ($ feat_res , $ this ->getVoc ().$ m [1 ], utf8_encode (str_replace ("\"" , "" , $ m [2 ])))
214+ );
215+ }
216+ }
217+ }
218+
219+
220+
221+
222+
169223 }
170224
171225 foreach ($ parsed_accession_arr [0 ] as $ acc ){
@@ -179,7 +233,13 @@ function process(){
179233 parent ::triplifyString ($ gb_res , $ this ->getVoc ()."versioned-accession " , $ parsed_version_arr ['versioned_accession ' ])
180234 );
181235 }
182-
236+ if (isset ($ parsed_contig_arr )){
237+ foreach ($ parsed_contig_arr as $ aContig ) {
238+ parent ::AddRDF (
239+ parent ::triplifyString ($ gb_res , $ this ->getVoc ()."contig " , parent ::safeLiteral ($ aContig ))
240+ );
241+ }
242+ }
183243 foreach ($ parsed_keyword_arr as $ akw ){
184244 parent ::AddRDF (
185245 parent ::triplifyString ($ gb_res , $ this ->getVoc ()."keyword " , $ akw )
@@ -228,7 +288,6 @@ function process(){
228288 if (count ($ matches ) == 0 ){
229289 $ gb_record_str .= $ aLine ;
230290 }
231- exit ;
232291 }//while
233292
234293 }
@@ -240,8 +299,6 @@ function parseFeatures($feature_arr){
240299
241300 $ out = array ();
242301 //get a copy of the features array
243- $ features = $ this ->getFeatures ();
244- $ feat_keys = array_keys ($ features );
245302 foreach ($ feature_arr as $ feat ){
246303 $ feature_raw = utf8_encode (trim ($ feat ['value ' ]));
247304
@@ -355,6 +412,14 @@ function parseSource($source_arr){
355412 return $ rm ;
356413 }
357414
415+ function parseContig ($ source_arr ){
416+ $ rm = array ();
417+ foreach ($ source_arr as $ s ){
418+ $ s_r = utf8_encode (trim ($ s ['value ' ]));
419+ $ rm [] = $ s_r ;
420+ }
421+ return $ rm ;
422+ }
358423
359424 /**
360425 * Parse the segment section according to section 3.4.9 of
@@ -529,11 +594,11 @@ function getStrandedness($aStr){
529594 }
530595
531596 /**
532- * Get a copy of the complete feature map with definition and
533- * comments (when available). See http://www.insdc.org/documents/feature-table
534- * for reference
597+ * Get a feature map with definition and
598+ * comments (when available) for a given key . See http://www.insdc.org/documents/feature-table
599+ * for reference
535600 */
536- function getFeatures (){
601+ function getFeatures ($ aKey ){
537602 $ features = array (
538603 'assembly_gap ' => array (
539604 'definition ' => 'gap between two components of a CON record that is part of a genome assembly ' ,
@@ -963,6 +1028,16 @@ function getFeatures(){
9631028 plasmids can contain multiple origins of transfer "
9641029 ),
9651030 );
1031+ if (strlen ($ aKey )){
1032+ if (array_key_exists ($ aKey , $ features )){
1033+ return $ features [$ aKey ];
1034+ }else {
1035+ trigger_error ("Could not find key: " .$ aKey ."\n" , E_USER_NOTICE );
1036+ }
1037+ }else {
1038+ trigger_error ("Invalid key: " .$ key ."\n" , E_USER_ERROR );
1039+ exit ;
1040+ }
9661041 return $ features ;
9671042 }
9681043
0 commit comments