Skip to content

Commit 1b58c8f

Browse files
committed
finished parsing features and added contig parser
1 parent 29be68c commit 1b58c8f

File tree

1 file changed

+86
-11
lines changed

1 file changed

+86
-11
lines changed

genbank/genbank.php

+86-11
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,7 @@ function process(){
117117
$sectionsRaw = $this->parseGenbankRaw($gb_record_str);
118118
/**
119119
* SECTIONS being parsed:
120-
* locus, definition, accession, version, keywords, segment, source, reference,
120+
* locus, definition, accession, version, keywords, segment, source, reference, features
121121
*/
122122
//get locus section(s)
123123
$locus = $this->retrieveSections("LOCUS", $sectionsRaw);
@@ -146,14 +146,21 @@ function process(){
146146
//get the source section
147147
$source = $this->retrieveSections("SOURCE", $sectionsRaw);
148148
$parsed_source_arr = $this->parseSource($source);
149+
150+
$contig = $this->retrieveSections("CONTIG", $sectionsRaw);
151+
if(!empty($contig)){
152+
$parsed_contig_arr = $this->parseContig($contig);
153+
}
154+
155+
149156
//get the reference section
150157
$references = $this->retrieveSections("REFERENCE", $sectionsRaw);
151158
$parsed_refs_arr = $this->parseReferences($references);
152159
$gb_res = "gi:".$parsed_version_arr['gi'];
153160
$gb_label = utf8_encode(htmlspecialchars($parsed_definition_arr[0]));
154161

155162
parent::AddRDF(
156-
parent::describeIndividual($gb_res, $gb_label, $this->getVoc()."genbank- record").
163+
parent::describeIndividual($gb_res, $gb_label, $this->getVoc()."genbank-record").
157164
parent::triplifyString($gb_res, $this->getVoc().'sequence-length', $parsed_locus_arr[0]['sequence_length']).
158165
parent::triplifyString($gb_res, $this->getVoc().'strandedness', $parsed_locus_arr[0]['strandedness']).
159166
parent::triplify($gb_res, "rdf:type", $this->getRes().$parsed_locus_arr[0]['mol_type']).
@@ -165,7 +172,54 @@ function process(){
165172
);
166173

167174
foreach ($parsed_features_arr as $aFeature) {
168-
print_r($aFeature);
175+
//getFeatures
176+
$type = $aFeature['type'];
177+
$feat_desc = $this->getFeatures($type);
178+
$label = preg_replace('/\s\s*/', ' ', $feat_desc['definition']);
179+
$comment = null;
180+
$value = $aFeature['value'];
181+
$value_arr = explode("/", $value);
182+
$location = preg_replace('/\n/', '',$value_arr[0]);
183+
$class_id = parent::getVoc().md5($type);
184+
$feat_res = parent::getRes().md5($type.$location.$gb_res);
185+
$feat_label = utf8_encode($type." ".$location." for ".$gb_res);
186+
187+
188+
if(isset($feat_desc['comment'])){
189+
$comment = $feat_desc['comment'];
190+
$comment = preg_replace('/\s\s*/', ' ', $comment);
191+
$label .= " ".$comment;
192+
}
193+
194+
195+
parent::AddRDF(
196+
parent::describeClass($class_id, $label, parent::getVoc()."Feature").
197+
parent::describeIndividual($feat_res, $feat_label, $class_id).
198+
parent::triplify($gb_res, $this->getVoc()."has-feature", $feat_res)
199+
);
200+
201+
202+
foreach($value_arr as $aL){
203+
//check if aL has an equals in it
204+
$p = "/(\S+)\=(.*)/";
205+
preg_match($p, $aL, $m);
206+
if(count($m)){
207+
if($m[1] == "db_xref"){
208+
parent::AddRDF(
209+
parent::triplify($feat_res, "rdfs:seeAlso", str_replace("\"", "", $m[2]))
210+
);
211+
}else{
212+
parent::AddRDF(
213+
parent::triplifyString($feat_res, $this->getVoc().$m[1], utf8_encode(str_replace("\"", "", $m[2])))
214+
);
215+
}
216+
}
217+
}
218+
219+
220+
221+
222+
169223
}
170224

171225
foreach($parsed_accession_arr[0] as $acc ){
@@ -179,7 +233,13 @@ function process(){
179233
parent::triplifyString($gb_res, $this->getVoc()."versioned-accession", $parsed_version_arr['versioned_accession'])
180234
);
181235
}
182-
236+
if(isset($parsed_contig_arr)){
237+
foreach ($parsed_contig_arr as $aContig) {
238+
parent::AddRDF(
239+
parent::triplifyString($gb_res, $this->getVoc()."contig", parent::safeLiteral($aContig))
240+
);
241+
}
242+
}
183243
foreach($parsed_keyword_arr as $akw){
184244
parent::AddRDF(
185245
parent::triplifyString($gb_res, $this->getVoc()."keyword", $akw)
@@ -228,7 +288,6 @@ function process(){
228288
if(count($matches) == 0){
229289
$gb_record_str .= $aLine;
230290
}
231-
exit;
232291
}//while
233292

234293
}
@@ -240,8 +299,6 @@ function parseFeatures($feature_arr){
240299

241300
$out = array();
242301
//get a copy of the features array
243-
$features = $this->getFeatures();
244-
$feat_keys = array_keys($features);
245302
foreach($feature_arr as $feat){
246303
$feature_raw = utf8_encode(trim($feat['value']));
247304

@@ -355,6 +412,14 @@ function parseSource($source_arr){
355412
return $rm;
356413
}
357414

415+
function parseContig($source_arr){
416+
$rm = array();
417+
foreach($source_arr as $s){
418+
$s_r = utf8_encode(trim($s['value']));
419+
$rm[] = $s_r;
420+
}
421+
return $rm;
422+
}
358423

359424
/**
360425
* Parse the segment section according to section 3.4.9 of
@@ -529,11 +594,11 @@ function getStrandedness($aStr){
529594
}
530595

531596
/**
532-
* Get a copy of the complete feature map with definition and
533-
* comments (when available). See http://www.insdc.org/documents/feature-table
534-
* for reference
597+
* Get a feature map with definition and
598+
* comments (when available) for a given key. See http://www.insdc.org/documents/feature-table
599+
* for reference
535600
*/
536-
function getFeatures(){
601+
function getFeatures($aKey){
537602
$features = array(
538603
'assembly_gap' => array(
539604
'definition' => 'gap between two components of a CON record that is part of a genome assembly',
@@ -963,6 +1028,16 @@ function getFeatures(){
9631028
plasmids can contain multiple origins of transfer"
9641029
),
9651030
);
1031+
if(strlen($aKey)){
1032+
if(array_key_exists($aKey, $features)){
1033+
return $features[$aKey];
1034+
}else{
1035+
trigger_error("Could not find key: ".$aKey."\n", E_USER_NOTICE);
1036+
}
1037+
}else{
1038+
trigger_error("Invalid key: ".$key."\n", E_USER_ERROR);
1039+
exit;
1040+
}
9661041
return $features;
9671042
}
9681043

0 commit comments

Comments
 (0)