@@ -39,28 +39,17 @@ function __construct($argv) {
3939		parent ::__construct ($ argv"sider " );
4040
4141		// set and print application parameters 
42- 		parent ::addParameter ('files ' ,true ,'all|label_mapping|adverse_effects_raw|indications_raw|meddra_freq_parsed  ' ,'all ' ,'all or comma-separated list of ontology short names to process ' );
42+ 		parent ::addParameter ('files ' ,true ,'all|indications|se|freq  ' ,'all ' ,'all or comma-separated list of ontology short names to process ' );
4343		parent ::addParameter ('download_url ' ,false ,null ,'http://sideeffects.embl.de/media/download/ ' );
4444
4545		parent ::initialize ();
4646	}
4747
4848	function  run () {
49- 
50- 		if (parent ::getParameterValue ('download ' ) === true ) 
51- 		{
52- 			$ this download ();
53- 		}
54- 		if (parent ::getParameterValue ('process ' ) === true ) 
55- 		{
56- 			$ this process ();
57- 		}
58- 		
59- 	}
60- 		
61- 	function  download (){
6249		$ idirparent ::getParameterValue ('indir ' );
50+ 		$ odirparent ::getParameterValue ('outdir ' );
6351		$ filesparent ::getParameterValue ('files ' );
52+ 		$ dataset_description'' ;
6453
6554		if ($ files'all ' ) {
6655			$ filesexplode ('| ' , parent ::getParameterList ('files ' ));
@@ -70,8 +59,11 @@ function download(){
7059		}
7160
7261		foreach ($ filesAS  $ file
73- 			$ lfile$ idir$ file'.tsv.gz ' ;
74- 			$ rfileparent ::getParameterValue ('download_url ' ).$ file'.tsv.gz ' ;
62+ 			$ f$ file
63+ 			if ($ file"freq " ) $ f"all_ " .$ file
64+ 			$ f"meddra_ " .$ f".tsv.gz " ;
65+ 			$ lfile$ idir$ f
66+ 			$ rfileparent ::getParameterValue ('download_url ' ).$ f
7567			if (!file_exists ($ lfileparent ::getParameterValue ('download ' ) == 'true ' ) {
7668				echo  "downloading  $ file...  " ;
7769				$ retfile_get_contents ($ rfile
@@ -86,43 +78,15 @@ function download(){
8678				}		
8779				echo  "done! " .PHP_EOL ;
8880			}
89- 		}//foreach 
90- 	}
91- 
92- 	function  process (){
93- 
94- 		$ idirparent ::getParameterValue ('indir ' );
95- 		$ odirparent ::getParameterValue ('outdir ' );
96- 		$ filesparent ::getParameterValue ('files ' );
97- 		
98- 		if ($ files'all ' ) {
99- 			$ filesexplode ('| ' , parent ::getParameterList ('files ' ));
100- 			array_shift ($ files
101- 		} else  {
102- 			$ filesexplode (', ' , parent ::getParameterValue ('files ' ));
103- 		}
104- 		
105- 		parent ::setCheckpoint ('dataset ' );
106- 
107- 		$ dataset_description'' ;
108- 
109- 		$ graph_uriparent ::getGraphURI ();
110- 		if (parent ::getParameterValue ('dataset_graph ' ) == true ) parent ::setGraphURI (parent ::getDatasetURI ());
11181
112- 		foreach ($ filesAS  $ file
113- 			$ lfile$ idir$ file'.tsv.gz ' ;
114- 			$ rfileparent ::getParameterValue ('download_url ' ).$ file'.tsv.gz ' ;
115- 
116- 			echo  "Processing  $ file...  " ;
82+ 			echo  "Processing  $ f...  " ;
11783			parent ::setReadFile ($ lfiletrue );	
11884
11985			$ suffixparent ::getParameterValue ('output_format ' );
12086			$ ofile"sider- " .$ file'. ' .$ suffix
12187			$ gzfalse ;
12288
123- 			if (strstr (parent ::getParameterValue ('output_format ' ), "gz " )) {
124- 				$ gztrue ;
125- 			}
89+ 			if (strstr (parent ::getParameterValue ('output_format ' ), "gz " )) $ gztrue ;
12690
12791			parent ::setWriteFile ($ odir$ ofile$ gz
12892			$ this $ file
@@ -320,43 +284,83 @@ function GetPCFromStereo($id)
320284	Format: label identifier, concept id, name of side effect (as found on the label) 
321285	*/ 
322286
323- 	function  adverse_effects_raw ()
287+ 	function  se ()
324288	{
325289		$ declarednull ;
326290
327291		parent ::setCheckpoint ('file ' );
328- 		while ($ l$ this GetReadFile ()->Read ()) {
292+ 		while ($ l$ this getReadFile ()->Read ()) {
329293			$ aexplode ("\t" ,$ l
330- 			$ id"sider: " .urlencode ($ a0 ]);
331- 			$ cui"umls: " .$ a1 ];
332- 			$ cui_labelstrtolower (trim ($ a2 ]));
294+ 			if (count ($ a6 ) {
295+ 				trigger_error ("Expecting 6 columns, found  " .count ($ a" instead. " , E_USER_ERROR );
296+ 				exit ;
297+ 			}
298+ 			$ stitch_flat"stitch: " .$ a0 ];
299+ 			$ stitch_stereo"stitch: " .$ a1 ];
300+ 			$ cui"umls: " .$ a2 ];
301+ 			$ term_type$ a3 ];
302+ 			$ term_type_cui$ a4 ];
303+ 			$ term_type_label$ a5 ];
304+ 
305+ 			if ($ term_type'LLT ' ) continue ;
306+ 
307+ 			$ id"sider: " .md5 ("se " .$ stitch_flat$ cui
308+ 
309+ 			$ cui_labelstrtolower (trim ($ term_type_label
310+ 			if (!isset ($ declared$ cui
311+ 				parent ::addRDF (
312+ 					parent ::describeClass ($ cui$ cui_label
313+ 				);
314+ 				$ declared$ cui'' ;
315+ 			}
316+ 
333317			parent ::addRDF (
334- 				parent ::describeClass ($ cui$ cui_label
335- 				parent ::triplify ($ idparent ::getVoc ()."side-effect " , $ cui
318+ 				parent ::describeIndividual ($ id"$ stitch_flat  $ cui_label side effect " , parent ::getVoc ()."Drug-Side-Effect " ).
319+ 				parent ::triplify ($ idparent ::getVoc ()."side-effect " , $ cui
320+ 				parent ::triplify ($ idparent ::getVoc ()."stitch-flat " , $ stitch_flat
321+ 				parent ::triplify ($ idparent ::getVoc ()."stitch-stereo " , $ stitch_stereo
336322			);
337323			parent ::setCheckpoint ('record ' );
338324		}
325+ 
339326		parent ::setCheckpoint ('file ' );
340327	}
341328
342- 	function  indications_raw ()
329+ 	function  indications ()
343330	{
344331		$ declarednull ;
345- 
332+ 		 $ list  =  null ; 
346333		parent ::setCheckpoint ('file ' );
347- 		while ($ l$ this GetReadFile ()->Read ()) {
334+ 		while ($ l$ this getReadFile ()->Read ()) {
348335			parent ::setCheckpoint ('record ' );
349336
350337			$ aexplode ("\t" ,$ l
351- 			$ id"sider: " .urlencode ($ a0 ]);
352- 			$ cui"umls: " .$ a1 ];
353- 			$ cui_labelstrtolower (trim ($ a2 ]));
338+ 			list ($ stitch_flat$ cui$ provenance$ cui_label$ term_type$ term_cui$ term_cui_label$ a
339+ 			$ id"sider: " .md5 ("i " .$ stitch_flat$ cui
340+ 
341+ 			if ($ term_type"LLT "  or  isset ($ list$ idcontinue ;
342+ 			if (!isset ($ list$ id
343+ 				$ list$ id'' ;
344+ 			}
345+ 
346+ 
347+ 			$ stitch_id"stitch: $ stitch_flat ;
348+ 			$ meddra_id"meddra: $ cui ;
349+ 
350+ 			if (!isset ($ declared$ cui
351+ 				parent ::addRDF (
352+ 					parent ::describeClass ($ meddra_id$ cui_label
353+ 				);
354+ 				$ declared$ cui'' ;
355+ 			}
354356
355357			parent ::addRDF (
356- 				parent ::describeClass ($ cui$ cui_label
357- 				parent ::triplify ($ idparent ::getVoc ()."indication " , $ cui
358+ 				parent ::describeIndividual ($ id$ stitch_id" -  " .$ meddra_id" indication  " , parent ::getVoc ()."Drug-Indication-Association " ).
359+ 				parent ::describeClass (parent ::getVoc ()."Drug-Indication-Association " ,"Drug-Disease Association " ).
360+ 				parent ::triplify ($ idparent ::getVoc ()."drug " , $ stitch_id
361+ 				parent ::triplify ($ idparent ::getVoc ()."indication " , $ meddra_id
362+ 				parent ::triplifyString ($ idparent ::getVoc ()."provenance " , $ provenance
358363			);
359- 			parent ::setCheckpoint ('record ' );
360364
361365		}
362366		parent ::setCheckpoint ('file ' );
@@ -384,30 +388,26 @@ function indications_raw()
384388matches the upper bound. Due to the nature of the data, there can be more than one frequency for the same label, 
385389e.g. from different clinical trials or for different levels of severeness. 
386390*/ 
387- 	function  meddra_freq_parsed ()
391+ 	function  freq ()
388392	{
389- 		$ cols12 ;
393+ 		$ cols10 ;
390394		$ i1 ;
391395		parent ::setCheckpoint ('file ' );
392396		while ($ lparent ::getReadFile ()->read ()) {
393397			parent ::setCheckpoint ('record ' );
394- 
395398			$ aexplode ("\t" ,str_replace ("% " ,"" ,$ l
396399			if (count ($ a$ cols
397- 				trigger_error ("Expecting  $ cols, but found  " .count ($ a" instead... skipping file! " );
400+ 				trigger_error ("Expecting  $ cols, but found  " .count ($ a" instead... skipping file! " ,  E_USER_ERROR );
398401				return  false ; 
399402			}
400- 			$ label$ a2 ];
401- 			$ label_idparent ::getNamespace ().urlencode ($ label
402- 			$ effect_id"umls: " .$ a3 ];
403- 			
404- 			$ idparent ::getRes ().md5 ($ a2 ].$ a3 ].$ a6 ]);
405- 			$ label"$ a4 ] in  $ label  $ a2 ]" ;
403+ 			list ($ stitch_flat$ stitch_stereo$ cui$ placebo$ freq$ freq_lower$ freq_upper$ concept_type$ meddra_concept_id$ meddra_concept_label
404+ 			$ id = "stitch_resource: " .md5 ("se_freq " .$ l
405+ 			$ label"side effect frequency of  $ meddra_concept_label for  $ stitch_id ;
406406			parent ::addRDF (
407- 				parent ::describeIndividual ($ id$ labelparent ::getVoc ()."Drug-Effect " ).
408- 				parent ::describeClass (parent ::getVoc ()."Drug-Effect " ,"SIDER Drug-Effect " ).
409- 				parent ::triplify ($ idparent ::getVoc ()."drug " , $ label_id 
410- 				parent ::triplify ($ idparent ::getVoc ()."effect " , $ effect_id 
407+ 				parent ::describeIndividual ($ id$ labelparent ::getVoc ()."Drug-Effect-Frequency  " ).
408+ 				parent ::describeClass (parent ::getVoc ()."Drug-Effect-Frequency  " ,"SIDER Drug-Effect and Frequency  " ).
409+ 				parent ::triplify ($ idparent ::getVoc ()."drug " , $ stitch_flat 
410+ 				parent ::triplify ($ idparent ::getVoc ()."effect " , " meddra: " . $ meddra_concept_id 
411411			);
412412
413413			if ($ a5 ]){
@@ -416,25 +416,25 @@ function meddra_freq_parsed()
416416				);
417417			}
418418
419- 				$ fid$ idmd5 ($ a5 ].$ a6 ].$ a7 ].$ a8 ]);
420- //				$fid = $id.($i++); 
421- 				$ flabel$ a6 ];
422- 				$ ftypeparent ::getVoc ().ucfirst ($ a6 ])."-Frequency " ;
423419				$ numberfalse ;
424- 				if (is_numeric ($ a [ 6 ] )) {
425- 					$ flabel$ a [ 6 ] ."% " ;
426- 					$ ftype_label"Specified -Frequency ;
420+ 				if (is_numeric ($ freq 
421+ 					$ flabel$ freq "% " ;
422+ 					$ ftype_label"Exact -Frequency ;
427423					$ ftypeparent ::getVoc ().$ ftype_label
428424					$ numbertrue ;
425+ 				} else  {
426+ 					$ flabel$ freq
427+ 					$ ftype_label"Qualitative-Frequency " ;
428+ 					$ ftypeparent ::getVoc ()."$ ftype_label;  
429429				} 
430- 				if ($ a [ 7 ]  != $ a [ 8 ] ) {
431- 					$ flabel"( $ a [ 7 ] - $ a [ 8 ] ) " ;
430+ 				if( $ freq_lower  !=  $ freq_upper ) { 
431+ 					 $ flabel .=  "$ freq_lower - $ freq_upper 
432432					$ ftype_label"Range-Frequency " ;
433433					$ ftypeparent ::getVoc ().$ ftype_label
434- 				}
434+ 				}  
435435
436436				parent ::addRDF (
437- 					parent ::triplify ($ idparent ::getVoc ()."reported-frequency " ,$ fid
437+ 					parent ::triplify ($ idparent ::getVoc ()."AQualitative-Frequency " ,$ fid
438438					parent ::describeIndividual ($ fid$ flabel$ ftype
439439					parent ::describeClass ($ ftype$ ftype_label
440440				);
0 commit comments