@@ -26,7 +26,7 @@ def get_stmts_from_source(source_protein, target_proteins=None):
2626
2727 Parameters
2828 ----------
29- target_protein : string
29+ source_protein : string
3030 The protein of interest in relation to protien list user enters
3131
3232 protein_list: list
@@ -49,12 +49,27 @@ def get_stmts_from_source(source_protein, target_proteins=None):
4949 # TODO: get the same values from this result as what you got from the old
5050 # query
5151
52- # cypher to get dataframe with all proteins that have INDRA relationship with target protein
53- # query = f"""MATCH p=(n:BioEntity)-[r:indra_rel]->(m:BioEntity)
54- # WHERE n.name = '{source_protein}'
55- # RETURN m.name, r.stmt_json, m.type, m.id, r.stmt_type"""
52+ #cypher to get dataframe with all proteins that have INDRA relationship with target protein
53+ #query = f"""MATCH p=(n:BioEntity)-[r:indra_rel]->(m:BioEntity) WHERE n.name = '{source_protein}' RETURN m.name, r.stmt_json, m.type, m.id, r.stmt_type"""
5654 #res = client.query_tx(query)
57-
55+
56+
57+ jsons = []
58+ types = []
59+ ids = []
60+ stmt_types = []
61+ for i in range (len (res )):
62+ target_name = res [i ].data
63+ jsons .append (res [i ].data ["stmt_json" ])
64+ types .append (res [i ].target_ns )
65+ ids .append (res [i ].target_id )
66+ stmt_types .append (res [i ].data ["stmt_type" ])
67+ protein_dict = {"stmt_json" : jsons , "target_type" : types , "target_id" :ids , "stmt_type" : stmt_types }
68+ stmts_by_protein_df = pd .DataFrame (protein_dict )
69+
70+ print (stmts_by_protein_df )
71+ print (res [0 ].__dict__ )
72+
5873 stmts_by_protein_df = pd .DataFrame (res , columns = ["name" , "stmt_json" , "type" , "id" , "indra_type" ])
5974 if target_proteins :
6075 # TODO: since the target proteins are now HGNC ids, you need to change this filter
@@ -66,6 +81,7 @@ def get_stmts_from_source(source_protein, target_proteins=None):
6681 return stmts_by_protein_df , stmts_by_protein_filtered_df
6782
6883
84+
6985def graph_barchart (filtered_df ):
7086 """Visualize frequnecy of interaction types among protiens that have direct
7187 INDRA relationship to target
@@ -119,7 +135,7 @@ def download_indra_htmls(filtered_df):
119135 ha .save_model ('%s_statements.html' % (name + str (index )))
120136
121137
122- def get_gene_id (protein_name ):
138+ def get_gene_id (source_protein ):
123139 """Return HGNC id for protein of interest
124140
125141 Parameters
@@ -133,15 +149,15 @@ def get_gene_id(protein_name):
133149 The HGNC id for the protein of interest
134150
135151 """
136- hgnc_id = hgnc_client .get_hgnc_id (protein_name )
137- if not hgnc_id :
138- hgnc_id = hgnc_client .get_current_hgnc_id (protein_name )
139- if not hgnc_id :
140- print ("%s is not a valid gene name" % protein_name )
152+ source_hgnc_id = hgnc_client .get_hgnc_id (source_protein )
153+ if not source_hgnc_id :
154+ source_hgnc_id = hgnc_client .get_current_hgnc_id (source_protein )
155+ if not source_hgnc_id :
156+ print ("%s is not a valid gene name" % source_protein )
141157 return None
142- return hgnc_id
158+ return source_hgnc_id
143159
144- def get_gene_ids (protein_list ):
160+ def get_gene_ids (target_proteins ):
145161 """Return HGNC ids for all proteins in the list
146162
147163 Parameters
@@ -152,15 +168,15 @@ def get_gene_ids(protein_list):
152168 Returns
153169 -------
154170 """
155- hgnc_ids = []
156- for protein in protein_list :
171+ target_hgnc_ids = []
172+ for protein in target_proteins :
157173 hgnc_id = get_gene_id (protein )
158174 if hgnc_id :
159- hgnc_ids .append (hgnc_id )
160- return hgnc_ids
175+ target_hgnc_ids .append (hgnc_id )
176+ return target_hgnc_ids
161177
162178
163- def shared_pathway (id_df , target_id , target_protein ):
179+ def shared_pathway (id_df , target_id , source_protein ):
164180 """Find shared pathways between list of genes and target protien
165181
166182 Parameters
@@ -169,7 +185,7 @@ def shared_pathway(id_df, target_id, target_protein):
169185 Contains HGNC ids for protein_list protein list
170186 target_id: string
171187 The target proteins HGNC id
172- target_protein : string
188+ source_protein : string
173189 The protein of interest in relation to protien list
174190
175191 Returns
@@ -183,21 +199,21 @@ def shared_pathway(id_df, target_id, target_protein):
183199 gene_id = ids [5 :]
184200 result = get_shared_pathways_for_genes ((("HGNC" , gene_id ),("HGNC" , target_id )))
185201 if not result :
186- print ("\n There are no shared pathways for" , names , "and" , target_protein )
202+ print ("\n There are no shared pathways for" , names , "and" , source_protein )
187203 else :
188- print ("\n Here are the shared pathways for" , names , "and" , target_protein )
204+ print ("\n Here are the shared pathways for" , names , "and" , source_protein )
189205 print (result )
190206
191207
192- def child_of_target (id_df , target_id , target_protein ):
208+ def child_of_target (id_df , target_id , source_protein ):
193209 """ Determine if any gene in gene list isa/partof the target protein
194210 Parameters
195211 ----------
196212 id_df : dataframe
197213 Contains HGNC ids for protein_list
198214 target_id : string
199215 The target proteins HGNC id
200- target_protein : string
216+ source_protein : string
201217 The protein of interest in relation to protien list user enters
202218
203219 Returns
@@ -214,10 +230,10 @@ def child_of_target(id_df, target_id, target_protein):
214230 result = isa_or_partof (("HGNC" , id ),("HGNC" , target_id ))
215231
216232 if result == True :
217- print ("\n " , names , "and" , target_protein , "are a part of the same family" )
233+ print ("\n " , names , "and" , source_protein , "are a part of the same family" )
218234 print (result )
219235 else :
220- print ("\n " ,names , "and" , target_protein , "are not a part of the same family" )
236+ print ("\n " ,names , "and" , source_protein , "are not a part of the same family" )
221237
222238
223239def get_go_terms_for_target (target_id ):
@@ -400,10 +416,10 @@ def graph_boxplots(shared_complexes_df,shared_entities):
400416
401417def run_analysis (source_hgnc_id , target_hgnc_ids ):
402418 # to get dataframe with protiens that target has INDRA rel with filtered by users gene list
403- filtered_df , protein_df = get_stmts_from_source (source_hgnc_id , target_hgnc_ids )
419+ stmts_by_protein_df , stmts_by_protein_filtered_df = get_stmts_from_source (source_hgnc_id , target_hgnc_ids )
404420 print ("\n This is a dataframe of protiens that have INDRA relationships with " ,
405- target_protein , " that have been filtered for the protein list" )
406- print (filtered_df )
421+ source_hgnc_id , " that have been filtered for the protein list" )
422+ print (stmts_by_protein_filtered_df )
407423
408424 # visualize frequnecy of interaction types among protiens that have direct
409425 # INDRA relationship to target
@@ -413,27 +429,27 @@ def run_analysis(source_hgnc_id, target_hgnc_ids):
413429 download_indra_htmls (filtered_df )
414430
415431 # to get gene ids for users gene list and target protein
416- id_df , target_id = get_gene_ids (protein_list , target_protein )
432+ id_df , target_id = get_gene_ids (protein_list , source_protein )
417433
418434 # to find shared pathways between users gene list and target protein
419- shared_pathway (id_df , target_id , target_protein )
435+ shared_pathway (id_df , target_id , source_protein )
420436
421437 # which proteins of interest are part of the same protien family complex
422438 # as the target
423- child_of_target (id_df , target_id , target_protein )
439+ child_of_target (id_df , target_id , source_protein )
424440
425441 # to get go term ids for target gene
426442 target_go , go_nodes = get_go_terms_for_target (target_id )
427443
428444 # finds shared upstream bioentities between the users gene list and target protein
429445 shared_proteins , shared_entities = shared_bioentities (protein_df )
430446 print ("These are the shared upstream bioentities between the gene list and" ,
431- target_protein )
447+ source_protein )
432448 print (shared_entities )
433449
434450 # finds shared bioentities between users gene list and target protein using GO terms
435451 shared_complexes_df = finding_protein_complexes (target_go )
436- print ("These are shared complexes between the gene list and" , target_protein )
452+ print ("These are shared complexes between the gene list and" , source_protein )
437453 print (shared_complexes_df )
438454
439455 # gets a list of reactome and wikipathways for shared genes
@@ -449,17 +465,19 @@ def main():
449465 'AMOT' , 'PLA2G4A' , 'RCN2' , 'TTC9' , 'FABP4' , 'GPCPD1' , 'VSNL1' ,
450466 'CRYBB1' , 'PDZD8' , 'FNDC3A' ]
451467
452- target_hgnc_ids = get_gene_ids (target_protein_names )
453468
454469 # the protein of interest in relation to protien list user enters
455470 source_protein_name = "CTNNB1"
456471
457472 source_hgnc_id = get_gene_id (source_protein_name )
458-
473+ target_hgnc_ids = get_gene_ids (target_protein_names )
474+
475+ print (source_hgnc_id ,target_hgnc_ids )
459476 if not source_hgnc_id or not target_hgnc_ids :
460477 print ("Cannot perform analysis due to invalid gene names" )
461478 return
462-
479+
480+
463481 run_analysis (source_hgnc_id , target_hgnc_ids )
464482
465483
0 commit comments