3
3
import json
4
4
import gzip
5
5
import logging
6
+ from db import get_mongo_connection
6
7
from concurrent .futures import ThreadPoolExecutor
7
8
import configparser
8
9
import urllib .parse
41
42
# Common response header
42
43
headers = {"Content-Type" : "application/json" }
43
44
44
- from db import get_mongo_connection
45
- mydb = get_mongo_connection (IS_DEBUG ,Config )
46
45
46
+ mydb = get_mongo_connection (IS_DEBUG , Config )
47
+
48
+ def get_potential_gene_symbols (query_string : str , limit_elements : int = 50 ) -> List [str ]:
47
49
48
- def get_potential_gene_symbols (query_string , limit_elements ):
49
50
"""
50
- TODO: document and add types
51
- :param query_string:
52
- :param limit_elements:
53
- :return:
51
+ Takes a string of any length and returns a list of genes that contain that search criteria.
52
+
53
+ :param query_string: The query string
54
+ :param limit_elements: The number of elements to return
55
+ :return: A list of genes that contain that search criteria
54
56
"""
55
57
er = re .compile ("^" + re .escape (query_string ), re .IGNORECASE )
56
58
collection_hgnc = mydb ["hgnc" ] # HGNC collection
@@ -83,11 +85,12 @@ def get_potential_gene_symbols(query_string, limit_elements):
83
85
return res
84
86
85
87
86
- def search_gene_group (gen ): # AGREGAR LO QUE PASA SI NO PERTENECE A NINGUN gene_group_id (EJ gen:AADACP1)
88
+ def search_gene_group (gen : str ) -> Dict [ str , Any ]:
87
89
"""
88
- TODO: document
89
- :param gen:
90
- :return:
90
+ Get the gene and locus group from HGNC DB for a specific approved gene symbol
91
+
92
+ :param gen: an approved gene symbol
93
+ :return: a dictionary with the gene and locus group information
91
94
"""
92
95
results = {'locus_group' : None , 'locus_type' : None , 'gene_group' : None , 'gene_group_id' : None }
93
96
collection_hgnc = mydb ["hgnc" ] # HGNC collection
@@ -107,17 +110,18 @@ def search_gene_group(gen): # AGREGAR LO QUE PASA SI NO PERTENECE A NINGUN gene
107
110
results ['gene_group' ] = document ['gene_group' ]
108
111
results ['gene_group_id' ] = document ['gene_group_id' ]
109
112
else :
110
- results ['gene_group' ] = [document ['gene_group' ]] # type: ignore
111
- results ['gene_group_id' ] = [document ['gene_group_id' ]] # type: ignore
113
+ results ['gene_group' ] = [document ['gene_group' ]] # type: ignore
114
+ results ['gene_group_id' ] = [document ['gene_group_id' ]] # type: ignore
112
115
113
116
return results
114
117
115
118
116
- def search_genes_in_same_group (group_id : int ):
119
+ def search_genes_in_same_group (group_id : int ) -> List [ str ] :
117
120
"""
118
- TODO: document
119
- :param group_id:
120
- :return:
121
+ From a gene group id in HGNC DB, get a list of all genes in the same group.
122
+
123
+ :param group_id: a valid gene group id from HGNC DB
124
+ :return: a list of all genes that are in the group
121
125
"""
122
126
collection_hgnc = mydb ["hgnc" ] # HGNC collection
123
127
query = {'gene_group_id' : group_id }
@@ -126,12 +130,13 @@ def search_genes_in_same_group(group_id: int):
126
130
return [doc ["symbol" ] for doc in docs ]
127
131
128
132
129
- def get_genes_of_pathway (pathway_id , pathway_source ) :
133
+ def get_genes_of_pathway (pathway_id : str , pathway_source : str ) -> List [ str ] :
130
134
"""
131
- TODO: document and add types
132
- :param pathway_id:
133
- :param pathway_source:
134
- :return:
135
+ From a gene group id in HGNC DB, get a list of all genes in the same group.
136
+
137
+ :param pathway_source: the pathway database
138
+ :param pathway_id: an pathway id to search in the database
139
+ :return: a list of all genes in the pathway
135
140
"""
136
141
collection_cpdb = mydb ["cpdb" ] # CPDB collection
137
142
ps = re .compile ("^" + pathway_source + "$" , re .IGNORECASE )
@@ -140,11 +145,12 @@ def get_genes_of_pathway(pathway_id, pathway_source):
140
145
return doc ["hgnc_symbol_ids" ] if doc is not None else []
141
146
142
147
143
- def get_pathways_of_gene (gene ) :
148
+ def get_pathways_of_gene (gene : str ) -> List [ str ] :
144
149
"""
145
- TODO: document and add types
146
- :param gene:
147
- :return:
150
+ Get all pathways and sources for a given gene symbols.
151
+
152
+ :param gene: apprved gene symbol
153
+ :return: list of pathways
148
154
"""
149
155
collection_cpdb = mydb ["cpdb" ] # CPDB collection
150
156
query = {'hgnc_symbol_ids' : gene }
@@ -153,11 +159,12 @@ def get_pathways_of_gene(gene):
153
159
return [str (doc ) for doc in docs ]
154
160
155
161
156
- def get_information_of_genes (genes : List [str ]) -> Dict :
162
+ def get_information_of_genes (genes : List [str ]) -> Dict [ str , Dict [ str , Any ]] :
157
163
"""
158
- TODO: document
159
- :param genes:
160
- :return:
164
+ This function receives a list of gene symbols and returns information about them from different bioinformatics databases.
165
+
166
+ :param genes: list of gene symbols
167
+ :return: dictionary with information about gene symbols (each key is a gene and each value is another dictionary with its information)
161
168
"""
162
169
res = {}
163
170
collection_gene_grch37 = mydb ["gene_grch37" ]
@@ -212,12 +219,13 @@ def get_information_of_genes(genes: List[str]) -> Dict:
212
219
return res
213
220
214
221
215
- def get_expression_from_gtex (tissue : str , genes : List [str ]) -> List :
222
+ def get_expression_from_gtex (tissue : str , genes : List [str ]) -> List [ Dict [ str , float ]] :
216
223
"""
217
- Gets all the expressions for a specific tissue and a list of genes
224
+ Gets all the expressions for a specific tissue and a list of genes.
225
+
218
226
:param tissue: Tissue to filter
219
227
:param genes: List of genes to filter
220
- :return: List of expressions
228
+ :return: List of expressions values. Each element of the list contains the expression values for each gene
221
229
"""
222
230
collection = mydb ["gtex_" + tissue ] # Connects to specific tissue's collection
223
231
query = {'gene' : {'$in' : genes }}
@@ -243,7 +251,7 @@ def terms_related_to_one_gene(gene: str, relation_type: Optional[List[str]] = No
243
251
if relation_type is None :
244
252
relation_type = ["enables" , "involved_in" , "part_of" , "located_in" ]
245
253
collection_go_annotations = mydb ["go_anotations" ]
246
-
254
+
247
255
annotation = list (collection_go_annotations .find ({"gene_symbol" : gene }))
248
256
related_genes = {}
249
257
if annotation :
@@ -260,6 +268,7 @@ def terms_related_to_one_gene(gene: str, relation_type: Optional[List[str]] = No
260
268
return related_genes
261
269
262
270
271
+
263
272
def is_term_on_db (term_id )-> bool :
264
273
"""
265
274
Returns whether a go term ID is in the DB
@@ -361,12 +370,12 @@ def enrich(gene_ids: List, p_value_threshold: int= 0.05, correction_method: str
361
370
return metrics , relations
362
371
363
372
364
- def populate_terms_with_data (term_list : List , ontology_type : Optional [List [str ]] = None )-> List :
373
+ def populate_terms_with_data (term_list , ontology_type : Optional [List [str ]] = None ):
365
374
"""
366
- Given a go terms list will return a list with all the terms information
367
- :param term_list: list of all the wanted terms
368
- :param ontology_type: Filters the ontology type of the terms in the response
369
- :return: list of all terms with all the information of them that's on the DB
375
+ TODO: document
376
+ :param term_list:
377
+ :param ontology_type:
378
+ :return:
370
379
"""
371
380
if ontology_type is None :
372
381
ontology_type = ["biological_process" , "molecular_function" , "cellular_component" ]
@@ -375,6 +384,7 @@ def populate_terms_with_data(term_list:List, ontology_type: Optional[List[str]]
375
384
return terms
376
385
377
386
387
+
378
388
def strip_term (term : Dict ,relations :Optional [List [str ]])-> Dict :
379
389
"""
380
390
Given a go term and wanted relations will return just a selected amount of attributes needed for representation as a graph
@@ -494,12 +504,12 @@ def cancer_drugs_related_to_gene(gene: str) -> List:
494
504
collection_pharm = mydb ["pharmgkb" ]
495
505
return list (collection_pharm .find ({"genes" :gene },{"_id" :0 }))
496
506
497
- # App
498
507
499
- def get_data_from_oncokb (genes : List [str ]) -> Dict :
508
+ def get_data_from_oncokb (genes : List [str ]) -> Dict [ str , Dict [ str , Any ]] :
500
509
"""
501
- Gets all data associated with a gene list.
502
- :param genes: List of genes to filter.
510
+ Gets all data from OncoKB database associated with a gene list.
511
+
512
+ :param genes: List of gene symbols.
503
513
:return: Dict of genes with their associated drugs and information according to OncoKB database
504
514
"""
505
515
collection_actionability_gene = mydb ["oncokb_biomarker_drug_associations" ]
@@ -528,10 +538,10 @@ def get_data_from_oncokb(genes: List[str]) -> Dict:
528
538
res [gen ]["oncokb_cancer_gene" ].append ("Oncogene" )
529
539
if doc_c ["tumor_suppressor_gene" ]:
530
540
res [gen ]["oncokb_cancer_gene" ].append ("Tumor Suppressor Gene" )
531
-
541
+
532
542
if len (res [gen ]["oncokb_cancer_gene" ]) == 0 :
533
543
res [gen ].pop ("oncokb_cancer_gene" )
534
-
544
+
535
545
sources = []
536
546
for key in doc_c :
537
547
if doc_c [key ] == 1 :
@@ -619,7 +629,7 @@ def gene_symbols():
619
629
@flask_app .route ("/gene-symbols-finder/" , methods = ['GET' ])
620
630
def gene_symbol_finder ():
621
631
"""Takes a string of any length and returns a list of genes that contain that search criteria."""
622
- query = None # To prevent MyPy warning
632
+ query = "" # To prevent MyPy warning
623
633
if "query" not in request .args :
624
634
abort (400 , "'query' parameter is mandatory" )
625
635
else :
@@ -642,7 +652,8 @@ def gene_symbol_finder():
642
652
@flask_app .route ("/information-of-genes" , methods = ['POST' ])
643
653
def information_of_genes ():
644
654
"""Receives a list of gene IDs and returns information about them."""
645
- body = request .get_json () # type: ignore
655
+ body = request .get_json () # type: ignore
656
+ response = {}
646
657
if "gene_ids" not in body :
647
658
abort (400 , "gene_ids is mandatory" )
648
659
@@ -653,7 +664,6 @@ def information_of_genes():
653
664
try :
654
665
response = get_information_of_genes (gene_ids )
655
666
except Exception as e :
656
- response = {} # To prevent mypy warnings
657
667
abort (400 , e )
658
668
return make_response (response , 200 , headers )
659
669
@@ -695,7 +705,7 @@ def genes_in_the_same_group(gene_id):
695
705
def pathway_genes (pathway_source , pathway_id ):
696
706
if pathway_source .lower () not in PATHWAYS_SOURCES :
697
707
abort (404 , f'{ pathway_source } is an invalid pathway source' )
698
- response = {"genes" : get_genes_of_pathway (pathway_id , pathway_source )}
708
+ response = {"genes" : get_genes_of_pathway (str ( pathway_id ) , pathway_source )}
699
709
return make_response (response , 200 , headers )
700
710
701
711
@flask_app .route ("/pathways-in-common" , methods = ['POST' ])
@@ -720,7 +730,7 @@ def pathways_in_common():
720
730
721
731
@flask_app .route ("/expression-of-genes" , methods = ['POST' ])
722
732
def expression_data_from_gtex ():
723
- body = request .get_json () # type: ignore
733
+ body = request .get_json () # type: ignore
724
734
725
735
if "gene_ids" not in body :
726
736
abort (400 , "gene_ids is mandatory" )
0 commit comments