33import json
44import gzip
55import logging
6+ from db import get_mongo_connection
67from concurrent .futures import ThreadPoolExecutor
78import configparser
89import urllib .parse
4142# Common response header
4243headers = {"Content-Type" : "application/json" }
4344
44- from db import get_mongo_connection
45- mydb = get_mongo_connection (IS_DEBUG ,Config )
4645
46+ mydb = get_mongo_connection (IS_DEBUG , Config )
47+
48+ def get_potential_gene_symbols (query_string : str , limit_elements : int = 50 ) -> List [str ]:
4749
48- def get_potential_gene_symbols (query_string , limit_elements ):
4950 """
50- TODO: document and add types
51- :param query_string:
52- :param limit_elements:
53- :return:
51+ Takes a string of any length and returns a list of genes that contain that search criteria.
52+
53+ :param query_string: The query string
54+ :param limit_elements: The number of elements to return
55+ :return: A list of genes that contain that search criteria
5456 """
5557 er = re .compile ("^" + re .escape (query_string ), re .IGNORECASE )
5658 collection_hgnc = mydb ["hgnc" ] # HGNC collection
@@ -83,11 +85,12 @@ def get_potential_gene_symbols(query_string, limit_elements):
8385 return res
8486
8587
86- def search_gene_group (gen ): # AGREGAR LO QUE PASA SI NO PERTENECE A NINGUN gene_group_id (EJ gen:AADACP1)
88+ def search_gene_group (gen : str ) -> Dict [ str , Any ]:
8789 """
88- TODO: document
89- :param gen:
90- :return:
90+ Get the gene and locus group from HGNC DB for a specific approved gene symbol
91+
92+ :param gen: an approved gene symbol
93+ :return: a dictionary with the gene and locus group information
9194 """
9295 results = {'locus_group' : None , 'locus_type' : None , 'gene_group' : None , 'gene_group_id' : None }
9396 collection_hgnc = mydb ["hgnc" ] # HGNC collection
@@ -107,17 +110,18 @@ def search_gene_group(gen): # AGREGAR LO QUE PASA SI NO PERTENECE A NINGUN gene
107110 results ['gene_group' ] = document ['gene_group' ]
108111 results ['gene_group_id' ] = document ['gene_group_id' ]
109112 else :
110- results ['gene_group' ] = [document ['gene_group' ]] # type: ignore
111- results ['gene_group_id' ] = [document ['gene_group_id' ]] # type: ignore
113+ results ['gene_group' ] = [document ['gene_group' ]] # type: ignore
114+ results ['gene_group_id' ] = [document ['gene_group_id' ]] # type: ignore
112115
113116 return results
114117
115118
116- def search_genes_in_same_group (group_id : int ):
119+ def search_genes_in_same_group (group_id : int ) -> List [ str ] :
117120 """
118- TODO: document
119- :param group_id:
120- :return:
121+ From a gene group id in HGNC DB, get a list of all genes in the same group.
122+
123+ :param group_id: a valid gene group id from HGNC DB
124+ :return: a list of all genes that are in the group
121125 """
122126 collection_hgnc = mydb ["hgnc" ] # HGNC collection
123127 query = {'gene_group_id' : group_id }
@@ -126,12 +130,13 @@ def search_genes_in_same_group(group_id: int):
126130 return [doc ["symbol" ] for doc in docs ]
127131
128132
129- def get_genes_of_pathway (pathway_id , pathway_source ) :
133+ def get_genes_of_pathway (pathway_id : str , pathway_source : str ) -> List [ str ] :
130134 """
131- TODO: document and add types
132- :param pathway_id:
133- :param pathway_source:
134- :return:
135+ From a gene group id in HGNC DB, get a list of all genes in the same group.
136+
137+ :param pathway_source: the pathway database
138+ :param pathway_id: an pathway id to search in the database
139+ :return: a list of all genes in the pathway
135140 """
136141 collection_cpdb = mydb ["cpdb" ] # CPDB collection
137142 ps = re .compile ("^" + pathway_source + "$" , re .IGNORECASE )
@@ -140,11 +145,12 @@ def get_genes_of_pathway(pathway_id, pathway_source):
140145 return doc ["hgnc_symbol_ids" ] if doc is not None else []
141146
142147
143- def get_pathways_of_gene (gene ) :
148+ def get_pathways_of_gene (gene : str ) -> List [ str ] :
144149 """
145- TODO: document and add types
146- :param gene:
147- :return:
150+ Get all pathways and sources for a given gene symbols.
151+
152+ :param gene: apprved gene symbol
153+ :return: list of pathways
148154 """
149155 collection_cpdb = mydb ["cpdb" ] # CPDB collection
150156 query = {'hgnc_symbol_ids' : gene }
@@ -153,11 +159,12 @@ def get_pathways_of_gene(gene):
153159 return [str (doc ) for doc in docs ]
154160
155161
156- def get_information_of_genes (genes : List [str ]) -> Dict :
162+ def get_information_of_genes (genes : List [str ]) -> Dict [ str , Dict [ str , Any ]] :
157163 """
158- TODO: document
159- :param genes:
160- :return:
164+ This function receives a list of gene symbols and returns information about them from different bioinformatics databases.
165+
166+ :param genes: list of gene symbols
167+ :return: dictionary with information about gene symbols (each key is a gene and each value is another dictionary with its information)
161168 """
162169 res = {}
163170 collection_gene_grch37 = mydb ["gene_grch37" ]
@@ -212,12 +219,13 @@ def get_information_of_genes(genes: List[str]) -> Dict:
212219 return res
213220
214221
215- def get_expression_from_gtex (tissue : str , genes : List [str ]) -> List :
222+ def get_expression_from_gtex (tissue : str , genes : List [str ]) -> List [ Dict [ str , float ]] :
216223 """
217- Gets all the expressions for a specific tissue and a list of genes
224+ Gets all the expressions for a specific tissue and a list of genes.
225+
218226 :param tissue: Tissue to filter
219227 :param genes: List of genes to filter
220- :return: List of expressions
228+ :return: List of expressions values. Each element of the list contains the expression values for each gene
221229 """
222230 collection = mydb ["gtex_" + tissue ] # Connects to specific tissue's collection
223231 query = {'gene' : {'$in' : genes }}
@@ -243,7 +251,7 @@ def terms_related_to_one_gene(gene: str, relation_type: Optional[List[str]] = No
243251 if relation_type is None :
244252 relation_type = ["enables" , "involved_in" , "part_of" , "located_in" ]
245253 collection_go_annotations = mydb ["go_anotations" ]
246-
254+
247255 annotation = list (collection_go_annotations .find ({"gene_symbol" : gene }))
248256 related_genes = {}
249257 if annotation :
@@ -260,6 +268,7 @@ def terms_related_to_one_gene(gene: str, relation_type: Optional[List[str]] = No
260268 return related_genes
261269
262270
271+
263272def is_term_on_db (term_id )-> bool :
264273 """
265274 Returns whether a go term ID is in the DB
@@ -361,12 +370,12 @@ def enrich(gene_ids: List, p_value_threshold: int= 0.05, correction_method: str
361370 return metrics , relations
362371
363372
364- def populate_terms_with_data (term_list : List , ontology_type : Optional [List [str ]] = None )-> List :
373+ def populate_terms_with_data (term_list , ontology_type : Optional [List [str ]] = None ):
365374 """
366- Given a go terms list will return a list with all the terms information
367- :param term_list: list of all the wanted terms
368- :param ontology_type: Filters the ontology type of the terms in the response
369- :return: list of all terms with all the information of them that's on the DB
375+ TODO: document
376+ :param term_list:
377+ :param ontology_type:
378+ :return:
370379 """
371380 if ontology_type is None :
372381 ontology_type = ["biological_process" , "molecular_function" , "cellular_component" ]
@@ -375,6 +384,7 @@ def populate_terms_with_data(term_list:List, ontology_type: Optional[List[str]]
375384 return terms
376385
377386
387+
378388def strip_term (term : Dict ,relations :Optional [List [str ]])-> Dict :
379389 """
380390 Given a go term and wanted relations will return just a selected amount of attributes needed for representation as a graph
@@ -494,12 +504,12 @@ def cancer_drugs_related_to_gene(gene: str) -> List:
494504 collection_pharm = mydb ["pharmgkb" ]
495505 return list (collection_pharm .find ({"genes" :gene },{"_id" :0 }))
496506
497- # App
498507
499- def get_data_from_oncokb (genes : List [str ]) -> Dict :
508+ def get_data_from_oncokb (genes : List [str ]) -> Dict [ str , Dict [ str , Any ]] :
500509 """
501- Gets all data associated with a gene list.
502- :param genes: List of genes to filter.
510+ Gets all data from OncoKB database associated with a gene list.
511+
512+ :param genes: List of gene symbols.
503513 :return: Dict of genes with their associated drugs and information according to OncoKB database
504514 """
505515 collection_actionability_gene = mydb ["oncokb_biomarker_drug_associations" ]
@@ -528,10 +538,10 @@ def get_data_from_oncokb(genes: List[str]) -> Dict:
528538 res [gen ]["oncokb_cancer_gene" ].append ("Oncogene" )
529539 if doc_c ["tumor_suppressor_gene" ]:
530540 res [gen ]["oncokb_cancer_gene" ].append ("Tumor Suppressor Gene" )
531-
541+
532542 if len (res [gen ]["oncokb_cancer_gene" ]) == 0 :
533543 res [gen ].pop ("oncokb_cancer_gene" )
534-
544+
535545 sources = []
536546 for key in doc_c :
537547 if doc_c [key ] == 1 :
@@ -619,7 +629,7 @@ def gene_symbols():
619629 @flask_app .route ("/gene-symbols-finder/" , methods = ['GET' ])
620630 def gene_symbol_finder ():
621631 """Takes a string of any length and returns a list of genes that contain that search criteria."""
622- query = None # To prevent MyPy warning
632+ query = "" # To prevent MyPy warning
623633 if "query" not in request .args :
624634 abort (400 , "'query' parameter is mandatory" )
625635 else :
@@ -642,7 +652,8 @@ def gene_symbol_finder():
642652 @flask_app .route ("/information-of-genes" , methods = ['POST' ])
643653 def information_of_genes ():
644654 """Receives a list of gene IDs and returns information about them."""
645- body = request .get_json () # type: ignore
655+ body = request .get_json () # type: ignore
656+ response = {}
646657 if "gene_ids" not in body :
647658 abort (400 , "gene_ids is mandatory" )
648659
@@ -653,7 +664,6 @@ def information_of_genes():
653664 try :
654665 response = get_information_of_genes (gene_ids )
655666 except Exception as e :
656- response = {} # To prevent mypy warnings
657667 abort (400 , e )
658668 return make_response (response , 200 , headers )
659669
@@ -695,7 +705,7 @@ def genes_in_the_same_group(gene_id):
695705 def pathway_genes (pathway_source , pathway_id ):
696706 if pathway_source .lower () not in PATHWAYS_SOURCES :
697707 abort (404 , f'{ pathway_source } is an invalid pathway source' )
698- response = {"genes" : get_genes_of_pathway (pathway_id , pathway_source )}
708+ response = {"genes" : get_genes_of_pathway (str ( pathway_id ) , pathway_source )}
699709 return make_response (response , 200 , headers )
700710
701711 @flask_app .route ("/pathways-in-common" , methods = ['POST' ])
@@ -720,7 +730,7 @@ def pathways_in_common():
720730
721731 @flask_app .route ("/expression-of-genes" , methods = ['POST' ])
722732 def expression_data_from_gtex ():
723- body = request .get_json () # type: ignore
733+ body = request .get_json () # type: ignore
724734
725735 if "gene_ids" not in body :
726736 abort (400 , "gene_ids is mandatory" )
0 commit comments