Skip to content

Commit 679a46a

Browse files
authored
Merge branch 'oncokb' into string
2 parents 36544d9 + 0a1c7ec commit 679a46a

File tree

3 files changed

+66
-51
lines changed

3 files changed

+66
-51
lines changed

DEPLOYING.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ BioAPI uses three genomic databases for its operation. These databases must be l
3737
3838
To import all databases in MongoDB:
3939
40-
1. Download the "bioapi_db.gz" from **[here](https://drive.google.com/file/d/1lI3A98N-GhnffkSOWjB_gx_ieq3pEjFP/view?usp=sharing)**
40+
1. Download the "bioapi_db.gz" from **[here](https://drive.google.com/file/d/1oBdhC-XoJn-VNEIEpfMWB2Gna--WZ1Wa/view?usp=sharing)**
4141
2. Shutdown all the services running `docker compose down`
4242
3. Edit the `docker-compose.yml` file to include the downloaded file inside the container:
4343
```yml

bio-api/bioapi.py

Lines changed: 60 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import json
44
import gzip
55
import logging
6+
from db import get_mongo_connection
67
from concurrent.futures import ThreadPoolExecutor
78
import configparser
89
import urllib.parse
@@ -41,16 +42,17 @@
4142
# Common response header
4243
headers = {"Content-Type": "application/json"}
4344

44-
from db import get_mongo_connection
45-
mydb = get_mongo_connection(IS_DEBUG,Config)
4645

46+
mydb = get_mongo_connection(IS_DEBUG, Config)
47+
48+
def get_potential_gene_symbols(query_string: str, limit_elements: int = 50) -> List[str]:
4749

48-
def get_potential_gene_symbols(query_string, limit_elements):
4950
"""
50-
TODO: document and add types
51-
:param query_string:
52-
:param limit_elements:
53-
:return:
51+
Takes a string of any length and returns a list of genes that contain that search criteria.
52+
53+
:param query_string: The query string
54+
:param limit_elements: The number of elements to return
55+
:return: A list of genes that contain that search criteria
5456
"""
5557
er = re.compile("^" + re.escape(query_string), re.IGNORECASE)
5658
collection_hgnc = mydb["hgnc"] # HGNC collection
@@ -83,11 +85,12 @@ def get_potential_gene_symbols(query_string, limit_elements):
8385
return res
8486

8587

86-
def search_gene_group(gen): # AGREGAR LO QUE PASA SI NO PERTENECE A NINGUN gene_group_id (EJ gen:AADACP1)
88+
def search_gene_group(gen: str) -> Dict[str, Any]:
8789
"""
88-
TODO: document
89-
:param gen:
90-
:return:
90+
Get the gene and locus group from HGNC DB for a specific approved gene symbol
91+
92+
:param gen: an approved gene symbol
93+
:return: a dictionary with the gene and locus group information
9194
"""
9295
results = {'locus_group': None, 'locus_type': None, 'gene_group': None, 'gene_group_id': None}
9396
collection_hgnc = mydb["hgnc"] # HGNC collection
@@ -107,17 +110,18 @@ def search_gene_group(gen): # AGREGAR LO QUE PASA SI NO PERTENECE A NINGUN gene
107110
results['gene_group'] = document['gene_group']
108111
results['gene_group_id'] = document['gene_group_id']
109112
else:
110-
results['gene_group'] = [document['gene_group']] # type: ignore
111-
results['gene_group_id'] = [document['gene_group_id']] # type: ignore
113+
results['gene_group'] = [document['gene_group']] # type: ignore
114+
results['gene_group_id'] = [document['gene_group_id']] # type: ignore
112115

113116
return results
114117

115118

116-
def search_genes_in_same_group(group_id: int):
119+
def search_genes_in_same_group(group_id: int) -> List[str]:
117120
"""
118-
TODO: document
119-
:param group_id:
120-
:return:
121+
From a gene group id in HGNC DB, get a list of all genes in the same group.
122+
123+
:param group_id: a valid gene group id from HGNC DB
124+
:return: a list of all genes that are in the group
121125
"""
122126
collection_hgnc = mydb["hgnc"] # HGNC collection
123127
query = {'gene_group_id': group_id}
@@ -126,12 +130,13 @@ def search_genes_in_same_group(group_id: int):
126130
return [doc["symbol"] for doc in docs]
127131

128132

129-
def get_genes_of_pathway(pathway_id, pathway_source):
133+
def get_genes_of_pathway(pathway_id: str, pathway_source: str) -> List[str]:
130134
"""
131-
TODO: document and add types
132-
:param pathway_id:
133-
:param pathway_source:
134-
:return:
135+
From a gene group id in HGNC DB, get a list of all genes in the same group.
136+
137+
:param pathway_source: the pathway database
138+
:param pathway_id: an pathway id to search in the database
139+
:return: a list of all genes in the pathway
135140
"""
136141
collection_cpdb = mydb["cpdb"] # CPDB collection
137142
ps = re.compile("^" + pathway_source + "$", re.IGNORECASE)
@@ -140,11 +145,12 @@ def get_genes_of_pathway(pathway_id, pathway_source):
140145
return doc["hgnc_symbol_ids"] if doc is not None else []
141146

142147

143-
def get_pathways_of_gene(gene):
148+
def get_pathways_of_gene(gene: str) -> List[str]:
144149
"""
145-
TODO: document and add types
146-
:param gene:
147-
:return:
150+
Get all pathways and sources for a given gene symbols.
151+
152+
:param gene: apprved gene symbol
153+
:return: list of pathways
148154
"""
149155
collection_cpdb = mydb["cpdb"] # CPDB collection
150156
query = {'hgnc_symbol_ids': gene}
@@ -153,11 +159,12 @@ def get_pathways_of_gene(gene):
153159
return [str(doc) for doc in docs]
154160

155161

156-
def get_information_of_genes(genes: List[str]) -> Dict:
162+
def get_information_of_genes(genes: List[str]) -> Dict[str, Dict[str, Any]]:
157163
"""
158-
TODO: document
159-
:param genes:
160-
:return:
164+
This function receives a list of gene symbols and returns information about them from different bioinformatics databases.
165+
166+
:param genes: list of gene symbols
167+
:return: dictionary with information about gene symbols (each key is a gene and each value is another dictionary with its information)
161168
"""
162169
res = {}
163170
collection_gene_grch37 = mydb["gene_grch37"]
@@ -212,12 +219,13 @@ def get_information_of_genes(genes: List[str]) -> Dict:
212219
return res
213220

214221

215-
def get_expression_from_gtex(tissue: str, genes: List[str]) -> List:
222+
def get_expression_from_gtex(tissue: str, genes: List[str]) -> List[Dict[str, float]]:
216223
"""
217-
Gets all the expressions for a specific tissue and a list of genes
224+
Gets all the expressions for a specific tissue and a list of genes.
225+
218226
:param tissue: Tissue to filter
219227
:param genes: List of genes to filter
220-
:return: List of expressions
228+
:return: List of expressions values. Each element of the list contains the expression values for each gene
221229
"""
222230
collection = mydb["gtex_" + tissue] # Connects to specific tissue's collection
223231
query = {'gene': {'$in': genes}}
@@ -243,7 +251,7 @@ def terms_related_to_one_gene(gene: str, relation_type: Optional[List[str]] = No
243251
if relation_type is None:
244252
relation_type = ["enables", "involved_in", "part_of", "located_in"]
245253
collection_go_annotations = mydb["go_anotations"]
246-
254+
247255
annotation = list(collection_go_annotations.find({"gene_symbol": gene}))
248256
related_genes = {}
249257
if annotation:
@@ -260,6 +268,7 @@ def terms_related_to_one_gene(gene: str, relation_type: Optional[List[str]] = No
260268
return related_genes
261269

262270

271+
263272
def is_term_on_db(term_id)-> bool:
264273
"""
265274
Returns whether a go term ID is in the DB
@@ -361,12 +370,12 @@ def enrich(gene_ids: List, p_value_threshold: int= 0.05, correction_method: str
361370
return metrics, relations
362371

363372

364-
def populate_terms_with_data(term_list:List, ontology_type: Optional[List[str]] = None)-> List:
373+
def populate_terms_with_data(term_list, ontology_type: Optional[List[str]] = None):
365374
"""
366-
Given a go terms list will return a list with all the terms information
367-
:param term_list: list of all the wanted terms
368-
:param ontology_type: Filters the ontology type of the terms in the response
369-
:return: list of all terms with all the information of them that's on the DB
375+
TODO: document
376+
:param term_list:
377+
:param ontology_type:
378+
:return:
370379
"""
371380
if ontology_type is None:
372381
ontology_type = ["biological_process", "molecular_function", "cellular_component"]
@@ -375,6 +384,7 @@ def populate_terms_with_data(term_list:List, ontology_type: Optional[List[str]]
375384
return terms
376385

377386

387+
378388
def strip_term(term: Dict,relations:Optional[List[str]])-> Dict:
379389
"""
380390
Given a go term and wanted relations will return just a selected amount of attributes needed for representation as a graph
@@ -494,12 +504,12 @@ def cancer_drugs_related_to_gene(gene: str) -> List:
494504
collection_pharm = mydb["pharmgkb"]
495505
return list(collection_pharm.find({"genes":gene},{"_id":0}))
496506

497-
# App
498507

499-
def get_data_from_oncokb(genes: List[str]) -> Dict:
508+
def get_data_from_oncokb(genes: List[str]) -> Dict[str, Dict[str, Any]]:
500509
"""
501-
Gets all data associated with a gene list.
502-
:param genes: List of genes to filter.
510+
Gets all data from OncoKB database associated with a gene list.
511+
512+
:param genes: List of gene symbols.
503513
:return: Dict of genes with their associated drugs and information according to OncoKB database
504514
"""
505515
collection_actionability_gene = mydb["oncokb_biomarker_drug_associations"]
@@ -528,10 +538,10 @@ def get_data_from_oncokb(genes: List[str]) -> Dict:
528538
res[gen]["oncokb_cancer_gene"].append("Oncogene")
529539
if doc_c["tumor_suppressor_gene"]:
530540
res[gen]["oncokb_cancer_gene"].append("Tumor Suppressor Gene")
531-
541+
532542
if len(res[gen]["oncokb_cancer_gene"]) == 0:
533543
res[gen].pop("oncokb_cancer_gene")
534-
544+
535545
sources = []
536546
for key in doc_c:
537547
if doc_c[key] == 1:
@@ -619,7 +629,7 @@ def gene_symbols():
619629
@flask_app.route("/gene-symbols-finder/", methods=['GET'])
620630
def gene_symbol_finder():
621631
"""Takes a string of any length and returns a list of genes that contain that search criteria."""
622-
query = None # To prevent MyPy warning
632+
query = "" # To prevent MyPy warning
623633
if "query" not in request.args:
624634
abort(400, "'query' parameter is mandatory")
625635
else:
@@ -642,7 +652,8 @@ def gene_symbol_finder():
642652
@flask_app.route("/information-of-genes", methods=['POST'])
643653
def information_of_genes():
644654
"""Receives a list of gene IDs and returns information about them."""
645-
body = request.get_json() # type: ignore
655+
body = request.get_json() # type: ignore
656+
response = {}
646657
if "gene_ids" not in body:
647658
abort(400, "gene_ids is mandatory")
648659

@@ -653,7 +664,6 @@ def information_of_genes():
653664
try:
654665
response = get_information_of_genes(gene_ids)
655666
except Exception as e:
656-
response = {} # To prevent mypy warnings
657667
abort(400, e)
658668
return make_response(response, 200, headers)
659669

@@ -695,7 +705,7 @@ def genes_in_the_same_group(gene_id):
695705
def pathway_genes(pathway_source, pathway_id):
696706
if pathway_source.lower() not in PATHWAYS_SOURCES:
697707
abort(404, f'{pathway_source} is an invalid pathway source')
698-
response = {"genes": get_genes_of_pathway(pathway_id, pathway_source)}
708+
response = {"genes": get_genes_of_pathway(str(pathway_id), pathway_source)}
699709
return make_response(response, 200, headers)
700710

701711
@flask_app.route("/pathways-in-common", methods=['POST'])
@@ -720,7 +730,7 @@ def pathways_in_common():
720730

721731
@flask_app.route("/expression-of-genes", methods=['POST'])
722732
def expression_data_from_gtex():
723-
body = request.get_json() # type: ignore
733+
body = request.get_json() # type: ignore
724734

725735
if "gene_ids" not in body:
726736
abort(400, "gene_ids is mandatory")

config/nginx/conf.d/default.conf

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,13 +9,18 @@ upstream web {
99

1010
server {
1111
client_max_body_size 0;
12+
13+
# Prevents 504 error with long running requests (3600 seg = 1 hour)
1214
proxy_read_timeout 3600;
1315
proxy_connect_timeout 3600;
1416
proxy_send_timeout 3600;
1517

1618
# Main proxy
1719
location / {
1820
proxy_pass http://web/;
21+
22+
# Prevents 504 error with long running requests
23+
uwsgi_read_timeout 3600; # 1 hour
1924
}
2025

2126
# Listening port and server name

0 commit comments

Comments
 (0)