Skip to content

Commit

Permalink
adding the data
Browse files Browse the repository at this point in the history
  • Loading branch information
someshfengde committed Mar 27, 2024
1 parent 80adb66 commit 4850390
Show file tree
Hide file tree
Showing 5 changed files with 95 additions and 33 deletions.
2 changes: 1 addition & 1 deletion backend/app/api/endpoints/search_endpoint.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
@version(1, 0)
async def get_search_results(
query: str = "",
routecategory: RouteCategory =RouteCategory.CT# RouteCategory.NS
routecategory: RouteCategory =RouteCategory.DRUG# RouteCategory.NS
) -> JSONResponse:
if trace_transaction := sentry_sdk.Hub.current.scope.transaction:
trace_transaction.set_tag("title", 'api_get_search_results')
Expand Down
11 changes: 11 additions & 0 deletions backend/app/chebml_eval_data.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
Study Title,Question,Answer,Link,Source
Aspirin,What is the alogP value for Aspirin?,The alogP value for Aspirin is 1.33.,CHEMBL6328,chembl
Aspirin,Has Aspirin received a black box warning from the FDA?,"No, Aspirin has not received a black box warning from the FDA.",CHEMBL267864,chembl
Aspirin,How many aromatic rings are present in the Aspirin molecule?,The Aspirin molecule contains 3 aromatic rings.,CHEMBL6329,chembl
Aspirin,How many aromatic rings are present in the structure of Aspirin?,Aspirin has 3 aromatic rings.,CHEMBL267864,chembl
Aspirin,How many aromatic rings are present in the Aspirin molecule?,The Aspirin molecule has 3 aromatic rings.,CHEMBL6362,chembl
Paracetamol (Acetaminophen),What is the molecular type of Paracetamol (Acetaminophen)?,The molecular type of Paracetamol (Acetaminophen) is a 'Small molecule'.,CHEMBL6362,chembl
Paracetamol (Acetaminophen),What is the chirality of Paracetamol (Acetaminophen)?,The chirality of Paracetamol (Acetaminophen) is -1.,CHEMBL265667,chembl
Paracetamol (Acetaminophen),What is the alogp value for Paracetamol (Acetaminophen)?,The alogp value for Paracetamol (Acetaminophen) is 1.33.,CHEMBL6328,chembl
Paracetamol (Acetaminophen),How many aromatic rings are present in the structure of Paracetamol (Acetaminophen)?,Paracetamol (Acetaminophen) has three aromatic rings in its structure.,CHEMBL265667,chembl
Paracetamol (Acetaminophen),What is the value of alogP for Paracetamol (Acetaminophen)?,The value of alogP for Paracetamol (Acetaminophen) is 2.11.,CHEMBL6329,chembl
41 changes: 41 additions & 0 deletions backend/app/make_it_rain.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
# %%
import requests
from tqdm import tqdm
import pandas as pd
import json
#%%
df = pd.read_csv("chebml_eval_data.csv")

#%%
df_filter = df[df['Source'] == "Clicnical Trials"].sample(n = 10)
chembl_data_debug = pd.DataFrame()
for i in tqdm(df_filter.iterrows()):
url = f"http://127.0.0.1:8000/search?query={i[1]['Question']}"
data_store = {
"question": i[1]['Question'],
"answer": i[1]['Answer'],
"study": i[1]['Study Title'],
}
payload = {}
headers = {
'Authorization': 'Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiJjdXJpZW8iLCJqdGkiOiI2ZmI2MTEyNS1jZGU1LTQ2MDAtYWE2MS1jMjBiYzEwNmRhNDMiLCJ0eXBlIjoiYWNjZXNzIiwiZnJlc2giOmZhbHNlLCJpYXQiOjE3MTA3NjUzMDYsImV4cCI6MTcxMDc2NjIwNi4yNzg4Mzd9.nmuzrzmr81ulI8TDauwx19QvLHFi8nXJeUgrEVzfhXs'
}
tries = 0
while tries < 1:
try:
response = requests.request("GET", url, headers=headers, data=payload)
json_str = response.text.replace("'", '"').strip('"')
data = json.loads(json_str)
if len(data['results']) > 0:
data_store['refined_results'] = data['result']
break
except:
tries += 1
continue
if data_store.get("refined_results", None) is not None:
chembl_data_debug = pd.concat([chembl_data_debug, pd.DataFrame([data_store])], ignore_index= True )
else:
data_store['refined_results'] = ""
chembl_data_debug = pd.concat([chembl_data_debug, pd.DataFrame([data_store])], ignore_index= True)

chembl_data_debug.to_csv("chembl_data_debug_source.csv", index = False)
Original file line number Diff line number Diff line change
Expand Up @@ -72,8 +72,8 @@ def __init__(self, config):
"Cypher Response: {context_str}\n"
"Response: "
)

self.qp = self.build_query_pipeline()
self.debug_chembl = {}

def execute_graph_query(self, queries):
logger.info(
Expand Down Expand Up @@ -110,7 +110,8 @@ def execute_graph_query(self, queries):

result_dict = self.graph_storage.execute_query(query)
results.append(result_dict)


self.debug_chembl['cypher_query'] = str(query_list)
logger.info(
f"execute_graph_query results: {results}"
)
Expand Down Expand Up @@ -156,9 +157,15 @@ def get_table_context_str(self, table_schema_objs: List[dict[str, str]]) -> str:
table_context += f"{key}: {value}\n"

context_strs.append(table_context)
self.debug_chembl['table_context_str'] = "\n\n".join(context_strs)

return "\n\n".join(context_strs)


def store_debug(self):
with open("debug_chembl.txt", "w") as f:
f.write(str(self.debug_chembl) + ",\n")

def get_response_synthesis_prompt(
self, query_str, sql_query, context_str
) -> PromptTemplate:
Expand All @@ -181,6 +188,7 @@ def cypher_output_parser(self, response: list[dict[str, list]]) -> str:

response_str += " ## ".join(record_in_list) + "\n"

self.debug_chembl['cypher_response'] = response_str
logger.info(
f"cypher_output_parser response_str: {response_str}"
)
Expand Down Expand Up @@ -233,11 +241,13 @@ def build_query_pipeline(self):
async def call_text2cypher(self, search_text:str) -> str:
try:
logger.info(f"call_text2cypher search_text: {search_text}")

self.debug_chembl['question'] = search_text
response = self.qp.run(query=search_text)

logger.info(f"call_text2cypher response: {str(response)}")
self.debug_chembl['response'] = response

logger.info(f"call_text2cypher response: {str(response)}")
self.store_debug()
except Exception as ex:
logger.exception("call_text2cypher Exception -", exc_info = ex, stack_info=True)

Expand Down
56 changes: 28 additions & 28 deletions backend/app/router/orchestrator.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,9 +40,9 @@ def __init__(self, config):
self.router.load(ORCHESRATOR_ROUTER_PROMPT_PROGRAM)

self.clinicalTrialSearch = ClinicalTrialText2SQLEngine(config)
# self.drugChemblSearch = DrugChEMBLText2CypherEngine(config)
# self.pubmedsearch = PubmedSearchQueryEngine(config)
# self.bravesearch = BraveSearchQueryEngine(config)
self.drugChemblSearch = DrugChEMBLText2CypherEngine(config)
self.pubmedsearch = PubmedSearchQueryEngine(config)
self.bravesearch = BraveSearchQueryEngine(config)

async def query_and_get_answer(
self,
Expand Down Expand Up @@ -87,31 +87,31 @@ async def query_and_get_answer(
logger.exception("Orchestrator.query_and_get_answer.sqlResponse Exception -", exc_info = e, stack_info=True)
pass

# elif router_id == 1 or routecategory == RouteCategory.DRUG:
# # drug information call
# logger.info(
# "Orchestrator.query_and_get_answer.router_id drug_information_choice Entered."
# )
# try:
# cypherResponse = await self.drugChemblSearch.call_text2cypher(
# search_text=search_text
# )
# result = str(cypherResponse)
# sources = result
# logger.info(
# f"Orchestrator.query_and_get_answer.cypherResponse cypherResponse: {result}"
# )

# return {
# "result" : result,
# "sources": sources
# }
# except Exception as e:
# logger.exception(
# "Orchestrator.query_and_get_answer.cypherResponse Exception -",
# exc_info=e,
# stack_info=True,
# )
elif router_id == 1 or routecategory == RouteCategory.DRUG:
# drug information call
logger.info(
"Orchestrator.query_and_get_answer.router_id drug_information_choice Entered."
)
try:
cypherResponse = await self.drugChemblSearch.call_text2cypher(
search_text=search_text
)
result = str(cypherResponse)
sources = result
logger.info(
f"Orchestrator.query_and_get_answer.cypherResponse cypherResponse: {result}"
)

return {
"result" : result,
"sources": sources
}
except Exception as e:
logger.exception(
"Orchestrator.query_and_get_answer.cypherResponse Exception -",
exc_info=e,
stack_info=True,
)


# if routing fails, sql and cypher calls fail, routing to pubmed or brave
Expand Down

0 comments on commit 4850390

Please sign in to comment.