Skip to content

Commit

Permalink
uploading clinical trails
Browse files Browse the repository at this point in the history
  • Loading branch information
raahulrahl committed Mar 28, 2024
1 parent 6319f0b commit 3e403e4
Show file tree
Hide file tree
Showing 18 changed files with 156 additions and 58 deletions.
1 change: 1 addition & 0 deletions backend/.vscode/settings.json
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
"clinicaltrials",
"Curieo",
"cypher",
"dspy",
"GROQ",
"llms",
"mistralai",
Expand Down
12 changes: 8 additions & 4 deletions backend/app/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,8 +125,11 @@
else:
QDRANT_API_URL = config("QDRANT_API_URL", default="https://ff1f8e90-959e-4cff-9455-03914d8a7002.europe-west3-0.gcp.cloud.qdrant.io")
QDRANT_COLLECTION_NAME: str = config("QDRANT_COLLECTION_NAME", default="pubmed_hybrid_vector_db")
QDRANT_CLINICAL_TRIAL_COLLECTION_NAME: str = config("QDRANT_CLINICAL_TRIAL_COLLECTION_NAME", default="clinical_trials_vector_db")
QDRANT_TOP_K: int = config("QDRANT_TOP_K", default=20)
QDRANT_SPARSE_TOP_K: int = config("QDRANT_SPARSE_TOP_K", default=3)
QDRANT_TOP_CLINICAL_TRAIL_K: int = config("QDRANT_TOP_CLINICAL_TRAIL_K", default=5)
QDRANT_CLINICAL_TRIAL_METADATA_FIELD_NAME: str = config("QDRANT_CLINICAL_TRIAL_METADATA_FIELD_NAME", default="title")

# LLAMA_INDEX Configuration
CHAT_ENABLED: bool = config("CHAT_ENABLED", default=False)
Expand All @@ -135,12 +138,13 @@
# Dspy Integration Configuration
CLINICAL_TRIAL_SQL_PROGRAM: str = "app/dspy_integration/dspy_programs/clinical_trials_sql_generation.json"
CLINICAL_TRIALS_RESPONSE_REFINEMENT_PROGRAM: str = "app/dspy_integration/dspy_programs/clinical_trials_response_refinement.json"
ORCHESRATOR_ROUTER_PROMPT_PROGRAM: str = "app/dspy_integration/dspy_programs/orchestrator_router_prompt.json"
ORCHESTRATOR_ROUTER_PROMPT_PROGRAM: str = "app/dspy_integration/dspy_programs/orchestrator_router_prompt.json"

# Phoenix Configuration
PHOENIX_API_ENDPOINT: str = config("PHOENIX_API_ENDPOINT", default="http://127.0.0.1:6007/v1/traces")

#AI models
ROUTER_MODEL: str = "gpt-3.5-turbo"
SQL_GENERATION_MODEL: str = "codellama/CodeLlama-13b-Instruct-hf"
RESPONSE_SYNTHESIZER_MODEL: str = "NousResearch/Nous-Hermes-llama-2-7b"
ROUTER_MODEL: str = config("ROUTER_MODEL", default="gpt-3.5-turbo")
SQL_GENERATION_MODEL: str = config("ROUTER_MODEL", default="codellama/CodeLlama-13b-Instruct-hf")
CLINICAL_TRAIL_RESPONSE_SYNTHESIZER_MODEL: str = config("CLINICAL_TRAIL_RESPONSE_SYNTHESIZER_MODEL", default="NousResearch/Nous-Hermes-llama-2-7b")
PUBMED_RESPONSE_SYNTHESIZER_MODEL: str = config("PUBMED_RESPONSE_SYNTHESIZER_MODEL", default="mistralai/Mixtral-8x7B-Instruct-v0.1")
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{
"table_name": "tbl_studies_arms_details",
"table_summary": "The table defines the protocol-specified group, subgroup, or cohort of participants in a clinical trial assigned to receive the specific intervention(s) or observations according to a protocol. The table has the below columns.'nct_id' is the primary key of the table. It is the id of the document. 'title' is the title of the document. \n\n The column 'Arm_Details' is a JSON column and it has the below information in a nested way. The 'Title' field holds information about the design group. The 'Description' field holds the description of the result group. The 'type' field holds information about the type of the design_groups like 'Active Comparator', 'Experimental', 'No Intervention', 'Other', 'Placebo Comparator', and 'Sham Comparator"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{
"table_name": "tbl_studies_design_outcomes",
"table_summary": "The table defines the Description of planned outcome measures and observations that will describe patterns of diseases and traits/associations with exposures, risk factors, or treatment. The table has the below columns.'nct_id' is the primary key of the table. It is the id of the document. 'title' is the title of the document. \n\n The column 'Design_Outcome_Measures' is a JSON column and it has the below information in a nested way. The 'OutcomeType' field holds information about the type of the design outcomes like 'other', 'primary', and 'secondary'. The 'Measure' holds the information about the measurement of the design groups. The 'Time' holds the information on the time frame of associated design groups. The 'Description' holds information about the description of the design groups."
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{
"table_name": "tbl_studies_designs",
"table_summary": "The table defines the Description of how the study will be conducted, including comparison group design and strategies for masking and allocating participants. The table has the below columns.'nct_id' is the primary key of the table. It is the id of the document. 'title' is the title of the document. \n\n The column 'Design_Details' is a JSON column and it has the below information in a nested way. The 'Allocation' field holds information about the type of the allocation of design like 'Non-Randomized', or 'Randomized'. The 'Intervention_Model' holds information about the intervention like 'Crossover Assignment', 'Factorial Assignment', 'Parallel Assignment', 'Sequential Assignment', and 'Single Group Assignment'.The 'Masking' column holds masking information like 'Double', 'None (Open Label)', 'Quadruple', 'Single', and 'Triple'.The 'Primary_Purpose' holds information about the designs like 'Basic Science', 'Device Feasibility', 'Diagnostic', 'Educational/Counseling/Training', 'Health Services Research', 'Other', 'Prevention', 'Screening', 'Supportive Care', 'Treatment'"
}
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
{
"table_name": "tbl_studies_eligibilities",
"table_summary": "The table contains Information about the criteria used to select participants; includes inclusion and exclusion criteria. The table has the below columns.'nct_id' is the primary key of the table. It is the id of the document.'title' is the title of the document.'description' is the description of the document. 'eligibility_details' has information in JSON format. The first field is 'Population' (field name is 'Population) which contains the information about the associated population type. The second field is the Sampling Method (field name is 'SamplingMethod') which contains the data about the method of sampling. The third field is Minimum Age (field name is 'MinimumAge') which contains data about the minimum age of the population. The fourth field is Maximum Age (field name is 'MaximumAge') which contains data about the maximum age of the population. Next field is 'HealthyVolunteers' that have information about the requirements of healthy volunteers. The last field is 'Criteria' which has the inclusion and exclusion criteria for the trial."
"table_summary": "The table contains Information about the criteria used to select participants; includes inclusion and exclusion criteria. The table has the below columns.'nct_id' is the primary key of the table. It is the id of the document.'title' is the title of the document. 'eligibility_details' has information in JSON format. The first field is 'Population' (field name is 'Population) which contains the information about the associated population type. The second field is the Sampling Method (field name is 'SamplingMethod') which contains the data about the method of sampling. The third field is Minimum Age (field name is 'MinimumAge') which contains data about the minimum age of the population. The fourth field is Maximum Age (field name is 'MaximumAge') which contains data about the maximum age of the population. Next field is 'HealthyVolunteers' that have information about the requirements of healthy volunteers. The last field is 'Criteria' which has the inclusion and exclusion criteria for the trial."
}
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
{
"table_name": "tbl_studies_sponsors",
"table_summary": "The table contains the Name of study sponsors and collaborators. The sponsor is the entity or individual initiating the study. Collaborators are other organizations providing support, including funding, design, implementation, data analysis, and reporting. The table has the below columns.'nct_id' is the primary key of the table. It is the id of the document.' title' is the title of the document.'description' is the description of the document. 'CollaboratorDetails' has information in JSON format. The first field is 'Collaborator Type' (field name is 'CollaboratorType') which contains information about the type of collaborations. The second field is the name of the collaborator entity (field name is 'CollaboratorDetails')."
"table_summary": "The table contains the Name of study sponsors and collaborators. The sponsor is the entity or individual initiating the study. Collaborators are other organizations providing support, including funding, design, implementation, data analysis, and reporting. The table has the below columns.'nct_id' is the primary key of the table. It is the id of the document.' title' is the title of the document. 'CollaboratorDetails' has information in JSON format. The first field is 'Collaborator Type' (field name is 'CollaboratorType') which contains information about the type of collaborations. The second field is the name of the collaborator entity (field name is 'CollaboratorDetails')."
}
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
{
"table_name": "tbl_studies_adverse_details",
"table_summary": "The table contains Summary information about reported adverse events (any untoward or unfavorable medical occurrence to participants, including abnormal physical exams, laboratory findings, symptoms, or diseases), including serious adverse events, other adverse events, and mortality. The table has the below columns.'nct_id' is the primary key of the table. It is the id of the document.' title' is the title of the document.'description' is the description of the document. 'adverse_details' has information in JSON format. The first field is 'Event Type' (field name is 'EventType') which contains information about the type of adverse events like 'deaths' or 'serious'. The second field is about the count of subjects that are affected by the trial(field name is 'SubjestAffected'). The third field is 'Classification' which tells a detailed description of the Adverse Events. The fourth field is about the count of subjects that are risked by the trial(field name is 'SubjectsRisk')."
"table_summary": "The table contains Summary information about reported adverse events (any untoward or unfavorable medical occurrence to participants, including abnormal physical exams, laboratory findings, symptoms, or diseases), including serious adverse events, other adverse events, and mortality. The table has the below columns.'nct_id' is the primary key of the table. It is the id of the document.' title' is the title of the document. 'adverse_details' has information in JSON format. The first field is 'Event Type' (field name is 'EventType') which contains information about the type of adverse events like 'deaths' or 'serious'. The second field is about the count of subjects that are affected by the trial(field name is 'SubjestAffected'). The third field is 'Classification' which tells a detailed description of the Adverse Events. The fourth field is about the count of subjects that are risked by the trial(field name is 'SubjectsRisk')."
}
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
{
"table_name": "tbl_studies_pubmed_links",
"table_summary": "The table contains Citations to publications related to the study protocol and/or results. Includes PubMed Unique Identifier (PMID) and/or full bibliographic citation. The table has the below columns.'nct_id' is the primary key of the table. It is the id of the document.' title' is the title of the document.'description' is the description of the document. 'pubmedcitation' has information in JSON format. The first field is 'Pubmed' which contains the associated article ID at the Pubmed website. The second field is about the citation details of the article of subject(field name is 'Citation')."
"table_summary": "The table contains Citations to publications related to the study protocol and/or results. Includes PubMed Unique Identifier (PMID) and/or full bibliographic citation. The table has the below columns.'nct_id' is the primary key of the table. It is the id of the document.' title' is the title of the document. 'pubmedcitation' has information in JSON format. The first field is 'Pubmed' which contains the associated article ID at the Pubmed website. The second field is about the citation details of the article of subject(field name is 'Citation')."
}
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
{
"table_name": "tbl_studies_conditions",
"table_summary": "The table contains the Name(s) of the disease(s) or condition(s) studied in the clinical study or the focus of the clinical study. The table has the below columns.'nct_id' is the primary key of the table. It is the id of the document.' title' is the title of the document.'description' is the description of the document. The filed 'condition_name' has the name of disease or conditions names that are mentioned in the study."
"table_summary": "The table contains the Name(s) of the disease(s) or condition(s) studied in the clinical study or the focus of the clinical study. The table has the below columns.'nct_id' is the primary key of the table. It is the id of the document.' title' is the title of the document. The filed 'condition_name' has the name of disease or conditions names that are mentioned in the study."
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{
"table_name": "tbl_primary_outcome_measurement",
"table_summary": "The table contains all the primary outcome measurements of a clinical study.The sample size included in the analysis for each outcome for each study group; usually participants but can represent other units of measure such as eyes 'lesions', etc.Summary data for primary and secondary outcome measures for each study group. Includes parameter estimates and measures of dispersion/precision. The table has the below columns.'nct_id' is the primary key of the table. It is the id of the document.'title' is the title of the document. \n\n The column 'Outcome_Primary_Measurement_Details' is a JSON column and it has the below information in a nested way. The 'Title' field holds the information about the definition of the measurement. The 'Description' field holds the information about the description of the measurement. The 'Time' field holds the information about the time frame duration of the measurement. The 'Population' field holds the information about the population details of the measurement. The 'Units' field unit measurement of the occurrence.\n\n Another main column 'Outcome_Primary_Measurement_Value_Details' has all the value-related information. It is also a JSON column. The 'Type' field holds the types of Measurement details like 'Count of Participants', 'Count of Units', 'Geometric Least Squares Mean', 'Geometric Mean', 'Least Squares Mean', 'Log Mean', 'Mean', 'Median', 'Number'.The 'Value' field holds the value of the measurement. The 'DispersionValue' holds the statistical or dispersion value. The 'DispersionType' holds the types of statistical analysis like 'Full Range', 'Geometric Coefficient of Variation', 'Inter-Quartile Range', 'Standard Deviation', 'Standard Error', 'Confidence Interval'"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{
"table_name": "tbl_secondary_outcome_measurement",
"table_summary": "The table contains all the secondary outcome measurements of a clinical study.The sample size included in the analysis for each outcome for each study group; usually participants but can represent other units of measure such as eyes 'lesions', etc.Summary data for primary and secondary outcome measures for each study group. Includes parameter estimates and measures of dispersion/precision.The table has the below columns.'nct_id' is the primary key of the table. It is the id of the document.'title' is the title of the document. \n\n The column 'Outcome_Secondary_Measurement_Details' is a JSON column and it has the below information in a nested way. The 'Title' field holds the information about the definition of the measurement. The 'Description' field holds the information about the description of the measurement. The 'Time' field holds the information about the time frame duration of the measurement. The 'Population' field holds the information about the population details of the measurement. The 'Units' field unit measurement of the occurrence.\n\n Another main column 'Outcome_Secondary_Measurement_Value_Details' has all the value-related information. It is also a JSON column. The 'Type' field holds the types of Measurement details like 'Count of Participants', 'Count of Units', 'Geometric Least Squares Mean', 'Geometric Mean', 'Least Squares Mean', 'Log Mean', 'Mean', 'Median', 'Number'.The 'Value' field holds the value of the measurement. The 'DispersionValue' holds the statistical or dispersion value. The 'DispersionType' holds the types of statistical analysis like 'Full Range', 'Geometric Coefficient of Variation', 'Inter-Quartile Range', 'Standard Deviation', 'Standard Error', 'Confidence Interval'"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{
"table_name": "tbl_baseline_details",
"table_summary": "The table summaries of demographic & baseline measures collected by arm or comparison group and for the entire population of participants in the clinical study. all the secondary outcome measurements of a clinical study. The table has the below columns.'nct_id' is the primary key of the table. It is the id of the document. 'title' is the title of the document. \n\n The column 'Baseline_Measurement_Details' is a JSON column and it has the below information in a nested way. The 'Title' field holds the information about the definition of the baseline measurement detail. The 'Type' field holds the types of baseline measurements like 'Count of Participants', 'Count of Units, 'Geometric Least Squares Mean',' Geometric Mean', 'Least Squares Mean', 'Log Mean', 'Mean',' Median', 'Number'.The 'Value' field holds the value of the baseline measurement. The 'category' holds different categories of data. The 'Classification' holds different classifications of data. The 'DispersionValue' holds the statistical or dispersion value. The 'DispersionType' holds the types of statistical analysis like 'Full Range', 'Inter-Quartile Range', and 'Standard Deviation'.\n\n Another main column 'Baseline_Group_Details' has all the value-related information. It is also a JSON column. The 'ResultGroupTitle' field holds the information about the result group. The 'ResultGroupDesc' holds the description of the result group. The 'BaseLineCount' holds the count of the sample. The 'BaseLineCountUnits' holds the unit types."
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{
"table_name": "tbl_studies_interventions",
"table_summary": "The table has the interventions or exposures (including drugs, medical devices, procedures, vaccines, and other products) of interest to the study, or associated with study arms/groups. The table has the below columns.'nct_id' is the primary key of the table. It is the id of the document. 'title' is the title of the document. \n\n The column 'Study_Intervention_Compressed_Details' is a JSON column and it has the below information in a nested way. The 'Type' field holds the information about the intervention type like 'Behavioral', 'Biological', 'Combination Product', 'Device', 'Diagnostic Test', 'Dietary Supplement', 'Drug', 'Genetic', 'Other', 'Procedure', 'Radiation'.The 'Details' field is also a JSON field that holds information about the 'name' and 'description' of the interventions."
}
Loading

0 comments on commit 3e403e4

Please sign in to comment.