Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Settings and Dev Setup #76

Merged
merged 2 commits into from
Jul 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 0 additions & 53 deletions agency/.env.template
Original file line number Diff line number Diff line change
Expand Up @@ -21,28 +21,6 @@
# SEARCH__BASE_URL=
# SEARCH__LOCALE=

# Brave Search API Configuration
# BRAVE__API_ROOT=
BRAVE__SUBSCRIPTION_KEY=
# BRAVE__RESULT_COUNT=
BRAVE__GOGGLES_ID=
# BRAVE__RESULT_FILTER=

# LLM and TogetherAI API Configuration
# TOGETHER__API_ROOT=
TOGETHER__API_KEY=
# TOGETHER__MODEL=

# BioLLM API Configuration
# BIOLLM__MODEL_NAME=
# BIOLLM__API_URL=
# BIOLLM__TEMPERATURE=
# BIOLLM__MAX_TOKENS=
# BIOLLM__PROMPT_TOKEN_LIMIT=

# OpenAI API Configuration
OPENAI__API_KEY=

# Embedding Model Configuration
# EMBEDDING__API_URL=
EMBEDDING__API_KEY=
Expand All @@ -53,29 +31,10 @@ EMBEDDING__API_KEY=
SPLADEEMBEDDING__API_KEY=
# EMBEDDING__BATCH_SIZE=

# Post Processing Settings
# POST_PROCESS__API=
# POST_PROCESS__MAX_TOKENS_PER_NODE=
# POST_PROCESS__COMPRESSED_TARGET_TOKENS=
# POST_PROCESS__TOP_N_SOURCES=

# Nebula Graph Configuration
# NEBULA_GRAPH__HOST=
# NEBULA_GRAPH__PORT=
NEBULA_GRAPH__USER=
NEBULA_GRAPH__PASSWORD=
# NEBULA_GRAPH__SPACE=

# Redis Configuration
REDIS__URL=
# REDIS__DEFAULT_EXPIRY=

# WANDB Configuration
WANDB__API_KEY=
# WANDB__PROJECT=
# WANDB__ENTITY=
# WANDB__NOTE=

# Tracing Configuration
TRACING__SENTRY_DSN=
# TRACING__ENABLE_TRACING=
Expand All @@ -85,9 +44,6 @@ TRACING__SENTRY_DSN=
# TRACING__SERVICE_NAME=
# TRACING__JAEGER_ENDPOINT=

# GROQ API Configuration
GROQ__API_KEY=

# Pubmed Parent Qdrant API Configuration
# PUBMED_PARENT_QDRANT__API_PORT=
# PUBMED_PARENT_QDRANT__API_URL=
Expand All @@ -106,15 +62,6 @@ PUBMED_CLUSTER_QDRANT__API_KEY=
# PUBMED_CLUSTER_QDRANT__SPARSE_TOP_K=
# PUBMED_CLUSTER_QDRANT__METADATA_FIELD_NAME=

# Clinical Trial Qdrant API Configuration
# CLINICAL_TRIAL_QDRANT__API_PORT=
# CLINICAL_TRIAL_QDRANT__API_URL=
CLINICAL_TRIAL_QDRANT__API_KEY=
# CLINICAL_TRIAL_QDRANT__COLLECTION_NAME=
# CLINICAL_TRIAL_QDRANT__TOP_K=
# CLINICAL_TRIAL_QDRANT__SPARSE_TOP_K=
# CLINICAL_TRIAL_QDRANT__METADATA_FIELD_NAME=

# Pubmed Retrieval Configuration
# PUBMED_RETRIEVAL__PARENT_RELEVANCE_CRITERIA=
# PUBMED_RETRIEVAL__CLUSTER_RELEVANCE_CRITERIA=
Expand Down
4 changes: 4 additions & 0 deletions agency/README.md
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👍🏻

Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,10 @@ Agency is a collection of multiple RAG modules. Every module is responsible for
# configure the environment variables
cp .env.template .env

# Copy the environment variables from the aws secret file
# https://eu-central-1.console.aws.amazon.com/secretsmanager/secret?name=search-agency-development-env&region=eu-central-1
# and turn on curieo vpn

# start the server
poetry run app
```
Expand Down
109 changes: 0 additions & 109 deletions agency/app/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,49 +24,6 @@ class SearchSettings(BaseSettings):
locale: str = "en-US"


class BraveSettings(BaseSettings):
api_root: str = "https://api.search.brave.com/res/v1/web/search"
subscription_key: SecretStr
result_count: int = 10
goggles_id: SecretStr
result_filter: list[str] = [
"discussions",
"faq",
"summarizer",
"infobox",
"news",
"query",
"web",
]


class TogetherPromptConfig(BaseSettings):
max_tokens: int = 1024
temperature: float = 0.7
top_p: float = 0.7
prompt_token_limit: int = 4096
stream: bool = True


class TogetherSettings(BaseSettings):
api_root: str = "https://api.together.xyz/v1/completions"
api_key: SecretStr
model: str = "mistralai/Mistral-7B-Instruct-v0.1"
prompt_config: TogetherPromptConfig = TogetherPromptConfig()


class BioLLMSettings(BaseSettings):
model_name: str = "ivarflakstad/Llama3-OpenBioLLM-8B"
api_url: str = "http://localhost:8085"
temperature: float = 0.1
max_tokens: int = 1000
prompt_token_limit: int = 3000


class OpenAISettings(BaseSettings):
api_key: SecretStr


class EmbeddingSettings(BaseSettings):
api_url: str = "http://localhost:8080"
api_key: SecretStr
Expand All @@ -79,40 +36,11 @@ class SpladeEmbeddingSettings(BaseSettings):
batch_size: int = 4


class PostProcessingSettings(BaseSettings):
api: str = "http://localhost:8000/compress"
max_tokens_per_node: int = 512
compressed_target_token: int = 300
top_n_sources: int = 10


class TableInfoDirSettings(BaseSettings):
clinical_trials: str = (
"app/rag/retrieval/clinical_trials/AACTTableQuestions_TableInfo"
)
drug_chembl: str = "app/rag/retrieval/drug_chembl/ChEMBLTableQuestions_TableInfo"


class NebulaGraphSettings(BaseSettings):
host: str = "http://127.0.0.1"
port: int = 9669
user: SecretStr
password: SecretStr
space: str = "chembl"


class RedisSettings(BaseSettings):
url: SecretStr
default_expiry: int = 86400


class WandbSettings(BaseSettings):
api_key: SecretStr
project: str = "pe_router"
entity: str = "curieo"
note: str = "Curieo Search"


class TracingSettings(BaseSettings):
sentry_dsn: SecretStr
jaeger_endpoint: str = "http://127.0.0.1:4317"
Expand All @@ -123,10 +51,6 @@ class TracingSettings(BaseSettings):
service_name: str = "agency-service"


class GroqSettings(BaseSettings):
api_key: SecretStr


class QdrantSettings(BaseSettings):
api_port: int = 6333
api_url: str = "localhost"
Expand All @@ -143,27 +67,6 @@ class PubmedRetrievalSettings(BaseSettings):
url_prefix: str = "https://pubmed.ncbi.nlm.nih.gov"


class DspySettings(BaseSettings):
clinical_trial_sql_program: str = (
"app/dspy_integration/dspy_programs/clinical_trials_sql_generation.json"
)
clinical_trials_response_refinement_program: str = (
"app/dspy_integration/dspy_programs/clinical_trials_response_refinement.json"
)
orchestrator_router_prompt_program: str = (
"app/dspy_integration/dspy_programs/orchestrator_router_prompt.json"
)


class AIModelsSettings(BaseSettings):
router: str = "gpt-3.5-turbo"
sql_generation: str = "codellama/CodeLlama-13b-Instruct-hf"
clinical_trail_response_synthesizer_model: str = (
"NousResearch/Nous-Hermes-llama-2-7b"
)
pubmed_response_synthesizer_model: str = "mistralai/Mixtral-8x7B-Instruct-v0.1"


class PubmedDatabaseSettings(BaseSettings):
connection: SecretStr
children_text_table_name: str = "pubmed_text_details"
Expand All @@ -181,25 +84,13 @@ class Settings(BaseSettings):
pubmed_database: PubmedDatabaseSettings
project: ProjectSettings = ProjectSettings()
search: SearchSettings = SearchSettings()
brave: BraveSettings
together: TogetherSettings
openai: OpenAISettings
nebula_graph: NebulaGraphSettings
redis: RedisSettings
wandb: WandbSettings | None = None
tracing: TracingSettings
groq: GroqSettings
dspy: DspySettings = DspySettings()
embedding: EmbeddingSettings
spladeembedding: SpladeEmbeddingSettings
post_process: PostProcessingSettings = PostProcessingSettings()
biollm: BioLLMSettings = BioLLMSettings()
pubmed_parent_qdrant: QdrantSettings
pubmed_cluster_qdrant: QdrantSettings
clinical_trial_qdrant: QdrantSettings
pubmed_retrieval: PubmedRetrievalSettings = PubmedRetrievalSettings()
table_info_dir: TableInfoDirSettings = TableInfoDirSettings()
ai_models: AIModelsSettings = AIModelsSettings()


app_settings = Settings()
Loading