Skip to content

Commit

Permalink
Merge pull request #76 from curieo-org/fix-settings
Browse files Browse the repository at this point in the history
Settings and Dev Setup
  • Loading branch information
kiraparser authored Jul 3, 2024
2 parents 94dcbcc + 9518d08 commit 69d6be0
Show file tree
Hide file tree
Showing 6 changed files with 214 additions and 163 deletions.
53 changes: 0 additions & 53 deletions agency/.env.template
Original file line number Diff line number Diff line change
Expand Up @@ -21,28 +21,6 @@
# SEARCH__BASE_URL=
# SEARCH__LOCALE=

# Brave Search API Configuration
# BRAVE__API_ROOT=
BRAVE__SUBSCRIPTION_KEY=
# BRAVE__RESULT_COUNT=
BRAVE__GOGGLES_ID=
# BRAVE__RESULT_FILTER=

# LLM and TogetherAI API Configuration
# TOGETHER__API_ROOT=
TOGETHER__API_KEY=
# TOGETHER__MODEL=

# BioLLM API Configuration
# BIOLLM__MODEL_NAME=
# BIOLLM__API_URL=
# BIOLLM__TEMPERATURE=
# BIOLLM__MAX_TOKENS=
# BIOLLM__PROMPT_TOKEN_LIMIT=

# OpenAI API Configuration
OPENAI__API_KEY=

# Embedding Model Configuration
# EMBEDDING__API_URL=
EMBEDDING__API_KEY=
Expand All @@ -53,29 +31,10 @@ EMBEDDING__API_KEY=
SPLADEEMBEDDING__API_KEY=
# EMBEDDING__BATCH_SIZE=

# Post Processing Settings
# POST_PROCESS__API=
# POST_PROCESS__MAX_TOKENS_PER_NODE=
# POST_PROCESS__COMPRESSED_TARGET_TOKENS=
# POST_PROCESS__TOP_N_SOURCES=

# Nebula Graph Configuration
# NEBULA_GRAPH__HOST=
# NEBULA_GRAPH__PORT=
NEBULA_GRAPH__USER=
NEBULA_GRAPH__PASSWORD=
# NEBULA_GRAPH__SPACE=

# Redis Configuration
REDIS__URL=
# REDIS__DEFAULT_EXPIRY=

# WANDB Configuration
WANDB__API_KEY=
# WANDB__PROJECT=
# WANDB__ENTITY=
# WANDB__NOTE=

# Tracing Configuration
TRACING__SENTRY_DSN=
# TRACING__ENABLE_TRACING=
Expand All @@ -85,9 +44,6 @@ TRACING__SENTRY_DSN=
# TRACING__SERVICE_NAME=
# TRACING__JAEGER_ENDPOINT=

# GROQ API Configuration
GROQ__API_KEY=

# Pubmed Parent Qdrant API Configuration
# PUBMED_PARENT_QDRANT__API_PORT=
# PUBMED_PARENT_QDRANT__API_URL=
Expand All @@ -106,15 +62,6 @@ PUBMED_CLUSTER_QDRANT__API_KEY=
# PUBMED_CLUSTER_QDRANT__SPARSE_TOP_K=
# PUBMED_CLUSTER_QDRANT__METADATA_FIELD_NAME=

# Clinical Trial Qdrant API Configuration
# CLINICAL_TRIAL_QDRANT__API_PORT=
# CLINICAL_TRIAL_QDRANT__API_URL=
CLINICAL_TRIAL_QDRANT__API_KEY=
# CLINICAL_TRIAL_QDRANT__COLLECTION_NAME=
# CLINICAL_TRIAL_QDRANT__TOP_K=
# CLINICAL_TRIAL_QDRANT__SPARSE_TOP_K=
# CLINICAL_TRIAL_QDRANT__METADATA_FIELD_NAME=

# Pubmed Retrieval Configuration
# PUBMED_RETRIEVAL__PARENT_RELEVANCE_CRITERIA=
# PUBMED_RETRIEVAL__CLUSTER_RELEVANCE_CRITERIA=
Expand Down
4 changes: 4 additions & 0 deletions agency/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,10 @@ Agency is a collection of multiple RAG modules. Every module is responsible for
# configure the environment variables
cp .env.template .env

# Copy the environment variables from the aws secret file
# https://eu-central-1.console.aws.amazon.com/secretsmanager/secret?name=search-agency-development-env&region=eu-central-1
# and turn on curieo vpn

# start the server
poetry run app
```
Expand Down
109 changes: 0 additions & 109 deletions agency/app/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,49 +24,6 @@ class SearchSettings(BaseSettings):
locale: str = "en-US"


class BraveSettings(BaseSettings):
api_root: str = "https://api.search.brave.com/res/v1/web/search"
subscription_key: SecretStr
result_count: int = 10
goggles_id: SecretStr
result_filter: list[str] = [
"discussions",
"faq",
"summarizer",
"infobox",
"news",
"query",
"web",
]


class TogetherPromptConfig(BaseSettings):
max_tokens: int = 1024
temperature: float = 0.7
top_p: float = 0.7
prompt_token_limit: int = 4096
stream: bool = True


class TogetherSettings(BaseSettings):
api_root: str = "https://api.together.xyz/v1/completions"
api_key: SecretStr
model: str = "mistralai/Mistral-7B-Instruct-v0.1"
prompt_config: TogetherPromptConfig = TogetherPromptConfig()


class BioLLMSettings(BaseSettings):
model_name: str = "ivarflakstad/Llama3-OpenBioLLM-8B"
api_url: str = "http://localhost:8085"
temperature: float = 0.1
max_tokens: int = 1000
prompt_token_limit: int = 3000


class OpenAISettings(BaseSettings):
api_key: SecretStr


class EmbeddingSettings(BaseSettings):
api_url: str = "http://localhost:8080"
api_key: SecretStr
Expand All @@ -79,40 +36,11 @@ class SpladeEmbeddingSettings(BaseSettings):
batch_size: int = 4


class PostProcessingSettings(BaseSettings):
api: str = "http://localhost:8000/compress"
max_tokens_per_node: int = 512
compressed_target_token: int = 300
top_n_sources: int = 10


class TableInfoDirSettings(BaseSettings):
clinical_trials: str = (
"app/rag/retrieval/clinical_trials/AACTTableQuestions_TableInfo"
)
drug_chembl: str = "app/rag/retrieval/drug_chembl/ChEMBLTableQuestions_TableInfo"


class NebulaGraphSettings(BaseSettings):
host: str = "http://127.0.0.1"
port: int = 9669
user: SecretStr
password: SecretStr
space: str = "chembl"


class RedisSettings(BaseSettings):
url: SecretStr
default_expiry: int = 86400


class WandbSettings(BaseSettings):
api_key: SecretStr
project: str = "pe_router"
entity: str = "curieo"
note: str = "Curieo Search"


class TracingSettings(BaseSettings):
sentry_dsn: SecretStr
jaeger_endpoint: str = "http://127.0.0.1:4317"
Expand All @@ -123,10 +51,6 @@ class TracingSettings(BaseSettings):
service_name: str = "agency-service"


class GroqSettings(BaseSettings):
api_key: SecretStr


class QdrantSettings(BaseSettings):
api_port: int = 6333
api_url: str = "localhost"
Expand All @@ -143,27 +67,6 @@ class PubmedRetrievalSettings(BaseSettings):
url_prefix: str = "https://pubmed.ncbi.nlm.nih.gov"


class DspySettings(BaseSettings):
clinical_trial_sql_program: str = (
"app/dspy_integration/dspy_programs/clinical_trials_sql_generation.json"
)
clinical_trials_response_refinement_program: str = (
"app/dspy_integration/dspy_programs/clinical_trials_response_refinement.json"
)
orchestrator_router_prompt_program: str = (
"app/dspy_integration/dspy_programs/orchestrator_router_prompt.json"
)


class AIModelsSettings(BaseSettings):
router: str = "gpt-3.5-turbo"
sql_generation: str = "codellama/CodeLlama-13b-Instruct-hf"
clinical_trail_response_synthesizer_model: str = (
"NousResearch/Nous-Hermes-llama-2-7b"
)
pubmed_response_synthesizer_model: str = "mistralai/Mixtral-8x7B-Instruct-v0.1"


class PubmedDatabaseSettings(BaseSettings):
connection: SecretStr
children_text_table_name: str = "pubmed_text_details"
Expand All @@ -181,25 +84,13 @@ class Settings(BaseSettings):
pubmed_database: PubmedDatabaseSettings
project: ProjectSettings = ProjectSettings()
search: SearchSettings = SearchSettings()
brave: BraveSettings
together: TogetherSettings
openai: OpenAISettings
nebula_graph: NebulaGraphSettings
redis: RedisSettings
wandb: WandbSettings | None = None
tracing: TracingSettings
groq: GroqSettings
dspy: DspySettings = DspySettings()
embedding: EmbeddingSettings
spladeembedding: SpladeEmbeddingSettings
post_process: PostProcessingSettings = PostProcessingSettings()
biollm: BioLLMSettings = BioLLMSettings()
pubmed_parent_qdrant: QdrantSettings
pubmed_cluster_qdrant: QdrantSettings
clinical_trial_qdrant: QdrantSettings
pubmed_retrieval: PubmedRetrievalSettings = PubmedRetrievalSettings()
table_info_dir: TableInfoDirSettings = TableInfoDirSettings()
ai_models: AIModelsSettings = AIModelsSettings()


app_settings = Settings()
File renamed without changes.
Loading

0 comments on commit 69d6be0

Please sign in to comment.