-
Notifications
You must be signed in to change notification settings - Fork 14
/
Copy pathconfig.py
69 lines (53 loc) · 2.53 KB
/
config.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
from pathlib import Path
from loguru import logger
from pydantic import SecretStr, model_validator
from pydantic_settings import BaseSettings, SettingsConfigDict
ROOT_DIR = Path(__file__).parent.parent
ENV_FILE = ROOT_DIR / ".env"
logger.info(f"Loading '.env' file from: {ENV_FILE}")
assert ENV_FILE.exists(), ".env doesn't exists at the expected location"
class Settings(BaseSettings):
model_config = SettingsConfigDict(env_file=str(ENV_FILE), env_file_encoding="utf-8")
# Superlinked
PROCESSED_DATASET_PATH: Path = (
Path("data") / "processed_300_sample.jsonl"
) # or change it for a bigger dataset to: processed_850_sample.jsonl
GPU_EMBEDDING_THRESHOLD: int = 32
# MongoDB
USE_MONGO_VECTOR_DB: bool = False # If 'False', we will use an InMemory vector database that requires no credentials.
MONGO_CLUSTER_URL: str | None = None
MONGO_CLUSTER_NAME: str = "free-cluster"
MONGO_DATABASE_NAME: str = "tabular-semantic-search"
MONGO_PROJECT_ID: str | None = None
MONGO_API_PUBLIC_KEY: SecretStr | None = None
MONGO_API_PRIVATE_KEY: SecretStr | None = None
# OpenAI
OPENAI_MODEL_ID: str = "gpt-4o"
OPENAI_API_KEY: SecretStr
@model_validator(mode="after")
def validate_mongo_config(self) -> "Settings":
"""Validates that all MongoDB settings are properly configured when MongoDB is enabled."""
if self.USE_MONGO_VECTOR_DB:
required_settings = {
"MONGO_CLUSTER_URL": self.MONGO_CLUSTER_URL,
"MONGO_DATABASE_NAME": self.MONGO_DATABASE_NAME,
"MONGO_CLUSTER_NAME": self.MONGO_CLUSTER_NAME,
"MONGO_PROJECT_ID": self.MONGO_PROJECT_ID,
"MONGO_API_PUBLIC_KEY": self.MONGO_API_PUBLIC_KEY,
"MONGO_API_PRIVATE_KEY": self.MONGO_API_PRIVATE_KEY,
}
missing_settings = [
key for key, value in required_settings.items() if not value
]
if missing_settings:
raise ValueError(
f"MongoDB is enabled but the following required settings are missing: {', '.join(missing_settings)}"
)
return self
def validate_processed_dataset_exists(self):
if not self.PROCESSED_DATASET_PATH.exists():
raise ValueError(
f"Processed dataset not found at '{self.PROCESSED_DATASET_PATH}'. "
"Please run 'make download-and-process-sample-dataset' first to download and process the Amazon dataset."
)
settings = Settings()