Skip to content

Commit c571e3b

Browse files
authored
Merge pull request #91 from DL4DS/dev_branch
Merge to main after dev_branch code restructure
2 parents 4bdb9ef + 558adb3 commit c571e3b

File tree

95 files changed

+1308
-400
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

95 files changed

+1308
-400
lines changed

.gitignore

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -169,5 +169,18 @@ code/.chainlit/translations/
169169
storage/logs/*
170170
vectorstores/*
171171

172-
*/.files/*
172+
**/.files/*
173173
code/storage/models/
174+
175+
**/translations/en-US.json
176+
**/translations/zh-CN.json
177+
178+
179+
**/vectorstores/*
180+
181+
**/private/students.json
182+
183+
**/apps/*/storage/logs/*
184+
**/apps/*/private/*
185+
186+
.idea/

Dockerfile

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,12 @@ FROM python:3.11
33
WORKDIR /code
44

55
COPY ./requirements.txt /code/requirements.txt
6+
COPY ./setup.py /code/setup.py
67

78
RUN pip install --upgrade pip
89

910
RUN pip install --no-cache-dir -r /code/requirements.txt
11+
RUN pip install -e .
1012

1113
COPY . /code
1214

@@ -17,12 +19,15 @@ RUN ls -R /code
1719
RUN chmod -R 777 /code
1820

1921
# Create a logs directory and set permissions
20-
RUN mkdir /code/logs && chmod 777 /code/logs
22+
RUN mkdir /code/apps/ai_tutor/logs && chmod 777 /code/apps/ai_tutor/logs
2123

2224
# Create a cache directory within the application's working directory
2325
RUN mkdir /.cache && chmod -R 777 /.cache
2426

25-
WORKDIR /code/code
27+
WORKDIR /code/apps/ai_tutor
28+
29+
# Expose the port the app runs on
30+
EXPOSE 7860
2631

2732
RUN --mount=type=secret,id=HUGGINGFACEHUB_API_TOKEN,mode=0444,required=true
2833
RUN --mount=type=secret,id=OPENAI_API_KEY,mode=0444,required=true
@@ -35,4 +40,4 @@ RUN --mount=type=secret,id=LITERAL_API_KEY_LOGGING,mode=0444,required=true
3540
RUN --mount=type=secret,id=CHAINLIT_AUTH_SECRET,mode=0444,required=true
3641

3742
# Default command to run the application
38-
CMD ["sh", "-c", "python -m modules.vectorstore.store_manager && uvicorn app:app --host 0.0.0.0 --port 7860"]
43+
CMD python -m modules.vectorstore.store_manager --config_file config/config.yml --project_config_file config/project_config.yml && python -m uvicorn app:app --host 0.0.0.0 --port 7860

Dockerfile.dev

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,29 +3,36 @@ FROM python:3.11
33
WORKDIR /code
44

55
COPY ./requirements.txt /code/requirements.txt
6+
COPY ./setup.py /code/setup.py
67

78
RUN pip install --upgrade pip
89

910
RUN pip install --no-cache-dir -r /code/requirements.txt
11+
RUN pip install -e .
1012

1113
COPY . /code
1214

15+
# Copy .env file to the application directory
16+
COPY .env /code/apps/ai_tutor/.env
17+
1318
# List the contents of the /code directory to verify files are copied correctly
1419
RUN ls -R /code
1520

1621
# Change permissions to allow writing to the directory
1722
RUN chmod -R 777 /code
1823

1924
# Create a logs directory and set permissions
20-
RUN mkdir /code/logs && chmod 777 /code/logs
25+
RUN mkdir /code/apps/ai_tutor/logs && chmod 777 /code/apps/ai_tutor/logs
2126

2227
# Create a cache directory within the application's working directory
2328
RUN mkdir /.cache && chmod -R 777 /.cache
2429

25-
WORKDIR /code/code
30+
WORKDIR /code/apps/ai_tutor
31+
32+
RUN ls -R /code
2633

2734
# Expose the port the app runs on
28-
EXPOSE 8000
35+
EXPOSE 7860
2936

3037
# Default command to run the application
31-
CMD ["sh", "-c", "python -m modules.vectorstore.store_manager && chainlit run main.py --host 0.0.0.0 --port 8000"]
38+
CMD python -m modules.vectorstore.store_manager --config_file config/config.yml --project_config_file config/project_config.yml && python -m uvicorn app:app --host 0.0.0.0 --port 7860

README.md

Lines changed: 20 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,12 @@ app_port: 7860
99
---
1010
# DL4DS Tutor 🏃
1111

12+
![Build Status](https://github.com/DL4DS/dl4ds_tutor/actions/workflows/push_to_hf_space.yml/badge.svg)
13+
![License](https://img.shields.io/github/license/DL4DS/dl4ds_tutor)
14+
![GitHub stars](https://img.shields.io/github/stars/DL4DS/dl4ds_tutor)
15+
![PRs Welcome](https://img.shields.io/badge/PRs-welcome-brightgreen.svg?style=flat-square)
16+
17+
1218
Check out the configuration reference at [Hugging Face Spaces Config Reference](https://huggingface.co/docs/hub/spaces-config-reference).
1319

1420
You can find a "production" implementation of the Tutor running live at [DL4DS Tutor](https://dl4ds-dl4ds-tutor.hf.space/) from the
@@ -31,26 +37,31 @@ Please visit [setup](https://dl4ds.github.io/dl4ds_tutor/guide/setup/) for more
3137
git clone https://github.com/DL4DS/dl4ds_tutor
3238
```
3339

34-
2. **Put your data under the `storage/data` directory**
40+
2. Create your app in the apps folder. (An example is the `apps/ai_tutor` app)
41+
```
42+
cd apps
43+
mkdir your_app
44+
```
45+
46+
2. **Put your data under the `apps/your_app/storage/data` directory**
3547
- Add URLs in the `urls.txt` file.
36-
- Add other PDF files in the `storage/data` directory.
48+
- Add other PDF files in the `apps/your_app/storage/data` directory.
3749

3850
3. **To test Data Loading (Optional)**
3951
```bash
40-
cd code
41-
python -m modules.dataloader.data_loader --links "your_pdf_link"
52+
cd apps/your_app
53+
python -m modules.dataloader.data_loader --links "your_pdf_link" --config_file config/config.yml --project_config_file config/project_config.yml
4254
```
4355

4456
4. **Create the Vector Database**
4557
```bash
46-
cd code
47-
python -m modules.vectorstore.store_manager
58+
cd apps/your_app
59+
python -m modules.vectorstore.store_manager --config_file config/config.yml --project_config_file config/project_config.yml
4860
```
49-
- Note: You need to run the above command when you add new data to the `storage/data` directory, or if the `storage/data/urls.txt` file is updated.
5061

5162
6. **Run the FastAPI App**
5263
```bash
53-
cd code
64+
cd apps/your_app
5465
uvicorn app:app --port 7860
5566
```
5667

@@ -65,7 +76,7 @@ The HuggingFace Space is built using the `Dockerfile` in the repository. To run
6576

6677
```bash
6778
docker build --tag dev -f Dockerfile.dev .
68-
docker run -it --rm -p 8000:8000 dev
79+
docker run -it --rm -p 7860:7860 dev
6980
```
7081

7182
## Contributing

code/.chainlit/config.toml renamed to apps/ai_tutor/.chainlit/config.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ github = "https://github.com/DL4DS/dl4ds_tutor"
6969

7070
# Specify a CSS file that can be used to customize the user interface.
7171
# The CSS file can be served from the public directory or via an external link.
72-
custom_css = "/public/test.css"
72+
custom_css = "/public/files/test.css"
7373

7474
# Specify a Javascript file that can be used to customize the user interface.
7575
# The Javascript file can be served from the public directory.

apps/ai_tutor/README.md

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
# WIP
2+
3+
4+
## Run the encrypt_students script
5+
6+
- If you don't want the emails to be public, run this script to encrypt the emails of the students.
7+
- This will create a new file in the public/files/ directory.
8+
- Place your file with the students' emails in the private/ directory (do not commit this file to the repository).
9+
10+
```bash
11+
python encrypt_students.py --students-file private/students.json --encrypted-students-file public/files/students_encrypted.json
12+
```

code/app.py renamed to apps/ai_tutor/app.py

Lines changed: 61 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -8,24 +8,33 @@
88
import secrets
99
import json
1010
import base64
11-
from modules.config.constants import (
11+
from config.constants import (
1212
OAUTH_GOOGLE_CLIENT_ID,
1313
OAUTH_GOOGLE_CLIENT_SECRET,
1414
CHAINLIT_URL,
15-
GITHUB_REPO,
16-
DOCS_WEBSITE,
17-
ALL_TIME_TOKENS_ALLOCATED,
18-
TOKENS_LEFT,
15+
EMAIL_ENCRYPTION_KEY,
1916
)
2017
from fastapi.middleware.cors import CORSMiddleware
2118
from fastapi.staticfiles import StaticFiles
22-
from modules.chat_processor.helpers import (
23-
get_user_details,
19+
from helpers import (
2420
get_time,
2521
reset_tokens_for_user,
2622
check_user_cooldown,
27-
update_user_info,
2823
)
24+
from modules.chat_processor.helpers import get_user_details, update_user_info
25+
from config.config_manager import config_manager
26+
import hashlib
27+
28+
# set config
29+
config = config_manager.get_config().dict()
30+
31+
# set constants
32+
GITHUB_REPO = config["misc"]["github_repo"]
33+
DOCS_WEBSITE = config["misc"]["docs_website"]
34+
ALL_TIME_TOKENS_ALLOCATED = config["token_config"]["all_time_tokens_allocated"]
35+
TOKENS_LEFT = config["token_config"]["tokens_left"]
36+
COOLDOWN_TIME = config["token_config"]["cooldown_time"]
37+
REGEN_TIME = config["token_config"]["regen_time"]
2938

3039
GOOGLE_CLIENT_ID = OAUTH_GOOGLE_CLIENT_ID
3140
GOOGLE_CLIENT_SECRET = OAUTH_GOOGLE_CLIENT_SECRET
@@ -46,13 +55,8 @@
4655
CHAINLIT_PATH = "/chainlit_tutor"
4756

4857
# only admin is given any additional permissions for now -- no limits on tokens
49-
USER_ROLES = {
50-
"[email protected]": ["instructor", "bu"],
51-
"[email protected]": ["admin", "instructor", "bu"],
52-
"[email protected]": ["instructor", "bu"],
53-
"[email protected]": ["guest"],
54-
# Add more users and roles as needed
55-
}
58+
with open("public/files/students_encrypted.json", "r") as file:
59+
USER_ROLES = json.load(file)
5660

5761
# Create a Google OAuth flow
5862
flow = Flow.from_client_config(
@@ -80,7 +84,20 @@
8084

8185

8286
def get_user_role(username: str):
83-
return USER_ROLES.get(username, ["guest"]) # Default to "guest" role
87+
88+
# Function to deterministically hash emails
89+
def deterministic_hash(email, salt):
90+
return hashlib.pbkdf2_hmac("sha256", email.encode(), salt, 100000).hex()
91+
92+
# encrypt email (#FIXME: this is not the best way to do this, not really encryption, more like a hash)
93+
encryption_salt = EMAIL_ENCRYPTION_KEY.encode()
94+
encrypted_email = deterministic_hash(username, encryption_salt)
95+
role = USER_ROLES.get(encrypted_email, ["guest"])
96+
97+
if "guest" in role:
98+
return "unauthorized"
99+
100+
return role
84101

85102

86103
async def get_user_info_from_cookie(request: Request):
@@ -146,6 +163,11 @@ async def login_page(request: Request):
146163
# return response
147164

148165

166+
@app.get("/unauthorized", response_class=HTMLResponse)
167+
async def unauthorized(request: Request):
168+
return templates.TemplateResponse("unauthorized.html", {"request": request})
169+
170+
149171
@app.get("/login/google")
150172
async def login_google(request: Request):
151173
# Clear any existing session cookies to avoid conflicts with guest sessions
@@ -176,6 +198,9 @@ async def auth_google(request: Request):
176198
profile_image = user_info.get("picture", "")
177199
role = get_user_role(email)
178200

201+
if role == "unauthorized":
202+
return RedirectResponse("/unauthorized")
203+
179204
session_token = secrets.token_hex(16)
180205
session_store[session_token] = {
181206
"email": email,
@@ -210,7 +235,7 @@ async def cooldown(request: Request):
210235
user_details = await get_user_details(user_info["email"])
211236
current_datetime = get_time()
212237
cooldown, cooldown_end_time = await check_user_cooldown(
213-
user_details, current_datetime
238+
user_details, current_datetime, COOLDOWN_TIME, TOKENS_LEFT, REGEN_TIME
214239
)
215240
print(f"User in cooldown: {cooldown}")
216241
print(f"Cooldown end time: {cooldown_end_time}")
@@ -228,7 +253,11 @@ async def cooldown(request: Request):
228253
else:
229254
user_details.metadata["in_cooldown"] = False
230255
await update_user_info(user_details)
231-
await reset_tokens_for_user(user_details)
256+
await reset_tokens_for_user(
257+
user_details,
258+
config["token_config"]["tokens_left"],
259+
config["token_config"]["regen_time"],
260+
)
232261
return RedirectResponse("/post-signin")
233262

234263

@@ -256,13 +285,19 @@ async def post_signin(request: Request):
256285
if "last_message_time" in user_details.metadata and "admin" not in get_user_role(
257286
user_info["email"]
258287
):
259-
cooldown, _ = await check_user_cooldown(user_details, current_datetime)
288+
cooldown, _ = await check_user_cooldown(
289+
user_details, current_datetime, COOLDOWN_TIME, TOKENS_LEFT, REGEN_TIME
290+
)
260291
if cooldown:
261292
user_details.metadata["in_cooldown"] = True
262293
return RedirectResponse("/cooldown")
263294
else:
264295
user_details.metadata["in_cooldown"] = False
265-
await reset_tokens_for_user(user_details)
296+
await reset_tokens_for_user(
297+
user_details,
298+
config["token_config"]["tokens_left"],
299+
config["token_config"]["regen_time"],
300+
)
266301

267302
if user_info:
268303
username = user_info["email"]
@@ -335,15 +370,19 @@ async def get_tokens_left(request: Request):
335370
try:
336371
user_info = await get_user_info_from_cookie(request)
337372
user_details = await get_user_details(user_info["email"])
338-
await reset_tokens_for_user(user_details)
373+
await reset_tokens_for_user(
374+
user_details,
375+
config["token_config"]["tokens_left"],
376+
config["token_config"]["regen_time"],
377+
)
339378
tokens_left = user_details.metadata["tokens_left"]
340379
return {"tokens_left": tokens_left}
341380
except Exception as e:
342381
print(f"Error getting tokens left: {e}")
343382
return {"tokens_left": 0}
344383

345384

346-
mount_chainlit(app=app, target="main.py", path=CHAINLIT_PATH)
385+
mount_chainlit(app=app, target="chainlit_app.py", path=CHAINLIT_PATH)
347386

348387
if __name__ == "__main__":
349388
import uvicorn

0 commit comments

Comments
 (0)