fix: add jupyter notebooks (#2)

loujaybee · Lou Bichard · web-flow · commit 6a7dc1f4cf8d · 2025-01-20T15:21:59.000Z
Co-authored-by: Lou Bichard &lt;loubichard@Lous-Air.lan&gt;
diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile
@@ -1 +1,22 @@
-FROM mcr.microsoft.com/vscode/devcontainers/base:ubuntu
+FROM mcr.microsoft.com/vscode/devcontainers/base:ubuntu
+
+# Install Ollama
+RUN curl -fsSL https://ollama.com/install.sh | TERM=xterm sh
+
+# Install Python 3.10
+RUN apt-get update && export DEBIAN_FRONTEND=noninteractive \
+    && apt-get -y install python3.10 python3.10-venv python3-pip
+
+# Create and activate virtual environment with Python 3.10
+RUN python3.10 -m venv /workspace/venv
+ENV PATH="/workspace/venv/bin:$PATH"
+
+# Set working directory
+WORKDIR /workspace
+
+# Install IPython kernel and its dependencies
+RUN pip install ipykernel && \
+    python -m ipykernel install --user --name python3 --display-name "Python 3.10.12"
+
+# Note: requirements.txt will be installed after container creation
+CMD [ -f "requirements.txt" ] && pip install -r requirements.txt || echo "No requirements.txt found"
diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json
@@ -13,5 +13,19 @@
 	"workspaceFolder": "/workspace",
 	"features": {
 		"ghcr.io/devcontainers/features/nvidia-cuda:1": {}
-	}
-}
+	},
+	"customizations": {
+		"vscode": {
+			"extensions": [
+				"ms-python.python",
+				"ms-toolsai.jupyter"
+			],
+			"settings": {
+				"python.defaultInterpreterPath": "/usr/bin/python3",
+				"python.formatting.provider": "black",
+				"jupyter.notebookFileRoot": "${workspaceFolder}"
+			}
+		}
+	},
+	"postCreateCommand": "pip install -r requirements.txt"
+}
diff --git a/.gitpod/automations.yaml b/.gitpod/automations.yaml
@@ -0,0 +1,32 @@
+services:
+  ollama-server:
+    name: ollama-server
+    description: "Start ollama server"
+    triggeredBy:
+      - manual
+      - postEnvironmentStart
+    commands:
+      start: |
+        cd /workspace/
+        ollama serve
+
+  gpu-stats:
+    name: gpu-stats
+    description: "GPU stats"
+    triggeredBy:
+      - manual
+    commands:
+      start: |
+        watch -n 1 nvidia-smi
+
+tasks:
+  run-llm:
+    name: Run LLM
+    description: "Start the PHI:3 model"
+    triggeredBy:
+      - postEnvironmentStart
+      - manual
+    command: |
+      sleep 60s # Hack to wait for ollama to start
+      cd /workspace/
+      ollama run phi3:medium
diff --git a/README.md b/README.md
@@ -12,9 +12,32 @@ As a data scientist or ML engineer, you've likely faced the challenge of limited
 
 ## What's Included
 
-- `.devcontainer/` - Complete GPU-enabled development environment configuration
-  - `devcontainer.json` - Environment setup with NVIDIA CUDA support
-  - `Dockerfile` - Base container configuration using Ubuntu
+- `devcontainer.json` - Decalarative and repeatable environment setup including:
+  - NVIDIA CUDA support via features
+  - GPU requirements and access configuration
+  - VS Code Python and Jupyter extensions
+  - Python interpreter and formatting settings
+  - Automatic requirements.txt installation
+  - Kernel specification for Jupyter notebooks
+- `Dockerfile` - Base container configuration using Ubuntu with CUDA toolkit and Python packages, including: 
+  - Python 3.10 with virtual environment
+  - IPython kernel for Jupyter notebooks
+  - Ollama for LLM inference
+- `requirements.txt` - All Python dependencies installed including (but not limited to):
+  - NumPy (>=1.24.0)
+  - Pandas (>=2.0.0)
+  - Matplotlib (>=3.7.0)
+  - Seaborn (>=0.12.0)
+  - scikit-learn (>=1.3.0)
+  - Jupyter (>=1.0.0)
+  - IPython Kernel (>=6.0.0)
+  - Plotly (>=5.0.0)
+  - Plotly Express (>=0.4.0)
+  - nbformat (>=5.0.0)
+- `.gitpod/automations.yaml` - Gitpod automation examples 
+  - Starting the ollama server
+  - Seeing GPU stats of the environment
+  - Running Ollama LLM
 
 ## Quick Start
 
@@ -32,15 +55,32 @@ As a data scientist or ML engineer, you've likely faced the challenge of limited
   - CUDA Toolkit
   - Python 3.x
   - Common ML libraries (PyTorch, TensorFlow)
+  - Ollama for local inference
 
-## Verify Your Setup
+## Try It Out
 
-Once your environment is running:
+Once your environment is running, here are some things you can try:
 
+1. Run local inference with ollama:
+```bash
+ollama run phi3:medium
+```
+
+2. See nvidia GPU performance and stats:
 ```bash
 watch -n 1 nvidia-smi 
 ```
 
+3. Run a Jupyter notebook:
+```bash
+jupyter notebook
+```
+
+4. Execute a Python script:
+```bash
+python my_script.py
+```
+
 ## Customization
 
 - Modify `.devcontainer/devcontainer.json` to change GPU requirements
@@ -54,7 +94,10 @@ watch -n 1 nvidia-smi
 
 **Note:** Refer to AWS documentation for precise costs. 
 
-## Learn More
+## Documentation
+
+For a full tutorial, check out: [https://www.gitpod.io/blog/gpu-dev-environments-on-aws](https://www.gitpod.io/blog/gpu-dev-environments-on-aws)
 
+Other helpful resources:
 - [Gitpod Documentation](https://www.gitpod.io/docs)
 - [Dev Container Specification](https://containers.dev)
diff --git a/notebook.ipynb b/notebook.ipynb
@@ -0,0 +1,54 @@
+{
+    "cells": [
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "import plotly.graph_objects as go\n",
+                "import plotly.express as px\n",
+                "import pandas as pd\n",
+                "df = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/finance-charts-apple.csv')\n",
+                "\n",
+                "fig = go.Figure([go.Scatter(x=df['Date'], y=df['AAPL.High'])])\n",
+                "fig.show()"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "df = px.data.stocks(indexed=True)-1\n",
+                "fig = px.area(df, facet_col=\"company\", facet_col_wrap=2)\n",
+                "fig.show()"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "df = px.data.stocks()\n",
+                "fig = px.line(df, x=\"date\", y=df.columns,\n",
+                "              hover_data={\"date\": \"|%B %d, %Y\"},\n",
+                "              title='custom tick labels')\n",
+                "fig.update_xaxes(\n",
+                "    dtick=\"M1\",\n",
+                "    tickformat=\"%b\\n%Y\")\n",
+                "fig.show()"
+            ]
+        }
+    ],
+    "metadata": {
+        "language_info": {
+            "name": "python"
+        },
+        "orig_nbformat": 4
+    },
+    "nbformat": 4,
+    "nbformat_minor": 2
+}
diff --git a/requirements.txt b/requirements.txt
@@ -0,0 +1,10 @@
+numpy>=1.24.0
+pandas>=2.0.0
+matplotlib>=3.7.0
+seaborn>=0.12.0
+scikit-learn>=1.3.0
+jupyter>=1.0.0
+ipykernel>=6.0.0 
+plotly>=5.0.0
+plotly-express>=0.4.0
+nbformat>=5.0.0