-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathuser_data.sh
110 lines (87 loc) · 3.61 KB
/
user_data.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
#!/usr/bin/env bash
# user_data script log filepath on ec2 instance: /var/log/cloud-init-output.log
set -euox pipefail
# https://docs.aws.amazon.com/ebs/latest/userguide/ebs-using-volumes.html
# EBS volume is primarly used to persist open-webui chat data,
# models downloaded by ollama, and docker images.
mount_ebs_volume() {
local volume_id="vol0f2153615108429a8"
local timeout=15
local interval=3
local start_time
start_time=$(date +%s)
while ! lsblk --nvme | grep -q "$volume_id"; do
current_time=$(date +%s)
elapsed=$((current_time - start_time))
if [ "$elapsed" -ge "$timeout" ]; then
echo "Error: Volume ($volume_id) is not attached. Ensure the EBS volume is attached."
exit 1
fi
echo "Waiting for volume ($volume_id) to be attached... Retrying in $interval seconds."
sleep $interval
done
local device_name
device_name="/dev/$(lsblk --nvme | grep "$volume_id" | awk '{print $1}')"
if file -s "$device_name" | grep "filesystem"; then
echo "Device $device_name is already formatted."
else
echo "Formatting $device_name as ext4..."
mkfs.ext4 "$device_name"
fi
local mount_point="/mnt/data"
# https://docs.docker.com/engine/daemon/#daemon-data-directory
local docker_data_directory="/var/lib/docker"
if [ ! -d "$mount_point" ]; then
echo "Creating mount point $mount_point..."
mkdir -p "$mount_point"
fi
if [ ! -d "$docker_data_directory" ]; then
echo "Creating docker mount point $docker_data_directory..."
mkdir -p "$docker_data_directory"
fi
echo "Mounting $device_name to $mount_point..."
mount "$device_name" "$mount_point"
echo "Mounting $device_name to $docker_data_directory..."
mount "$device_name" "$docker_data_directory"
if mountpoint -q "$mount_point"; then
echo "EBS volume successfully mounted at $mount_point."
else
echo "Error: Failed to mount $device_name at $mount_point."
exit 1
fi
if mountpoint -q "$docker_data_directory"; then
echo "EBS volume successfully mounted at $docker_data_directory."
else
echo "Error: Failed to mount $device_name at $docker_data_directory."
exit 1
fi
echo "Creating directories within $mount_point..."
mkdir -p "${mount_point}/ollama"
mkdir -p "${mount_point}/open-webui"
# add the device to /etc/fstab for auto-mounting on startup.
local device_uuid
device_uuid=$(blkid -s UUID -o value "$device_name")
echo "$device_uuid $mount_point ext4 defaults 0 1" | tee -a /etc/fstab
echo "$device_uuid $docker_data_directory ext4 0 1" | tee -a /etc/fstab
}
# Disable systemd cgroup management in docker.
#
# Fixes the following error:
#
# cuda driver library failed to get device context 800time=2025-02-09T08:41:25.591Z level=WARN source=gpu.go:449 msg="error looking up nvidia GPU memory"
#
# Context for this workaround: https://github.com/NVIDIA/gpu-operator/issues/485
#
# Also see:
# - https://github.com/NVIDIA/nvidia-container-toolkit/issues/538#issuecomment-2219617957
# - https://forums.developer.nvidia.com/t/nvida-container-toolkit-failed-to-initialize-nvml-unknown-error/286219/3
disable_systemd_cgroup_management_in_docker() {
jq '.["exec-opts"] |= (
(select(type == "array") // []) + ["native.cgroupdriver=cgroupfs"]
)' /etc/docker/daemon.json | tee /etc/docker/daemon.json.tmp > /dev/null
mv /etc/docker/daemon.json.tmp /etc/docker/daemon.json
systemctl restart docker
systemctl daemon-reload
}
mount_ebs_volume
disable_systemd_cgroup_management_in_docker