fix: initial commit

stellarbear · stellarbear · commit 7d86fdbea766 · 2024-01-03T11:47:26.000+03:00
diff --git a/Dockerfile b/Dockerfile
@@ -0,0 +1,15 @@
+ARG CUDA_VERSION=12.3.0
+FROM docker.io/nvidia/cuda:${CUDA_VERSION}-devel-ubuntu22.04
+
+WORKDIR /usr/local/src
+RUN apt-get update && \
+    apt-get install --no-install-recommends -y bash git make vim wget g++ ffmpeg curl
+RUN git clone https://github.com/ggerganov/whisper.cpp.git -b v1.4.0 --depth 1
+
+# whisper.cpp setup
+WORKDIR /usr/local/src/whisper.cpp
+
+RUN make clean
+RUN WHISPER_CUBLAS=1 make -j
+
+ENTRYPOINT [ "./volume/transcribe.sh" ] 
diff --git a/README.md b/README.md
@@ -0,0 +1,46 @@
+
+https://github.com/ggerganov/whisper.cpp
+
+## TLDR
+```
+docker compose up
+```
+or
+```
+MODEL=large-v2 LANGUAGE=ru docker compose up
+```
+
+## Step by step
+#### 1. Build CUDA image (single run)
+```
+docker compose build --progress=plain
+```
+
+#### 2. Download models (single run)
+You may want to do it manually in order to see the progress
+```
+./models/download.sh large-v2 
+```
+This script is a plain copy of [download-ggml-model.sh](https://github.com/ggerganov/whisper.cpp/blob/master/models/download-ggml-model.sh)
+You may find additional information and configurations [here](https://github.com/ggerganov/whisper.cpp/tree/master/models) 
+
+#### 3. Prepare your files
+Place all the files in the ```./volume/input/``` directory
+
+#### 4. Run the docker compose
+```
+docker compose up
+```
+Configure defaults
+```
+MODEL=large-v2 \
+LANGUAGE=ru \
+    docker compose up
+```
+| Argument    | Values | Defaults |
+| -------- | ------- |------- |
+| model  | base, medium, large, [other options](https://github.com/ggerganov/whisper.cpp/blob/master/models/download-ggml-model.sh#L25)   |   large-v3 
+| language | rn, ru, fr, etc. (depends on the model)     |  ru
+
+#### 5. Result
+You can find the result in the ```./volume/output/``` directory
diff --git a/docker-compose.yml b/docker-compose.yml
@@ -0,0 +1,22 @@
+version: "3.3"
+
+services:
+  whisper-cpp:
+    build:
+      context: .
+    stdin_open: true
+    tty: true
+    environment:
+      - MODEL=${MODEL:-ggml-large-v3.bin}
+      - LANGUAGE=${LANGUAGE:-ru}
+    volumes:
+      - ./models:/usr/local/src/whisper.cpp/models
+      - ./volume:/usr/local/src/whisper.cpp/volume
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: nvidia
+              device_ids: ['0']
+              capabilities: [gpu]
+          
diff --git a/models/download.sh b/models/download.sh
@@ -0,0 +1,111 @@
+#!/bin/bash
+
+# This script downloads Whisper model files that have already been converted to ggml format.
+# This way you don't have to convert them yourself.
+
+#src="https://ggml.ggerganov.com"
+#pfx="ggml-model-whisper"
+
+src="https://huggingface.co/ggerganov/whisper.cpp"
+pfx="resolve/main/ggml"
+
+# get the path of this script
+function get_script_path() {
+    if [ -x "$(command -v realpath)" ]; then
+        echo "$(dirname "$(realpath "$0")")"
+    else
+        local ret="$(cd -- "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P)"
+        echo "$ret"
+    fi
+}
+
+models_path="$(get_script_path)"
+
+# Whisper models
+models=(
+    "tiny.en"
+    "tiny"
+    "tiny-q5_1"
+    "tiny.en-q5_1"
+    "base.en"
+    "base"
+    "base-q5_1"
+    "base.en-q5_1"
+    "small.en"
+    "small.en-tdrz"
+    "small"
+    "small-q5_1"
+    "small.en-q5_1"
+    "medium"
+    "medium.en"
+    "medium-q5_0"
+    "medium.en-q5_0"
+    "large-v1"
+    "large-v2"
+    "large-v3"
+    "large-q5_0"
+)
+
+# list available models
+function list_models {
+    printf "\n"
+    printf "  Available models:"
+    for model in "${models[@]}"; do
+        printf " $model"
+    done
+    printf "\n\n"
+}
+
+if [ "$#" -ne 1 ]; then
+    printf "Usage: $0 <model>\n"
+    list_models
+
+    exit 1
+fi
+
+model=$1
+
+if [[ ! " ${models[@]} " =~ " ${model} " ]]; then
+    printf "Invalid model: $model\n"
+    list_models
+
+    exit 1
+fi
+
+# check if model contains `tdrz` and update the src and pfx accordingly
+if [[ $model == *"tdrz"* ]]; then
+    src="https://huggingface.co/akashmjn/tinydiarize-whisper.cpp"
+    pfx="resolve/main/ggml"
+fi
+
+# download ggml model
+
+printf "Downloading ggml model $model from '$src' ...\n"
+
+cd "$models_path"
+
+if [ -f "ggml-$model.bin" ]; then
+    printf "Model $model already exists. Skipping download.\n"
+    exit 0
+fi
+
+if [ -x "$(command -v curl)" ]; then
+    curl -L --output ggml-$model.bin $src/$pfx-$model.bin
+elif [ -x "$(command -v wget)" ]; then
+    wget --no-config --quiet --show-progress -O ggml-$model.bin $src/$pfx-$model.bin
+else
+    printf "Either wget or curl is required to download models.\n"
+    exit 1
+fi
+
+
+if [ $? -ne 0 ]; then
+    printf "Failed to download ggml model $model \n"
+    printf "Please try again later or download the original Whisper model files and convert them yourself.\n"
+    exit 1
+fi
+
+printf "Done! Model '$model' saved in 'models/ggml-$model.bin'\n"
+printf "You can now use it like this:\n\n"
+printf "  $ ./main -m models/ggml-$model.bin -f samples/jfk.wav\n"
+printf "\n"
diff --git a/volume/input/.gitkeep b/volume/input/.gitkeep
diff --git a/volume/output/.gitkeep b/volume/output/.gitkeep
diff --git a/volume/transcribe.sh b/volume/transcribe.sh
@@ -0,0 +1,29 @@
+#!/bin/bash
+echo "[logger]: configuration"
+echo "${MODEL}"
+echo "${LANGUAGE}"
+
+# Model verification
+echo "[logger]: model verification"
+if [ ! -f "./models/ggml-$MODEL.bin" ]; then
+    echo "[logger]: model not found. downloading"
+    ./models/download.sh "$MODEL"
+else
+    echo "[logger]: model found"
+fi
+
+# Transcribe
+rm ./volume/output/*
+shopt -s nullglob
+for file in ./volume/input/*; do
+    base_name=$(basename -- "$file") 
+    extension="${base_name##*.}"
+    file_name="${base_name%.*}"
+    echo "$file" "$file_name"
+    
+    echo "[logger]: Covert to wav "$file_name""
+    ffmpeg -i "$file" -ar 16000 "./volume/output/$file_name.wav"
+
+    echo "[logger]: Transcribe"
+    ./main -m "./models/ggml-$MODEL.bin" -f "./volume/output/$file_name.wav" --language "ru" -otxt -osrt
+done