neuralmagic · wangchen615 · Apr 17, 2025 · Apr 17, 2025 · Apr 17, 2025 · Apr 22, 2025
diff --git a/Dockerfile b/Dockerfile
@@ -0,0 +1,36 @@
+FROM python:3.11-slim
+
+LABEL org.opencontainers.image.source="https://github.com/neuralmagic/guidellm"
+LABEL org.opencontainers.image.description="GuideLLM Benchmark Container"
+
+# Install system dependencies
+RUN apt-get update && apt-get install -y \
+    git \
+    curl \
+    && rm -rf /var/lib/apt/lists/*
+
+# Create non-root user
+RUN useradd -m -u 1000 guidellm
+
+# Set working directory
+WORKDIR /app
+
+# Install GuideLLM
+RUN pip install git+https://github.com/neuralmagic/guidellm.git
-RUN pip install git+https://github.com/neuralmagic/guidellm.git
+COPY . /source
+RUN pip install /source && \
+         cp /source/build/run_benchmark.sh /app/
-RUN pip install git+https://github.com/neuralmagic/guidellm.git
+COPY . /source
+RUN pip install /source && \
+         cp /source/build/run_benchmark.sh /app/
+
+# Copy and set up the benchmark script
+COPY run_benchmark.sh /app/
+RUN chmod +x /app/run_benchmark.sh
+
+# Set ownership to non-root user
+RUN chown -R guidellm:guidellm /app
+
+# Switch to non-root user
+USER guidellm
+
+# Healthcheck
+HEALTHCHECK --interval=30s --timeout=30s --start-period=5s --retries=3 \
+    CMD curl -f http://localhost:8000/health || exit 1
+
-# Healthcheck
-HEALTHCHECK --interval=30s --timeout=30s --start-period=5s --retries=3 \
-    CMD curl -f http://localhost:8000/health || exit 1
-# Healthcheck
-HEALTHCHECK --interval=30s --timeout=30s --start-period=5s --retries=3 \
-    CMD curl -f http://localhost:8000/health || exit 1
+# Set the entrypoint
+ENTRYPOINT ["/app/run_benchmark.sh"]
diff --git a/run_benchmark.sh b/run_benchmark.sh
@@ -0,0 +1,34 @@
+#!/bin/bash
+
+# Required environment variables
+TARGET=${TARGET:-"http://localhost:8000"}
+MODEL=${MODEL:-"neuralmagic/Meta-Llama-3.1-8B-Instruct-quantized.w4a16"}
+RATE_TYPE=${RATE_TYPE:-"sweep"}
+DATA=${DATA:-"prompt_tokens=256,output_tokens=128"}
+MAX_REQUESTS=${MAX_REQUESTS:-"100"}
+MAX_SECONDS=${MAX_SECONDS:-""}
+
+# Output configuration
+OUTPUT_PATH=${OUTPUT_PATH:-"/results/guidellm_benchmark_results"}
+OUTPUT_FORMAT=${OUTPUT_FORMAT:-"json"}  # Can be json, yaml, or yml
+
+# Build the command
+CMD="guidellm benchmark --target \"${TARGET}\" --model \"${MODEL}\" --rate-type \"${RATE_TYPE}\" --data \"${DATA}\""
+
+# Add optional parameters
+if [ ! -z "${MAX_REQUESTS}" ]; then
+    CMD="${CMD} --max-requests ${MAX_REQUESTS}"
+fi
+
+if [ ! -z "${MAX_SECONDS}" ]; then
+    CMD="${CMD} --max-seconds ${MAX_SECONDS}"
+fi
+
+# Add output path with appropriate extension
+if [ ! -z "${OUTPUT_PATH}" ]; then
+    CMD="${CMD} --output-path \"${OUTPUT_PATH}.${OUTPUT_FORMAT}\""
+fi
+
+# Execute the command
+echo "Running command: ${CMD}"
+eval "${CMD}"