Add Marian server for model testing (#492)

* Compile marian server * Add Marian server for testing * Reformat * Update utils/marian_client.py Co-authored-by: Greg Tatum <[email protected]> * Make port configurable * Relock poetry --------- Co-authored-by: Greg Tatum <[email protected]>
mozilla · Mar 28, 2024 · 3774779 · 3774779
1 parent 7a15b5e
commit 3774779
Show file tree

Hide file tree

Showing 6 changed files with 418 additions and 291 deletions.
diff --git a/Makefile b/Makefile
@@ -5,6 +5,11 @@ SHELL=/bin/bash
 
 # task group id for downloading evals and logs
 LOGS_TASK_GROUP?=
+# An ID of a Taskcluster task with a Marian model in the artifacts
+MODEL_TASK?=
+# A command to run with run-docker
+DOCKER_COMMAND=bash
+MARIAN_SERVER_PORT=8886
 
 # OpusCleaner is a data cleaner for training corpus
 # More details are in docs/cleaning.md
@@ -103,27 +108,26 @@ run-docker:
 		--rm \
 		--volume $$(pwd):/builds/worker/checkouts \
 		--workdir /builds/worker/checkouts \
-		ftt-local bash
+		-p $(MARIAN_SERVER_PORT):$(MARIAN_SERVER_PORT) \
+		ftt-local $(DOCKER_COMMAND)
 
 # Run tests under Docker
-run-tests-docker: build-docker
-run-tests-docker:
-	# this is a mitigation to guard against build failures with the new Apple ARM processors
-	if [ -n "$$VIRTUAL_ENV" ]; then \
-		echo "Error: Virtual environment detected. Exit the poetry shell."; \
-		exit 1; \
-	fi && \
-	if [ $$(uname -m) == 'arm64' ]; then \
-		echo "setting arm64 platform"; \
-	  	export DOCKER_DEFAULT_PLATFORM=linux/amd64; \
-	fi && \
-	docker run \
-		--interactive \
-		--tty \
-		--rm \
-		--volume $$(pwd):/builds/worker/checkouts \
-		--workdir /builds/worker/checkouts \
-		 ftt-local make run-tests
+run-tests-docker: DOCKER_COMMAND="make run-tests"
+run-tests-docker: run-docker
+
+# Run Marian server that loads a model from data/models/$MODEL_TASK
+# For example:
+# MODEL_TASK=ZP5V73iKTM2HCFQsCU-JBQ make download-model
+# MODEL_TASK=ZP5V73iKTM2HCFQsCU-JBQ make run-server-docker
+# Then run `python utils/marian_client.py` to test the model
+# It will be slow on a CPU under Docker
+run-server-docker: DOCKER_COMMAND=/builds/worker/tools/marian-dev/build/marian-server \
+  -c /builds/worker/checkouts/data/taskcluster-models/$(MODEL_TASK)/decoder.yml \
+  -m /builds/worker/checkouts/data/taskcluster-models/$(MODEL_TASK)/model.npz \
+  -v /builds/worker/checkouts/data/taskcluster-models/$(MODEL_TASK)/vocab.spm /builds/worker/checkouts/data/taskcluster-models/$(MODEL_TASK)/vocab.spm \
+  --port $(MARIAN_SERVER_PORT)
+run-server-docker: run-docker
+
 
 # Validates Taskcluster task graph locally
 validate-taskgraph:
@@ -155,13 +159,21 @@ download-logs:
 # Downloads evaluation results from Taskcluster task group to a CSV file
 # This includes BLEU and chrF metrics for each dataset and trained model
 download-evals:
-	mkdir -p data/taskcluster-logs
+	mkdir -p data/taskcluster-evals
 	poetry install --only taskcluster --no-root
 	poetry run python utils/taskcluster_downloader.py \
 		--output=data/taskcluster-evals/$(LOGS_TASK_GROUP) \
 		--mode=evals \
 		--task-group-id=$(LOGS_TASK_GROUP)
 
+# Downloads a trained model from the Taskcluster task artifacts
+# For example: `MODEL_TASK=ZP5V73iKTM2HCFQsCU-JBQ make download-model`
+download-model:
+	mkdir -p data/taskcluster-models/$(MODEL_TASK)
+	wget -O data/taskcluster-models/$(MODEL_TASK)/decoder.yml https://firefox-ci-tc.services.mozilla.com/api/queue/v1/task/$(MODEL_TASK)/runs/0/artifacts/public%2Fbuild%2Fmodel.npz.best-chrf.npz.decoder.yml
+	wget -O data/taskcluster-models/$(MODEL_TASK)/model.npz https://firefox-ci-tc.services.mozilla.com/api/queue/v1/task/$(MODEL_TASK)/runs/0/artifacts/public%2Fbuild%2Fmodel.npz.best-chrf.npz
+	wget -O data/taskcluster-models/$(MODEL_TASK)/vocab.spm https://firefox-ci-tc.services.mozilla.com/api/queue/v1/task/$(MODEL_TASK)/runs/0/artifacts/public%2Fbuild%2Fvocab.spm
+
 
 # Runs Tensorboard for Marian training logs in ./logs directory
 # then go to http://localhost:6006

diff --git a/docker/Dockerfile b/docker/Dockerfile
@@ -25,7 +25,9 @@ RUN apt-get update -qq \
                           libhunspell-dev \
                           bc \
                           libopenblas-dev \
-    && apt-get clean
+                          openssl \
+                          libssl-dev  \
+         && apt-get clean
 
 RUN mkdir /builds/worker/tools && \
     chown worker:worker /builds/worker/tools && \

diff --git a/pipeline/setup/compile-marian.sh b/pipeline/setup/compile-marian.sh
@@ -17,12 +17,14 @@ mkdir -p "${marian_dir}"
 cd "${marian_dir}"
 
 if [ "${use_gpu}" == "true" ]; then
+  # this is a production version that runs on GPU
   test -v CUDA_DIR
   cmake .. -DUSE_SENTENCEPIECE=on -DUSE_FBGEMM=on -DCOMPILE_CPU=on -DCMAKE_BUILD_TYPE=Release \
     -DCUDA_TOOLKIT_ROOT_DIR="${CUDA_DIR}" "${extra_args[@]}"
 else
+  # this is a CPU version that we use for testing
   cmake .. -DUSE_SENTENCEPIECE=on -DUSE_FBGEMM=on -DCOMPILE_CPU=on -DCMAKE_BUILD_TYPE=Release \
-    -DCOMPILE_CUDA=off "${extra_args[@]}"
+    -DCOMPILE_CUDA=off -DCOMPILE_SERVER=on "${extra_args[@]}"
 fi
 
 make -j "${threads}"