Skip to content

Commit

Permalink
Add Marian server for model testing (#492)
Browse files Browse the repository at this point in the history
* Compile marian server

* Add Marian server for testing

* Reformat

* Update utils/marian_client.py

Co-authored-by: Greg Tatum <[email protected]>

* Make port configurable

* Relock poetry

---------

Co-authored-by: Greg Tatum <[email protected]>
  • Loading branch information
eu9ene and gregtatum authored Mar 28, 2024
1 parent 7a15b5e commit 3774779
Show file tree
Hide file tree
Showing 6 changed files with 418 additions and 291 deletions.
52 changes: 32 additions & 20 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,11 @@ SHELL=/bin/bash

# task group id for downloading evals and logs
LOGS_TASK_GROUP?=
# An ID of a Taskcluster task with a Marian model in the artifacts
MODEL_TASK?=
# A command to run with run-docker
DOCKER_COMMAND=bash
MARIAN_SERVER_PORT=8886

# OpusCleaner is a data cleaner for training corpus
# More details are in docs/cleaning.md
Expand Down Expand Up @@ -103,27 +108,26 @@ run-docker:
--rm \
--volume $$(pwd):/builds/worker/checkouts \
--workdir /builds/worker/checkouts \
ftt-local bash
-p $(MARIAN_SERVER_PORT):$(MARIAN_SERVER_PORT) \
ftt-local $(DOCKER_COMMAND)

# Run tests under Docker
run-tests-docker: build-docker
run-tests-docker:
# this is a mitigation to guard against build failures with the new Apple ARM processors
if [ -n "$$VIRTUAL_ENV" ]; then \
echo "Error: Virtual environment detected. Exit the poetry shell."; \
exit 1; \
fi && \
if [ $$(uname -m) == 'arm64' ]; then \
echo "setting arm64 platform"; \
export DOCKER_DEFAULT_PLATFORM=linux/amd64; \
fi && \
docker run \
--interactive \
--tty \
--rm \
--volume $$(pwd):/builds/worker/checkouts \
--workdir /builds/worker/checkouts \
ftt-local make run-tests
run-tests-docker: DOCKER_COMMAND="make run-tests"
run-tests-docker: run-docker

# Run Marian server that loads a model from data/models/$MODEL_TASK
# For example:
# MODEL_TASK=ZP5V73iKTM2HCFQsCU-JBQ make download-model
# MODEL_TASK=ZP5V73iKTM2HCFQsCU-JBQ make run-server-docker
# Then run `python utils/marian_client.py` to test the model
# It will be slow on a CPU under Docker
run-server-docker: DOCKER_COMMAND=/builds/worker/tools/marian-dev/build/marian-server \
-c /builds/worker/checkouts/data/taskcluster-models/$(MODEL_TASK)/decoder.yml \
-m /builds/worker/checkouts/data/taskcluster-models/$(MODEL_TASK)/model.npz \
-v /builds/worker/checkouts/data/taskcluster-models/$(MODEL_TASK)/vocab.spm /builds/worker/checkouts/data/taskcluster-models/$(MODEL_TASK)/vocab.spm \
--port $(MARIAN_SERVER_PORT)
run-server-docker: run-docker


# Validates Taskcluster task graph locally
validate-taskgraph:
Expand Down Expand Up @@ -155,13 +159,21 @@ download-logs:
# Downloads evaluation results from Taskcluster task group to a CSV file
# This includes BLEU and chrF metrics for each dataset and trained model
download-evals:
mkdir -p data/taskcluster-logs
mkdir -p data/taskcluster-evals
poetry install --only taskcluster --no-root
poetry run python utils/taskcluster_downloader.py \
--output=data/taskcluster-evals/$(LOGS_TASK_GROUP) \
--mode=evals \
--task-group-id=$(LOGS_TASK_GROUP)

# Downloads a trained model from the Taskcluster task artifacts
# For example: `MODEL_TASK=ZP5V73iKTM2HCFQsCU-JBQ make download-model`
download-model:
mkdir -p data/taskcluster-models/$(MODEL_TASK)
wget -O data/taskcluster-models/$(MODEL_TASK)/decoder.yml https://firefox-ci-tc.services.mozilla.com/api/queue/v1/task/$(MODEL_TASK)/runs/0/artifacts/public%2Fbuild%2Fmodel.npz.best-chrf.npz.decoder.yml
wget -O data/taskcluster-models/$(MODEL_TASK)/model.npz https://firefox-ci-tc.services.mozilla.com/api/queue/v1/task/$(MODEL_TASK)/runs/0/artifacts/public%2Fbuild%2Fmodel.npz.best-chrf.npz
wget -O data/taskcluster-models/$(MODEL_TASK)/vocab.spm https://firefox-ci-tc.services.mozilla.com/api/queue/v1/task/$(MODEL_TASK)/runs/0/artifacts/public%2Fbuild%2Fvocab.spm


# Runs Tensorboard for Marian training logs in ./logs directory
# then go to http://localhost:6006
Expand Down
4 changes: 3 additions & 1 deletion docker/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,9 @@ RUN apt-get update -qq \
libhunspell-dev \
bc \
libopenblas-dev \
&& apt-get clean
openssl \
libssl-dev \
&& apt-get clean

RUN mkdir /builds/worker/tools && \
chown worker:worker /builds/worker/tools && \
Expand Down
4 changes: 3 additions & 1 deletion pipeline/setup/compile-marian.sh
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,14 @@ mkdir -p "${marian_dir}"
cd "${marian_dir}"

if [ "${use_gpu}" == "true" ]; then
# this is a production version that runs on GPU
test -v CUDA_DIR
cmake .. -DUSE_SENTENCEPIECE=on -DUSE_FBGEMM=on -DCOMPILE_CPU=on -DCMAKE_BUILD_TYPE=Release \
-DCUDA_TOOLKIT_ROOT_DIR="${CUDA_DIR}" "${extra_args[@]}"
else
# this is a CPU version that we use for testing
cmake .. -DUSE_SENTENCEPIECE=on -DUSE_FBGEMM=on -DCOMPILE_CPU=on -DCMAKE_BUILD_TYPE=Release \
-DCOMPILE_CUDA=off "${extra_args[@]}"
-DCOMPILE_CUDA=off -DCOMPILE_SERVER=on "${extra_args[@]}"
fi

make -j "${threads}"
Expand Down
Loading

0 comments on commit 3774779

Please sign in to comment.