Skip to content

Commit 3f88489

Browse files
authored
Merge branch 'master' into secure-check
2 parents 187ed53 + 6bdb1ba commit 3f88489

File tree

20 files changed

+61
-33
lines changed

20 files changed

+61
-33
lines changed

README.md

+2-2
Original file line numberDiff line numberDiff line change
@@ -85,9 +85,9 @@ curl -X POST -d '{"prompt":"count from 1 to 9 in french ", "max_tokens": 100}' -
8585

8686
```bash
8787
#export token=<HUGGINGFACE_HUB_TOKEN>
88-
docker build --pull . -f docker/Dockerfile.llm -t ts/llm
88+
docker build --pull . -f docker/Dockerfile.vllm -t ts/vllm
8989

90-
docker run --rm -ti --shm-size 10g --gpus all -e HUGGING_FACE_HUB_TOKEN=$token -p 8080:8080 -v data:/data ts/llm --model_id meta-llama/Meta-Llama-3-8B-Instruct --disable_token_auth
90+
docker run --rm -ti --shm-size 10g --gpus all -e HUGGING_FACE_HUB_TOKEN=$token -p 8080:8080 -v data:/data ts/vllm --model_id meta-llama/Meta-Llama-3-8B-Instruct --disable_token_auth
9191

9292
# Try it out
9393
curl -X POST -d '{"model":"meta-llama/Meta-Llama-3-8B-Instruct", "prompt":"Hello, my name is", "max_tokens": 200}' --header "Content-Type: application/json" "http://localhost:8080/predictions/model/1.0/v1/completions"

benchmarks/utils/system_under_test.py

+26
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,7 @@ def start(self):
113113
execute("torchserve --stop", wait=True)
114114
click.secho("*Setting up model store...", fg="green")
115115
self._prepare_local_dependency()
116+
self._clear_neuron_cache_if_exists()
116117
click.secho("*Starting local Torchserve instance...", fg="green")
117118

118119
ts_cmd = (
@@ -141,6 +142,31 @@ def start(self):
141142
if "Model server started" in str(line).strip():
142143
break
143144

145+
def _clear_neuron_cache_if_exists(self):
146+
cache_dir = "/var/tmp/neuron-compile-cache/"
147+
148+
# Check if the directory exists
149+
if os.path.exists(cache_dir) and os.path.isdir(cache_dir):
150+
click.secho(
151+
f"Directory {cache_dir} exists. Clearing contents...", fg="green"
152+
)
153+
154+
# Remove the directory contents
155+
for filename in os.listdir(cache_dir):
156+
file_path = os.path.join(cache_dir, filename)
157+
try:
158+
if os.path.isfile(file_path) or os.path.islink(file_path):
159+
os.unlink(file_path)
160+
elif os.path.isdir(file_path):
161+
shutil.rmtree(file_path)
162+
except Exception as e:
163+
click.secho(f"Failed to delete {file_path}. Reason: {e}", fg="red")
164+
click.secho(f"Cache cleared: {cache_dir}", fg="green")
165+
else:
166+
click.secho(
167+
f"Directory {cache_dir} does not exist. No action taken.", fg="green"
168+
)
169+
144170
def stop(self):
145171
click.secho("*Terminating Torchserve instance...", fg="green")
146172
execute("torchserve --stop", wait=True)

docker/Dockerfile

+2-2
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ COPY ./ serve
7373
RUN \
7474
if echo "$LOCAL_CHANGES" | grep -q "false"; then \
7575
rm -rf serve;\
76-
git clone --recursive $REPO_URL -b $BRANCH_NAME; \
76+
git clone --recursive $REPO_URL -b $BRANCH_NAME serve; \
7777
fi
7878

7979

@@ -238,7 +238,7 @@ COPY ./ serve
238238
RUN \
239239
if echo "$LOCAL_CHANGES" | grep -q "false"; then \
240240
rm -rf serve;\
241-
git clone --recursive $REPO_URL -b $BRANCH_NAME; \
241+
git clone --recursive $REPO_URL -b $BRANCH_NAME serve; \
242242
fi
243243

244244
COPY --from=compile-image /home/venv /home/venv
File renamed without changes.

docs/llm_deployment.md

+2-2
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ The launcher can either be used standalone or in combination with our provided T
1111

1212
To launch the docker we first need to build it:
1313
```bash
14-
docker build . -f docker/Dockerfile.llm -t ts/llm
14+
docker build . -f docker/Dockerfile.vllm -t ts/vllm
1515
```
1616

1717
Models are usually loaded from the HuggingFace hub and are cached in a [docker volume](https://docs.docker.com/storage/volumes/) for faster reload.
@@ -22,7 +22,7 @@ export token=<HUGGINGFACE_HUB_TOKEN>
2222

2323
You can then go ahead and launch a TorchServe instance serving your selected model:
2424
```bash
25-
docker run --rm -ti --shm-size 1g --gpus all -e HUGGING_FACE_HUB_TOKEN=$token -p 8080:8080 -v data:/data ts/llm --model_id meta-llama/Meta-Llama-3-8B-Instruct --disable_token_auth
25+
docker run --rm -ti --shm-size 1g --gpus all -e HUGGING_FACE_HUB_TOKEN=$token -p 8080:8080 -v data:/data ts/vllm --model_id meta-llama/Meta-Llama-3-8B-Instruct --disable_token_auth
2626
```
2727

2828
To change the model you just need to exchange the identifier given to the `--model_id` parameter.

examples/large_models/utils/test_llm_streaming_response.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -196,7 +196,7 @@ def parse_args():
196196
"--model-version",
197197
type=str,
198198
default="1.0",
199-
help="Model vesion. Default: 1.0",
199+
help="Model version. Default: 1.0",
200200
)
201201

202202
return parser.parse_args()

examples/large_models/vllm/llama3/Readme.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ To leverage the power of vLLM we fist need to install it using pip in out develo
99
```bash
1010
python -m pip install -r ../requirements.txt
1111
```
12-
For later deployments we can make vLLM part of the deployment environment by adding the requirements.txt while building the model archive in step 2 (see [here](../../../../model-archiver/README.md#model-specific-custom-python-requirements) for details) or we can make it part of a docker image like [here](../../../../docker/Dockerfile.llm).
12+
For later deployments we can make vLLM part of the deployment environment by adding the requirements.txt while building the model archive in step 2 (see [here](../../../../model-archiver/README.md#model-specific-custom-python-requirements) for details) or we can make it part of a docker image like [here](../../../../docker/Dockerfile.vllm).
1313

1414
### Step 1: Download Model from HuggingFace
1515

examples/large_models/vllm/lora/Readme.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ To leverage the power of vLLM we fist need to install it using pip in out develo
99
```bash
1010
python -m pip install -r ../requirements.txt
1111
```
12-
For later deployments we can make vLLM part of the deployment environment by adding the requirements.txt while building the model archive in step 2 (see [here](../../../../model-archiver/README.md#model-specific-custom-python-requirements) for details) or we can make it part of a docker image like [here](../../../../docker/Dockerfile.llm).
12+
For later deployments we can make vLLM part of the deployment environment by adding the requirements.txt while building the model archive in step 2 (see [here](../../../../model-archiver/README.md#model-specific-custom-python-requirements) for details) or we can make it part of a docker image like [here](../../../../docker/Dockerfile.vllm).
1313

1414
### Step 1: Download Model from HuggingFace
1515

examples/large_models/vllm/mistral/Readme.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ To leverage the power of vLLM we fist need to install it using pip in out develo
99
```bash
1010
python -m pip install -r ../requirements.txt
1111
```
12-
For later deployments we can make vLLM part of the deployment environment by adding the requirements.txt while building the model archive in step 2 (see [here](../../../../model-archiver/README.md#model-specific-custom-python-requirements) for details) or we can make it part of a docker image like [here](../../../../docker/Dockerfile.llm).
12+
For later deployments we can make vLLM part of the deployment environment by adding the requirements.txt while building the model archive in step 2 (see [here](../../../../model-archiver/README.md#model-specific-custom-python-requirements) for details) or we can make it part of a docker image like [here](../../../../docker/Dockerfile.vllm).
1313

1414
### Step 1: Download Model from HuggingFace
1515

frontend/archive/src/main/java/org/pytorch/serve/archive/model/ModelConfig.java

+5-2
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,9 @@
1111
public class ModelConfig {
1212
private static final Logger logger = LoggerFactory.getLogger(ModelConfig.class);
1313

14+
public static final int defaultStartupTimeout = 120; // unit: sec
15+
public static final int defaultResponseTimeout = 120; // unit: sec
16+
1417
/** the minimum number of workers of a model */
1518
private int minWorkers;
1619
/** the maximum number of workers of a model */
@@ -20,9 +23,9 @@ public class ModelConfig {
2023
/** the maximum delay in msec of a batch of a model */
2124
private int maxBatchDelay;
2225
/** the timeout in sec of a specific model's response. */
23-
private int responseTimeout = 120; // unit: sec
26+
private int responseTimeout = defaultResponseTimeout;
2427
/** the timeout in sec of a specific model's startup. */
25-
private int startupTimeout = 120; // unit: sec
28+
private int startupTimeout = defaultStartupTimeout;
2629
/**
2730
* the device type where the model is loaded. It can be gpu, cpu. The model is loaded on CPU if
2831
* deviceType: "cpu" is set on a GPU host.

frontend/server/src/main/java/org/pytorch/serve/ModelServer.java

+2-1
Original file line numberDiff line numberDiff line change
@@ -193,7 +193,8 @@ private void initModelStore() throws InvalidSnapshotException, IOException {
193193
String fileName = file.getName();
194194
if (file.isFile()
195195
&& !fileName.endsWith(".mar")
196-
&& !fileName.endsWith(".model")) {
196+
&& !fileName.endsWith(".model")
197+
&& !fileName.endsWith(".tar.gz")) {
197198
continue;
198199
}
199200
try {

frontend/server/src/main/java/org/pytorch/serve/openapi/OpenApiUtils.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -203,7 +203,7 @@ private static Operation getSetDefaultOperation() {
203203
MediaType error = getErrorResponse();
204204

205205
operation.addResponse(
206-
new Response("200", "Default vesion succsesfully updated for model", status));
206+
new Response("200", "Default version successfully updated for model", status));
207207
operation.addResponse(
208208
new Response("404", "Model not found or Model version not found", error));
209209
operation.addResponse(new Response("500", "Internal Server Error", error));

frontend/server/src/main/java/org/pytorch/serve/util/ApiUtils.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,7 @@ public static String setDefault(String modelName, String newModelVersion)
9898
ModelManager modelManager = ModelManager.getInstance();
9999
modelManager.setDefaultVersion(modelName, newModelVersion);
100100
String msg =
101-
"Default vesion succsesfully updated for model \""
101+
"Default version successfully updated for model \""
102102
+ modelName
103103
+ "\" to \""
104104
+ newModelVersion

frontend/server/src/main/java/org/pytorch/serve/wlm/Model.java

+10-2
Original file line numberDiff line numberDiff line change
@@ -193,9 +193,17 @@ public void setModelState(JsonObject modelInfo) {
193193
minWorkers = modelInfo.get(MIN_WORKERS).getAsInt();
194194
maxWorkers = modelInfo.get(MAX_WORKERS).getAsInt();
195195
maxBatchDelay = modelInfo.get(MAX_BATCH_DELAY).getAsInt();
196-
responseTimeout = modelInfo.get(RESPONSE_TIMEOUT).getAsInt();
197-
startupTimeout = modelInfo.get(STARTUP_TIMEOUT).getAsInt();
198196
batchSize = modelInfo.get(BATCH_SIZE).getAsInt();
197+
responseTimeout =
198+
modelInfo.has(RESPONSE_TIMEOUT) && !modelInfo.get(RESPONSE_TIMEOUT).isJsonNull()
199+
? modelInfo.get(RESPONSE_TIMEOUT).getAsInt()
200+
: modelArchive.getModelConfig()
201+
.defaultResponseTimeout; // default value for responseTimeout
202+
startupTimeout =
203+
modelInfo.has(STARTUP_TIMEOUT) && !modelInfo.get(STARTUP_TIMEOUT).isJsonNull()
204+
? modelInfo.get(STARTUP_TIMEOUT).getAsInt()
205+
: modelArchive.getModelConfig()
206+
.defaultStartupTimeout; // default value for startupTimeout
199207

200208
JsonElement runtime = modelInfo.get(RUNTIME_TYPE);
201209
String runtime_str = Manifest.RuntimeType.PYTHON.getValue();

frontend/server/src/test/java/org/pytorch/serve/ModelServerTest.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -429,7 +429,7 @@ public void testSetDefaultVersionNoop() throws InterruptedException {
429429
StatusResponse resp = JsonUtils.GSON.fromJson(TestUtils.getResult(), StatusResponse.class);
430430
Assert.assertEquals(
431431
resp.getStatus(),
432-
"Default vesion succsesfully updated for model \"noopversioned\" to \"1.2.1\"");
432+
"Default version successfully updated for model \"noopversioned\" to \"1.2.1\"");
433433
}
434434

435435
@Test(

frontend/server/src/test/resources/management_open_api.json

+1-1
Original file line numberDiff line numberDiff line change
@@ -1671,7 +1671,7 @@
16711671
],
16721672
"responses": {
16731673
"200": {
1674-
"description": "Default vesion succsesfully updated for model",
1674+
"description": "Default version successfully updated for model",
16751675
"content": {
16761676
"application/json": {
16771677
"schema": {

frontend/server/src/test/resources/model_management_api.json

+1-1
Original file line numberDiff line numberDiff line change
@@ -1216,7 +1216,7 @@
12161216
],
12171217
"responses": {
12181218
"200": {
1219-
"description": "Default vesion succsesfully updated for model",
1219+
"description": "Default version successfully updated for model",
12201220
"content": {
12211221
"application/json": {
12221222
"schema": {

kubernetes/kserve/build_image.sh

+1-1
Original file line numberDiff line numberDiff line change
@@ -66,5 +66,5 @@ cp -r ../../third_party .
6666
if [ "${MULTI}" == "true" ]; then
6767
DOCKER_BUILDKIT=1 docker buildx build --file "$DOCKER_FILE" --build-arg BASE_IMAGE=$BASE_IMAGE --platform "${ARCH}" -t "$DOCKER_TAG" --push .
6868
else
69-
DOCKER_BUILDKIT=1 docker buildx build --file "$DOCKER_FILE" --build-arg BASE_IMAGE=$BASE_IMAGE -t "$DOCKER_TAG" --load .
69+
DOCKER_BUILDKIT=1 docker buildx build --file "$DOCKER_FILE" --build-arg BASE_IMAGE=$BASE_IMAGE -t "$DOCKER_TAG" --push .
7070
fi

kubernetes/kserve/build_upload_release.py

-5
Original file line numberDiff line numberDiff line change
@@ -39,11 +39,6 @@
3939
dry_run,
4040
)
4141

42-
for image in [
43-
f"{organization}/torchserve-kfs:{check_ts_version()}-gpu",
44-
]:
45-
try_and_handle(f"docker push {image}", dry_run)
46-
4742
# Cleanup built images
4843
if args.cleanup:
4944
try_and_handle(f"docker system prune --all --volumes -f", dry_run)

kubernetes/kserve/docker_nightly.py

+2-7
Original file line numberDiff line numberDiff line change
@@ -43,22 +43,17 @@
4343
dry_run,
4444
)
4545

46-
# Push Nightly images to official PyTorch Dockerhub account
47-
try_and_handle(f"docker push {organization}/{gpu_version}", dry_run)
48-
4946
# Tag nightly images with latest
5047
try_and_handle(
5148
f"docker buildx imagetools create --tag {organization}/{project}:latest-cpu {organization}/{cpu_version}",
5249
dry_run,
5350
)
51+
5452
try_and_handle(
55-
f"docker tag {organization}/{gpu_version} {organization}/{project}:latest-gpu",
53+
f"docker buildx imagetools create --tag {organization}/{project}:latest-gpu {organization}/{gpu_version}",
5654
dry_run,
5755
)
5856

59-
# Push images with latest tag
60-
try_and_handle(f"docker push {organization}/{project}:latest-gpu", dry_run)
61-
6257
# Cleanup built images
6358
if args.cleanup:
6459
try_and_handle(f"docker system prune --all --volumes -f", dry_run)

0 commit comments

Comments
 (0)