Skip to content

Commit 98ea356

Browse files
authored
[Lora][Frontend]Add default local directory LoRA resolver plugin. (#16855)
Signed-off-by: jberkhahn <[email protected]>
1 parent d191102 commit 98ea356

File tree

9 files changed

+146
-3
lines changed

9 files changed

+146
-3
lines changed

.buildkite/test-pipeline.yaml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -628,7 +628,7 @@ steps:
628628
- vllm/plugins/
629629
- tests/plugins/
630630
commands:
631-
# begin platform plugin tests, all the code in-between runs on dummy platform
631+
# begin platform plugin and general plugin tests, all the code in-between runs on dummy platform
632632
- pip install -e ./plugins/vllm_add_dummy_platform
633633
- pytest -v -s plugins_tests/test_platform_plugins.py
634634
- pip uninstall vllm_add_dummy_platform -y
@@ -639,6 +639,7 @@ steps:
639639
- pytest -v -s distributed/test_distributed_oot.py
640640
- pytest -v -s entrypoints/openai/test_oot_registration.py # it needs a clean process
641641
- pytest -v -s models/test_oot_registration.py # it needs a clean process
642+
- pytest -v -s plugins/lora_resolvers # unit tests for in-tree lora resolver plugins
642643

643644
- label: Multi-step Tests (4 GPUs) # 36min
644645
mirror_hardwares: [amdexperimental]

docs/source/features/lora.md

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -159,9 +159,12 @@ Alternatively, you can use the LoRAResolver plugin to dynamically load LoRA adap
159159

160160
You can set up multiple LoRAResolver plugins if you want to load LoRA adapters from different sources. For example, you might have one resolver for local files and another for S3 storage. vLLM will load the first LoRA adapter that it finds.
161161

162-
You can either install existing plugins or implement your own.
162+
You can either install existing plugins or implement your own. By default, vLLM comes with a [resolver plugin to load LoRA adapters from a local directory.](https://github.com/vllm-project/vllm/tree/main/vllm/plugins/lora_resolvers)
163+
To enable this resolver, set `VLLM_ALLOW_RUNTIME_LORA_UPDATING` to True, set `VLLM_PLUGINS` to include `lora_filesystem_resolver`, and then set `VLLM_LORA_RESOLVER_CACHE_DIR` to a local directory. When vLLM receives a request using a LoRA adapter `foobar`,
164+
it will first look in the local directory for a directory `foobar`, and attempt to load the contents of that directory as a LoRA adapter. If successful, the request will complete as normal and
165+
that adapter will then be available for normal use on the server.
163166

164-
Steps to implement your own LoRAResolver plugin:
167+
Alternatively, follow these example steps to implement your own plugin:
165168
1. Implement the LoRAResolver interface.
166169

167170
Example of a simple S3 LoRAResolver implementation:

pyproject.toml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,9 @@ Slack="http://slack.vllm.ai/"
4141
[project.scripts]
4242
vllm = "vllm.entrypoints.cli.main:main"
4343

44+
[project.entry-points."vllm.general_plugins"]
45+
lora_filesystem_resolver = "vllm.plugins.lora_resolvers.filesystem_resolver:register_filesystem_resolver"
46+
4447
[tool.setuptools_scm]
4548
# no extra settings needed, presence enables setuptools-scm
4649

tests/plugins/lora_resolvers/__init__.py

Whitespace-only changes.
Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
# SPDX-License-Identifier: Apache-2.0
2+
import os
3+
import shutil
4+
5+
import pytest
6+
from huggingface_hub import snapshot_download
7+
8+
from vllm.plugins.lora_resolvers.filesystem_resolver import FilesystemResolver
9+
10+
MODEL_NAME = "mistralai/Mistral-7B-v0.1"
11+
LORA_NAME = "typeof/zephyr-7b-beta-lora"
12+
PA_NAME = "swapnilbp/llama_tweet_ptune"
13+
14+
15+
@pytest.fixture(scope='module')
16+
def adapter_cache(request, tmpdir_factory):
17+
# Create dir that mimics the structure of the adapter cache
18+
adapter_cache = tmpdir_factory.mktemp(
19+
request.module.__name__) / "adapter_cache"
20+
return adapter_cache
21+
22+
23+
@pytest.fixture(scope="module")
24+
def zephyr_lora_files():
25+
return snapshot_download(repo_id=LORA_NAME)
26+
27+
28+
@pytest.fixture(scope="module")
29+
def pa_files():
30+
return snapshot_download(repo_id=PA_NAME)
31+
32+
33+
@pytest.mark.asyncio
34+
async def test_filesystem_resolver(adapter_cache, zephyr_lora_files):
35+
model_files = adapter_cache / LORA_NAME
36+
shutil.copytree(zephyr_lora_files, model_files)
37+
38+
fs_resolver = FilesystemResolver(adapter_cache)
39+
assert fs_resolver is not None
40+
41+
lora_request = await fs_resolver.resolve_lora(MODEL_NAME, LORA_NAME)
42+
assert lora_request is not None
43+
assert lora_request.lora_name == LORA_NAME
44+
assert lora_request.lora_path == os.path.join(adapter_cache, LORA_NAME)
45+
46+
47+
@pytest.mark.asyncio
48+
async def test_missing_adapter(adapter_cache):
49+
fs_resolver = FilesystemResolver(adapter_cache)
50+
assert fs_resolver is not None
51+
52+
missing_lora_request = await fs_resolver.resolve_lora(MODEL_NAME, "foobar")
53+
assert missing_lora_request is None
54+
55+
56+
@pytest.mark.asyncio
57+
async def test_nonlora_adapter(adapter_cache, pa_files):
58+
model_files = adapter_cache / PA_NAME
59+
shutil.copytree(pa_files, model_files)
60+
61+
fs_resolver = FilesystemResolver(adapter_cache)
62+
assert fs_resolver is not None
63+
64+
pa_request = await fs_resolver.resolve_lora(MODEL_NAME, PA_NAME)
65+
assert pa_request is None

vllm/envs.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@
6868
VLLM_ALLOW_LONG_MAX_MODEL_LEN: bool = False
6969
VLLM_RPC_TIMEOUT: int = 10000 # ms
7070
VLLM_PLUGINS: Optional[list[str]] = None
71+
VLLM_LORA_RESOLVER_CACHE_DIR: Optional[str] = None
7172
VLLM_TORCH_PROFILER_DIR: Optional[str] = None
7273
VLLM_USE_TRITON_AWQ: bool = False
7374
VLLM_ALLOW_RUNTIME_LORA_UPDATING: bool = False
@@ -503,6 +504,12 @@ def maybe_convert_int(value: Optional[str]) -> Optional[int]:
503504
lambda: None if "VLLM_PLUGINS" not in os.environ else os.environ[
504505
"VLLM_PLUGINS"].split(","),
505506

507+
# a local directory to look in for unrecognized LoRA adapters.
508+
# only works if plugins are enabled and
509+
# VLLM_ALLOW_RUNTIME_LORA_UPDATING is enabled.
510+
"VLLM_LORA_RESOLVER_CACHE_DIR":
511+
lambda: os.getenv("VLLM_LORA_RESOLVER_CACHE_DIR", None),
512+
506513
# Enables torch profiler if set. Path to the directory where torch profiler
507514
# traces are saved. Note that it must be an absolute path.
508515
"VLLM_TORCH_PROFILER_DIR":

vllm/plugins/lora_resolvers/README.md

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
# LoRA Resolver Plugins
2+
3+
This directory contains vLLM general plugins for dynamically discovering and loading LoRA adapters
4+
via the LoRAResolver plugin framework.
5+
6+
Note that `VLLM_ALLOW_RUNTIME_LORA_UPDATING` must be set to true to allow LoRA resolver plugins
7+
to work, and `VLLM_PLUGINS` must be set to include the desired resolver plugins.
8+
9+
# lora_filesystem_resolver
10+
This LoRA Resolver is installed with vLLM by default.
11+
To use, set `VLLM_PLUGIN_LORA_CACHE_DIR` to a local directory. When vLLM receives a request
12+
for a LoRA adapter `foobar` it doesn't currently recognize, it will look in that local directory
13+
for a subdirectory `foobar` containing a LoRA adapter. If such an adapter exists, it will
14+
load that adapter, and then service the request as normal. That adapter will then be available
15+
for future requests as normal.

vllm/plugins/lora_resolvers/__init__.py

Whitespace-only changes.
Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
# SPDX-License-Identifier: Apache-2.0
2+
import json
3+
import os
4+
from typing import Optional
5+
6+
import vllm.envs as envs
7+
from vllm.lora.request import LoRARequest
8+
from vllm.lora.resolver import LoRAResolver, LoRAResolverRegistry
9+
10+
11+
class FilesystemResolver(LoRAResolver):
12+
13+
def __init__(self, lora_cache_dir: str):
14+
self.lora_cache_dir = lora_cache_dir
15+
16+
async def resolve_lora(self, base_model_name: str,
17+
lora_name: str) -> Optional[LoRARequest]:
18+
lora_path = os.path.join(self.lora_cache_dir, lora_name)
19+
if os.path.exists(lora_path):
20+
adapter_config_path = os.path.join(self.lora_cache_dir, lora_name,
21+
"adapter_config.json")
22+
if os.path.exists(adapter_config_path):
23+
with open(adapter_config_path) as file:
24+
adapter_config = json.load(file)
25+
if adapter_config["peft_type"] == "LORA" and adapter_config[
26+
"base_model_name_or_path"] == base_model_name:
27+
lora_request = LoRARequest(lora_name=lora_name,
28+
lora_int_id=abs(
29+
hash(lora_name)),
30+
lora_path=lora_path)
31+
return lora_request
32+
return None
33+
34+
35+
def register_filesystem_resolver():
36+
"""Register the filesystem LoRA Resolver with vLLM"""
37+
38+
lora_cache_dir = envs.VLLM_LORA_RESOLVER_CACHE_DIR
39+
if lora_cache_dir:
40+
if not os.path.exists(lora_cache_dir) or not os.path.isdir(
41+
lora_cache_dir):
42+
raise ValueError(
43+
"VLLM_LORA_RESOLVER_CACHE_DIR must be set to a valid directory \
44+
for Filesystem Resolver plugin to function")
45+
fs_resolver = FilesystemResolver(lora_cache_dir)
46+
LoRAResolverRegistry.register_resolver("Filesystem Resolver",
47+
fs_resolver)
48+
49+
return

0 commit comments

Comments
 (0)