-
Notifications
You must be signed in to change notification settings - Fork 6
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #1 from IlyasMoutawwakil/rocm-support
Rocm and custom devices support
- Loading branch information
Showing
5 changed files
with
149 additions
and
138 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
from py_tgi import TGI | ||
from py_tgi.utils import is_nvidia_system, is_rocm_system | ||
|
||
if is_rocm_system() or is_nvidia_system(): | ||
llm = TGI( | ||
model="TheBloke/Llama-2-7B-AWQ", # awq model checkpoint | ||
devices=["/dev/kfd", "/dev/dri"] if is_rocm_system() else None, # custom devices (ROCm) | ||
gpus="all" if is_nvidia_system() else None, # all gpus (NVIDIA) | ||
quantize="gptq", # use exllama kernels (rocm compatible) | ||
) | ||
else: | ||
llm = TGI(model="gpt2", sharded=False) | ||
|
||
output = llm.generate(["Hi, I'm a language model", "I'm fine, how are you?"]) | ||
print(output) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,48 +1,17 @@ | ||
import signal | ||
import subprocess | ||
from contextlib import contextmanager | ||
|
||
|
||
def get_nvidia_gpu_devices() -> str: | ||
nvidia_smi = ( | ||
subprocess.check_output( | ||
[ | ||
"nvidia-smi", | ||
"--query-gpu=index,gpu_name,compute_cap", | ||
"--format=csv", | ||
], | ||
) | ||
.decode("utf-8") | ||
.strip() | ||
.split("\n")[1:] | ||
) | ||
device = [ | ||
{ | ||
"id": int(gpu.split(", ")[0]), | ||
"name": gpu.split(", ")[1], | ||
"compute_cap": gpu.split(", ")[2], | ||
} | ||
for gpu in nvidia_smi | ||
] | ||
device_ids = [gpu["id"] for gpu in device if "Display" not in gpu["name"]] | ||
device_ids = ",".join([str(device_id) for device_id in device_ids]) | ||
|
||
return device_ids | ||
|
||
|
||
@contextmanager | ||
def timeout(time: int): | ||
""" | ||
Timeout context manager. Raises TimeoutError if the code inside the context manager takes longer than `time` seconds to execute. | ||
""" | ||
|
||
def signal_handler(signum, frame): | ||
raise TimeoutError("Timed out") | ||
def is_rocm_system() -> bool: | ||
try: | ||
subprocess.check_output(["rocm-smi"]) | ||
return True | ||
except FileNotFoundError: | ||
return False | ||
|
||
signal.signal(signal.SIGALRM, signal_handler) | ||
signal.alarm(time) | ||
|
||
def is_nvidia_system() -> bool: | ||
try: | ||
yield | ||
finally: | ||
signal.alarm(0) | ||
subprocess.check_output(["nvidia-smi"]) | ||
return True | ||
except FileNotFoundError: | ||
return False |
Oops, something went wrong.