Skip to content

Commit

Permalink
adding torch support for qwen models
Browse files Browse the repository at this point in the history
  • Loading branch information
risingsunomi committed Jan 31, 2025
1 parent 76e141a commit 85d25c1
Show file tree
Hide file tree
Showing 2 changed files with 52 additions and 15 deletions.
2 changes: 0 additions & 2 deletions exo/inference/torch/models/general_mha.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,6 @@
GeneralMHA class
Return transformer model with MHA
"""
import re

from typing import Optional, Tuple

import torch
Expand Down
65 changes: 52 additions & 13 deletions exo/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,55 +162,94 @@
},
"qwen-2.5-1.5b": {
"layers": 28,
"repo": {"MLXDynamicShardInferenceEngine": "mlx-community/Qwen2.5-1.5B-Instruct-4bit",},
"repo": {
"MLXDynamicShardInferenceEngine": "mlx-community/Qwen2.5-1.5B-Instruct-4bit",
"TorchDynamicShardInferenceEngine": "Qwen/Qwen2.5-1.5B-Instruct"
},
},
"qwen-2.5-coder-1.5b": {
"layers": 28,
"repo": {"MLXDynamicShardInferenceEngine": "mlx-community/Qwen2.5-Coder-1.5B-Instruct-4bit",},
"repo": {
"MLXDynamicShardInferenceEngine": "mlx-community/Qwen2.5-Coder-1.5B-Instruct-4bit",
"TorchDynamicShardInferenceEngine": "Qwen/Qwen2.5-Coder-1.5B-Instruct"
},
},
"qwen-2.5-3b": {
"layers": 36,
"repo": {"MLXDynamicShardInferenceEngine": "mlx-community/Qwen2.5-3B-Instruct-4bit",},
"repo": {
"MLXDynamicShardInferenceEngine": "mlx-community/Qwen2.5-3B-Instruct-4bit",
"TorchDynamicShardInferenceEngine": "Qwen/Qwen2.5-3B-Instruct"
},
},
"qwen-2.5-coder-3b": {
"layers": 36,
"repo": {"MLXDynamicShardInferenceEngine": "mlx-community/Qwen2.5-Coder-3B-Instruct-4bit",},
"repo": {
"MLXDynamicShardInferenceEngine": "mlx-community/Qwen2.5-Coder-3B-Instruct-4bit",
"TorchDynamicShardInferenceEngine": "Qwen/Qwen2.5-Coder-3B-Instruct"
},
},
"qwen-2.5-7b": {
"layers": 28,
"repo": {"MLXDynamicShardInferenceEngine": "mlx-community/Qwen2.5-7B-Instruct-4bit",},
"repo": {
"MLXDynamicShardInferenceEngine": "mlx-community/Qwen2.5-7B-Instruct-4bit",
"TorchDynamicShardInferenceEngine": "Qwen/Qwen2.5-7B-Instruct"
},
},
"qwen-2.5-coder-7b": {
"layers": 28,
"repo": {"MLXDynamicShardInferenceEngine": "mlx-community/Qwen2.5-Coder-7B-Instruct-4bit",},
"repo": {
"MLXDynamicShardInferenceEngine": "mlx-community/Qwen2.5-Coder-7B-Instruct-4bit",
"TorchDynamicShardInferenceEngine": "Qwen/Qwen2.5-Coder-7B-Instruct"
},
},
"qwen-2.5-math-7b": {
"layers": 28,
"repo": {"MLXDynamicShardInferenceEngine": "mlx-community/Qwen2.5-Math-7B-Instruct-4bit",},
"repo": {
"MLXDynamicShardInferenceEngine": "mlx-community/Qwen2.5-Math-7B-Instruct-4bit",
"TorchDynamicShardInferenceEngine": "Qwen/Qwen2.5-Math-7B-Instruct"
},
},
"qwen-2.5-14b": {
"layers": 48,
"repo": {"MLXDynamicShardInferenceEngine": "mlx-community/Qwen2.5-14B-Instruct-4bit",},
"repo": {
"MLXDynamicShardInferenceEngine": "mlx-community/Qwen2.5-14B-Instruct-4bit",
"TorchDynamicShardInferenceEngine": "Qwen/Qwen2.5-14B-Instruct"
},
},
"qwen-2.5-coder-14b": {
"layers": 48,
"repo": {"MLXDynamicShardInferenceEngine": "mlx-community/Qwen2.5-Coder-14B-Instruct-4bit",},
"repo": {
"MLXDynamicShardInferenceEngine": "mlx-community/Qwen2.5-Coder-14B-Instruct-4bit",
"TorchDynamicShardInferenceEngine": "Qwen/Qwen2.5-Coder-14B-Instruct"
},
},
"qwen-2.5-32b": {
"layers": 64,
"repo": {"MLXDynamicShardInferenceEngine": "mlx-community/Qwen2.5-32B-Instruct-4bit",},
"repo": {
"MLXDynamicShardInferenceEngine": "mlx-community/Qwen2.5-32B-Instruct-4bit",
"TorchDynamicShardInferenceEngine": "Qwen/Qwen2.5-32B-Instruct"
},
},
"qwen-2.5-coder-32b": {
"layers": 64,
"repo": {"MLXDynamicShardInferenceEngine": "mlx-community/Qwen2.5-Coder-32B-Instruct-4bit",},
"repo": {
"MLXDynamicShardInferenceEngine": "mlx-community/Qwen2.5-Coder-32B-Instruct-4bit",
"TorchDynamicShardInferenceEngine": "Qwen/Qwen2.5-Coder-32B-Instruct"
},
},
"qwen-2.5-72b": {
"layers": 80,
"repo": {"MLXDynamicShardInferenceEngine": "mlx-community/Qwen2.5-72B-Instruct-4bit",},
"repo": {
"MLXDynamicShardInferenceEngine": "mlx-community/Qwen2.5-72B-Instruct-4bit",
"TorchDynamicShardInferenceEngine": "Qwen/Qwen2.5-72B-Instruct"
},
},
"qwen-2.5-math-72b": {
"layers": 80,
"repo": {"MLXDynamicShardInferenceEngine": "mlx-community/Qwen2.5-Math-72B-Instruct-4bit",},
"repo": {
"MLXDynamicShardInferenceEngine": "mlx-community/Qwen2.5-Math-72B-Instruct-4bit",
"TorchDynamicShardInferenceEngine": "Qwen/Qwen2.5-Math-72B-Instruct"
},
},
### nemotron
"nemotron-70b": {
Expand Down

0 comments on commit 85d25c1

Please sign in to comment.