Skip to content

Commit

Permalink
Add DeepSeek R1 Distill 8B
Browse files Browse the repository at this point in the history
  • Loading branch information
Jack-Khuu committed Feb 1, 2025
1 parent 7cbf2a3 commit 8ee137d
Show file tree
Hide file tree
Showing 3 changed files with 15 additions and 2 deletions.
10 changes: 8 additions & 2 deletions tokenizer/hf_tokenizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,14 @@ def __init__(self, file_path: str):
if tokenizer_config_path is not None:
with open(tokenizer_config_path, "r") as handle:
tok_config = json.load(handle)
bos_token = tok_config.get("bos_token")
eos_token = tok_config.get("eos_token")

def _extract_token(identifier: str) -> Optional[str]:
entry: Optional[Union[str, dict]] = tok_config.get(identifier)
return entry.get("content") if isinstance(entry, dict) else entry

bos_token = _extract_token("bos_token")
eos_token = _extract_token("eos_token")

if bos_token is not None:
self._bos_id = self._tokenizer.token_to_id(bos_token)
if eos_token is not None:
Expand Down
6 changes: 6 additions & 0 deletions torchchat/model_config/models.json
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,12 @@
"distribution_path": "meta-llama/Meta-Llama-3.1-8B-Instruct",
"transformer_params_key": "Meta-Llama-3.1-8B"
},
"deepseek-ai/DeepSeek-R1-Distill-Llama-8B": {
"aliases": ["deepseek-r1-distill", "deepseek-r1-distill-8b"],
"distribution_channel": "HuggingFaceSnapshot",
"distribution_path": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B",
"tokenizer_file": "tokenizer.json"
},
"meta-llama/Meta-Llama-3.1-70B-Instruct": {
"aliases": ["llama3.1-70b"],
"distribution_channel": "HuggingFaceSnapshot",
Expand Down
1 change: 1 addition & 0 deletions torchchat/model_params/DeepSeek-R1-Distill-Llama-8B.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"block_size": 131072, "dim": 4096, "ffn_dim_multiplier": 1.3, "multiple_of": 1024, "n_heads": 32, "n_local_heads": 8, "n_layers": 32, "rope_base": 500000.0, "vocab_size": 128256, "use_tiktoken": true, "use_hf_tokenizer": true, "norm_eps": 1e-05, "rope_scaling": {"factor": 8.0, "low_freq_factor": 1.0, "high_freq_factor": 4.0, "original_max_position_embeddings": 8192}}

0 comments on commit 8ee137d

Please sign in to comment.