Skip to content

Commit 3c53300

Browse files
Merge pull request #5 from DeepAuto-AI/deepauto/feat/refactor-code
Refactor code in preparation for SGLang PR
2 parents f5f5e89 + 63fee4f commit 3c53300

21 files changed

+462
-2185
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -228,5 +228,6 @@ compile_commands.json
228228

229229
1
230230

231+
# Profiling data
231232
*.nsys-rep
232233
*.ncu-rep

python/sglang/bench_one_batch.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,7 @@ def load_model(server_args, port_args, tp_rank):
137137
is_embedding=server_args.is_embedding,
138138
dtype=server_args.dtype,
139139
quantization=server_args.quantization,
140+
is_context_extended=server_args.enable_hip_attention,
140141
)
141142
model_runner = ModelRunner(
142143
model_config=model_config,

python/sglang/srt/configs/model_config.py

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ def __init__(
4343
is_embedding: Optional[bool] = None,
4444
dtype: str = "auto",
4545
quantization: Optional[str] = None,
46+
is_context_extended: Optional[bool] = None,
4647
) -> None:
4748
self.model_path = model_path
4849
self.revision = revision
@@ -70,21 +71,20 @@ def __init__(
7071
derived_context_len = get_context_length(self.hf_text_config)
7172
if context_length is not None:
7273
if context_length > derived_context_len:
73-
# FIXME: ignore this env flag only when HiP + context extension activated
74-
logger.warning(
75-
f"Warning: User-specified context_length ({context_length}) is greater than the derived context_length ({derived_context_len}). "
76-
f"This may lead to incorrect model outputs or CUDA errors."
77-
)
78-
self.context_len = context_length
79-
# if get_bool_env_var("SGLANG_ALLOW_OVERWRITE_LONGER_CONTEXT_LEN"):
80-
# else:
81-
# raise ValueError(
82-
# f"User-specified context_length ({context_length}) is greater than the derived context_length ({derived_context_len}). "
83-
# f"This may lead to incorrect model outputs or CUDA errors. Note that the derived context_length may differ from max_position_embeddings in the model's config. "
84-
# f"To allow overriding this maximum, set the env var SGLANG_ALLOW_OVERWRITE_LONGER_CONTEXT_LEN=1"
85-
# )
86-
else:
87-
self.context_len = context_length
74+
if is_context_extended:
75+
pass
76+
elif get_bool_env_var("SGLANG_ALLOW_OVERWRITE_LONGER_CONTEXT_LEN"):
77+
logger.warning(
78+
f"Warning: User-specified context_length ({context_length}) is greater than the derived context_length ({derived_context_len}). "
79+
f"This may lead to incorrect model outputs or CUDA errors."
80+
)
81+
else:
82+
raise ValueError(
83+
f"User-specified context_length ({context_length}) is greater than the derived context_length ({derived_context_len}). "
84+
f"This may lead to incorrect model outputs or CUDA errors. Note that the derived context_length may differ from max_position_embeddings in the model's config. "
85+
f"To allow overriding this maximum, set the env var SGLANG_ALLOW_OVERWRITE_LONGER_CONTEXT_LEN=1"
86+
)
87+
self.context_len = context_length
8888
else:
8989
self.context_len = derived_context_len
9090

python/sglang/srt/layers/attention/hip_attention/__init__.py

Lines changed: 0 additions & 4 deletions
This file was deleted.

python/sglang/srt/layers/attention/hip_attention/hip_config.py

Lines changed: 0 additions & 173 deletions
This file was deleted.

0 commit comments

Comments
 (0)