Skip to content

Commit

Permalink
Merge pull request #5 from DeepAuto-AI/deepauto/feat/refactor-code
Browse files Browse the repository at this point in the history
Refactor code in preparation for SGLang PR
  • Loading branch information
daniel-geon-park authored Jan 28, 2025
2 parents f5f5e89 + 63fee4f commit 3c53300
Show file tree
Hide file tree
Showing 21 changed files with 462 additions and 2,185 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -228,5 +228,6 @@ compile_commands.json

1

# Profiling data
*.nsys-rep
*.ncu-rep
1 change: 1 addition & 0 deletions python/sglang/bench_one_batch.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,7 @@ def load_model(server_args, port_args, tp_rank):
is_embedding=server_args.is_embedding,
dtype=server_args.dtype,
quantization=server_args.quantization,
is_context_extended=server_args.enable_hip_attention,
)
model_runner = ModelRunner(
model_config=model_config,
Expand Down
30 changes: 15 additions & 15 deletions python/sglang/srt/configs/model_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ def __init__(
is_embedding: Optional[bool] = None,
dtype: str = "auto",
quantization: Optional[str] = None,
is_context_extended: Optional[bool] = None,
) -> None:
self.model_path = model_path
self.revision = revision
Expand Down Expand Up @@ -70,21 +71,20 @@ def __init__(
derived_context_len = get_context_length(self.hf_text_config)
if context_length is not None:
if context_length > derived_context_len:
# FIXME: ignore this env flag only when HiP + context extension activated
logger.warning(
f"Warning: User-specified context_length ({context_length}) is greater than the derived context_length ({derived_context_len}). "
f"This may lead to incorrect model outputs or CUDA errors."
)
self.context_len = context_length
# if get_bool_env_var("SGLANG_ALLOW_OVERWRITE_LONGER_CONTEXT_LEN"):
# else:
# raise ValueError(
# f"User-specified context_length ({context_length}) is greater than the derived context_length ({derived_context_len}). "
# f"This may lead to incorrect model outputs or CUDA errors. Note that the derived context_length may differ from max_position_embeddings in the model's config. "
# f"To allow overriding this maximum, set the env var SGLANG_ALLOW_OVERWRITE_LONGER_CONTEXT_LEN=1"
# )
else:
self.context_len = context_length
if is_context_extended:
pass
elif get_bool_env_var("SGLANG_ALLOW_OVERWRITE_LONGER_CONTEXT_LEN"):
logger.warning(
f"Warning: User-specified context_length ({context_length}) is greater than the derived context_length ({derived_context_len}). "
f"This may lead to incorrect model outputs or CUDA errors."
)
else:
raise ValueError(
f"User-specified context_length ({context_length}) is greater than the derived context_length ({derived_context_len}). "
f"This may lead to incorrect model outputs or CUDA errors. Note that the derived context_length may differ from max_position_embeddings in the model's config. "
f"To allow overriding this maximum, set the env var SGLANG_ALLOW_OVERWRITE_LONGER_CONTEXT_LEN=1"
)
self.context_len = context_length
else:
self.context_len = derived_context_len

Expand Down
4 changes: 0 additions & 4 deletions python/sglang/srt/layers/attention/hip_attention/__init__.py

This file was deleted.

173 changes: 0 additions & 173 deletions python/sglang/srt/layers/attention/hip_attention/hip_config.py

This file was deleted.

Loading

0 comments on commit 3c53300

Please sign in to comment.