change wording

wwwjn · wwwjn · commit de1ab6dadf02 · 2025-08-11T16:30:45.000-07:00
diff --git a/torchtitan/models/deepseek_v3/model/state_dict_adapter.py b/torchtitan/models/deepseek_v3/model/state_dict_adapter.py
@@ -136,7 +136,7 @@ def _add_quantization_scale_inv_tensors(
 
     def to_hf(self, state_dict: dict[str, Any]) -> dict[str, Any]:
         """
-        1. Quantize the weights from float32 to float8.
+        1. When saving HF checkpoints, quantize the weights from float32 to float8.
         2. Convert between the HF shape and the torchtitan shape.
         3. Split the GroupedExperts' weight into seprate expert's wegiht.
         """
@@ -149,7 +149,6 @@ def to_hf(self, state_dict: dict[str, Any]) -> dict[str, Any]:
                 continue
 
             if "moe.experts" in key:
-                # model.layers.3.mlp.experts.0.down_proj.weight
                 abstract_key = re.sub(r"(\d+)", "{}", key, count=1)
                 layer_num = re.search(r"\d+", key).group(0)
                 new_abstract_key = to_hf_map[abstract_key]
@@ -188,7 +187,7 @@ def to_hf(self, state_dict: dict[str, Any]) -> dict[str, Any]:
 
     def from_hf(self, hf_state_dict: dict[str, Any]) -> dict[str, Any]:
         """
-        1. Dequantize the weights from float8 to float32.
+        1. When loading from HF checkpoint, dequantize the weights from float8 to float32.
         2. Convert between the HF shape and the torchtitan shape.
         3. Concate seprate expert's wegiht into GroupedExperts' weight.
         """
diff --git a/torchtitan/models/deepseek_v3/train_configs/deepseek_v3_671b.toml b/torchtitan/models/deepseek_v3/train_configs/deepseek_v3_671b.toml
@@ -48,7 +48,7 @@ dataset = "c4"  # supported datasets: c4_test (2K), c4 (177M)
 data_parallel_replicate_degree = 1
 data_parallel_shard_degree = -1
 fsdp_reshard_after_forward = "default" # default / never / always
-tensor_parallel_degree = 1
+tensor_parallel_degree = 8
 enable_async_tensor_parallel = false
 expert_parallel_degree = 1
 pipeline_parallel_degree = 1