fmt

garrett361 · garrett361 · commit a453229057fa · 2025-03-26T18:30:14.000Z
diff --git a/torchtitan/experiments/deepseek_v3/model.py b/torchtitan/experiments/deepseek_v3/model.py
@@ -25,17 +25,16 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-""" PyTorch DeepSeek model."""
+"""PyTorch DeepSeek model."""
+
 import math
 from typing import Optional, Tuple
 
 import torch
 import torch.distributed as dist
-
 import torch.distributed._symmetric_memory as symm_mem
 import torch.nn.functional as F
 import torch.utils.checkpoint
-
 from attn_mask_utils import _prepare_4d_causal_attention_mask
 from model_config import ModelArgs
 from symm_mem_recipes import OnDeviceAllToAllV
@@ -401,9 +400,7 @@ def forward(self, hidden_states):
             )  # [n, n_group]
             group_idx = torch.topk(
                 group_scores, k=self.topk_group, dim=-1, sorted=False
-            )[
-                1
-            ]  # [n, top_k_group]
+            )[1]  # [n, top_k_group]
             group_mask = torch.zeros_like(group_scores)  # [n, n_group]
             group_mask.scatter_(1, group_idx, 1)  # [n, n_group]
             score_mask = (
@@ -629,7 +626,6 @@ def moe_forward(self, x, topk_ids, topk_weight):
                 % self.experts_per_rank
             ) + self.ep_rank * self.experts_per_rank
 
-
         # Prepare buffer for tokens processed by experts
         if self.shuffle_method == "symm_mem":
             # Take necessary space from `token_gather_buf` symm mem because we are
@@ -1002,9 +998,9 @@ def __init__(self, config: ModelArgs):
         self.vocab_size = config.vocab_size
 
         # Creating model parts related to my stage
-        assert (
-            config.stage_idx < config.num_stages
-        ), f"Stage {config.stage_idx} is not in the model"
+        assert config.stage_idx < config.num_stages, (
+            f"Stage {config.stage_idx} is not in the model"
+        )
         print(f"Creating model stage {config.stage_idx} of {config.num_stages}")
 
         self.embed_tokens = (