pytorch
diff --git a/‎.ci/docker/requirements.txt
Lines changed: 1 addition & 0 deletions b/‎.ci/docker/requirements.txt
Lines changed: 1 addition & 0 deletions
diff --git a/‎docs/extension.md
Lines changed: 41 additions & 13 deletions b/‎docs/extension.md
Lines changed: 41 additions & 13 deletions
diff --git a/‎scripts/estimate/estimation.py
Lines changed: 3 additions & 3 deletions b/‎scripts/estimate/estimation.py
Lines changed: 3 additions & 3 deletions
diff --git a/‎scripts/generate/test_generate.py
Lines changed: 8 additions & 9 deletions b/‎scripts/generate/test_generate.py
Lines changed: 8 additions & 9 deletions
diff --git a/‎tests/assets/argparser_example.py
Lines changed: 0 additions & 16 deletions b/‎tests/assets/argparser_example.py
Lines changed: 0 additions & 16 deletions
diff --git a/‎tests/assets/extend_jobconfig_example.py
Lines changed: 32 additions & 0 deletions b/‎tests/assets/extend_jobconfig_example.py
Lines changed: 32 additions & 0 deletions
diff --git a/‎tests/unit_tests/test_dataset_checkpointing.py
Lines changed: 3 additions & 3 deletions b/‎tests/unit_tests/test_dataset_checkpointing.py
Lines changed: 3 additions & 3 deletions
@@ -7,3 +7,4 @@ blobfile
 tabulate
 wandb
 fsspec
+tyro
@@ -35,24 +35,52 @@ This is an ongoing effort, and the level of grouping is subject to change.
 
 
 ### Extending `JobConfig`
-[`JobConfig`](../torchtitan/config_manager.py) provides an argument `--experimental.custom_args_module`. When specified, `JobConfig` attempts to import the module provided by the argument. The imported module should contain exactly one public function. `JobConfig` executes this public function, passing its own argparser as an argument. This allows you to extend `JobConfig` with custom functionality.
 
-Suppose you want to add a custom argument `--custom_args.how-is-your-day` to `JobConfig`. You can create a Python module (e.g., `custom_args.py`) with a single public function and put it to `torchtitan/experiments/your_folder/`:
+[`JobConfig`](../torchtitan/config_manager.py) supports custom extension through the `--experimental.custom_args_module` flag.
+This lets you define a custom module that extends `JobConfig` with additional fields.
 
-```
-import argparse
+When specified, your custom `JobConfig` is merged with the default:
+- If a field exists in both, the custom config’s value replaces the default.
+- Fields unique to either config are retained.
+
+#### Example
+
+To add a custom `custom_args` section, define your own `JobConfig`:
+
+```python
+# torchtitan/experiments/your_folder/custom_args.py
+from dataclasses import dataclass, field
+
+@dataclass
+class CustomArgs:
+    how_is_your_day: str = "good"
+    """Just an example."""
 
+@dataclass
+class Training:
+    steps: int = 500
+    """Replaces the default value"""
 
-def extend_parser(parser: argparse.ArgumentParser) -> None:
-     parser.add_argument(
-         "--custom_args.how-is-your-day",
-         type=str,
-         default="good",
-         help="Just an example.",
-     )
+    my_mini_steps: int = 10000
+    """New field is added"""
+
+    ... # Original fields are preserved
+
+@dataclass
+class JobConfig:
+    custom_args: CustomArgs = field(default_factory=CustomArgs)
+    training: Training= field(default_factory=Training)
 ```
 
-To utilize the custom argument, specify the following arguments when running the training script:
+Then run your script with:
+
+```bash
+--experimental.custom_args_module=torchtitan.experiments.your_folder.custom_args
 ```
---experimental.custom_args_module=torchtitan.experiments.your_folder.custom_args --custom_args.how-is-your-day=wonderful
+
+Or specify it in your `.toml` config:
+
+```toml
+[experimental]
+custom_args_module = "torchtitan.experiments.your_folder.custom_args"
 ```
@@ -18,7 +18,7 @@
 from torchtitan.components.ft import init_ft_manager
 from torchtitan.components.lr_scheduler import build_lr_schedulers
 from torchtitan.components.optimizer import build_optimizers
-from torchtitan.config_manager import JobConfig
+from torchtitan.config_manager import ConfigManager, JobConfig
 from torchtitan.distributed import ParallelDims, utils as dist_utils
 from torchtitan.protocols.model_converter import build_model_converters
 from torchtitan.protocols.train_spec import get_train_spec
@@ -190,8 +190,8 @@ def estimate_memory(job_config: JobConfig):
 
 
 if __name__ == "__main__":
-    config = JobConfig()
-    config.parse_args()
+    config_manager = ConfigManager()
+    config = config_manager.parse_args()
     try:
         estimate_memory(config)
     finally:
 
@@ -26,7 +26,7 @@
 )
 from torchtitan.components.metrics import build_device_memory_monitor
 
-from torchtitan.config_manager import JobConfig
+from torchtitan.config_manager import ConfigManager
 from torchtitan.distributed import ParallelDims, utils as dist_utils
 from torchtitan.protocols.train_spec import get_train_spec
 from torchtitan.tools import utils
@@ -85,9 +85,8 @@ def test_generate(
     color = utils.Color
 
     # Load configuration from toml file
-    job_config = JobConfig()
-    job_config.parse_args([f"--job.config_file={config_path}"])
-    job_config._validate_config()
+    config_manager = ConfigManager()
+    config = config_manager.parse_args([f"--job.config_file={config_path}"])
 
     if len(args.prompt) == 0:
         logger.warning(
@@ -100,16 +99,16 @@ def test_generate(
     device_module.set_device(device)
     device_memory_monitor = build_device_memory_monitor()
 
-    train_spec = get_train_spec(job_config.model.name)
+    train_spec = get_train_spec(config.model.name)
 
     logger.info(f"World Size: {world_size}, Local Rank: {local_rank} on {device}")
 
     # Tokenizer setup
-    tokenizer = train_spec.build_tokenizer_fn(job_config)
+    tokenizer = train_spec.build_tokenizer_fn(config)
 
     model_cls = train_spec.cls
-    model_args = train_spec.config[job_config.model.flavor]
-    model_args.update_from_config(job_config, tokenizer)
+    model_args = train_spec.config[config.model.flavor]
+    model_args.update_from_config(config, tokenizer)
 
     init_device = "meta" if world_size > 1 else device
     with torch.device(init_device):
@@ -119,7 +118,7 @@ def test_generate(
     world_mesh = None
     # Init distributed env
     if world_size > 1:
-        dist_utils.init_distributed(job_config)
+        dist_utils.init_distributed(config)
         parallel_dims = ParallelDims(
             dp_replicate=1,
             dp_shard=-1,
 
@@ -0,0 +1,32 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from dataclasses import dataclass, field
+
+
+@dataclass
+class CustomArgs:
+    how_is_your_day: str = "good"
+    """Just an example helptext"""
+
+    num_days: int = 7
+    """Number of days in a week"""
+
+
+@dataclass
+class Training:
+    steps: int = 99
+    my_custom_steps: int = 32
+
+
+@dataclass
+class JobConfig:
+    """
+    This is an example of how to extend the tyro parser with custom config classes.
+    """
+
+    custom_args: CustomArgs = field(default_factory=CustomArgs)
+    training: Training = field(default_factory=Training)
@@ -5,7 +5,7 @@
 # LICENSE file in the root directory of this source tree.
 
 import torch
-from torchtitan.config_manager import JobConfig
+from torchtitan.config_manager import ConfigManager
 from torchtitan.datasets.hf_datasets import build_hf_dataloader
 from torchtitan.datasets.tokenizer.tiktoken import TikTokenizer
 
@@ -36,8 +36,8 @@ def test_c4_resumption(self):
 
     def _build_dataloader(self, dataset_name, batch_size, seq_len, world_size, rank):
         tokenizer = TikTokenizer("./tests/assets/test_tiktoken.model")
-        config = JobConfig()
-        config.parse_args(
+        config_manager = ConfigManager()
+        config = config_manager.parse_args(
             [
                 "--training.dataset",
                 dataset_name,
-Original file line number
+Diff line change
 tabulate
 wandb
 fsspec
 +tyro