Jiayi-Pan · Jiayi-Pan · Jan 23, 2025 · Jan 23, 2025
diff --git a/README.md b/README.md
@@ -67,8 +67,8 @@ PYTHONUNBUFFERE=1 python3 -m verl.trainer.main_ppo \
  trainer.default_hdfs_dir=null \
  trainer.n_gpus_per_node=$N_GPUS \
  trainer.nnodes=1 \
- trainer.save_freq=30 \
- trainer.test_freq=10 \
+ trainer.save_freq=100 \
+ trainer.test_freq=100 \
  trainer.project_name=TinyZero \
  trainer.experiment_name=$EXPERIMENT_NAME \
  trainer.total_epochs=15 2>&1 | tee verl_demo.log
@@ -109,8 +109,8 @@ python3 -m verl.trainer.main_ppo \
  trainer.default_hdfs_dir=null \
  trainer.n_gpus_per_node=$N_GPUS \
  trainer.nnodes=1 \
- trainer.save_freq=30 \
- trainer.test_freq=10 \
+ trainer.save_freq=100 \
+ trainer.test_freq=100 \
  trainer.project_name=TinyZero \
  trainer.experiment_name=$EXPERIMENT_NAME \
  trainer.total_epochs=15 2>&1 | tee verl_demo.log
@@ -151,8 +151,8 @@ python3 -m verl.trainer.main_ppo \
  trainer.default_hdfs_dir=null \
  trainer.n_gpus_per_node=$N_GPUS \
  trainer.nnodes=1 \
- trainer.save_freq=30 \
- trainer.test_freq=10 \
+ trainer.save_freq=100 \
+ trainer.test_freq=100 \
  trainer.project_name=TinyZero \
  trainer.experiment_name=$EXPERIMENT_NAME \
  trainer.total_epochs=15 2>&1 | tee verl_demo.log

diff --git a/examples/data_preprocess/countdown.py b/examples/data_preprocess/countdown.py
@@ -50,14 +50,22 @@ def gen_dataset(
 
     return samples
 
-def make_prefix(dp):
+def make_prefix(dp, template_type):
     target = dp['target']
     numbers = dp['nums']
-
-    prefix = f"""A conversation between User and Assistant. The user asks a question, and the Assistant solves it. The assistant first thinks about the reasoning process in the mind and then provides the user with the answer.
+    if template_type == 'base':
+        """This works for any base model"""
+        prefix = f"""A conversation between User and Assistant. The user asks a question, and the Assistant solves it. The assistant first thinks about the reasoning process in the mind and then provides the user with the answer.
 User: Using the numbers {numbers}, create an equation that equals {target}. You can use basic arithmetic operations (+, -, *, /) and each number can only be used once. Show your work in <think> </think> tags. And return the final answer in <answer> </answer> tags, for example <answer> (1 + 2) / 3 </answer>.
 Assistant: Let me solve this step by step.
 <think>"""
+    elif template_type == 'qwen-chat':
+        raise NotImplementedError("Qwen-chat template WIP")
+#         """This works for Qwen Instruct Models"""
+#         prefix = f"""A conversation between User and Assistant. The user asks a question, and the Assistant solves it. The assistant first thinks about the reasoning process in the mind and then provides the user with the answer.
+# User: Using the numbers {numbers}, create an equation that equals {target}. You can use basic arithmetic operations (+, -, *, /) and each number can only be used once. Show your work in <think> </think> tags. And return the final answer in <answer> </answer> tags, for example <answer> (1 + 2) / 3 </answer>.
+# Assistant: Let me solve this step by step.
+# <think>"""
     return prefix
 
 
@@ -71,7 +79,8 @@ def make_prefix(dp):
     parser.add_argument('--min_number', type=int, default=1)
     parser.add_argument('--max_number', type=int, default=100)
     parser.add_argument('--train_size', type=int, default=327680)
-    parser.add_argument('--test_size', type=int, default=4096)
+    parser.add_argument('--test_size', type=int, default=1024)
+    parser.add_argument('--template_type', type=str, default='base')
 
     args = parser.parse_args()
 
@@ -87,7 +96,7 @@ def make_prefix(dp):
 
     def make_map_fn(split):
         def process_fn(example, idx):
-            question = make_prefix(example)
+            question = make_prefix(example, template_type=args.template_type)
             solution = {
                 "target": example['target'],
                 "numbers": example['nums']