Option to resume conversion job with no other args

turboderp · turboderp · commit 6030517a6fe6 · 2024-06-08T22:15:41.000+02:00
diff --git a/convert.py b/convert.py
@@ -14,6 +14,7 @@
 parser = argparse.ArgumentParser(description = "Convert model to ExLlamaV2")
 parser.add_argument("-i", "--in_dir", type = str, help = "Input directory", default = "")
 parser.add_argument("-o", "--out_dir", type = str, help = "Output (working) directory")
+parser.add_argument("-res", "--resume", action = "store_true", help = "Resume job from specified output directory (without specifying other options)")
 parser.add_argument("-nr", "--no_resume", action = "store_true", help = "Do not resume an interrupted job (deletes all files in the output directory)")
 parser.add_argument("-cf", "--compile_full", type = str, help = "Output folder for compiled model with all config/tokenizer files")
 parser.add_argument("-c", "--cal_dataset", type = str, help = "Calibration dataset (.parquet file)")
@@ -37,12 +38,17 @@
 
 # Check some args
 
-if not args.in_dir:
-    print(" ## Please specify input model directory (-i, --in_dir)")
+resuming = False
+if args.out_dir:
+    if not args.no_resume:
+        if os.path.exists(os.path.join(args.out_dir, "job_new.json")):
+            resuming = True
+else:
+    print(" ## Please specify output/working directory (-o, --out_dir)")
     sys.exit()
 
-if not args.out_dir:
-    print(" ## Please specify output/working directory (-o, --out_dir)")
+if not args.in_dir and not resuming:
+    print(" ## Please specify input model directory (-i, --in_dir)")
     sys.exit()
 
 if args.length > 2048 or args.measurement_length > 2048:
@@ -63,17 +69,6 @@
     print(f" ## Error: Directory not found: {args.out_dir}")
     sys.exit()
 
-# Create config
-
-config = ExLlamaV2Config()
-config.model_dir = args.in_dir
-config.qkv_embed = False
-config.prepare()
-
-# Tokenizer
-
-tokenizer = ExLlamaV2Tokenizer(config)
-
 # Create job
 
 def save_job():
@@ -133,7 +128,8 @@ def save_job():
 
 else:
     print(f" -- Resuming job")
-    print(f" !! Note: Overriding options with settings from existing job")
+    if args.in_dir:
+        print(f" !! Note: Overriding options with settings from existing job")
 
     with open(job_file, "r", encoding = "utf8") as f:
         resume_job = json.load(f)
@@ -146,6 +142,10 @@ def save_job():
         print(" ** Error: Corrupted job")
         sys.exit()
 
+    if job["progress"] == "finished":
+        print(" !! Job is already finished")
+        sys.exit()
+
 # Feedback
 
 print(f" -- Input: {job['in_dir']}")
@@ -161,7 +161,6 @@ def save_job():
     print(f" -- Measurement will be saved to {job['output_measurement']}")
     print(f" !! Conversion script will end after measurement pass")
 
-
 if job['rope_scale']: print(f" -- RoPE scale: {job['rope_scale']:.2f}")
 if job['rope_alpha']: print(f" -- RoPE alpha: {job['rope_alpha']:.2f}")
 
@@ -180,6 +179,17 @@ def save_job():
 if not os.path.exists(out_tensor_dir):
     os.makedirs(out_tensor_dir)
 
+# Create config
+
+config = ExLlamaV2Config()
+config.model_dir = job['in_dir']
+config.qkv_embed = False
+config.prepare()
+
+# Tokenizer
+
+tokenizer = ExLlamaV2Tokenizer(config)
+
 # Set scaling for input model
 
 if job["rope_scale"] is not None: config.scale_pos_emb = job["rope_scale"]
diff --git a/doc/convert.md b/doc/convert.md
@@ -4,7 +4,7 @@
 
 Here are the arguments to `convert.py`:
 
-- **-i / --in_dir *directory***: _(required)_ The source model to convert, in HF format (FP16). The directory should 
+- **-i / --in_dir *directory***: _(required if not resuming)_ The source model to convert, in HF format (FP16). The directory should 
 contain at least a `config.json` file, a `tokenizer.model` file and one or more `.safetensors` files containing weights.
 If there are multiple weights files, they will all be indexed and searched for the neccessary tensors, so sharded models are 
 supported.
@@ -132,6 +132,13 @@ python convert.py \
     -b 4.5
 ```
 
+If the working `-o` directory is not empty and you do not specify `-nr`, any existing job in that directory
+will be resumed. You can resume a job with no other arguments:
+
+```sh
+python convert.py -o /mnt/temp/exl2/
+```
+
 ### Notes
 
 - If the conversion script seems to stop on the "Solving..." step, give it a moment. It's attempting to find the