Version 1.0.1

ZFTurbo · Jun 20, 2023 · 358cc2f · 358cc2f
1 parent 5a13270
commit 358cc2f
Show file tree

Hide file tree

Showing 3 changed files with 260 additions and 80 deletions.
diff --git a/README.md b/README.md
@@ -10,8 +10,21 @@ Model based on [Demucs4](https://github.com/facebookresearch/demucs), [MDX](http
 
 With this command audios with names "mixture1.wav" and "mixture2.wav" will be processed and results will be stored in `./results/` folder in WAV format.
 
-* **Note 1**: If you have not enough GPU memory you can use CPU (`--cpu`), but it will be slow. Additionally you can use single ONNX (`--single_onnx`), but it will decrease quality a little bit. Also reduce of chunk size can help (`--chunk_size 200000`).
-* **Note 2**: In current revision code requires less GPU memory, but it process multiple files slower. If you want old fast method use argument `--large_gpu`. It will require > 11 GB of GPU memory, but will work faster.  
+### All available keys
+* `--input_audio` - input audio location. You can provide multiple files at once. **Required**
+* `--output_folder` - output audio folder. **Required**
+* `--cpu` - choose CPU instead of GPU for processing. Can be very slow.
+* `--overlap_large` - overlap of splitted audio for light models. Closer to 1.0 - slower, but better quality. Default: 0.6.
+* `--overlap_small` - overlap of splitted audio for heavy models. Closer to 1.0 - slower, but better quality. Default: 0.5.
+* `--single_onnx` - only use single ONNX model for vocals. Can be useful if you have not enough GPU memory.
+* `--chunk_size` - chunk size for ONNX models. Set lower to reduce GPU memory consumption. Default: 1000000.
+* `--large_gpu` - it will store all models on GPU for faster processing of multiple audio files. Requires at least 11 GB of free GPU memory.
+* `--use_kim_model_1` - use first version of Kim model (as it was on contest).
+* `--only_vocals` - only create vocals and instrumental. Skip bass, drums, other. Processing will be faster.
+
+### Notes
+* If you have not enough GPU memory you can use CPU (`--cpu`), but it will be slow. Additionally you can use single ONNX (`--single_onnx`), but it will decrease quality a little bit. Also reduce of chunk size can help (`--chunk_size 200000`).
+* In current revision code requires less GPU memory, but it process multiple files slower. If you want old fast method use argument `--large_gpu`. It will require > 11 GB of GPU memory, but will work faster.  
 
 ## Quality comparison
 
@@ -36,6 +49,14 @@ Quality comparison with best separation models performed on [MultiSong Dataset](
 * GUI supports Drag & Drop of multiple files.
 * Progress bar available.
 
+## Changes
+
+### v1.0.1
+* Settings in GUI updated, now you can control all possible options
+* Kim vocal model updated from version 1 to version 2, you still can use version 1 using parameter `--use_kim_model_1`
+* Added possibility to generate only vocals/instrumental pair if you don't need bass, drums and other stems. Use parameter `--only_vocals`
+* Standalone program was updated. It has less size now. GUI will download torch/cuda on the first run. 
+
 ## Citation
 
 * [arxiv paper](https://arxiv.org/abs/2305.07489)

diff --git a/gui.py b/gui.py
@@ -14,8 +14,10 @@
 from PyQt5.QtCore import *
 from PyQt5 import QtCore
 from PyQt5.QtWidgets import *
+from PyQt5.QtGui import *
 import sys
-from inference import predict_with_model
+from inference import predict_with_model, __VERSION__
+import torch
 
 
 root = dict()
@@ -48,7 +50,7 @@ def setupUi(self, Dialog):
         global root
 
         Dialog.setObjectName("Settings")
-        Dialog.resize(370, 180)
+        Dialog.resize(370, 320)
 
         self.checkbox_cpu = QCheckBox("Use CPU instead of GPU?", Dialog)
         self.checkbox_cpu.move(30, 10)
@@ -62,14 +64,70 @@ def setupUi(self, Dialog):
         if root['single_onnx']:
             self.checkbox_single_onnx.setChecked(True)
 
+        self.checkbox_large_gpu = QCheckBox("Use large GPU?", Dialog)
+        self.checkbox_large_gpu.move(30, 70)
+        self.checkbox_large_gpu.resize(320, 40)
+        if root['large_gpu']:
+            self.checkbox_large_gpu.setChecked(True)
+
+        self.checkbox_kim_1 = QCheckBox("Use old Kim Vocal model?", Dialog)
+        self.checkbox_kim_1.move(30, 100)
+        self.checkbox_kim_1.resize(320, 40)
+        if root['use_kim_model_1']:
+            self.checkbox_kim_1.setChecked(True)
+
+        self.checkbox_only_vocals = QCheckBox("Generate only vocals/instrumental?", Dialog)
+        self.checkbox_only_vocals.move(30, 130)
+        self.checkbox_only_vocals.resize(320, 40)
+        if root['only_vocals']:
+            self.checkbox_only_vocals.setChecked(True)
+
+        self.chunk_size_label = QLabel(Dialog)
+        self.chunk_size_label.setText('Chunk size')
+        self.chunk_size_label.move(30, 160)
+        self.chunk_size_label.resize(320, 40)
+
+        self.chunk_size_valid = QIntValidator(bottom=100000, top=10000000)
+        self.chunk_size = QLineEdit(Dialog)
+        self.chunk_size.setFixedWidth(140)
+        self.chunk_size.move(130, 170)
+        self.chunk_size.setValidator(self.chunk_size_valid)
+        self.chunk_size.setText(str(root['chunk_size']))
+
+        self.overlap_large_label = QLabel(Dialog)
+        self.overlap_large_label.setText('Overlap large')
+        self.overlap_large_label.move(30, 190)
+        self.overlap_large_label.resize(320, 40)
+
+        self.overlap_large_valid = QDoubleValidator(bottom=0.001, top=0.999, decimals=10)
+        self.overlap_large_valid.setNotation(QDoubleValidator.Notation.StandardNotation)
+        self.overlap_large = QLineEdit(Dialog)
+        self.overlap_large.setFixedWidth(140)
+        self.overlap_large.move(130, 200)
+        self.overlap_large.setValidator(self.overlap_large_valid)
+        self.overlap_large.setText(str(root['overlap_large']))
+
+        self.overlap_small_label = QLabel(Dialog)
+        self.overlap_small_label.setText('Overlap small')
+        self.overlap_small_label.move(30, 220)
+        self.overlap_small_label.resize(320, 40)
+
+        self.overlap_small_valid = QDoubleValidator(0.001, 0.999, 10)
+        self.overlap_small_valid.setNotation(QDoubleValidator.Notation.StandardNotation)
+        self.overlap_small = QLineEdit(Dialog)
+        self.overlap_small.setFixedWidth(140)
+        self.overlap_small.move(130, 230)
+        self.overlap_small.setValidator(self.overlap_small_valid)
+        self.overlap_small.setText(str(root['overlap_small']))
+
         self.pushButton_save = QPushButton(Dialog)
         self.pushButton_save.setObjectName("pushButton_save")
-        self.pushButton_save.move(30, 120)
+        self.pushButton_save.move(30, 280)
         self.pushButton_save.resize(150, 35)
 
         self.pushButton_cancel = QPushButton(Dialog)
         self.pushButton_cancel.setObjectName("pushButton_cancel")
-        self.pushButton_cancel.move(190, 120)
+        self.pushButton_cancel.move(190, 280)
         self.pushButton_cancel.resize(150, 35)
 
         self.retranslateUi(Dialog)
@@ -91,6 +149,32 @@ def return_save(self):
         # print("save")
         root['cpu'] = self.checkbox_cpu.isChecked()
         root['single_onnx'] = self.checkbox_single_onnx.isChecked()
+        root['large_gpu'] = self.checkbox_large_gpu.isChecked()
+        root['use_kim_model_1'] = self.checkbox_kim_1.isChecked()
+        root['only_vocals'] = self.checkbox_only_vocals.isChecked()
+
+        chunk_size_text = self.chunk_size.text()
+        state = self.chunk_size_valid.validate(chunk_size_text, 0)
+        if state[0] == QValidator.State.Acceptable:
+            root['chunk_size'] = chunk_size_text
+
+        overlap_large_text = self.overlap_large.text()
+        # locale problems... it wants comma instead of dot
+        if 0:
+            state = self.overlap_large_valid.validate(overlap_large_text, 0)
+            if state[0] == QValidator.State.Acceptable:
+                root['overlap_large'] = float(overlap_large_text)
+        else:
+            root['overlap_large'] = float(overlap_large_text)
+
+        overlap_small_text = self.overlap_small.text()
+        if 0:
+            state = self.overlap_small_valid.validate(overlap_small_text, 0)
+            if state[0] == QValidator.State.Acceptable:
+                root['overlap_small'] = float(overlap_small_text)
+        else:
+            root['overlap_small'] = float(overlap_small_text)
+
         self.Dialog.close()
 
     def return_cancel(self):
@@ -144,8 +228,12 @@ def execute_long_task(self):
             'output_folder': root['output_folder'],
             'cpu': root['cpu'],
             'single_onnx': root['single_onnx'],
-            'overlap_large': 0.6,
-            'overlap_small': 0.5,
+            'large_gpu': root['large_gpu'],
+            'chunk_size': root['chunk_size'],
+            'overlap_large': root['overlap_large'],
+            'overlap_small': root['overlap_small'],
+            'use_kim_model_1': root['use_kim_model_1'],
+            'only_vocals': root['only_vocals'],
         }
 
         self.update_progress(0)
@@ -220,7 +308,23 @@ def create_dialog():
     root['input_files'] = []
     root['output_folder'] = os.path.dirname(os.path.abspath(__file__)) + '/results/'
     root['cpu'] = False
+    root['large_gpu'] = False
     root['single_onnx'] = False
+    root['chunk_size'] = 1000000
+    root['overlap_large'] = 0.6
+    root['overlap_small'] = 0.5
+    root['use_kim_model_1'] = False
+    root['only_vocals'] = False
+
+    t = torch.cuda.get_device_properties(0).total_memory / (1024 * 1024 * 1024)
+    if t > 11.5:
+        print('You have enough GPU memory ({:.2f} GB), so we set fast GPU mode. You can change in settings!'.format(t))
+        root['large_gpu'] = True
+        root['single_onnx'] = False
+    elif t < 8:
+        root['large_gpu'] = False
+        root['single_onnx'] = True
+        root['chunk_size'] = 500000
 
     button_select_input_files = QPushButton(w)
     button_select_input_files.setText("Input audio files")
@@ -303,4 +407,5 @@ def create_dialog():
 
 
 if __name__ == '__main__':
+    print('Version: {}'.format(__VERSION__))
     create_dialog()