-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathparams.yaml
81 lines (80 loc) · 3.76 KB
/
params.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
training:
#batch size: [strong, weak, unlabel]
batch_size: [24, 24]
batch_size_val: 64
n_epochs_warmup: 50 # num epochs used for exponential warmup
num_workers: 8 # change according to your cpu
n_epochs: 100 # max num epochs
early_stop_patience: 100 # Same as number of epochs by default, so no early stopping used
accumulate_batches: 1
gradient_clip: 0. # 0 no gradient clipping
median_window: 7 # length of median filter used to smooth prediction in inference (nb of output frames)
val_thresholds: [0.5] # thresholds used to compute f1 intersection in validation.
n_test_thresholds: 50 # number of thresholds used to compute psds in test
backend: dp # pytorch lightning backend, ddp, dp or None
validation_interval: 1 # perform validation every X epoch, 1 default
seed: 42
deterministic: False
precision: 32
mixup: soft # Soft mixup gives the ratio of the mix to the labels, hard mixup gives a 1 to every label present.
obj_metric_strong_type: intersection
enable_progress_bar: True
weak_split: 0.9 # split weak data in train and validation
scaler:
statistic: instance # instance or dataset-wide statistic
normtype: minmax # minmax or standard or mean normalization
dims: [1, 2] # dimensions over which normalization is applied
savepath: ./scaler.ckpt # path to scaler checkpoint
data: # change with your paths if different.
# NOTE: if you have data in 44kHz only then synth_folder will be the path where
# resampled data will be placed.
synth_folder: "/mnt/d/DESED_dataset/dcase_synth/audio/train/synthetic21_train/soundscapes_16k/"
synth_tsv: "/mnt/d/DESED_dataset/dcase_synth/metadata/train/synthetic21_train/soundscapes.tsv"
strong_folder: "/mnt/d/DESED_dataset/audio/train/strong_label_real_16k/"
strong_tsv: "/mnt/d/DESED_dataset/metadata/train/audioset_strong.tsv"
weak_folder: "/mnt/d/DESED_dataset/audio/train/weak_16k/"
weak_tsv: "/mnt/d/DESED_dataset/metadata/train/weak.tsv"
unlabeled_folder: "/mnt/d/DESED_dataset/audio/train/unlabel_in_domain_16k/"
unlabeled_tsv: "/mnt/d/DESED_dataset/metadata/train/unlabel_in_domain.tsv"
# synth_val_folder: "/mnt/d/DESED_dataset/dcase_synth/audio/validation/synthetic21_validation/soundscapes_16k/"
# synth_val_folder_44k: "/mnt/d/DESED_dataset/dcase_synth/audio/validation/synthetic21_validation/soundscapes/"
# synth_val_tsv: "/mnt/d/DESED_dataset/dcase_synth/metadata/validation/synthetic21_validation/soundscapes.tsv"
# synth_val_dur: "/mnt/d/DESED_dataset/dcase_synth/metadata/validation/synthetic21_validation/durations.tsv"
synth_val_folder: "/mnt/d/DESED_dataset/audio/validation/validation_16k/"
synth_val_tsv: "/mnt/d/DESED_dataset/metadata/validation/validation.tsv"
synth_val_dur: "/mnt/d/DESED_dataset/metadata/validation/validation_durations.tsv"
# test_folder: "/mnt/d/DESED_dataset/audio/validation/validation_16k/"
# test_tsv: "/mnt/d/DESED_dataset/metadata/validation/validation.tsv"
# test_dur: "/mnt/d/DESED_dataset/metadata/validation/validation_durations.tsv"
test_folder: "/mnt/d/DESED_dataset/audio/test/public/"
test_tsv: "/mnt/d/DESED_dataset/metadata/test/public.tsv"
test_dur: "/mnt/d/DESED_dataset/metadata/test/test_durations.tsv"
eval_folder: "/mnt/d/DESED_dataset/audio/eval21_16k"
audio_max_len: 10
fs: 16000
net_subsample: 1
opt:
lr: 0.001
feats:
n_mels: 128
n_filters: 2048
hop_length: 256
n_window: 2048
sample_rate: 16000
f_min: 0
f_max: 8000
net:
dropout: 0.5
n_layer_RNN: 2
n_in_channel: 1
nclass: 10
n_RNN_cell: 128
activation: Relu
rnn_type: BGRU
kernel_size: [3, 3, 3, 3, 3, 3, 3]
padding: [1, 1, 1, 1, 1, 1, 1]
stride: [1, 1, 1, 1, 1, 1, 1]
nb_filters: [ 16, 32, 64, 128, 128, 128, 128 ]
pooling: [ [ 1, 2 ], [ 1, 2 ], [ 1, 2 ], [ 1, 2 ], [ 1, 2 ], [ 1, 2 ], [ 1, 2 ] ]
dropout_recurrent: 0
attention: True