Skip to content

Commit 1b444d8

Browse files
author
Caroline Chen
committed
Add iemocap variants (#2778)
Summary: add ability to load only improvised or only scripted utterances. Pull Request resolved: #2778 Reviewed By: nateanl Differential Revision: D40511865 Pulled By: carolineechen fbshipit-source-id: e1fe3908ac2aa306ad30c242ddd25762b2268539
1 parent ee68a98 commit 1b444d8

File tree

2 files changed

+60
-29
lines changed

2 files changed

+60
-29
lines changed

test/torchaudio_unittest/datasets/iemocap_test.py

+43-22
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
from torchaudio.datasets import iemocap
55
from torchaudio_unittest.common_utils import get_whitenoise, save_wav, TempDirMixin, TorchaudioTestCase
66

7-
LABELS = ["neu", "hap", "ang", "sad", "exc", "xxx"]
7+
LABELS = ["neu", "hap", "ang", "sad", "exc", "fru", "xxx"]
88
SAMPLE_RATE = 16000
99

1010

@@ -21,8 +21,6 @@ def _save_wav(filepath: str, seed: int):
2121

2222
def _save_label(label_folder: str, filename: str, wav_stem: str):
2323
label = random.choice(LABELS)
24-
if label == "exc":
25-
label = "hap"
2624
line = f"[xxx]\t{wav_stem}\t{label}\t[yyy]"
2725
filepath = os.path.join(label_folder, filename)
2826

@@ -40,19 +38,22 @@ def _get_samples(dataset_dir: str, session: int):
4038
os.makedirs(wav_folder, exist_ok=True)
4139
os.makedirs(label_folder, exist_ok=True)
4240

43-
samples = []
4441
wav_stems = []
4542
for i in range(5):
4643
for g in ["F", "M"]:
47-
speaker = f"Ses0{session}{g}"
48-
subfolder = f"{speaker}_impro0{i}"
49-
subfolder_path = os.path.join(wav_folder, subfolder)
50-
os.makedirs(subfolder_path, exist_ok=True)
51-
52-
for j in range(5):
53-
wav_stem = f"{subfolder}_F00{j}"
54-
wav_stems.append(wav_stem)
55-
44+
for utt in ["impro", "script"]:
45+
speaker = f"Ses0{session}{g}"
46+
subfolder = f"{speaker}_{utt}0{i}"
47+
subfolder_path = os.path.join(wav_folder, subfolder)
48+
os.makedirs(subfolder_path, exist_ok=True)
49+
50+
for j in range(5):
51+
wav_stem = f"{subfolder}_F00{j}"
52+
wav_stems.append(wav_stem)
53+
54+
all_samples = []
55+
impro_samples = []
56+
script_samples = []
5657
wav_stems = sorted(wav_stems)
5758
for wav_stem in wav_stems:
5859
subfolder = wav_stem[:-5]
@@ -64,31 +65,43 @@ def _get_samples(dataset_dir: str, session: int):
6465
if label == "xxx":
6566
continue
6667
sample = (wav, SAMPLE_RATE, wav_stem, label, speaker)
67-
samples.append(sample)
68+
all_samples.append(sample)
69+
70+
if "impro" in subfolder:
71+
impro_samples.append(sample)
72+
else:
73+
script_samples.append(sample)
6874

69-
return samples
75+
return all_samples, script_samples, impro_samples
7076

7177

7278
def get_mock_dataset(dataset_dir: str):
7379
os.makedirs(dataset_dir, exist_ok=True)
7480

75-
samples = []
81+
all_samples = []
82+
script_samples = []
83+
impro_samples = []
7684
for session in range(1, 4):
77-
samples += _get_samples(dataset_dir, session)
78-
return samples
85+
samples = _get_samples(dataset_dir, session)
86+
all_samples += samples[0]
87+
script_samples += samples[1]
88+
impro_samples += samples[2]
89+
return all_samples, script_samples, impro_samples
7990

8091

8192
class TestIemocap(TempDirMixin, TorchaudioTestCase):
8293
root_dir = None
8394
backend = "default"
8495

85-
samples = []
96+
all_samples = []
97+
script_samples = []
98+
impro_samples = []
8699

87100
@classmethod
88101
def setUpClass(cls):
89102
cls.root_dir = cls.get_base_temp_dir()
90103
dataset_dir = os.path.join(cls.root_dir, "IEMOCAP")
91-
cls.samples = get_mock_dataset(dataset_dir)
104+
cls.all_samples, cls.script_samples, cls.impro_samples = get_mock_dataset(dataset_dir)
92105

93106
def _testIEMOCAP(self, dataset, samples):
94107
num_samples = 0
@@ -98,6 +111,14 @@ def _testIEMOCAP(self, dataset, samples):
98111

99112
assert num_samples == len(samples)
100113

101-
def testIEMOCAPDataset(self):
114+
def testIEMOCAPFullDataset(self):
102115
dataset = iemocap.IEMOCAP(self.root_dir)
103-
self._testIEMOCAP(dataset, self.samples)
116+
self._testIEMOCAP(dataset, self.all_samples)
117+
118+
def testIEMOCAPScriptedDataset(self):
119+
dataset = iemocap.IEMOCAP(self.root_dir, utterance_type="scripted")
120+
self._testIEMOCAP(dataset, self.script_samples)
121+
122+
def testIEMOCAPImprovisedDataset(self):
123+
dataset = iemocap.IEMOCAP(self.root_dir, utterance_type="improvised")
124+
self._testIEMOCAP(dataset, self.impro_samples)

torchaudio/datasets/iemocap.py

+17-7
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import os
22
import re
33
from pathlib import Path
4-
from typing import Tuple, Union
4+
from typing import Optional, Tuple, Union
55

66
from torch import Tensor
77
from torch.utils.data import Dataset
@@ -28,19 +28,26 @@ class IEMOCAP(Dataset):
2828
Args:
2929
root (str or Path): Root directory where the dataset's top level directory is found
3030
sessions (Tuple[int]): Tuple of sessions (1-5) to use. (Default: ``(1, 2, 3, 4, 5)``)
31+
utterance_type (str or None, optional): Which type(s) of utterances to include in the dataset.
32+
Options: ("scripted", "improvised", ``None``). If ``None``, both scripted and improvised
33+
data are used.
3134
"""
3235

3336
def __init__(
3437
self,
3538
root: Union[str, Path],
3639
sessions: Tuple[str] = (1, 2, 3, 4, 5),
40+
utterance_type: Optional[str] = None,
3741
):
3842
root = Path(root)
3943
self._path = root / "IEMOCAP"
4044

4145
if not os.path.isdir(self._path):
4246
raise RuntimeError("Dataset not found.")
4347

48+
if utterance_type not in ["scripted", "improvised", None]:
49+
raise ValueError("utterance_type must be one of ['scripted', 'improvised', or None]")
50+
4451
all_data = []
4552
self.data = []
4653
self.mapping = {}
@@ -57,7 +64,12 @@ def __init__(
5764

5865
# add labels
5966
label_dir = session_dir / "dialog" / "EmoEvaluation"
60-
label_paths = label_dir.glob("*.txt")
67+
query = "*.txt"
68+
if utterance_type == "scripted":
69+
query = "*script*.txt"
70+
elif utterance_type == "improvised":
71+
query = "*impro*.txt"
72+
label_paths = label_dir.glob(query)
6173

6274
for label_path in label_paths:
6375
with open(label_path, "r") as f:
@@ -67,11 +79,9 @@ def __init__(
6779
line = re.split("[\t\n]", line)
6880
wav_stem = line[1]
6981
label = line[2]
70-
if label == "exc":
71-
label = "hap"
7282
if wav_stem not in all_data:
7383
continue
74-
if label not in ["neu", "hap", "ang", "sad"]:
84+
if label not in ["neu", "hap", "ang", "sad", "exc", "fru"]:
7585
continue
7686
self.mapping[wav_stem] = {}
7787
self.mapping[wav_stem]["label"] = label
@@ -99,7 +109,7 @@ def get_metadata(self, n: int) -> Tuple[str, int, str, str, str]:
99109
str:
100110
File name
101111
str:
102-
Label (one of ``"neu"``, ``"hap"``, ``"ang"``, ``"sad"``)
112+
Label (one of ``"neu"``, ``"hap"``, ``"ang"``, ``"sad"``, ``"exc"``, ``"fru"``)
103113
str:
104114
Speaker
105115
"""
@@ -125,7 +135,7 @@ def __getitem__(self, n: int) -> Tuple[Tensor, int, str, str, str]:
125135
str:
126136
File name
127137
str:
128-
Label (one of ``"neu"``, ``"hap"``, ``"ang"``, ``"sad"``)
138+
Label (one of ``"neu"``, ``"hap"``, ``"ang"``, ``"sad"``, ``"exc"``, ``"fru"``)
129139
str:
130140
Speaker
131141
"""

0 commit comments

Comments
 (0)