Skip to content

Commit 5be9eab

Browse files
committed
update whisper dataset folder structure
1 parent 2d10b02 commit 5be9eab

File tree

2 files changed

+28
-2
lines changed

2 files changed

+28
-2
lines changed

script/app-mlperf-inference-mlcommons-python/customize.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -574,7 +574,7 @@ def get_run_cmd_reference(
574574
cmd = f"""{x}{env['MLC_PYTHON_BIN_WITH_PATH']}{x} reference_mlperf.py \
575575
--scenario {tmp_scenario} \
576576
--dataset_dir {x}{env['MLC_DATASET_WHISPER_PATH']}{x} \
577-
--manifest {x}{os.path.join(env['MLC_DATASET_WHISPER_PATH'], "dev-all-repack.json")}{x} \
577+
--manifest {x}{os.path.join(env['MLC_DATASET_WHISPER_PATH'], "data", "dev-all-repack.json")}{x} \
578578
--log_dir {x}{env['MLC_MLPERF_OUTPUT_DIR']}{x} \
579579
--model-path {x}{env['MLC_ML_MODEL_WHISPER_PATH']}{x}"""
580580

script/get-dataset-whisper/customize.py

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@ def preprocess(i):
1414
if env.get('MLC_DATASET_WHISPER_PATH', '') != '':
1515
return {'return': 0}
1616

17-
print(env.get('MLC_TMP_DATASET_TYPE', ''))
1817
if env.get('MLC_TMP_DATASET_TYPE', '') == "preprocessed":
1918
env['MLC_TMP_REQUIRE_DOWNLOAD'] = "yes"
2019
else:
@@ -42,4 +41,31 @@ def postprocess(i):
4241

4342
env = i['env']
4443

44+
if env.get('MLC_TMP_DATASET_TYPE', '') != "preprocessed":
45+
cwd = env.get('MLC_OUTDIRNAME', os.getcwd())
46+
data_dir = os.path.join(cwd, 'data')
47+
env['MLC_DATASET_WHISPER_PATH'] = data_dir
48+
else:
49+
# copy files to data folder
50+
tmp_src_dir = env["MLC_DATASET_WHISPER_PATH"]
51+
tmp_dest_dir = os.path.join(tmp_src_dir, "data")
52+
53+
os.makedirs(tmp_dest_dir, exist_ok=True)
54+
55+
items_to_copy = [
56+
"LibriSpeech",
57+
"dev-all",
58+
"dev-all-repack",
59+
"dev-all-repack.json"
60+
]
61+
62+
for item in items_to_copy:
63+
src_path = os.path.join(tmp_src_dir, item)
64+
dst_path = os.path.join(tmp_dest_dir, item)
65+
if os.path.isdir(src_path):
66+
shutil.copytree(src_path, dst_path, dirs_exist_ok=True)
67+
elif os.path.isfile(src_path):
68+
shutil.copy2(src_path, dst_path)
69+
70+
4571
return {'return': 0}

0 commit comments

Comments
 (0)