keras-team · jaytiwarihub · Jan 3, 2026 · Jan 3, 2026 · Jan 5, 2026 · Jan 9, 2026
diff --git a/examples/audio/speaker_recognition_using_cnn.py b/examples/audio/speaker_recognition_using_cnn.py
@@ -45,22 +45,33 @@
 os.environ["KERAS_BACKEND"] = "tensorflow"
 
 import shutil
+import zipfile
 import numpy as np
-
 import tensorflow as tf
 import keras
-
 from pathlib import Path
 from IPython.display import display, Audio
 
-# Get the data from https://www.kaggle.com/kongaevans/speaker-recognition-dataset/
-# and save it to ./speaker-recognition-dataset.zip
-# then unzip it to ./16000_pcm_speeches
-"""shell
-kaggle datasets download -d kongaevans/speaker-recognition-dataset
-unzip -qq speaker-recognition-dataset.zip
-"""
-
+DATASET_ROOT = "16000_pcm_speeches"
+ZIP_FILE = "speaker-recognition-dataset.zip"
+
+DATASET_ROOT = Path("16000_pcm_speeches")
+ZIP_FILE = Path("speaker-recognition-dataset.zip")
+
+# Check if the dataset is already extracted
+if not DATASET_ROOT.exists():
+    # Check if the zip file is present
+    if ZIP_FILE.exists():
+        print(f"Extracting {ZIP_FILE}...")
+        with zipfile.ZipFile(ZIP_FILE, "r") as zip_ref:
+            zip_ref.extractall(DATASET_ROOT)
+        print("Extraction complete.")
+    else:
+        # If neither exists, guide the user
+        print(f"Dataset not found. Please download it from:")
+        print("https://www.kaggle.com/kongaevans/speaker-recognition-dataset")
+        print(f"Save it as '{ZIP_FILE}' in this directory and run again.")
+        exit()
 DATASET_ROOT = "16000_pcm_speeches"
 
 # The folders in which we will put the audio samples and the noise samples