From 20de20abdee3215cc751e2eaa0de156ac924d3a5 Mon Sep 17 00:00:00 2001
From: William Wong <compulim@users.noreply.github.com>
Date: Fri, 7 Feb 2025 21:34:05 +0000
Subject: [PATCH 01/43] Add mock SpeechSynthesis

---
 .../bargeIn/js/MockedSpeechSynthesis.js       | 117 +++++++++++++
 .../bargeIn/js/MockedSpeechSynthesisEvent.js  |  47 ++++++
 .../js/MockedSpeechSynthesisUtterance.js      | 121 ++++++++++++++
 __tests__/html2/speech/bargeIn/js/fn.js       |  16 ++
 __tests__/html2/speech/bargeIn/js/index.js    |   4 +
 __tests__/html2/speech/bargeIn/mockSetup.html | 157 ++++++++++++++++++
 6 files changed, 462 insertions(+)
 create mode 100644 __tests__/html2/speech/bargeIn/js/MockedSpeechSynthesis.js
 create mode 100644 __tests__/html2/speech/bargeIn/js/MockedSpeechSynthesisEvent.js
 create mode 100644 __tests__/html2/speech/bargeIn/js/MockedSpeechSynthesisUtterance.js
 create mode 100644 __tests__/html2/speech/bargeIn/js/fn.js
 create mode 100644 __tests__/html2/speech/bargeIn/js/index.js
 create mode 100644 __tests__/html2/speech/bargeIn/mockSetup.html

diff --git a/__tests__/html2/speech/bargeIn/js/MockedSpeechSynthesis.js b/__tests__/html2/speech/bargeIn/js/MockedSpeechSynthesis.js
new file mode 100644
index 0000000000..cdc3436376
--- /dev/null
+++ b/__tests__/html2/speech/bargeIn/js/MockedSpeechSynthesis.js
@@ -0,0 +1,117 @@
+import { EventTargetProperties } from 'https://unpkg.com/event-target-properties@latest/dist/event-target-properties.mjs';
+import SpeechSynthesisEvent from './MockedSpeechSynthesisEvent.js';
+
+export default class SpeechSynthesis extends EventTarget {
+  constructor() {
+    super();
+
+    this.#eventTargetProperties = new EventTargetProperties(this);
+  }
+
+  /** @type {SpeechSynthesisUtterance} */
+  #currentUtterance;
+  /** @type {EventTargetProperties} */
+  #eventTargetProperties;
+  /** @type {boolean} */
+  #paused = false;
+  // #pending = false;
+  /** @type {SpeechSynthesisUtterance[]} */
+  #queue = [];
+  /** @type {boolean} */
+  #speaking = false;
+
+  get onvoiceschanged() {
+    return this.#eventTargetProperties.getProperty('voiceschanged');
+  }
+
+  set onvoiceschanged(value) {
+    this.#eventTargetProperties.setProperty('voiceschanged', value);
+  }
+
+  /** @type {boolean} */
+  get paused() {
+    return this.#paused;
+  }
+
+  /** @type {boolean} */
+  get pending() {
+    return !!this.#queue.length;
+  }
+
+  /** @type {boolean} */
+  get speaking() {
+    return !this.paused && this.#speaking;
+  }
+
+  cancel() {
+    this.#paused = false;
+    this.#speaking = false;
+    this.#queue.splice(0);
+
+    this.#currentUtterance?.dispatchEvent(new SpeechSynthesisEvent('end', { utterance: this.#currentUtterance }));
+  }
+
+  getVoices() {
+    return [];
+  }
+
+  pause() {
+    if (this.#paused) {
+      return;
+    }
+
+    this.#paused = true;
+
+    this.#currentUtterance?.dispatchEvent(new SpeechSynthesisEvent('pause', { utterance: this.#currentUtterance }));
+  }
+
+  resume() {
+    if (!this.#paused) {
+      return;
+    }
+
+    this.#paused = false;
+
+    if (this.#currentUtterance) {
+      this.#currentUtterance.dispatchEvent(new SpeechSynthesisEvent('resume', { utterance: this.#currentUtterance }));
+    } else {
+      this.#next();
+    }
+  }
+
+  speak(/** @type {SpeechSynthesisUtterance} */ utterance) {
+    this.#queue.push(/** @type {SpeechSynthesisUtterance} */ utterance);
+
+    !this.#paused && !this.#speaking && this.#next();
+  }
+
+  #next() {
+    if (this.#paused) {
+      throw new Error('Should not call #next() when it is paused.');
+    }
+
+    this.#currentUtterance = this.#queue.shift();
+
+    if (!this.#currentUtterance) {
+      this.#paused = false;
+      this.#speaking = false;
+
+      return;
+    }
+
+    this.#speaking = true;
+
+    this.#currentUtterance.addEventListener('end', () => this.#next(), { once: true });
+    this.#currentUtterance.addEventListener(
+      'error',
+      () => {
+        this.#paused = false;
+        this.#speaking = false;
+        this.#queue.splice(0);
+      },
+      { once: true }
+    );
+
+    this.#currentUtterance.dispatchEvent(new SpeechSynthesisEvent('start', { utterance: this.#currentUtterance }));
+  }
+}
diff --git a/__tests__/html2/speech/bargeIn/js/MockedSpeechSynthesisEvent.js b/__tests__/html2/speech/bargeIn/js/MockedSpeechSynthesisEvent.js
new file mode 100644
index 0000000000..bd99d28cd0
--- /dev/null
+++ b/__tests__/html2/speech/bargeIn/js/MockedSpeechSynthesisEvent.js
@@ -0,0 +1,47 @@
+export default class SpeechSynthesisEvent extends Event {
+  constructor(
+    /** @type {string} */
+    type,
+    /** @type {EventInitDict} */
+    eventInitDict
+  ) {
+    super(type, eventInitDict);
+
+    this.#charIndex = eventInitDict.charIndex || 0;
+    this.#charLength = eventInitDict.charLength || 0;
+    this.#elapsedTime = eventInitDict.elapsedTime || 0;
+    this.#name = eventInitDict.name || '';
+    this.#utterance = eventInitDict.utterance;
+  }
+
+  /** @type {number} */
+  #charIndex;
+  /** @type {number} */
+  #charLength;
+  /** @type {number} */
+  #elapsedTime;
+  /** @type {string} */
+  #name;
+  /** @type {SpeechSynthesisUtterance | undefined} */
+  #utterance;
+
+  get charIndex() {
+    return this.#charIndex;
+  }
+
+  get charLength() {
+    return this.#charLength;
+  }
+
+  get elapsedTime() {
+    return this.#elapsedTime;
+  }
+
+  get name() {
+    return this.#name;
+  }
+
+  get utterance() {
+    return this.#utterance;
+  }
+}
diff --git a/__tests__/html2/speech/bargeIn/js/MockedSpeechSynthesisUtterance.js b/__tests__/html2/speech/bargeIn/js/MockedSpeechSynthesisUtterance.js
new file mode 100644
index 0000000000..b1090deeb2
--- /dev/null
+++ b/__tests__/html2/speech/bargeIn/js/MockedSpeechSynthesisUtterance.js
@@ -0,0 +1,121 @@
+import { EventTargetProperties } from 'https://unpkg.com/event-target-properties@latest/dist/event-target-properties.mjs';
+
+export default class SpeechSynthesisUtterance extends EventTarget {
+  constructor() {
+    super();
+
+    this.#eventTargetProperties = new EventTargetProperties(this);
+  }
+
+  #eventTargetProperties;
+  #lang;
+  #pitch;
+  #rate;
+  #text;
+  #voice;
+  #volume;
+
+  get lang() {
+    return this.#lang;
+  }
+
+  set lang(value) {
+    this.#lang = value;
+  }
+
+  get pitch() {
+    return this.#pitch;
+  }
+
+  set pitch(value) {
+    this.#pitch = value;
+  }
+
+  get rate() {
+    return this.#rate;
+  }
+
+  set rate(value) {
+    this.#rate = value;
+  }
+
+  get text() {
+    return this.#text;
+  }
+
+  set text(value) {
+    this.#text = value;
+  }
+
+  get voice() {
+    return this.#voice;
+  }
+
+  set voice(value) {
+    this.#voice = value;
+  }
+
+  get volume() {
+    return this.#volume;
+  }
+
+  set volume(value) {
+    this.#volume = value;
+  }
+
+  get onboundary() {
+    return this.#eventTargetProperties.getProperty('boundary');
+  }
+
+  set onboundary(value) {
+    this.#eventTargetProperties.setProperty('boundary', value);
+  }
+
+  get onend() {
+    return this.#eventTargetProperties.getProperty('end');
+  }
+
+  set onend(value) {
+    this.#eventTargetProperties.setProperty('end', value);
+  }
+
+  get onerror() {
+    return this.#eventTargetProperties.getProperty('error');
+  }
+
+  set onerror(value) {
+    this.#eventTargetProperties.setProperty('error', value);
+  }
+
+  get onmark() {
+    return this.#eventTargetProperties.getProperty('mark');
+  }
+
+  set onmark(value) {
+    this.#eventTargetProperties.setProperty('mark', value);
+  }
+
+  get onpause() {
+    return this.#eventTargetProperties.getProperty('pause');
+  }
+
+  set onpause(value) {
+    this.#eventTargetProperties.setProperty('pause', value);
+  }
+
+  get onresume() {
+    return this.#eventTargetProperties.getProperty('resume');
+  }
+
+  set onresume(value) {
+    this.#eventTargetProperties.setProperty('resume', value);
+  }
+
+  get onstart() {
+    return this.#eventTargetProperties.getProperty('start');
+  }
+
+  set onstart(value) {
+    this.#eventTargetProperties.setProperty('start', value);
+  }
+}
diff --git a/__tests__/html2/speech/bargeIn/js/fn.js b/__tests__/html2/speech/bargeIn/js/fn.js
new file mode 100644
index 0000000000..c9c7d72f2d
--- /dev/null
+++ b/__tests__/html2/speech/bargeIn/js/fn.js
@@ -0,0 +1,16 @@
+export default function fn(
+  /** @type {Function | undefined} */
+  impl
+) {
+  const fn = (...args) => {
+    fn.mock.calls.push(args);
+
+    return impl?.(...args);
+  };
+
+  fn._isMockFunction = true;
+  fn.getMockName = () => 'mock';
+  fn.mock = { calls: [] };
+
+  return fn;
+}
diff --git a/__tests__/html2/speech/bargeIn/js/index.js b/__tests__/html2/speech/bargeIn/js/index.js
new file mode 100644
index 0000000000..1ca0c22132
--- /dev/null
+++ b/__tests__/html2/speech/bargeIn/js/index.js
@@ -0,0 +1,4 @@
+export { default as fn } from './fn.js';
+export { default as SpeechSynthesis } from './MockedSpeechSynthesis.js';
+export { default as SpeechSynthesisEvent } from './MockedSpeechSynthesisEvent.js';
+export { default as SpeechSynthesisUtterance } from './MockedSpeechSynthesisUtterance.js';
diff --git a/__tests__/html2/speech/bargeIn/mockSetup.html b/__tests__/html2/speech/bargeIn/mockSetup.html
new file mode 100644
index 0000000000..e509a04217
--- /dev/null
+++ b/__tests__/html2/speech/bargeIn/mockSetup.html
@@ -0,0 +1,157 @@
+<!doctype html>
+<html lang="en-US">
+  <head>
+    <link href="/assets/index.css" rel="stylesheet" type="text/css" />
+    <script crossorigin="anonymous" src="/test-harness.js"></script>
+    <script crossorigin="anonymous" src="/test-page-object.js"></script>
+    <script crossorigin="anonymous" src="/__dist__/webchat-es5.js"></script>
+  </head>
+  <body>
+    <main>
+      <p>This test validates the mock of <code>SpeechRecognition</code>.</p>
+    </main>
+    <script type="module">
+      import {
+        SpeechGrammarList,
+        SpeechRecognition
+      } from 'https://unpkg.com/react-dictate-button@3.0.0/dist/react-dictate-button.internal.mjs';
+      import { fn, SpeechSynthesis, SpeechSynthesisEvent, SpeechSynthesisUtterance } from './js/index.js';
+
+      run(async function () {
+        const mockedWebSpeech = {
+          SpeechGrammarList: SpeechGrammarList,
+          SpeechRecognition: SpeechRecognition,
+          speechSynthesis: new SpeechSynthesis(),
+          SpeechSynthesisUtterance: SpeechSynthesisUtterance
+        };
+
+        const speechSynthesis = new SpeechSynthesis();
+
+        expect(speechSynthesis.paused).toBe(false);
+        expect(speechSynthesis.pending).toBe(false);
+        expect(speechSynthesis.speaking).toBe(false);
+
+        const utterance1 = new SpeechSynthesisUtterance('Hello, World!');
+
+        utterance1.onend = fn();
+        utterance1.onpause = fn();
+        utterance1.onresume = fn();
+        utterance1.onstart = fn();
+
+        speechSynthesis.speak(utterance1);
+
+        expect(speechSynthesis.paused).toBe(false);
+        expect(speechSynthesis.pending).toBe(false);
+        expect(speechSynthesis.speaking).toBe(true);
+        expect(utterance1.onend).toHaveBeenCalledTimes(0);
+        expect(utterance1.onpause).toHaveBeenCalledTimes(0);
+        expect(utterance1.onresume).toHaveBeenCalledTimes(0);
+        expect(utterance1.onstart).toHaveBeenCalledTimes(1);
+
+        const utterance2 = new SpeechSynthesisUtterance('Aloha!');
+
+        utterance2.onend = fn();
+        utterance2.onpause = fn();
+        utterance2.onresume = fn();
+        utterance2.onstart = fn();
+
+        speechSynthesis.speak(utterance2);
+
+        expect(speechSynthesis.paused).toBe(false);
+        expect(speechSynthesis.pending).toBe(true);
+        expect(speechSynthesis.speaking).toBe(true);
+        expect(utterance2.onend).toHaveBeenCalledTimes(0);
+        expect(utterance2.onpause).toHaveBeenCalledTimes(0);
+        expect(utterance2.onresume).toHaveBeenCalledTimes(0);
+        expect(utterance2.onstart).toHaveBeenCalledTimes(0);
+
+        speechSynthesis.pause();
+
+        expect(utterance1.onend).toHaveBeenCalledTimes(0);
+        expect(utterance1.onpause).toHaveBeenCalledTimes(1);
+        expect(utterance1.onresume).toHaveBeenCalledTimes(0);
+        expect(utterance1.onstart).toHaveBeenCalledTimes(1);
+
+        speechSynthesis.resume();
+
+        expect(utterance1.onend).toHaveBeenCalledTimes(0);
+        expect(utterance1.onpause).toHaveBeenCalledTimes(1);
+        expect(utterance1.onresume).toHaveBeenCalledTimes(1);
+        expect(utterance1.onstart).toHaveBeenCalledTimes(1);
+
+        utterance1.dispatchEvent(new SpeechSynthesisEvent('end', { utterance: utterance1 }));
+
+        expect(speechSynthesis.paused).toBe(false);
+        expect(speechSynthesis.pending).toBe(false);
+        expect(speechSynthesis.speaking).toBe(true);
+
+        expect(utterance1.onend).toHaveBeenCalledTimes(1);
+        expect(utterance1.onpause).toHaveBeenCalledTimes(1);
+        expect(utterance1.onresume).toHaveBeenCalledTimes(1);
+        expect(utterance1.onstart).toHaveBeenCalledTimes(1);
+
+        expect(utterance2.onend).toHaveBeenCalledTimes(0);
+        expect(utterance2.onpause).toHaveBeenCalledTimes(0);
+        expect(utterance2.onresume).toHaveBeenCalledTimes(0);
+        expect(utterance2.onstart).toHaveBeenCalledTimes(1);
+
+        utterance2.dispatchEvent(new SpeechSynthesisEvent('end', { utterance: utterance2 }));
+
+        expect(speechSynthesis.paused).toBe(false);
+        expect(speechSynthesis.pending).toBe(false);
+        expect(speechSynthesis.speaking).toBe(false);
+
+        expect(utterance2.onend).toHaveBeenCalledTimes(1);
+        expect(utterance2.onpause).toHaveBeenCalledTimes(0);
+        expect(utterance2.onresume).toHaveBeenCalledTimes(0);
+        expect(utterance2.onstart).toHaveBeenCalledTimes(1);
+
+        speechSynthesis.pause();
+
+        expect(speechSynthesis.paused).toBe(true);
+        expect(speechSynthesis.pending).toBe(false);
+        expect(speechSynthesis.speaking).toBe(false);
+
+        const utterance3 = new SpeechSynthesisUtterance('Good morning!');
+
+        utterance3.onend = fn();
+        utterance3.onpause = fn();
+        utterance3.onresume = fn();
+        utterance3.onstart = fn();
+
+        speechSynthesis.speak(utterance3);
+
+        expect(speechSynthesis.paused).toBe(true);
+        expect(speechSynthesis.pending).toBe(true); // Not exactly sure if pending is true/false, seems true.
+        expect(speechSynthesis.speaking).toBe(false);
+
+        expect(utterance3.onend).toHaveBeenCalledTimes(0);
+        expect(utterance3.onpause).toHaveBeenCalledTimes(0);
+        expect(utterance3.onresume).toHaveBeenCalledTimes(0);
+        expect(utterance3.onstart).toHaveBeenCalledTimes(0);
+
+        speechSynthesis.resume();
+
+        expect(speechSynthesis.paused).toBe(false);
+        expect(speechSynthesis.pending).toBe(false);
+        expect(speechSynthesis.speaking).toBe(true);
+
+        expect(utterance3.onend).toHaveBeenCalledTimes(0);
+        expect(utterance3.onpause).toHaveBeenCalledTimes(0);
+        expect(utterance3.onresume).toHaveBeenCalledTimes(0);
+        expect(utterance3.onstart).toHaveBeenCalledTimes(1);
+
+        utterance3.dispatchEvent(new SpeechSynthesisEvent('end', { utterance: utterance3 }));
+
+        expect(speechSynthesis.paused).toBe(false);
+        expect(speechSynthesis.pending).toBe(false);
+        expect(speechSynthesis.speaking).toBe(false);
+
+        expect(utterance3.onend).toHaveBeenCalledTimes(1);
+        expect(utterance3.onpause).toHaveBeenCalledTimes(0);
+        expect(utterance3.onresume).toHaveBeenCalledTimes(0);
+        expect(utterance3.onstart).toHaveBeenCalledTimes(1);
+      });
+    </script>
+  </body>
+</html>

From f352b5a29740ea89526d6e8c4723767d442de948 Mon Sep 17 00:00:00 2001
From: William Wong <compulim@users.noreply.github.com>
Date: Fri, 7 Feb 2025 22:02:15 +0000
Subject: [PATCH 02/43] Clean up

---
 __tests__/html2/speech/bargeIn/mockSetup.html | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/__tests__/html2/speech/bargeIn/mockSetup.html b/__tests__/html2/speech/bargeIn/mockSetup.html
index e509a04217..cab3a72c4f 100644
--- a/__tests__/html2/speech/bargeIn/mockSetup.html
+++ b/__tests__/html2/speech/bargeIn/mockSetup.html
@@ -18,13 +18,6 @@
       import { fn, SpeechSynthesis, SpeechSynthesisEvent, SpeechSynthesisUtterance } from './js/index.js';
 
       run(async function () {
-        const mockedWebSpeech = {
-          SpeechGrammarList: SpeechGrammarList,
-          SpeechRecognition: SpeechRecognition,
-          speechSynthesis: new SpeechSynthesis(),
-          SpeechSynthesisUtterance: SpeechSynthesisUtterance
-        };
-
         const speechSynthesis = new SpeechSynthesis();
 
         expect(speechSynthesis.paused).toBe(false);

From 8019538e71fe4482ef52b8ab9dbda06d86ccf420 Mon Sep 17 00:00:00 2001
From: William Wong <compulim@users.noreply.github.com>
Date: Fri, 7 Feb 2025 22:26:07 +0000
Subject: [PATCH 03/43] Use jest-mock

---
 __tests__/html2/speech/bargeIn/js/fn.js       | 16 -----------
 __tests__/html2/speech/bargeIn/js/index.js    |  1 -
 __tests__/html2/speech/bargeIn/mockSetup.html | 27 ++++++++++++++++++-
 3 files changed, 26 insertions(+), 18 deletions(-)
 delete mode 100644 __tests__/html2/speech/bargeIn/js/fn.js

diff --git a/__tests__/html2/speech/bargeIn/js/fn.js b/__tests__/html2/speech/bargeIn/js/fn.js
deleted file mode 100644
index c9c7d72f2d..0000000000
--- a/__tests__/html2/speech/bargeIn/js/fn.js
+++ /dev/null
@@ -1,16 +0,0 @@
-export default function fn(
-  /** @type {Function | undefined} */
-  impl
-) {
-  const fn = (...args) => {
-    fn.mock.calls.push(args);
-
-    return impl?.(...args);
-  };
-
-  fn._isMockFunction = true;
-  fn.getMockName = () => 'mock';
-  fn.mock = { calls: [] };
-
-  return fn;
-}
diff --git a/__tests__/html2/speech/bargeIn/js/index.js b/__tests__/html2/speech/bargeIn/js/index.js
index 1ca0c22132..f04e5a9372 100644
--- a/__tests__/html2/speech/bargeIn/js/index.js
+++ b/__tests__/html2/speech/bargeIn/js/index.js
@@ -1,4 +1,3 @@
-export { default as fn } from './fn.js';
 export { default as SpeechSynthesis } from './MockedSpeechSynthesis.js';
 export { default as SpeechSynthesisEvent } from './MockedSpeechSynthesisEvent.js';
 export { default as SpeechSynthesisUtterance } from './MockedSpeechSynthesisUtterance.js';
diff --git a/__tests__/html2/speech/bargeIn/mockSetup.html b/__tests__/html2/speech/bargeIn/mockSetup.html
index cab3a72c4f..67017d1014 100644
--- a/__tests__/html2/speech/bargeIn/mockSetup.html
+++ b/__tests__/html2/speech/bargeIn/mockSetup.html
@@ -15,7 +15,8 @@
         SpeechGrammarList,
         SpeechRecognition
       } from 'https://unpkg.com/react-dictate-button@3.0.0/dist/react-dictate-button.internal.mjs';
-      import { fn, SpeechSynthesis, SpeechSynthesisEvent, SpeechSynthesisUtterance } from './js/index.js';
+      import { fn } from 'https://esm.sh/jest-mock';
+      import { SpeechSynthesis, SpeechSynthesisEvent, SpeechSynthesisUtterance } from './js/index.js';
 
       run(async function () {
         const speechSynthesis = new SpeechSynthesis();
@@ -62,14 +63,23 @@
 
         expect(utterance1.onend).toHaveBeenCalledTimes(0);
         expect(utterance1.onpause).toHaveBeenCalledTimes(1);
+        expect(utterance1.onpause).toHaveBeenLastCalledWith(
+          expect.objectContaining({ type: 'pause', utterance: utterance1 })
+        );
         expect(utterance1.onresume).toHaveBeenCalledTimes(0);
         expect(utterance1.onstart).toHaveBeenCalledTimes(1);
+        expect(utterance1.onstart).toHaveBeenLastCalledWith(
+          expect.objectContaining({ type: 'start', utterance: utterance1 })
+        );
 
         speechSynthesis.resume();
 
         expect(utterance1.onend).toHaveBeenCalledTimes(0);
         expect(utterance1.onpause).toHaveBeenCalledTimes(1);
         expect(utterance1.onresume).toHaveBeenCalledTimes(1);
+        expect(utterance1.onresume).toHaveBeenLastCalledWith(
+          expect.objectContaining({ type: 'resume', utterance: utterance1 })
+        );
         expect(utterance1.onstart).toHaveBeenCalledTimes(1);
 
         utterance1.dispatchEvent(new SpeechSynthesisEvent('end', { utterance: utterance1 }));
@@ -79,6 +89,9 @@
         expect(speechSynthesis.speaking).toBe(true);
 
         expect(utterance1.onend).toHaveBeenCalledTimes(1);
+        expect(utterance1.onend).toHaveBeenLastCalledWith(
+          expect.objectContaining({ type: 'end', utterance: utterance1 })
+        );
         expect(utterance1.onpause).toHaveBeenCalledTimes(1);
         expect(utterance1.onresume).toHaveBeenCalledTimes(1);
         expect(utterance1.onstart).toHaveBeenCalledTimes(1);
@@ -87,6 +100,9 @@
         expect(utterance2.onpause).toHaveBeenCalledTimes(0);
         expect(utterance2.onresume).toHaveBeenCalledTimes(0);
         expect(utterance2.onstart).toHaveBeenCalledTimes(1);
+        expect(utterance2.onstart).toHaveBeenLastCalledWith(
+          expect.objectContaining({ type: 'start', utterance: utterance2 })
+        );
 
         utterance2.dispatchEvent(new SpeechSynthesisEvent('end', { utterance: utterance2 }));
 
@@ -95,6 +111,9 @@
         expect(speechSynthesis.speaking).toBe(false);
 
         expect(utterance2.onend).toHaveBeenCalledTimes(1);
+        expect(utterance2.onend).toHaveBeenLastCalledWith(
+          expect.objectContaining({ type: 'end', utterance: utterance2 })
+        );
         expect(utterance2.onpause).toHaveBeenCalledTimes(0);
         expect(utterance2.onresume).toHaveBeenCalledTimes(0);
         expect(utterance2.onstart).toHaveBeenCalledTimes(1);
@@ -133,6 +152,9 @@
         expect(utterance3.onpause).toHaveBeenCalledTimes(0);
         expect(utterance3.onresume).toHaveBeenCalledTimes(0);
         expect(utterance3.onstart).toHaveBeenCalledTimes(1);
+        expect(utterance3.onstart).toHaveBeenLastCalledWith(
+          expect.objectContaining({ type: 'start', utterance: utterance3 })
+        );
 
         utterance3.dispatchEvent(new SpeechSynthesisEvent('end', { utterance: utterance3 }));
 
@@ -141,6 +163,9 @@
         expect(speechSynthesis.speaking).toBe(false);
 
         expect(utterance3.onend).toHaveBeenCalledTimes(1);
+        expect(utterance3.onend).toHaveBeenLastCalledWith(
+          expect.objectContaining({ type: 'end', utterance: utterance3 })
+        );
         expect(utterance3.onpause).toHaveBeenCalledTimes(0);
         expect(utterance3.onresume).toHaveBeenCalledTimes(0);
         expect(utterance3.onstart).toHaveBeenCalledTimes(1);

From 34d0ee60c1196fa36e9fc1275a8e1907b934d6c5 Mon Sep 17 00:00:00 2001
From: William Wong <compulim@users.noreply.github.com>
Date: Sat, 8 Feb 2025 02:53:39 +0000
Subject: [PATCH 04/43] Add expectingInput

---
 .../html2/speech/bargeIn/comprehensive.html   | 147 ++++++++++++++++++
 __tests__/html2/speech/bargeIn/mockSetup.html |   2 +-
 .../{bargeIn => }/js/MockedSpeechSynthesis.js |   0
 .../js/MockedSpeechSynthesisEvent.js          |   0
 .../js/MockedSpeechSynthesisUtterance.js      |   3 +-
 .../html2/speech/{bargeIn => }/js/index.js    |   0
 6 files changed, 150 insertions(+), 2 deletions(-)
 create mode 100644 __tests__/html2/speech/bargeIn/comprehensive.html
 rename __tests__/html2/speech/{bargeIn => }/js/MockedSpeechSynthesis.js (100%)
 rename __tests__/html2/speech/{bargeIn => }/js/MockedSpeechSynthesisEvent.js (100%)
 rename __tests__/html2/speech/{bargeIn => }/js/MockedSpeechSynthesisUtterance.js (97%)
 rename __tests__/html2/speech/{bargeIn => }/js/index.js (100%)

diff --git a/__tests__/html2/speech/bargeIn/comprehensive.html b/__tests__/html2/speech/bargeIn/comprehensive.html
new file mode 100644
index 0000000000..90ffa21817
--- /dev/null
+++ b/__tests__/html2/speech/bargeIn/comprehensive.html
@@ -0,0 +1,147 @@
+<!doctype html>
+<html lang="en-US">
+  <head>
+    <link href="/assets/index.css" rel="stylesheet" type="text/css" />
+    <script crossorigin="anonymous" src="/test-harness.js"></script>
+    <script crossorigin="anonymous" src="/test-page-object.js"></script>
+    <script crossorigin="anonymous" src="/__dist__/webchat-es5.js"></script>
+  </head>
+  <body>
+    <main id="webchat"></main>
+    <script type="module">
+      import {
+        SpeechGrammarList,
+        SpeechRecognition,
+        SpeechRecognitionAlternative,
+        SpeechRecognitionEvent,
+        SpeechRecognitionResult,
+        SpeechRecognitionResultList
+      } from 'https://unpkg.com/react-dictate-button@main/dist/react-dictate-button.internal.mjs';
+      import { fn, spyOn } from 'https://esm.sh/jest-mock';
+      import { SpeechSynthesis, SpeechSynthesisEvent, SpeechSynthesisUtterance } from '../js/index.js';
+      import { waitFor } from 'https://unpkg.com/@testduet/wait-for@main/dist/wait-for.mjs';
+
+      const {
+        testHelpers: { createDirectLineEmulator },
+        WebChat: { renderWebChat }
+      } = window;
+
+      run(async function () {
+        const speechSynthesis = new SpeechSynthesis();
+        const ponyfill = {
+          SpeechGrammarList,
+          SpeechRecognition: fn().mockImplementationOnce(() => {
+            const speechRecognition = new SpeechRecognition();
+
+            speechRecognitionStart = spyOn(speechRecognition, 'start');
+
+            return speechRecognition;
+          }),
+          speechSynthesis,
+          SpeechSynthesisUtterance
+        };
+        let speechRecognitionStart;
+
+        const speechSynthesisSpeak = spyOn(speechSynthesis, 'speak');
+
+        const { directLine, store } = createDirectLineEmulator();
+
+        renderWebChat(
+          {
+            directLine,
+            store,
+            webSpeechPonyfillFactory: () => ponyfill
+          },
+          document.getElementById('webchat')
+        );
+
+        await pageConditions.uiConnected();
+
+        const { resolveAll } = await directLine.actPostActivity(async () => {
+          await pageObjects.clickMicrophoneButton();
+
+          expect(speechRecognitionStart).toHaveBeenCalledTimes(1);
+
+          const [{ value: speechRecognition }] = ponyfill.SpeechRecognition.mock.results;
+
+          speechRecognition.dispatchEvent(new Event('start'));
+          speechRecognition.dispatchEvent(new Event('audiostart'));
+          speechRecognition.dispatchEvent(new Event('soundstart'));
+          speechRecognition.dispatchEvent(new Event('speechstart'));
+          speechRecognition.dispatchEvent(
+            new SpeechRecognitionEvent('result', {
+              results: new SpeechRecognitionResultList(
+                new SpeechRecognitionResult(new SpeechRecognitionAlternative(0, 'Hello'))
+              )
+            })
+          );
+
+          const [interims] = document.getElementsByClassName('webchat__send-box__dictation-interims');
+
+          // THEN: Interims should be "Hello".
+          await waitFor(() => expect(interims.textContent.trimEnd()).toBe('Hello'));
+
+          speechRecognition.dispatchEvent(
+            new SpeechRecognitionEvent('result', {
+              results: new SpeechRecognitionResultList(
+                new SpeechRecognitionResult(new SpeechRecognitionAlternative(0, 'Hello, World!'))
+              )
+            })
+          );
+
+          // THEN: Interims should be "Hello, World!".
+          await waitFor(() => expect(interims.textContent.trimEnd()).toBe('Hello, World!'));
+
+          speechRecognition.dispatchEvent(
+            new SpeechRecognitionEvent('result', {
+              results: new SpeechRecognitionResultList(
+                SpeechRecognitionResult.fromFinalized(new SpeechRecognitionAlternative(0.9, 'Hello, World!'))
+              )
+            })
+          );
+
+          speechRecognition.dispatchEvent(new Event('speechend'));
+          speechRecognition.dispatchEvent(new Event('soundend'));
+          speechRecognition.dispatchEvent(new Event('audioend'));
+          speechRecognition.dispatchEvent(new Event('end'));
+        });
+
+        await resolveAll();
+
+        // THEN: Should have send the activity.
+        await pageConditions.numActivitiesShown(1);
+
+        await directLine.emulateIncomingActivity({
+          inputHint: 'expectingInput',
+          text: 'Aloha!',
+          type: 'message'
+        });
+
+        // THEN: Should send a message and the reply from the bot.
+        await pageConditions.numActivitiesShown(2);
+
+        // Unsure why speak() is being called twice.
+        await waitFor(() => expect(speechSynthesisSpeak).toHaveBeenCalledTimes(2));
+
+        expect(speechSynthesisSpeak).toHaveBeenLastCalledWith(expect.any(SpeechSynthesisUtterance));
+        expect(speechSynthesisSpeak).toHaveBeenLastCalledWith(
+          expect.objectContaining({
+            text: 'Aloha!'
+          })
+        );
+
+        speechSynthesisSpeak.mock.calls[0][0].dispatchEvent(
+          new SpeechSynthesisEvent('end', { utterance: speechSynthesisSpeak.mock.calls[0] })
+        );
+
+        speechSynthesisSpeak.mock.calls[1][0].dispatchEvent(
+          new SpeechSynthesisEvent('end', { utterance: speechSynthesisSpeak.mock.calls[1] })
+        );
+
+        expect(speechSynthesis).toHaveProperty('speaking', false);
+
+        await waitFor(() => expect(speechRecognitionStart).toHaveBeenCalledTimes(2));
+      });
+    </script>
+  </body>
+</html>
diff --git a/__tests__/html2/speech/bargeIn/mockSetup.html b/__tests__/html2/speech/bargeIn/mockSetup.html
index 67017d1014..1f8d8faf9a 100644
--- a/__tests__/html2/speech/bargeIn/mockSetup.html
+++ b/__tests__/html2/speech/bargeIn/mockSetup.html
@@ -14,7 +14,7 @@
       import {
         SpeechGrammarList,
         SpeechRecognition
-      } from 'https://unpkg.com/react-dictate-button@3.0.0/dist/react-dictate-button.internal.mjs';
+      } from 'https://unpkg.com/react-dictate-button@main/dist/react-dictate-button.internal.mjs';
       import { fn } from 'https://esm.sh/jest-mock';
       import { SpeechSynthesis, SpeechSynthesisEvent, SpeechSynthesisUtterance } from './js/index.js';
 
diff --git a/__tests__/html2/speech/bargeIn/js/MockedSpeechSynthesis.js b/__tests__/html2/speech/js/MockedSpeechSynthesis.js
similarity index 100%
rename from __tests__/html2/speech/bargeIn/js/MockedSpeechSynthesis.js
rename to __tests__/html2/speech/js/MockedSpeechSynthesis.js
diff --git a/__tests__/html2/speech/bargeIn/js/MockedSpeechSynthesisEvent.js b/__tests__/html2/speech/js/MockedSpeechSynthesisEvent.js
similarity index 100%
rename from __tests__/html2/speech/bargeIn/js/MockedSpeechSynthesisEvent.js
rename to __tests__/html2/speech/js/MockedSpeechSynthesisEvent.js
diff --git a/__tests__/html2/speech/bargeIn/js/MockedSpeechSynthesisUtterance.js b/__tests__/html2/speech/js/MockedSpeechSynthesisUtterance.js
similarity index 97%
rename from __tests__/html2/speech/bargeIn/js/MockedSpeechSynthesisUtterance.js
rename to __tests__/html2/speech/js/MockedSpeechSynthesisUtterance.js
index b1090deeb2..025b5d510e 100644
--- a/__tests__/html2/speech/bargeIn/js/MockedSpeechSynthesisUtterance.js
+++ b/__tests__/html2/speech/js/MockedSpeechSynthesisUtterance.js
@@ -1,10 +1,11 @@
 import { EventTargetProperties } from 'https://unpkg.com/event-target-properties@latest/dist/event-target-properties.mjs';
 
 export default class SpeechSynthesisUtterance extends EventTarget {
-  constructor() {
+  constructor(text) {
     super();
 
     this.#eventTargetProperties = new EventTargetProperties(this);
+    this.#text = text || '';
   }
 
   #eventTargetProperties;
diff --git a/__tests__/html2/speech/bargeIn/js/index.js b/__tests__/html2/speech/js/index.js
similarity index 100%
rename from __tests__/html2/speech/bargeIn/js/index.js
rename to __tests__/html2/speech/js/index.js

From 379881e513faa548fba9c663cc14edbf4dcee1af Mon Sep 17 00:00:00 2001
From: William Wong <compulim@users.noreply.github.com>
Date: Sat, 8 Feb 2025 07:23:21 +0000
Subject: [PATCH 05/43] Complete the test

---
 .../html2/speech/bargeIn/comprehensive.html   | 147 -------------
 __tests__/html2/speech/comprehensive.html     | 198 ++++++++++++++++++
 2 files changed, 198 insertions(+), 147 deletions(-)
 delete mode 100644 __tests__/html2/speech/bargeIn/comprehensive.html
 create mode 100644 __tests__/html2/speech/comprehensive.html

diff --git a/__tests__/html2/speech/bargeIn/comprehensive.html b/__tests__/html2/speech/bargeIn/comprehensive.html
deleted file mode 100644
index 90ffa21817..0000000000
--- a/__tests__/html2/speech/bargeIn/comprehensive.html
+++ /dev/null
@@ -1,147 +0,0 @@
-<!doctype html>
-<html lang="en-US">
-  <head>
-    <link href="/assets/index.css" rel="stylesheet" type="text/css" />
-    <script crossorigin="anonymous" src="/test-harness.js"></script>
-    <script crossorigin="anonymous" src="/test-page-object.js"></script>
-    <script crossorigin="anonymous" src="/__dist__/webchat-es5.js"></script>
-  </head>
-  <body>
-    <main id="webchat"></main>
-    <script type="module">
-      import {
-        SpeechGrammarList,
-        SpeechRecognition,
-        SpeechRecognitionAlternative,
-        SpeechRecognitionEvent,
-        SpeechRecognitionResult,
-        SpeechRecognitionResultList
-      } from 'https://unpkg.com/react-dictate-button@main/dist/react-dictate-button.internal.mjs';
-      import { fn, spyOn } from 'https://esm.sh/jest-mock';
-      import { SpeechSynthesis, SpeechSynthesisEvent, SpeechSynthesisUtterance } from '../js/index.js';
-      import { waitFor } from 'https://unpkg.com/@testduet/wait-for@main/dist/wait-for.mjs';
-
-      const {
-        testHelpers: { createDirectLineEmulator },
-        WebChat: { renderWebChat }
-      } = window;
-
-      run(async function () {
-        const speechSynthesis = new SpeechSynthesis();
-        const ponyfill = {
-          SpeechGrammarList,
-          SpeechRecognition: fn().mockImplementationOnce(() => {
-            const speechRecognition = new SpeechRecognition();
-
-            speechRecognitionStart = spyOn(speechRecognition, 'start');
-
-            return speechRecognition;
-          }),
-          speechSynthesis,
-          SpeechSynthesisUtterance
-        };
-        let speechRecognitionStart;
-
-        const speechSynthesisSpeak = spyOn(speechSynthesis, 'speak');
-
-        const { directLine, store } = createDirectLineEmulator();
-
-        renderWebChat(
-          {
-            directLine,
-            store,
-            webSpeechPonyfillFactory: () => ponyfill
-          },
-          document.getElementById('webchat')
-        );
-
-        await pageConditions.uiConnected();
-
-        const { resolveAll } = await directLine.actPostActivity(async () => {
-          await pageObjects.clickMicrophoneButton();
-
-          expect(speechRecognitionStart).toHaveBeenCalledTimes(1);
-
-          const [{ value: speechRecognition }] = ponyfill.SpeechRecognition.mock.results;
-
-          speechRecognition.dispatchEvent(new Event('start'));
-          speechRecognition.dispatchEvent(new Event('audiostart'));
-          speechRecognition.dispatchEvent(new Event('soundstart'));
-          speechRecognition.dispatchEvent(new Event('speechstart'));
-          speechRecognition.dispatchEvent(
-            new SpeechRecognitionEvent('result', {
-              results: new SpeechRecognitionResultList(
-                new SpeechRecognitionResult(new SpeechRecognitionAlternative(0, 'Hello'))
-              )
-            })
-          );
-
-          const [interims] = document.getElementsByClassName('webchat__send-box__dictation-interims');
-
-          // THEN: Interims should be "Hello".
-          await waitFor(() => expect(interims.textContent.trimEnd()).toBe('Hello'));
-
-          speechRecognition.dispatchEvent(
-            new SpeechRecognitionEvent('result', {
-              results: new SpeechRecognitionResultList(
-                new SpeechRecognitionResult(new SpeechRecognitionAlternative(0, 'Hello, World!'))
-              )
-            })
-          );
-
-          // THEN: Interims should be "Hello, World!".
-          await waitFor(() => expect(interims.textContent.trimEnd()).toBe('Hello, World!'));
-
-          speechRecognition.dispatchEvent(
-            new SpeechRecognitionEvent('result', {
-              results: new SpeechRecognitionResultList(
-                SpeechRecognitionResult.fromFinalized(new SpeechRecognitionAlternative(0.9, 'Hello, World!'))
-              )
-            })
-          );
-
-          speechRecognition.dispatchEvent(new Event('speechend'));
-          speechRecognition.dispatchEvent(new Event('soundend'));
-          speechRecognition.dispatchEvent(new Event('audioend'));
-          speechRecognition.dispatchEvent(new Event('end'));
-        });
-
-        await resolveAll();
-
-        // THEN: Should have send the activity.
-        await pageConditions.numActivitiesShown(1);
-
-        await directLine.emulateIncomingActivity({
-          inputHint: 'expectingInput',
-          text: 'Aloha!',
-          type: 'message'
-        });
-
-        // THEN: Should send a message and the reply from the bot.
-        await pageConditions.numActivitiesShown(2);
-
-        // Unsure why speak() is being called twice.
-        await waitFor(() => expect(speechSynthesisSpeak).toHaveBeenCalledTimes(2));
-
-        expect(speechSynthesisSpeak).toHaveBeenLastCalledWith(expect.any(SpeechSynthesisUtterance));
-        expect(speechSynthesisSpeak).toHaveBeenLastCalledWith(
-          expect.objectContaining({
-            text: 'Aloha!'
-          })
-        );
-
-        speechSynthesisSpeak.mock.calls[0][0].dispatchEvent(
-          new SpeechSynthesisEvent('end', { utterance: speechSynthesisSpeak.mock.calls[0] })
-        );
-
-        speechSynthesisSpeak.mock.calls[1][0].dispatchEvent(
-          new SpeechSynthesisEvent('end', { utterance: speechSynthesisSpeak.mock.calls[1] })
-        );
-
-        expect(speechSynthesis).toHaveProperty('speaking', false);
-
-        await waitFor(() => expect(speechRecognitionStart).toHaveBeenCalledTimes(2));
-      });
-    </script>
-  </body>
-</html>
diff --git a/__tests__/html2/speech/comprehensive.html b/__tests__/html2/speech/comprehensive.html
new file mode 100644
index 0000000000..2338642722
--- /dev/null
+++ b/__tests__/html2/speech/comprehensive.html
@@ -0,0 +1,198 @@
+<!doctype html>
+<html lang="en-US">
+  <head>
+    <link href="/assets/index.css" rel="stylesheet" type="text/css" />
+    <script crossorigin="anonymous" src="/test-harness.js"></script>
+    <script crossorigin="anonymous" src="/test-page-object.js"></script>
+    <script crossorigin="anonymous" src="/__dist__/webchat-es5.js"></script>
+  </head>
+  <body>
+    <main id="webchat"></main>
+    <script type="module">
+      import {
+        SpeechGrammarList,
+        SpeechRecognition,
+        SpeechRecognitionAlternative,
+        SpeechRecognitionEvent,
+        SpeechRecognitionResult,
+        SpeechRecognitionResultList
+      } from 'https://unpkg.com/react-dictate-button@main/dist/react-dictate-button.internal.mjs';
+      import { fn, spyOn } from 'https://esm.sh/jest-mock';
+      import { SpeechSynthesis, SpeechSynthesisEvent, SpeechSynthesisUtterance } from './js/index.js';
+      import { waitFor } from 'https://unpkg.com/@testduet/wait-for@main/dist/wait-for.mjs';
+
+      const {
+        testHelpers: { createDirectLineEmulator },
+        WebChat: { renderWebChat }
+      } = window;
+
+      run(async function () {
+        const speechSynthesis = new SpeechSynthesis();
+        const ponyfill = {
+          SpeechGrammarList,
+          SpeechRecognition: fn().mockImplementation(() => {
+            const speechRecognition = new SpeechRecognition();
+
+            speechRecognitionStart = spyOn(speechRecognition, 'start');
+
+            return speechRecognition;
+          }),
+          speechSynthesis,
+          SpeechSynthesisUtterance
+        };
+        let speechRecognitionStart;
+
+        const speechSynthesisSpeak = spyOn(speechSynthesis, 'speak');
+
+        const { directLine, store } = createDirectLineEmulator();
+
+        renderWebChat(
+          {
+            directLine,
+            store,
+            webSpeechPonyfillFactory: () => ponyfill
+          },
+          document.getElementById('webchat')
+        );
+
+        await pageConditions.uiConnected();
+
+        (
+          await directLine.actPostActivity(async () => {
+            // WHEN: Microphone button is clicked.
+            await pageObjects.clickMicrophoneButton();
+
+            // THEN: Should create new SpeechSynthesis() for priming user gesture requirement.
+            await waitFor(() => expect(speechSynthesisSpeak).toHaveBeenCalledTimes(1));
+            expect(speechSynthesis).toHaveProperty('paused', false);
+            expect(speechSynthesis).toHaveProperty('pending', false);
+            expect(speechSynthesis).toHaveProperty('speaking', true);
+
+            // WHEN: Priming is done.
+            speechSynthesisSpeak.mock.calls[0][0].dispatchEvent(
+              new SpeechSynthesisEvent('end', { utterance: speechSynthesisSpeak.mock.calls[0] })
+            );
+            expect(speechSynthesis).toHaveProperty('paused', false);
+            expect(speechSynthesis).toHaveProperty('pending', false);
+            expect(speechSynthesis).toHaveProperty('speaking', false);
+
+            // THEN: Should construct SpeechRecognition().
+            expect(ponyfill.SpeechRecognition).toHaveBeenCalledTimes(1);
+
+            const { value: speechRecognition1 } = ponyfill.SpeechRecognition.mock.results[0];
+
+            // THEN: Should call SpeechRecognition.start().
+            expect(speechRecognition1.start).toHaveBeenCalledTimes(1);
+
+            // WHEN: Recognition started and interims result is dispatched.
+            speechRecognition1.dispatchEvent(new Event('start'));
+            speechRecognition1.dispatchEvent(new Event('audiostart'));
+            speechRecognition1.dispatchEvent(new Event('soundstart'));
+            speechRecognition1.dispatchEvent(new Event('speechstart'));
+            speechRecognition1.dispatchEvent(
+              new SpeechRecognitionEvent('result', {
+                results: new SpeechRecognitionResultList(
+                  new SpeechRecognitionResult(new SpeechRecognitionAlternative(0, 'Hello'))
+                )
+              })
+            );
+
+            const [interims] = document.getElementsByClassName('webchat__send-box__dictation-interims');
+
+            // THEN: Interims should be "Hello".
+            await waitFor(() => expect(interims.textContent.trimEnd()).toBe('Hello'));
+
+            // WHEN: Interims result is dispatched.
+            speechRecognition1.dispatchEvent(
+              new SpeechRecognitionEvent('result', {
+                results: new SpeechRecognitionResultList(
+                  new SpeechRecognitionResult(new SpeechRecognitionAlternative(0, 'Hello, World!'))
+                )
+              })
+            );
+
+            // THEN: Interims should be "Hello, World!".
+            await waitFor(() => expect(interims.textContent.trimEnd()).toBe('Hello, World!'));
+
+            // WHEN: Final result is dispatched.
+            speechRecognition1.dispatchEvent(
+              new SpeechRecognitionEvent('result', {
+                results: new SpeechRecognitionResultList(
+                  SpeechRecognitionResult.fromFinalized(new SpeechRecognitionAlternative(0.9, 'Hello, World!'))
+                )
+              })
+            );
+
+            speechRecognition1.dispatchEvent(new Event('speechend'));
+            speechRecognition1.dispatchEvent(new Event('soundend'));
+            speechRecognition1.dispatchEvent(new Event('audioend'));
+            speechRecognition1.dispatchEvent(new Event('end'));
+          })
+        ).resolveAll();
+
+        // THEN: Should have send the activity.
+        await pageConditions.numActivitiesShown(1);
+        expect(pageElements.activityContents()[0]).toHaveProperty('textContent', 'Hello, World!');
+
+        // WHEN: Bot replied.
+        await directLine.emulateIncomingActivity({
+          inputHint: 'expectingInput',
+          text: 'Aloha!',
+          type: 'message'
+        });
+
+        // THEN: Should send a message and the reply from the bot.
+        await pageConditions.numActivitiesShown(2);
+        expect(pageElements.activityContents()[1]).toHaveProperty('textContent', 'Aloha!');
+
+        // THEN: Should call SpeechSynthesis.start() again.
+        await waitFor(() => expect(speechSynthesisSpeak).toHaveBeenCalledTimes(2));
+
+        // THEN: Should synthesize "Aloha!".
+        expect(speechSynthesisSpeak).toHaveBeenLastCalledWith(expect.any(SpeechSynthesisUtterance));
+        expect(speechSynthesisSpeak).toHaveBeenLastCalledWith(expect.objectContaining({ text: 'Aloha!' }));
+
+        // WHEN: Synthesis completed.
+        speechSynthesisSpeak.mock.calls[1][0].dispatchEvent(
+          new SpeechSynthesisEvent('end', { utterance: speechSynthesisSpeak.mock.calls[1] })
+        );
+        expect(speechSynthesis).toHaveProperty('paused', false);
+        expect(speechSynthesis).toHaveProperty('pending', false);
+        expect(speechSynthesis).toHaveProperty('speaking', false);
+
+        (
+          await directLine.actPostActivity(async () => {
+            // THEN: Should create new SpeechRecognition() again.
+            await waitFor(() => expect(ponyfill.SpeechRecognition).toHaveBeenCalledTimes(2));
+
+            const { value: speechRecognition2 } = ponyfill.SpeechRecognition.mock.results[1];
+
+            // THEN: Should have called SpeechRecognition.start().
+            expect(speechRecognition2.start).toHaveBeenCalledTimes(1);
+
+            // WHEN: Recognized as "Good morning!" without interims.
+            speechRecognition2.dispatchEvent(new Event('start'));
+            speechRecognition2.dispatchEvent(new Event('audiostart'));
+            speechRecognition2.dispatchEvent(new Event('soundstart'));
+            speechRecognition2.dispatchEvent(new Event('speechstart'));
+            speechRecognition2.dispatchEvent(
+              new SpeechRecognitionEvent('result', {
+                results: new SpeechRecognitionResultList(
+                  SpeechRecognitionResult.fromFinalized(new SpeechRecognitionAlternative(0.9, 'Good morning!'))
+                )
+              })
+            );
+            speechRecognition2.dispatchEvent(new Event('speechend'));
+            speechRecognition2.dispatchEvent(new Event('soundend'));
+            speechRecognition2.dispatchEvent(new Event('audioend'));
+            speechRecognition2.dispatchEvent(new Event('end'));
+          })
+        ).resolveAll();
+
+        // THEN: Should have send another activity.
+        await pageConditions.numActivitiesShown(3);
+        expect(pageElements.activityContents()[2]).toHaveProperty('textContent', 'Good morning!');
+      });
+    </script>
+  </body>
+</html>

From 1a48a920ede6b961c6fa79f1d843067b1d78dfa9 Mon Sep 17 00:00:00 2001
From: William Wong <compulim@users.noreply.github.com>
Date: Sat, 8 Feb 2025 07:44:22 +0000
Subject: [PATCH 06/43] Add import map

---
 __tests__/html2/speech/comprehensive.html | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/__tests__/html2/speech/comprehensive.html b/__tests__/html2/speech/comprehensive.html
index 2338642722..8e9d06774c 100644
--- a/__tests__/html2/speech/comprehensive.html
+++ b/__tests__/html2/speech/comprehensive.html
@@ -8,6 +8,15 @@
   </head>
   <body>
     <main id="webchat"></main>
+    <script type="importmap">
+      {
+        "imports": {
+          "@testduet/wait-for": "https://unpkg.com/@testduet/wait-for@main/dist/wait-for.mjs",
+          "jest-mock": "https://esm.sh/jest-mock",
+          "react-dictate-button/internal": "https://unpkg.com/react-dictate-button@main/dist/react-dictate-button.internal.mjs"
+        }
+      }
+    </script>
     <script type="module">
       import {
         SpeechGrammarList,
@@ -16,10 +25,10 @@
         SpeechRecognitionEvent,
         SpeechRecognitionResult,
         SpeechRecognitionResultList
-      } from 'https://unpkg.com/react-dictate-button@main/dist/react-dictate-button.internal.mjs';
-      import { fn, spyOn } from 'https://esm.sh/jest-mock';
+      } from 'react-dictate-button/internal';
+      import { fn, spyOn } from 'jest-mock';
       import { SpeechSynthesis, SpeechSynthesisEvent, SpeechSynthesisUtterance } from './js/index.js';
-      import { waitFor } from 'https://unpkg.com/@testduet/wait-for@main/dist/wait-for.mjs';
+      import { waitFor } from '@testduet/wait-for';
 
       const {
         testHelpers: { createDirectLineEmulator },

From f7de58a58607822688783981512ac0884aa8ff37 Mon Sep 17 00:00:00 2001
From: William Wong <compulim@users.noreply.github.com>
Date: Sat, 8 Feb 2025 08:11:18 +0000
Subject: [PATCH 07/43] Use import map

---
 __tests__/html2/speech/{bargeIn => }/mockSetup.html | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)
 rename __tests__/html2/speech/{bargeIn => }/mockSetup.html (95%)

diff --git a/__tests__/html2/speech/bargeIn/mockSetup.html b/__tests__/html2/speech/mockSetup.html
similarity index 95%
rename from __tests__/html2/speech/bargeIn/mockSetup.html
rename to __tests__/html2/speech/mockSetup.html
index 1f8d8faf9a..4a2ef0105f 100644
--- a/__tests__/html2/speech/bargeIn/mockSetup.html
+++ b/__tests__/html2/speech/mockSetup.html
@@ -10,12 +10,20 @@
     <main>
       <p>This test validates the mock of <code>SpeechRecognition</code>.</p>
     </main>
+    <script type="importmap">
+      {
+        "imports": {
+          "jest-mock": "https://esm.sh/jest-mock",
+          "react-dictate-button/internal": "https://unpkg.com/react-dictate-button@main/dist/react-dictate-button.internal.mjs"
+        }
+      }
+    </script>
     <script type="module">
       import {
         SpeechGrammarList,
         SpeechRecognition
-      } from 'https://unpkg.com/react-dictate-button@main/dist/react-dictate-button.internal.mjs';
-      import { fn } from 'https://esm.sh/jest-mock';
+      } from 'react-dictate-button/internal';
+      import { fn } from 'jest-mock';
       import { SpeechSynthesis, SpeechSynthesisEvent, SpeechSynthesisUtterance } from './js/index.js';
 
       run(async function () {

From 3cfc548173c2c2207fbe715dc904971c0bc768db Mon Sep 17 00:00:00 2001
From: William Wong <compulim@users.noreply.github.com>
Date: Sat, 8 Feb 2025 08:20:07 +0000
Subject: [PATCH 08/43] Add await to resolveAll()

---
 __tests__/html2/speech/comprehensive.html | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/__tests__/html2/speech/comprehensive.html b/__tests__/html2/speech/comprehensive.html
index 8e9d06774c..84b63bc878 100644
--- a/__tests__/html2/speech/comprehensive.html
+++ b/__tests__/html2/speech/comprehensive.html
@@ -66,7 +66,7 @@
 
         await pageConditions.uiConnected();
 
-        (
+        await (
           await directLine.actPostActivity(async () => {
             // WHEN: Microphone button is clicked.
             await pageObjects.clickMicrophoneButton();
@@ -169,7 +169,7 @@
         expect(speechSynthesis).toHaveProperty('pending', false);
         expect(speechSynthesis).toHaveProperty('speaking', false);
 
-        (
+        await (
           await directLine.actPostActivity(async () => {
             // THEN: Should create new SpeechRecognition() again.
             await waitFor(() => expect(ponyfill.SpeechRecognition).toHaveBeenCalledTimes(2));

From 14d55114bdb2130e043d8cadc4bdb8b7f4e95749 Mon Sep 17 00:00:00 2001
From: William Wong <compulim@users.noreply.github.com>
Date: Sat, 8 Feb 2025 08:22:42 +0000
Subject: [PATCH 09/43] Complete case

---
 __tests__/html2/speech/bargeIn/behavior.html | 167 +++++++++++++++++++
 1 file changed, 167 insertions(+)
 create mode 100644 __tests__/html2/speech/bargeIn/behavior.html

diff --git a/__tests__/html2/speech/bargeIn/behavior.html b/__tests__/html2/speech/bargeIn/behavior.html
new file mode 100644
index 0000000000..5795df98d3
--- /dev/null
+++ b/__tests__/html2/speech/bargeIn/behavior.html
@@ -0,0 +1,167 @@
+<!doctype html>
+<html lang="en-US">
+  <head>
+    <link href="/assets/index.css" rel="stylesheet" type="text/css" />
+    <script crossorigin="anonymous" src="/test-harness.js"></script>
+    <script crossorigin="anonymous" src="/test-page-object.js"></script>
+    <script crossorigin="anonymous" src="/__dist__/webchat-es5.js"></script>
+  </head>
+  <body>
+    <main id="webchat"></main>
+    <script type="importmap">
+      {
+        "imports": {
+          "@testduet/wait-for": "https://unpkg.com/@testduet/wait-for@main/dist/wait-for.mjs",
+          "jest-mock": "https://esm.sh/jest-mock",
+          "react-dictate-button/internal": "https://unpkg.com/react-dictate-button@main/dist/react-dictate-button.internal.mjs"
+        }
+      }
+    </script>
+    <script type="module">
+      import {
+        SpeechGrammarList,
+        SpeechRecognition,
+        SpeechRecognitionAlternative,
+        SpeechRecognitionEvent,
+        SpeechRecognitionResult,
+        SpeechRecognitionResultList
+      } from 'react-dictate-button/internal';
+      import { fn, spyOn } from 'jest-mock';
+      import { SpeechSynthesis, SpeechSynthesisEvent, SpeechSynthesisUtterance } from '../js/index.js';
+      import { waitFor } from '@testduet/wait-for';
+
+      const {
+        testHelpers: { createDirectLineEmulator },
+        WebChat: { renderWebChat }
+      } = window;
+
+      run(async function () {
+        const speechSynthesis = new SpeechSynthesis();
+        const ponyfill = {
+          SpeechGrammarList,
+          SpeechRecognition: fn().mockImplementation(() => {
+            const speechRecognition = new SpeechRecognition();
+
+            speechRecognition.continuous = true;
+            speechRecognitionStart = spyOn(speechRecognition, 'start');
+
+            return speechRecognition;
+          }),
+          speechSynthesis,
+          SpeechSynthesisUtterance
+        };
+        let speechRecognitionStart;
+
+        const speechSynthesisCancel = spyOn(speechSynthesis, 'cancel');
+        const speechSynthesisSpeak = spyOn(speechSynthesis, 'speak');
+
+        const { directLine, store } = createDirectLineEmulator();
+
+        renderWebChat(
+          {
+            directLine,
+            store,
+            webSpeechPonyfillFactory: () => ponyfill
+          },
+          document.getElementById('webchat')
+        );
+
+        await pageConditions.uiConnected();
+
+        await (
+          await directLine.actPostActivity(async () => {
+            // WHEN: Microphone button is clicked.
+            await pageObjects.clickMicrophoneButton();
+
+            // THEN: Should construct SpeechRecognition().
+            expect(ponyfill.SpeechRecognition).toHaveBeenCalledTimes(1);
+
+            const { value: speechRecognition1 } = ponyfill.SpeechRecognition.mock.results[0];
+
+            // THEN: Should call SpeechRecognition.start().
+            expect(speechRecognition1.start).toHaveBeenCalledTimes(1);
+
+            // WHEN: Recognition started and interims result is dispatched.
+            speechRecognition1.dispatchEvent(new Event('start'));
+            speechRecognition1.dispatchEvent(new Event('audiostart'));
+            speechRecognition1.dispatchEvent(new Event('soundstart'));
+            speechRecognition1.dispatchEvent(new Event('speechstart'));
+            speechRecognition1.dispatchEvent(
+              new SpeechRecognitionEvent('result', {
+                results: new SpeechRecognitionResultList(
+                  SpeechRecognitionResult.fromFinalized(new SpeechRecognitionAlternative(0.9, 'Hello, World!'))
+                )
+              })
+            );
+
+            // speechRecognition1.dispatchEvent(new Event('speechend'));
+            // speechRecognition1.dispatchEvent(new Event('soundend'));
+            // speechRecognition1.dispatchEvent(new Event('audioend'));
+            // speechRecognition1.dispatchEvent(new Event('end'));
+          })
+        ).resolveAll();
+
+        // THEN: Should have send the activity.
+        await pageConditions.numActivitiesShown(1);
+        expect(pageElements.activityContents()[0]).toHaveProperty('textContent', 'Hello, World!');
+
+        // WHEN: Bot replied.
+        await directLine.emulateIncomingActivity({
+          inputHint: 'expectingInput',
+          text: 'Aloha!',
+          type: 'message'
+        });
+
+        // THEN: Should send a message and the reply from the bot.
+        await pageConditions.numActivitiesShown(2);
+        expect(pageElements.activityContents()[1]).toHaveProperty('textContent', 'Aloha!');
+
+        // THEN: Should call SpeechSynthesis.start() again.
+        await waitFor(() => expect(speechSynthesisSpeak).toHaveBeenCalledTimes(2));
+
+        // THEN: Should synthesize "Aloha!".
+        expect(speechSynthesisSpeak).toHaveBeenLastCalledWith(expect.any(SpeechSynthesisUtterance));
+        expect(speechSynthesisSpeak).toHaveBeenLastCalledWith(expect.objectContaining({ text: 'Aloha!' }));
+
+        // WHEN: Barge-in.
+        await (
+          await directLine.actPostActivity(async () => {
+            const { value: speechRecognition1 } = ponyfill.SpeechRecognition.mock.results[0];
+
+            // WHEN: Interims are dispatched.
+            speechRecognition1.dispatchEvent(
+              new SpeechRecognitionEvent('result', {
+                results: new SpeechRecognitionResultList(
+                  new SpeechRecognitionResult(new SpeechRecognitionAlternative(0, 'Good'))
+                )
+              })
+            );
+          })
+        ).resolveAll();
+
+        // THEN: Should cancel speech synthesis.
+        expect(speechSynthesisCancel).toHaveBeenCalledTimes(1);
+
+        // WHEN: Completed recognition.
+        await (
+          await directLine.actPostActivity(async () => {
+            const { value: speechRecognition1 } = ponyfill.SpeechRecognition.mock.results[0];
+
+            // WHEN: Interims are dispatched.
+            speechRecognition1.dispatchEvent(
+              new SpeechRecognitionEvent('result', {
+                results: new SpeechRecognitionResultList(
+                  SpeechRecognitionResult.fromFinalized(new SpeechRecognitionAlternative(0.9, 'Good morning!'))
+                )
+              })
+            );
+          })
+        ).resolveAll();
+
+        // THEN: Should have send the activity.
+        await pageConditions.numActivitiesShown(3);
+        expect(pageElements.activityContents()[2]).toHaveProperty('textContent', 'Good morning!');
+      });
+    </script>
+  </body>
+</html>

From 5c2cb3569f71fdf583a9113a2b5e75f13a691af6 Mon Sep 17 00:00:00 2001
From: William Wong <compulim@users.noreply.github.com>
Date: Mon, 10 Feb 2025 18:33:18 +0000
Subject: [PATCH 10/43] No need to wait for send when barge-in

---
 __tests__/html2/speech/bargeIn/behavior.html | 23 ++++++++------------
 1 file changed, 9 insertions(+), 14 deletions(-)

diff --git a/__tests__/html2/speech/bargeIn/behavior.html b/__tests__/html2/speech/bargeIn/behavior.html
index 5795df98d3..a6a5a7c77b 100644
--- a/__tests__/html2/speech/bargeIn/behavior.html
+++ b/__tests__/html2/speech/bargeIn/behavior.html
@@ -123,21 +123,16 @@
         expect(speechSynthesisSpeak).toHaveBeenLastCalledWith(expect.any(SpeechSynthesisUtterance));
         expect(speechSynthesisSpeak).toHaveBeenLastCalledWith(expect.objectContaining({ text: 'Aloha!' }));
 
-        // WHEN: Barge-in.
-        await (
-          await directLine.actPostActivity(async () => {
-            const { value: speechRecognition1 } = ponyfill.SpeechRecognition.mock.results[0];
-
-            // WHEN: Interims are dispatched.
-            speechRecognition1.dispatchEvent(
-              new SpeechRecognitionEvent('result', {
-                results: new SpeechRecognitionResultList(
-                  new SpeechRecognitionResult(new SpeechRecognitionAlternative(0, 'Good'))
-                )
-              })
-            );
+        // WHEN: Barge-in with interims.
+        const { value: speechRecognition1 } = ponyfill.SpeechRecognition.mock.results[0];
+
+        speechRecognition1.dispatchEvent(
+          new SpeechRecognitionEvent('result', {
+            results: new SpeechRecognitionResultList(
+              new SpeechRecognitionResult(new SpeechRecognitionAlternative(0, 'Good'))
+            )
           })
-        ).resolveAll();
+        );
 
         // THEN: Should cancel speech synthesis.
         expect(speechSynthesisCancel).toHaveBeenCalledTimes(1);

From 6fa485760106b20706e4632d793210c2332a4c73 Mon Sep 17 00:00:00 2001
From: William Wong <compulim@users.noreply.github.com>
Date: Mon, 10 Feb 2025 18:36:50 +0000
Subject: [PATCH 11/43] Add interims

---
 __tests__/html2/speech/bargeIn/behavior.html | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/__tests__/html2/speech/bargeIn/behavior.html b/__tests__/html2/speech/bargeIn/behavior.html
index a6a5a7c77b..7ca202a29f 100644
--- a/__tests__/html2/speech/bargeIn/behavior.html
+++ b/__tests__/html2/speech/bargeIn/behavior.html
@@ -86,6 +86,20 @@
             speechRecognition1.dispatchEvent(new Event('audiostart'));
             speechRecognition1.dispatchEvent(new Event('soundstart'));
             speechRecognition1.dispatchEvent(new Event('speechstart'));
+            speechRecognition1.dispatchEvent(
+              new SpeechRecognitionEvent('result', {
+                results: new SpeechRecognitionResultList(
+                  new SpeechRecognitionResult(new SpeechRecognitionAlternative(0, 'Hello'))
+                )
+              })
+            );
+            speechRecognition1.dispatchEvent(
+              new SpeechRecognitionEvent('result', {
+                results: new SpeechRecognitionResultList(
+                  new SpeechRecognitionResult(new SpeechRecognitionAlternative(0, 'Hello, World!'))
+                )
+              })
+            );
             speechRecognition1.dispatchEvent(
               new SpeechRecognitionEvent('result', {
                 results: new SpeechRecognitionResultList(

From a3da8a81a6c48ee01d4a182a42da5659d93eec59 Mon Sep 17 00:00:00 2001
From: William Wong <compulim@users.noreply.github.com>
Date: Tue, 11 Feb 2025 02:16:38 +0000
Subject: [PATCH 12/43] Support barge-in

---
 __tests__/html2/speech/bargeIn/behavior.html  | 45 +++++++++++++----
 package-lock.json                             | 48 +++++++------------
 packages/api/src/StyleOptions.ts              |  2 +
 packages/api/src/defaultStyleOptions.ts       |  5 +-
 packages/api/src/hooks/Composer.tsx           | 17 ++++++-
 packages/component/package.json               |  2 +-
 packages/component/src/Composer.tsx           |  3 +-
 packages/component/src/Dictation.js           | 39 +++++++--------
 ...DictateOnIncomingActivityFromOthersSaga.js | 12 +++--
 .../src/sagas/stopDictateOnCardActionSaga.js  |  5 +-
 .../sagas/stopSpeakingActivityOnInputSaga.js  |  5 +-
 11 files changed, 111 insertions(+), 72 deletions(-)

diff --git a/__tests__/html2/speech/bargeIn/behavior.html b/__tests__/html2/speech/bargeIn/behavior.html
index 7ca202a29f..4e19c05e0c 100644
--- a/__tests__/html2/speech/bargeIn/behavior.html
+++ b/__tests__/html2/speech/bargeIn/behavior.html
@@ -13,7 +13,7 @@
         "imports": {
           "@testduet/wait-for": "https://unpkg.com/@testduet/wait-for@main/dist/wait-for.mjs",
           "jest-mock": "https://esm.sh/jest-mock",
-          "react-dictate-button/internal": "https://unpkg.com/react-dictate-button@main/dist/react-dictate-button.internal.mjs"
+          "react-dictate-button/internal": "https://unpkg.com/react-dictate-button@3.0.1-main.1bbfe61/dist/react-dictate-button.internal.mjs"
         }
       }
     </script>
@@ -22,6 +22,7 @@
         SpeechGrammarList,
         SpeechRecognition,
         SpeechRecognitionAlternative,
+        SpeechRecognitionErrorEvent,
         SpeechRecognitionEvent,
         SpeechRecognitionResult,
         SpeechRecognitionResultList
@@ -36,13 +37,16 @@
       } = window;
 
       run(async function () {
+        let speechRecognitionAbort;
+        let speechRecognitionStart;
+
         const speechSynthesis = new SpeechSynthesis();
         const ponyfill = {
           SpeechGrammarList,
           SpeechRecognition: fn().mockImplementation(() => {
             const speechRecognition = new SpeechRecognition();
 
-            speechRecognition.continuous = true;
+            speechRecognitionAbort = spyOn(speechRecognition, 'abort');
             speechRecognitionStart = spyOn(speechRecognition, 'start');
 
             return speechRecognition;
@@ -50,7 +54,6 @@
           speechSynthesis,
           SpeechSynthesisUtterance
         };
-        let speechRecognitionStart;
 
         const speechSynthesisCancel = spyOn(speechSynthesis, 'cancel');
         const speechSynthesisSpeak = spyOn(speechSynthesis, 'speak');
@@ -61,6 +64,7 @@
           {
             directLine,
             store,
+            styleOptions: { speechRecognitionContinuous: true },
             webSpeechPonyfillFactory: () => ponyfill
           },
           document.getElementById('webchat')
@@ -80,6 +84,7 @@
 
             // THEN: Should call SpeechRecognition.start().
             expect(speechRecognition1.start).toHaveBeenCalledTimes(1);
+            expect(speechRecognition1).toHaveProperty('continuous', true);
 
             // WHEN: Recognition started and interims result is dispatched.
             speechRecognition1.dispatchEvent(new Event('start'));
@@ -107,11 +112,6 @@
                 )
               })
             );
-
-            // speechRecognition1.dispatchEvent(new Event('speechend'));
-            // speechRecognition1.dispatchEvent(new Event('soundend'));
-            // speechRecognition1.dispatchEvent(new Event('audioend'));
-            // speechRecognition1.dispatchEvent(new Event('end'));
           })
         ).resolveAll();
 
@@ -137,6 +137,14 @@
         expect(speechSynthesisSpeak).toHaveBeenLastCalledWith(expect.any(SpeechSynthesisUtterance));
         expect(speechSynthesisSpeak).toHaveBeenLastCalledWith(expect.objectContaining({ text: 'Aloha!' }));
 
+        // WHEN: "start" event is emitted for synthesis.
+        speechSynthesisSpeak.mock.calls[1][0].dispatchEvent(
+          new SpeechSynthesisEvent('start', { utterance: speechSynthesisSpeak.mock.calls[1] })
+        );
+
+        // THEN: Should start speaking.
+        expect(speechSynthesis).toHaveProperty('speaking', true);
+
         // WHEN: Barge-in with interims.
         const { value: speechRecognition1 } = ponyfill.SpeechRecognition.mock.results[0];
 
@@ -149,7 +157,7 @@
         );
 
         // THEN: Should cancel speech synthesis.
-        expect(speechSynthesisCancel).toHaveBeenCalledTimes(1);
+        await waitFor(() => expect(speechSynthesisCancel).toHaveBeenCalledTimes(1));
 
         // WHEN: Completed recognition.
         await (
@@ -170,6 +178,25 @@
         // THEN: Should have send the activity.
         await pageConditions.numActivitiesShown(3);
         expect(pageElements.activityContents()[2]).toHaveProperty('textContent', 'Good morning!');
+
+        // THEN: Should not have called SpeechRecognition.abort().
+        expect(speechRecognitionAbort).toHaveBeenCalledTimes(0);
+
+        // WHEN: Click on the microphone button.
+        await pageObjects.clickMicrophoneButton();
+
+        // THEN: Should have called SpeechRecognition.abort() once.
+        expect(speechRecognitionAbort).toHaveBeenCalledTimes(1);
+
+        // WHEN: Speech recognition dispatch abort error event.
+        speechRecognition1.dispatchEvent(new Event('speechend'));
+        speechRecognition1.dispatchEvent(new Event('soundend'));
+        speechRecognition1.dispatchEvent(new Event('audioend'));
+        speechRecognition1.dispatchEvent(new SpeechRecognitionErrorEvent('error', { error: 'aborted' }));
+        speechRecognition1.dispatchEvent(new Event('end'));
+
+        // THEN: Should go back to text input mode.
+        expect(pageElements.sendBoxTextBox()).toBeTruthy();
       });
     </script>
   </body>
diff --git a/package-lock.json b/package-lock.json
index 4f09cfc9b2..de06102643 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -18609,37 +18609,6 @@
         "react": ">=16.8.0"
       }
     },
-    "node_modules/react-dictate-button": {
-      "version": "2.0.1",
-      "resolved": "https://registry.npmjs.org/react-dictate-button/-/react-dictate-button-2.0.1.tgz",
-      "integrity": "sha512-cLVxzjEy/I5IdOhZHedSbMwPIV62cQHUj09kvHm6XyRpycX7j3efLRRm661HO9zZM3ZtYT+Sy4j7F5eJaAWBug==",
-      "license": "MIT",
-      "dependencies": {
-        "@babel/runtime-corejs3": "^7.14.0",
-        "core-js": "^3.12.1",
-        "prop-types": "15.7.2"
-      },
-      "peerDependencies": {
-        "react": ">= 16.8.0"
-      }
-    },
-    "node_modules/react-dictate-button/node_modules/prop-types": {
-      "version": "15.7.2",
-      "resolved": "https://registry.npmjs.org/prop-types/-/prop-types-15.7.2.tgz",
-      "integrity": "sha512-8QQikdH7//R2vurIJSutZ1smHYTcLpRWEOlHnzcWHmBYrOGUysKwSsrC89BCiFj3CbrfJ/nXFdJepOVrY1GCHQ==",
-      "license": "MIT",
-      "dependencies": {
-        "loose-envify": "^1.4.0",
-        "object-assign": "^4.1.1",
-        "react-is": "^16.8.1"
-      }
-    },
-    "node_modules/react-dictate-button/node_modules/react-is": {
-      "version": "16.13.1",
-      "resolved": "https://registry.npmjs.org/react-is/-/react-is-16.13.1.tgz",
-      "integrity": "sha512-24e6ynE2H+OKt4kqsOvNd8kBpV65zoxbA4BVsEOB3ARVWQki/DHzaUoC5KuON/BiccDaCCTZBuOcfZs70kR8bQ==",
-      "license": "MIT"
-    },
     "node_modules/react-dom": {
       "version": "16.8.6",
       "resolved": "https://registry.npmjs.org/react-dom/-/react-dom-16.8.6.tgz",
@@ -24271,7 +24240,7 @@
         "prop-types": "15.8.1",
         "punycode": "2.3.1",
         "react-chain-of-responsibility": "0.2.0-main.3cb47ab",
-        "react-dictate-button": "2.0.1",
+        "react-dictate-button": "^3.0.1-main.1bbfe61",
         "react-film": "3.1.1-main.f623bf6",
         "react-redux": "7.2.9",
         "react-say": "2.1.0",
@@ -24317,6 +24286,21 @@
         "node": ">=6"
       }
     },
+    "packages/component/node_modules/react-dictate-button": {
+      "version": "3.0.1-main.1bbfe61",
+      "resolved": "https://registry.npmjs.org/react-dictate-button/-/react-dictate-button-3.0.1-main.1bbfe61.tgz",
+      "integrity": "sha512-wVWQNDRLMOOOiAfPHRENdSA3Jpd7+TA6KEEl+fxpeGdNKtTXL6BQ9nQQo1E82YX8Mn3iZrZzsGVsCbGEmbbkcA==",
+      "license": "MIT",
+      "dependencies": {
+        "@babel/runtime-corejs3": "^7.14.0",
+        "core-js": "^3.12.1",
+        "react-dictate-button": "^3.0.1-main.1bbfe61",
+        "use-ref-from": "^0.1.0"
+      },
+      "peerDependencies": {
+        "react": ">=16.8.6"
+      }
+    },
     "packages/component/node_modules/type-fest": {
       "version": "4.26.1",
       "dev": true,
diff --git a/packages/api/src/StyleOptions.ts b/packages/api/src/StyleOptions.ts
index 70dfb01051..bf5783ad72 100644
--- a/packages/api/src/StyleOptions.ts
+++ b/packages/api/src/StyleOptions.ts
@@ -942,6 +942,8 @@ type StyleOptions = {
    * New in 4.19.0.
    */
   feedbackActionsPlacement?: 'activity-actions' | 'activity-status';
+
+  speechRecognitionContinuous?: boolean | undefined;
 };
 
 // StrictStyleOptions is only used internally in Web Chat and for simplifying our code:
diff --git a/packages/api/src/defaultStyleOptions.ts b/packages/api/src/defaultStyleOptions.ts
index 80d4d11649..8fa1ef4e6b 100644
--- a/packages/api/src/defaultStyleOptions.ts
+++ b/packages/api/src/defaultStyleOptions.ts
@@ -307,7 +307,10 @@ const DEFAULT_OPTIONS: Required<StyleOptions> = {
 
   codeBlockTheme: 'github-light-default' as const,
 
-  feedbackActionsPlacement: 'activity-status' as const
+  feedbackActionsPlacement: 'activity-status' as const,
+
+  // Speech recognition
+  speechRecognitionContinuous: false
 };
 
 export default DEFAULT_OPTIONS;
diff --git a/packages/api/src/hooks/Composer.tsx b/packages/api/src/hooks/Composer.tsx
index 6ae1f9340d..99f7e21513 100644
--- a/packages/api/src/hooks/Composer.tsx
+++ b/packages/api/src/hooks/Composer.tsx
@@ -116,12 +116,14 @@ const EMPTY_ARRAY: readonly [] = Object.freeze([]);
 
 function createCardActionContext({
   cardActionMiddleware,
+  continuous,
   directLine,
   dispatch,
   markAllAsAcknowledged,
   ponyfill
 }: {
   cardActionMiddleware: readonly CardActionMiddleware[];
+  continuous: boolean;
   directLine: DirectLineJSBotConnection;
   dispatch: Function;
   markAllAsAcknowledged: () => void;
@@ -137,6 +139,11 @@ function createCardActionContext({
     onCardAction: (cardAction, { target }: { target?: any } = {}) => {
       markAllAsAcknowledged();
 
+      // Stop speech recognition only if under interactive mode.
+      if (!continuous) {
+        dispatch(stopDictate());
+      }
+
       return runMiddleware({
         cardAction,
         getSignInUrl:
@@ -340,12 +347,20 @@ const ComposerCore = ({
     () =>
       createCardActionContext({
         cardActionMiddleware: Object.freeze([...singleToArray(cardActionMiddleware)]),
+        continuous: !!styleOptions.speechRecognitionContinuous,
         directLine,
         dispatch,
         markAllAsAcknowledged,
         ponyfill
       }),
-    [cardActionMiddleware, directLine, dispatch, markAllAsAcknowledged, ponyfill]
+    [
+      cardActionMiddleware,
+      directLine,
+      dispatch,
+      markAllAsAcknowledged,
+      ponyfill,
+      styleOptions.speechRecognitionContinuous
+    ]
   );
 
   const patchedSelectVoice = useMemo(
diff --git a/packages/component/package.json b/packages/component/package.json
index 1edef023d6..9c63dd2395 100644
--- a/packages/component/package.json
+++ b/packages/component/package.json
@@ -151,7 +151,7 @@
     "prop-types": "15.8.1",
     "punycode": "2.3.1",
     "react-chain-of-responsibility": "0.2.0-main.3cb47ab",
-    "react-dictate-button": "2.0.1",
+    "react-dictate-button": "^3.0.1-main.1bbfe61",
     "react-film": "3.1.1-main.f623bf6",
     "react-redux": "7.2.9",
     "react-say": "2.1.0",
diff --git a/packages/component/src/Composer.tsx b/packages/component/src/Composer.tsx
index 7847f5917f..cfd7507e00 100644
--- a/packages/component/src/Composer.tsx
+++ b/packages/component/src/Composer.tsx
@@ -87,7 +87,8 @@ const ComposerCoreUI = memo(({ children }: ComposerCoreUIProps) => {
   const rootClassName = useStyleToEmotionObject()(ROOT_STYLE) + '';
 
   const dictationOnError = useCallback(err => {
-    console.error(err);
+    // Ignore aborted error as it is likely user clicking on the microphone button to abort recognition.
+    err.error === 'aborted' || console.error(err);
   }, []);
 
   return (
diff --git a/packages/component/src/Dictation.js b/packages/component/src/Dictation.js
index 3934350a91..e5ca1acf2e 100644
--- a/packages/component/src/Dictation.js
+++ b/packages/component/src/Dictation.js
@@ -3,7 +3,7 @@ import { hooks } from 'botframework-webchat-api';
 import { useSetDictateState } from 'botframework-webchat-api/internal';
 import { Constants } from 'botframework-webchat-core';
 import PropTypes from 'prop-types';
-import React, { useCallback, useEffect, useMemo } from 'react';
+import React, { useCallback, useEffect } from 'react';
 import { Composer as DictateComposer } from 'react-dictate-button';
 
 import useResumeAudioContext from './hooks/internal/useResumeAudioContext';
@@ -11,7 +11,7 @@ import useSettableDictateAbortable from './hooks/internal/useSettableDictateAbor
 import useWebSpeechPonyfill from './hooks/useWebSpeechPonyfill';
 
 const {
-  useActivities,
+  // useActivities,
   useDictateInterims,
   useDictateState,
   useEmitTypingIndicator,
@@ -20,6 +20,7 @@ const {
   useSendTypingIndicator,
   useShouldSpeakIncomingActivity,
   useStopDictate,
+  useStyleOptions,
   useSubmitSendBox,
   useUIState
 } = hooks;
@@ -34,7 +35,8 @@ const Dictation = ({ onError }) => {
   const [, setSendBox] = useSendBoxValue();
   const [, setShouldSpeakIncomingActivity] = useShouldSpeakIncomingActivity();
   const [{ SpeechGrammarList, SpeechRecognition } = {}] = useWebSpeechPonyfill();
-  const [activities] = useActivities();
+  const [{ speechRecognitionContinuous }] = useStyleOptions();
+  // const [activities] = useActivities();
   const [dictateState] = useDictateState();
   const [sendTypingIndicator] = useSendTypingIndicator();
   const [speechLanguage] = useLanguage('speech');
@@ -45,17 +47,15 @@ const Dictation = ({ onError }) => {
   const stopDictate = useStopDictate();
   const submitSendBox = useSubmitSendBox();
 
-  const numSpeakingActivities = useMemo(
-    () => activities.filter(({ channelData: { speak } = {} }) => speak).length,
-    [activities]
-  );
+  // const numSpeakingActivities = useMemo(
+  //   () => activities.filter(({ channelData: { speak } = {} }) => speak).length,
+  //   [activities]
+  // );
 
   const handleDictate = useCallback(
     ({ result: { confidence, transcript } = {} }) => {
       if (dictateState === DICTATING || dictateState === STARTING) {
         setDictateInterims([]);
-        setDictateState(IDLE);
-        stopDictate();
 
         if (transcript) {
           setSendBox(transcript);
@@ -64,15 +64,7 @@ const Dictation = ({ onError }) => {
         }
       }
     },
-    [
-      dictateState,
-      setDictateInterims,
-      setDictateState,
-      stopDictate,
-      setSendBox,
-      submitSendBox,
-      setShouldSpeakIncomingActivity
-    ]
+    [dictateState, setDictateInterims, setSendBox, submitSendBox, setShouldSpeakIncomingActivity]
   );
 
   const handleDictating = useCallback(
@@ -89,6 +81,11 @@ const Dictation = ({ onError }) => {
     [dictateState, emitTypingIndicator, sendTypingIndicator, setDictateAbortable, setDictateInterims, setDictateState]
   );
 
+  const handleEnd = useCallback(() => {
+    dictateState !== IDLE && setDictateState(IDLE);
+    (dictateState === DICTATING || dictateState === STARTING) && stopDictate();
+  }, [dictateState, setDictateState, stopDictate]);
+
   const handleError = useCallback(
     event => {
       dictateState !== IDLE && setDictateState(IDLE);
@@ -107,15 +104,15 @@ const Dictation = ({ onError }) => {
 
   return (
     <DictateComposer
+      continuous={speechRecognitionContinuous}
       lang={speechLanguage}
       onDictate={handleDictate}
+      onEnd={handleEnd}
       onError={handleError}
       onProgress={handleDictating}
       speechGrammarList={SpeechGrammarList}
       speechRecognition={SpeechRecognition}
-      started={
-        uiState !== 'disabled' && (dictateState === STARTING || dictateState === DICTATING) && !numSpeakingActivities
-      }
+      started={uiState !== 'disabled' && (dictateState === STARTING || dictateState === DICTATING)}
     />
   );
 };
diff --git a/packages/core/src/sagas/speakActivityAndStartDictateOnIncomingActivityFromOthersSaga.js b/packages/core/src/sagas/speakActivityAndStartDictateOnIncomingActivityFromOthersSaga.js
index 41aa2f3e58..1e8153884c 100644
--- a/packages/core/src/sagas/speakActivityAndStartDictateOnIncomingActivityFromOthersSaga.js
+++ b/packages/core/src/sagas/speakActivityAndStartDictateOnIncomingActivityFromOthersSaga.js
@@ -1,12 +1,13 @@
 import { put, select, takeEvery } from 'redux-saga/effects';
 
 import { INCOMING_ACTIVITY } from '../actions/incomingActivity';
-import { WILL_START } from '../constants/DictateState';
 import markActivity from '../actions/markActivity';
 import setDictateState from '../actions/setDictateState';
-import shouldSpeakIncomingActivitySelector from '../selectors/shouldSpeakIncomingActivity';
-import speakableActivity from '../definitions/speakableActivity';
 import stopDictate from '../actions/stopDictate';
+import { IDLE, WILL_START } from '../constants/DictateState';
+import speakableActivity from '../definitions/speakableActivity';
+import dictateStateSelector from '../selectors/dictateState';
+import shouldSpeakIncomingActivitySelector from '../selectors/shouldSpeakIncomingActivity';
 import whileConnected from './effects/whileConnected';
 
 function* speakActivityAndStartDictateOnIncomingActivityFromOthers({ userID }) {
@@ -18,6 +19,7 @@ function* speakActivityAndStartDictateOnIncomingActivityFromOthers({ userID }) {
       type === INCOMING_ACTIVITY && payload.activity.from.id !== userID && payload.activity.from.role !== 'user',
     function* ({ payload: { activity } }) {
       const shouldSpeakIncomingActivity = yield select(shouldSpeakIncomingActivitySelector);
+      const dictateState = yield select(dictateStateSelector);
       const shouldSpeak = speakableActivity(activity) && shouldSpeakIncomingActivity;
 
       if (
@@ -30,7 +32,9 @@ function* speakActivityAndStartDictateOnIncomingActivityFromOthers({ userID }) {
       }
 
       if (shouldSpeak && activity.inputHint === 'expectingInput') {
-        yield put(setDictateState(WILL_START));
+        if (dictateState === IDLE) {
+          yield put(setDictateState(WILL_START));
+        }
       } else if (activity.inputHint === 'ignoringInput') {
         yield put(stopDictate());
       }
diff --git a/packages/core/src/sagas/stopDictateOnCardActionSaga.js b/packages/core/src/sagas/stopDictateOnCardActionSaga.js
index 4981c00f14..6e85a3c3a6 100644
--- a/packages/core/src/sagas/stopDictateOnCardActionSaga.js
+++ b/packages/core/src/sagas/stopDictateOnCardActionSaga.js
@@ -12,7 +12,10 @@ function* stopDictateOnCardAction() {
     // Currently, there are no actions that are related to card input
     // For now, we are using POST_ACTIVITY of a "message" activity
     // In the future, if we have an action for card input, we should use that instead
-    ({ payload, type }) => type === POST_ACTIVITY_PENDING && payload.activity.type === 'message',
+
+    // [P*] TODO: Think about if we should stop dictate on card action.
+    // XXXXXXXXXXX
+    ({ payload, type }) => false && type === POST_ACTIVITY_PENDING && payload.activity.type === 'message',
     function* putStopDictate() {
       yield put(stopDictate());
     }
diff --git a/packages/core/src/sagas/stopSpeakingActivityOnInputSaga.js b/packages/core/src/sagas/stopSpeakingActivityOnInputSaga.js
index e0b5661021..2db7a0192c 100644
--- a/packages/core/src/sagas/stopSpeakingActivityOnInputSaga.js
+++ b/packages/core/src/sagas/stopSpeakingActivityOnInputSaga.js
@@ -1,6 +1,7 @@
 import { put, takeEvery } from 'redux-saga/effects';
 
 import { POST_ACTIVITY_PENDING } from '../actions/postActivity';
+import { SET_DICTATE_INTERIMS } from '../actions/setDictateInterims';
 import { SET_SEND_BOX } from '../actions/setSendBox';
 import stopSpeakingActivity from '../actions/stopSpeakingActivity';
 import whileConnected from './effects/whileConnected';
@@ -13,7 +14,9 @@ function* stopSpeakingActivityOnInput() {
       // But currently there are no actions generated out of a card action
       // So, right now, we are using best-effort by listening to POST_ACTIVITY_PENDING with a "message" event
       // We filter out speech because we will call startSpeakingActivity() for POST_ACTIVITY_PENDING dispatched by speech
-      (type === POST_ACTIVITY_PENDING && meta.method !== 'speech' && payload.activity.type === 'message'),
+      (type === POST_ACTIVITY_PENDING && meta.method !== 'speech' && payload.activity.type === 'message') ||
+      // We want to stop speaking activity on barge-in.
+      type === SET_DICTATE_INTERIMS,
     function* () {
       yield put(stopSpeakingActivity());
     }

From 42cde33a9b3a5b5be410e038aea405f0fddd067a Mon Sep 17 00:00:00 2001
From: William Wong <compulim@users.noreply.github.com>
Date: Tue, 11 Feb 2025 07:44:41 +0000
Subject: [PATCH 13/43] Bump version

---
 __tests__/html2/speech/bargeIn/behavior.html | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/__tests__/html2/speech/bargeIn/behavior.html b/__tests__/html2/speech/bargeIn/behavior.html
index 4e19c05e0c..39537957f1 100644
--- a/__tests__/html2/speech/bargeIn/behavior.html
+++ b/__tests__/html2/speech/bargeIn/behavior.html
@@ -13,7 +13,7 @@
         "imports": {
           "@testduet/wait-for": "https://unpkg.com/@testduet/wait-for@main/dist/wait-for.mjs",
           "jest-mock": "https://esm.sh/jest-mock",
-          "react-dictate-button/internal": "https://unpkg.com/react-dictate-button@3.0.1-main.1bbfe61/dist/react-dictate-button.internal.mjs"
+          "react-dictate-button/internal": "https://unpkg.com/react-dictate-button@main/dist/react-dictate-button.internal.mjs"
         }
       }
     </script>

From b5d215d433670e7b942cfda0dbe96f28434b863e Mon Sep 17 00:00:00 2001
From: William Wong <compulim@users.noreply.github.com>
Date: Tue, 11 Feb 2025 07:44:46 +0000
Subject: [PATCH 14/43] Bump version

---
 package-lock.json               | 32 ++++++++++++++++----------------
 packages/component/package.json |  2 +-
 2 files changed, 17 insertions(+), 17 deletions(-)

diff --git a/package-lock.json b/package-lock.json
index de06102643..4e8a7d1a2c 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -18609,6 +18609,21 @@
         "react": ">=16.8.0"
       }
     },
+    "node_modules/react-dictate-button": {
+      "version": "3.0.1-main.ed9daf3",
+      "resolved": "https://registry.npmjs.org/react-dictate-button/-/react-dictate-button-3.0.1-main.ed9daf3.tgz",
+      "integrity": "sha512-kQjCDtHvwstdlhzq9NsMPFvCCk8X4pkBAQ7MUqeIyXwyfIi41FIza93kYWb7Xb0USvDfZ/ZS5v5Eb/e4R1CLCA==",
+      "license": "MIT",
+      "dependencies": {
+        "@babel/runtime-corejs3": "^7.14.0",
+        "core-js": "^3.12.1",
+        "react-dictate-button": "^3.0.1-main.ed9daf3",
+        "use-ref-from": "^0.1.0"
+      },
+      "peerDependencies": {
+        "react": ">=16.8.6"
+      }
+    },
     "node_modules/react-dom": {
       "version": "16.8.6",
       "resolved": "https://registry.npmjs.org/react-dom/-/react-dom-16.8.6.tgz",
@@ -24240,7 +24255,7 @@
         "prop-types": "15.8.1",
         "punycode": "2.3.1",
         "react-chain-of-responsibility": "0.2.0-main.3cb47ab",
-        "react-dictate-button": "^3.0.1-main.1bbfe61",
+        "react-dictate-button": "^3.0.1-main.ed9daf3",
         "react-film": "3.1.1-main.f623bf6",
         "react-redux": "7.2.9",
         "react-say": "2.1.0",
@@ -24286,21 +24301,6 @@
         "node": ">=6"
       }
     },
-    "packages/component/node_modules/react-dictate-button": {
-      "version": "3.0.1-main.1bbfe61",
-      "resolved": "https://registry.npmjs.org/react-dictate-button/-/react-dictate-button-3.0.1-main.1bbfe61.tgz",
-      "integrity": "sha512-wVWQNDRLMOOOiAfPHRENdSA3Jpd7+TA6KEEl+fxpeGdNKtTXL6BQ9nQQo1E82YX8Mn3iZrZzsGVsCbGEmbbkcA==",
-      "license": "MIT",
-      "dependencies": {
-        "@babel/runtime-corejs3": "^7.14.0",
-        "core-js": "^3.12.1",
-        "react-dictate-button": "^3.0.1-main.1bbfe61",
-        "use-ref-from": "^0.1.0"
-      },
-      "peerDependencies": {
-        "react": ">=16.8.6"
-      }
-    },
     "packages/component/node_modules/type-fest": {
       "version": "4.26.1",
       "dev": true,
diff --git a/packages/component/package.json b/packages/component/package.json
index 9c63dd2395..2f1d83db67 100644
--- a/packages/component/package.json
+++ b/packages/component/package.json
@@ -151,7 +151,7 @@
     "prop-types": "15.8.1",
     "punycode": "2.3.1",
     "react-chain-of-responsibility": "0.2.0-main.3cb47ab",
-    "react-dictate-button": "^3.0.1-main.1bbfe61",
+    "react-dictate-button": "^3.0.1-main.ed9daf3",
     "react-film": "3.1.1-main.f623bf6",
     "react-redux": "7.2.9",
     "react-say": "2.1.0",

From da5fa78e8a993fd31f552e1109fd3fe9daf2ea8e Mon Sep 17 00:00:00 2001
From: William Wong <compulim@users.noreply.github.com>
Date: Tue, 11 Feb 2025 21:22:36 +0000
Subject: [PATCH 15/43] Continue to show "Listening..."

---
 __tests__/html2/speech/bargeIn/behavior.html  | 125 +++++++++++++-----
 .../component/src/SendBox/BasicSendBox.tsx    |  15 +--
 .../src/SendBox/DictationInterims.tsx         |  20 ++-
 packages/component/src/testIds.ts             |   1 +
 4 files changed, 110 insertions(+), 51 deletions(-)

diff --git a/__tests__/html2/speech/bargeIn/behavior.html b/__tests__/html2/speech/bargeIn/behavior.html
index 39537957f1..d0225e2098 100644
--- a/__tests__/html2/speech/bargeIn/behavior.html
+++ b/__tests__/html2/speech/bargeIn/behavior.html
@@ -33,7 +33,7 @@
 
       const {
         testHelpers: { createDirectLineEmulator },
-        WebChat: { renderWebChat }
+        WebChat: { renderWebChat, testIds }
       } = window;
 
       run(async function () {
@@ -72,32 +72,76 @@
 
         await pageConditions.uiConnected();
 
-        await (
-          await directLine.actPostActivity(async () => {
-            // WHEN: Microphone button is clicked.
-            await pageObjects.clickMicrophoneButton();
+        // WHEN: Microphone button is clicked.
+        await pageObjects.clickMicrophoneButton();
 
-            // THEN: Should construct SpeechRecognition().
-            expect(ponyfill.SpeechRecognition).toHaveBeenCalledTimes(1);
+        // THEN: Should call SpeechSynthesis.start() for fulfilling user-gesture requirement for speech synthesis by synthesizing an empty utterance.
+        await waitFor(() => expect(speechSynthesisSpeak).toHaveBeenCalledTimes(1));
 
-            const { value: speechRecognition1 } = ponyfill.SpeechRecognition.mock.results[0];
+        expect(speechSynthesisSpeak).toHaveBeenLastCalledWith(expect.any(SpeechSynthesisUtterance));
+        expect(speechSynthesisSpeak).toHaveBeenLastCalledWith(expect.objectContaining({ text: '' }));
 
-            // THEN: Should call SpeechRecognition.start().
-            expect(speechRecognition1.start).toHaveBeenCalledTimes(1);
-            expect(speechRecognition1).toHaveProperty('continuous', true);
+        // WHEN: Synthesized the empty utterance.
+        speechSynthesisSpeak.mock.calls[0][0].dispatchEvent(
+          new SpeechSynthesisEvent('start', { utterance: speechSynthesisSpeak.mock.calls[0] })
+        );
+        speechSynthesisSpeak.mock.calls[0][0].dispatchEvent(
+          new SpeechSynthesisEvent('end', { utterance: speechSynthesisSpeak.mock.calls[0] })
+        );
 
-            // WHEN: Recognition started and interims result is dispatched.
-            speechRecognition1.dispatchEvent(new Event('start'));
-            speechRecognition1.dispatchEvent(new Event('audiostart'));
-            speechRecognition1.dispatchEvent(new Event('soundstart'));
-            speechRecognition1.dispatchEvent(new Event('speechstart'));
-            speechRecognition1.dispatchEvent(
-              new SpeechRecognitionEvent('result', {
-                results: new SpeechRecognitionResultList(
-                  new SpeechRecognitionResult(new SpeechRecognitionAlternative(0, 'Hello'))
-                )
-              })
-            );
+        // THEN: Should have stopped synthesis.
+        expect(speechSynthesis).toHaveProperty('paused', false);
+        expect(speechSynthesis).toHaveProperty('pending', false);
+        expect(speechSynthesis).toHaveProperty('speaking', false);
+
+        // AFTER: Microphone button is clicked.
+        // THEN: Should construct a SpeechRecognition() instance.
+        expect(ponyfill.SpeechRecognition).toHaveBeenCalledTimes(1);
+
+        const { value: speechRecognition1 } = ponyfill.SpeechRecognition.mock.results[0];
+
+        // THEN: Should call SpeechRecognition.start().
+        expect(speechRecognition1.start).toHaveBeenCalledTimes(1);
+        expect(speechRecognition1).toHaveProperty('continuous', true);
+
+        // THEN: Send box should say "Listening…" and hide the original text box.
+        expect(pageElements.sendBoxTextBox()).toBeFalsy();
+        expect(document.querySelector(`[data-testid="${testIds.sendBoxSpeechBox}"]`)).toHaveProperty(
+          'textContent',
+          'Starting…'
+        );
+
+        // WHEN: Recognition started and interims result is dispatched.
+        speechRecognition1.dispatchEvent(new Event('start'));
+        speechRecognition1.dispatchEvent(new Event('audiostart'));
+        speechRecognition1.dispatchEvent(new Event('soundstart'));
+        speechRecognition1.dispatchEvent(new Event('speechstart'));
+
+        // THEN: Send box should say "Listening…" and hide the original text box.
+        expect(pageElements.sendBoxTextBox()).toBeFalsy();
+        expect(document.querySelector(`[data-testid="${testIds.sendBoxSpeechBox}"]`)).toHaveProperty(
+          'textContent',
+          'Listening…'
+        );
+
+        // WHEN: Recognized interims of "Hello".
+        speechRecognition1.dispatchEvent(
+          new SpeechRecognitionEvent('result', {
+            results: new SpeechRecognitionResultList(
+              new SpeechRecognitionResult(new SpeechRecognitionAlternative(0, 'Hello'))
+            )
+          })
+        );
+
+        // THEN: Should display "Hello" in the send box.
+        expect(document.querySelector(`[data-testid="${testIds.sendBoxSpeechBox}"]`)).toHaveProperty(
+          'textContent',
+          expect.stringMatching(/^Hello\s/u)
+        );
+
+        // WHEN: Completed recognizing "Hello, World!".
+        await (
+          await directLine.actPostActivity(async () => {
             speechRecognition1.dispatchEvent(
               new SpeechRecognitionEvent('result', {
                 results: new SpeechRecognitionResultList(
@@ -119,21 +163,26 @@
         await pageConditions.numActivitiesShown(1);
         expect(pageElements.activityContents()[0]).toHaveProperty('textContent', 'Hello, World!');
 
-        // WHEN: Bot replied.
+        // THEN: Send box should say "Listening…" and hide the original text box.
+        expect(pageElements.sendBoxTextBox()).toBeFalsy();
+        expect(document.querySelector(`[data-testid="${testIds.sendBoxSpeechBox}"]`)).toHaveProperty(
+          'textContent',
+          'Listening…'
+        );
+
+        // WHEN: Bot replied "Aloha!"
         await directLine.emulateIncomingActivity({
           inputHint: 'expectingInput',
           text: 'Aloha!',
           type: 'message'
         });
 
-        // THEN: Should send a message and the reply from the bot.
+        // THEN: Should show the reply "Aloha!"
         await pageConditions.numActivitiesShown(2);
         expect(pageElements.activityContents()[1]).toHaveProperty('textContent', 'Aloha!');
 
-        // THEN: Should call SpeechSynthesis.start() again.
+        // THEN: Should call SpeechSynthesis.speak() again with utterance of "Aloha!"
         await waitFor(() => expect(speechSynthesisSpeak).toHaveBeenCalledTimes(2));
-
-        // THEN: Should synthesize "Aloha!".
         expect(speechSynthesisSpeak).toHaveBeenLastCalledWith(expect.any(SpeechSynthesisUtterance));
         expect(speechSynthesisSpeak).toHaveBeenLastCalledWith(expect.objectContaining({ text: 'Aloha!' }));
 
@@ -143,11 +192,18 @@
         );
 
         // THEN: Should start speaking.
+        expect(speechSynthesis).toHaveProperty('paused', false);
+        expect(speechSynthesis).toHaveProperty('pending', false);
         expect(speechSynthesis).toHaveProperty('speaking', true);
 
-        // WHEN: Barge-in with interims.
-        const { value: speechRecognition1 } = ponyfill.SpeechRecognition.mock.results[0];
+        // THEN: Send box should continue to show "Listening…"
+        expect(pageElements.sendBoxTextBox()).toBeFalsy();
+        expect(document.querySelector(`[data-testid="${testIds.sendBoxSpeechBox}"]`)).toHaveProperty(
+          'textContent',
+          'Listening…'
+        );
 
+        // WHEN: Barge-in with interims of "Good".
         speechRecognition1.dispatchEvent(
           new SpeechRecognitionEvent('result', {
             results: new SpeechRecognitionResultList(
@@ -157,9 +213,12 @@
         );
 
         // THEN: Should cancel speech synthesis.
-        await waitFor(() => expect(speechSynthesisCancel).toHaveBeenCalledTimes(1));
+        expect(speechSynthesisCancel).toHaveBeenCalledTimes(1);
+        expect(speechSynthesis).toHaveProperty('paused', false);
+        expect(speechSynthesis).toHaveProperty('pending', false);
+        expect(speechSynthesis).toHaveProperty('speaking', false);
 
-        // WHEN: Completed recognition.
+        // WHEN: Completed recognition of "Good morning!"
         await (
           await directLine.actPostActivity(async () => {
             const { value: speechRecognition1 } = ponyfill.SpeechRecognition.mock.results[0];
@@ -182,7 +241,7 @@
         // THEN: Should not have called SpeechRecognition.abort().
         expect(speechRecognitionAbort).toHaveBeenCalledTimes(0);
 
-        // WHEN: Click on the microphone button.
+        // WHEN: Click on the microphone button to stop recognition.
         await pageObjects.clickMicrophoneButton();
 
         // THEN: Should have called SpeechRecognition.abort() once.
diff --git a/packages/component/src/SendBox/BasicSendBox.tsx b/packages/component/src/SendBox/BasicSendBox.tsx
index e4acd9b869..864de878b2 100644
--- a/packages/component/src/SendBox/BasicSendBox.tsx
+++ b/packages/component/src/SendBox/BasicSendBox.tsx
@@ -13,13 +13,11 @@ import SendButton from './SendButton';
 import SuggestedActions from './SuggestedActions';
 import TextBox from './TextBox';
 
-import type { WebChatActivity } from 'botframework-webchat-core';
-
 const {
   DictateState: { DICTATING, STARTING }
 } = Constants;
 
-const { useActivities, useDirection, useDictateState, useStyleOptions } = hooks;
+const { useDirection, useDictateState, useStyleOptions } = hooks;
 
 const ROOT_STYLE = {
   '&.webchat__send-box': {
@@ -31,19 +29,10 @@ const ROOT_STYLE = {
   }
 };
 
-// TODO: [P3] We should consider exposing core/src/definitions and use it instead
-function activityIsSpeakingOrQueuedToSpeak(activity: WebChatActivity) {
-  return activity.type === 'message' && activity.channelData?.speak;
-}
-
 function useSendBoxSpeechInterimsVisible(): [boolean] {
-  const [activities] = useActivities();
   const [dictateState] = useDictateState();
 
-  return [
-    (dictateState === STARTING || dictateState === DICTATING) &&
-      !activities.filter(activityIsSpeakingOrQueuedToSpeak).length
-  ];
+  return [dictateState === STARTING || dictateState === DICTATING];
 }
 
 type BasicSendBoxProps = Readonly<{
diff --git a/packages/component/src/SendBox/DictationInterims.tsx b/packages/component/src/SendBox/DictationInterims.tsx
index ff69dd9c42..340af54cda 100644
--- a/packages/component/src/SendBox/DictationInterims.tsx
+++ b/packages/component/src/SendBox/DictationInterims.tsx
@@ -1,13 +1,14 @@
 /* eslint react/no-array-index-key: "off" */
 
-import { Constants } from 'botframework-webchat-core';
 import { hooks } from 'botframework-webchat-api';
+import { Constants } from 'botframework-webchat-core';
 import classNames from 'classnames';
 import PropTypes from 'prop-types';
 import React, { FC } from 'react';
 
-import useStyleSet from '../hooks/useStyleSet';
 import { useStyleToEmotionObject } from '../hooks/internal/styleToEmotionObject';
+import useStyleSet from '../hooks/useStyleSet';
+import testIds from '../testIds';
 
 const {
   DictateState: { DICTATING, STARTING, STOPPING }
@@ -32,13 +33,19 @@ const DictationInterims: FC<DictationInterimsProps> = ({ className }) => {
   const rootClassName = useStyleToEmotionObject()(ROOT_STYLE) + '';
 
   return dictateState === STARTING || dictateState === STOPPING ? (
-    <p className={classNames(dictationInterimsStyleSet + '', rootClassName, (className || '') + '', 'status')}>
+    <p
+      className={classNames(dictationInterimsStyleSet + '', rootClassName, (className || '') + '', 'status')}
+      data-testid={testIds.sendBoxSpeechBox}
+    >
       {dictateState === STARTING && localize('SPEECH_INPUT_STARTING')}
     </p>
   ) : (
     dictateState === DICTATING &&
       (dictateInterims.length ? (
-        <p className={classNames(dictationInterimsStyleSet + '', rootClassName, (className || '') + '', 'dictating')}>
+        <p
+          className={classNames(dictationInterimsStyleSet + '', rootClassName, (className || '') + '', 'dictating')}
+          data-testid={testIds.sendBoxSpeechBox}
+        >
           {dictateInterims.map((interim, index) => (
             <span key={index}>
               {interim}
@@ -47,7 +54,10 @@ const DictationInterims: FC<DictationInterimsProps> = ({ className }) => {
           ))}
         </p>
       ) : (
-        <p className={classNames(dictationInterimsStyleSet + '', rootClassName, (className || '') + '', 'status')}>
+        <p
+          className={classNames(dictationInterimsStyleSet + '', rootClassName, (className || '') + '', 'status')}
+          data-testid={testIds.sendBoxSpeechBox}
+        >
           {localize('SPEECH_INPUT_LISTENING')}
         </p>
       ))
diff --git a/packages/component/src/testIds.ts b/packages/component/src/testIds.ts
index f72e256c8c..d58f326907 100644
--- a/packages/component/src/testIds.ts
+++ b/packages/component/src/testIds.ts
@@ -1,6 +1,7 @@
 const testIds = {
   codeBlockCopyButton: 'code block copy button',
   copyButton: 'copy button',
+  sendBoxSpeechBox: 'send box speech box',
   sendBoxTextBox: 'send box text area',
   viewCodeButton: 'view code button'
 };

From 8ef07afafa10d04b8b3c686bdf9c737bbf8d50e3 Mon Sep 17 00:00:00 2001
From: William Wong <compulim@users.noreply.github.com>
Date: Wed, 12 Feb 2025 09:20:47 +0000
Subject: [PATCH 16/43] Bump react-dictate-button

---
 package-lock.json               | 32 ++++++++++++++++----------------
 packages/component/package.json |  2 +-
 2 files changed, 17 insertions(+), 17 deletions(-)

diff --git a/package-lock.json b/package-lock.json
index 4e8a7d1a2c..524c567bb0 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -18609,21 +18609,6 @@
         "react": ">=16.8.0"
       }
     },
-    "node_modules/react-dictate-button": {
-      "version": "3.0.1-main.ed9daf3",
-      "resolved": "https://registry.npmjs.org/react-dictate-button/-/react-dictate-button-3.0.1-main.ed9daf3.tgz",
-      "integrity": "sha512-kQjCDtHvwstdlhzq9NsMPFvCCk8X4pkBAQ7MUqeIyXwyfIi41FIza93kYWb7Xb0USvDfZ/ZS5v5Eb/e4R1CLCA==",
-      "license": "MIT",
-      "dependencies": {
-        "@babel/runtime-corejs3": "^7.14.0",
-        "core-js": "^3.12.1",
-        "react-dictate-button": "^3.0.1-main.ed9daf3",
-        "use-ref-from": "^0.1.0"
-      },
-      "peerDependencies": {
-        "react": ">=16.8.6"
-      }
-    },
     "node_modules/react-dom": {
       "version": "16.8.6",
       "resolved": "https://registry.npmjs.org/react-dom/-/react-dom-16.8.6.tgz",
@@ -24255,7 +24240,7 @@
         "prop-types": "15.8.1",
         "punycode": "2.3.1",
         "react-chain-of-responsibility": "0.2.0-main.3cb47ab",
-        "react-dictate-button": "^3.0.1-main.ed9daf3",
+        "react-dictate-button": "^3.0.1-main.24a5a4e",
         "react-film": "3.1.1-main.f623bf6",
         "react-redux": "7.2.9",
         "react-say": "2.1.0",
@@ -24301,6 +24286,21 @@
         "node": ">=6"
       }
     },
+    "packages/component/node_modules/react-dictate-button": {
+      "version": "3.0.1-main.24a5a4e",
+      "resolved": "https://registry.npmjs.org/react-dictate-button/-/react-dictate-button-3.0.1-main.24a5a4e.tgz",
+      "integrity": "sha512-ReknENPEH0CCMWtA/wLYg7MNVkB5sz2UMGXis9SatC6JDHbuy2s2rBBkBmUN6kWTG4NIdzjSQpxrLeCNth4mMw==",
+      "license": "MIT",
+      "dependencies": {
+        "@babel/runtime-corejs3": "^7.14.0",
+        "core-js": "^3.12.1",
+        "react-dictate-button": "^3.0.1-main.24a5a4e",
+        "use-ref-from": "^0.1.0"
+      },
+      "peerDependencies": {
+        "react": ">=16.8.6"
+      }
+    },
     "packages/component/node_modules/type-fest": {
       "version": "4.26.1",
       "dev": true,
diff --git a/packages/component/package.json b/packages/component/package.json
index 2f1d83db67..b1e0d631a3 100644
--- a/packages/component/package.json
+++ b/packages/component/package.json
@@ -151,7 +151,7 @@
     "prop-types": "15.8.1",
     "punycode": "2.3.1",
     "react-chain-of-responsibility": "0.2.0-main.3cb47ab",
-    "react-dictate-button": "^3.0.1-main.ed9daf3",
+    "react-dictate-button": "^3.0.1-main.24a5a4e",
     "react-film": "3.1.1-main.f623bf6",
     "react-redux": "7.2.9",
     "react-say": "2.1.0",

From 5ca1ea9e4210a9bbf0832310c37e69f67a012085 Mon Sep 17 00:00:00 2001
From: William Wong <compulim@users.noreply.github.com>
Date: Wed, 12 Feb 2025 09:21:35 +0000
Subject: [PATCH 17/43] Add more expectations

---
 __tests__/html2/speech/comprehensive.html | 74 ++++++++++++++---------
 1 file changed, 45 insertions(+), 29 deletions(-)

diff --git a/__tests__/html2/speech/comprehensive.html b/__tests__/html2/speech/comprehensive.html
index 84b63bc878..e4fa56e6b6 100644
--- a/__tests__/html2/speech/comprehensive.html
+++ b/__tests__/html2/speech/comprehensive.html
@@ -32,7 +32,7 @@
 
       const {
         testHelpers: { createDirectLineEmulator },
-        WebChat: { renderWebChat }
+        WebChat: { renderWebChat, testIds }
       } = window;
 
       run(async function () {
@@ -66,38 +66,51 @@
 
         await pageConditions.uiConnected();
 
-        await (
-          await directLine.actPostActivity(async () => {
-            // WHEN: Microphone button is clicked.
-            await pageObjects.clickMicrophoneButton();
-
-            // THEN: Should create new SpeechSynthesis() for priming user gesture requirement.
-            await waitFor(() => expect(speechSynthesisSpeak).toHaveBeenCalledTimes(1));
-            expect(speechSynthesis).toHaveProperty('paused', false);
-            expect(speechSynthesis).toHaveProperty('pending', false);
-            expect(speechSynthesis).toHaveProperty('speaking', true);
-
-            // WHEN: Priming is done.
-            speechSynthesisSpeak.mock.calls[0][0].dispatchEvent(
-              new SpeechSynthesisEvent('end', { utterance: speechSynthesisSpeak.mock.calls[0] })
-            );
-            expect(speechSynthesis).toHaveProperty('paused', false);
-            expect(speechSynthesis).toHaveProperty('pending', false);
-            expect(speechSynthesis).toHaveProperty('speaking', false);
+        // WHEN: Microphone button is clicked.
+        await pageObjects.clickMicrophoneButton();
 
-            // THEN: Should construct SpeechRecognition().
-            expect(ponyfill.SpeechRecognition).toHaveBeenCalledTimes(1);
+        // THEN: Should create new SpeechSynthesis() for priming user gesture requirement.
+        await waitFor(() => expect(speechSynthesisSpeak).toHaveBeenCalledTimes(1));
+        expect(speechSynthesis).toHaveProperty('paused', false);
+        expect(speechSynthesis).toHaveProperty('pending', false);
+        expect(speechSynthesis).toHaveProperty('speaking', true);
+        expect(speechSynthesisSpeak).toHaveBeenLastCalledWith(expect.any(SpeechSynthesisUtterance));
+        expect(speechSynthesisSpeak).toHaveBeenLastCalledWith(expect.objectContaining({ text: '' }));
 
-            const { value: speechRecognition1 } = ponyfill.SpeechRecognition.mock.results[0];
+        // WHEN: Priming is done.
+        speechSynthesisSpeak.mock.calls[0][0].dispatchEvent(
+          new SpeechSynthesisEvent('start', { utterance: speechSynthesisSpeak.mock.calls[0] })
+        );
+        speechSynthesisSpeak.mock.calls[0][0].dispatchEvent(
+          new SpeechSynthesisEvent('end', { utterance: speechSynthesisSpeak.mock.calls[0] })
+        );
+        expect(speechSynthesis).toHaveProperty('paused', false);
+        expect(speechSynthesis).toHaveProperty('pending', false);
+        expect(speechSynthesis).toHaveProperty('speaking', false);
+
+        // THEN: Should construct SpeechRecognition().
+        expect(ponyfill.SpeechRecognition).toHaveBeenCalledTimes(1);
+
+        const { value: speechRecognition1 } = ponyfill.SpeechRecognition.mock.results[0];
 
-            // THEN: Should call SpeechRecognition.start().
-            expect(speechRecognition1.start).toHaveBeenCalledTimes(1);
+        // THEN: Should call SpeechRecognition.start().
+        expect(speechRecognition1.start).toHaveBeenCalledTimes(1);
+
+        // THEN: Send box should say "Listening…" and hide the original text box.
+        expect(pageElements.sendBoxTextBox()).toBeFalsy();
+        expect(document.querySelector(`[data-testid="${testIds.sendBoxSpeechBox}"]`)).toHaveProperty(
+          'textContent',
+          'Starting…'
+        );
 
-            // WHEN: Recognition started and interims result is dispatched.
-            speechRecognition1.dispatchEvent(new Event('start'));
-            speechRecognition1.dispatchEvent(new Event('audiostart'));
-            speechRecognition1.dispatchEvent(new Event('soundstart'));
-            speechRecognition1.dispatchEvent(new Event('speechstart'));
+        // WHEN: Recognition started and interims result is dispatched.
+        speechRecognition1.dispatchEvent(new Event('start'));
+        speechRecognition1.dispatchEvent(new Event('audiostart'));
+        speechRecognition1.dispatchEvent(new Event('soundstart'));
+        speechRecognition1.dispatchEvent(new Event('speechstart'));
+
+        await (
+          await directLine.actPostActivity(async () => {
             speechRecognition1.dispatchEvent(
               new SpeechRecognitionEvent('result', {
                 results: new SpeechRecognitionResultList(
@@ -143,6 +156,9 @@
         await pageConditions.numActivitiesShown(1);
         expect(pageElements.activityContents()[0]).toHaveProperty('textContent', 'Hello, World!');
 
+        // THEN: Send box go back to input mode.
+        expect(pageElements.sendBoxTextBox()).toBeTruthy();
+
         // WHEN: Bot replied.
         await directLine.emulateIncomingActivity({
           inputHint: 'expectingInput',

From c9c9e69d0d861c2a5296a533c0096580a83432de Mon Sep 17 00:00:00 2001
From: William Wong <compulim@users.noreply.github.com>
Date: Wed, 12 Feb 2025 19:34:55 +0000
Subject: [PATCH 18/43] Clean up

---
 __tests__/html2/speech/bargeIn/behavior.html |  5 ++--
 __tests__/html2/speech/comprehensive.html    | 27 ++++++++++++--------
 2 files changed, 20 insertions(+), 12 deletions(-)

diff --git a/__tests__/html2/speech/bargeIn/behavior.html b/__tests__/html2/speech/bargeIn/behavior.html
index d0225e2098..9836aec468 100644
--- a/__tests__/html2/speech/bargeIn/behavior.html
+++ b/__tests__/html2/speech/bargeIn/behavior.html
@@ -75,7 +75,7 @@
         // WHEN: Microphone button is clicked.
         await pageObjects.clickMicrophoneButton();
 
-        // THEN: Should call SpeechSynthesis.start() for fulfilling user-gesture requirement for speech synthesis by synthesizing an empty utterance.
+        // THEN: Should call SpeechSynthesis.speak() for fulfilling user gesture requirement for speech synthesis by synthesizing an empty utterance.
         await waitFor(() => expect(speechSynthesisSpeak).toHaveBeenCalledTimes(1));
 
         expect(speechSynthesisSpeak).toHaveBeenLastCalledWith(expect.any(SpeechSynthesisUtterance));
@@ -172,6 +172,7 @@
 
         // WHEN: Bot replied "Aloha!"
         await directLine.emulateIncomingActivity({
+          // "expectingInput" should allow barge-in during speech synthesis.
           inputHint: 'expectingInput',
           text: 'Aloha!',
           type: 'message'
@@ -196,7 +197,7 @@
         expect(speechSynthesis).toHaveProperty('pending', false);
         expect(speechSynthesis).toHaveProperty('speaking', true);
 
-        // THEN: Send box should continue to show "Listening…"
+        // THEN: Send box should continue to show "Listening…" as barge-in is allowed through "expectingInput".
         expect(pageElements.sendBoxTextBox()).toBeFalsy();
         expect(document.querySelector(`[data-testid="${testIds.sendBoxSpeechBox}"]`)).toHaveProperty(
           'textContent',
diff --git a/__tests__/html2/speech/comprehensive.html b/__tests__/html2/speech/comprehensive.html
index e4fa56e6b6..51fd7125e3 100644
--- a/__tests__/html2/speech/comprehensive.html
+++ b/__tests__/html2/speech/comprehensive.html
@@ -161,7 +161,7 @@
 
         // WHEN: Bot replied.
         await directLine.emulateIncomingActivity({
-          inputHint: 'expectingInput',
+          inputHint: 'expectingInput', // "expectingInput" should turn the microphone back on after synthesis completed.
           text: 'Aloha!',
           type: 'message'
         });
@@ -170,31 +170,38 @@
         await pageConditions.numActivitiesShown(2);
         expect(pageElements.activityContents()[1]).toHaveProperty('textContent', 'Aloha!');
 
-        // THEN: Should call SpeechSynthesis.start() again.
+        // THEN: Should call SpeechSynthesis.speak() again.
         await waitFor(() => expect(speechSynthesisSpeak).toHaveBeenCalledTimes(2));
 
-        // THEN: Should synthesize "Aloha!".
+        // THEN: Should start synthesize "Aloha!".
         expect(speechSynthesisSpeak).toHaveBeenLastCalledWith(expect.any(SpeechSynthesisUtterance));
         expect(speechSynthesisSpeak).toHaveBeenLastCalledWith(expect.objectContaining({ text: 'Aloha!' }));
 
+        // THEN: SpeechSynthesis.speaking should be true.
+        expect(speechSynthesis).toHaveProperty('paused', false);
+        expect(speechSynthesis).toHaveProperty('pending', false);
+        expect(speechSynthesis).toHaveProperty('speaking', true);
+
         // WHEN: Synthesis completed.
         speechSynthesisSpeak.mock.calls[1][0].dispatchEvent(
           new SpeechSynthesisEvent('end', { utterance: speechSynthesisSpeak.mock.calls[1] })
         );
+
+        // THEN: SpeechSynthesis.speaking should be false.
         expect(speechSynthesis).toHaveProperty('paused', false);
         expect(speechSynthesis).toHaveProperty('pending', false);
         expect(speechSynthesis).toHaveProperty('speaking', false);
 
-        await (
-          await directLine.actPostActivity(async () => {
-            // THEN: Should create new SpeechRecognition() again.
-            await waitFor(() => expect(ponyfill.SpeechRecognition).toHaveBeenCalledTimes(2));
+        // THEN: Should create new SpeechRecognition() again as "expectingInput" should restart speech recognition.
+        await waitFor(() => expect(ponyfill.SpeechRecognition).toHaveBeenCalledTimes(2));
 
-            const { value: speechRecognition2 } = ponyfill.SpeechRecognition.mock.results[1];
+        const { value: speechRecognition2 } = ponyfill.SpeechRecognition.mock.results[1];
 
-            // THEN: Should have called SpeechRecognition.start().
-            expect(speechRecognition2.start).toHaveBeenCalledTimes(1);
+        // THEN: Should have called SpeechRecognition.start().
+        expect(speechRecognition2.start).toHaveBeenCalledTimes(1);
 
+        await (
+          await directLine.actPostActivity(async () => {
             // WHEN: Recognized as "Good morning!" without interims.
             speechRecognition2.dispatchEvent(new Event('start'));
             speechRecognition2.dispatchEvent(new Event('audiostart'));

From 14822c062dbdc213b4ffb70907c273e698a711d7 Mon Sep 17 00:00:00 2001
From: William Wong <compulim@users.noreply.github.com>
Date: Wed, 12 Feb 2025 20:40:06 +0000
Subject: [PATCH 19/43] Clean up

---
 __tests__/html2/speech/bargeIn/behavior.html | 16 ++--------------
 __tests__/html2/speech/comprehensive.html    |  6 +-----
 2 files changed, 3 insertions(+), 19 deletions(-)

diff --git a/__tests__/html2/speech/bargeIn/behavior.html b/__tests__/html2/speech/bargeIn/behavior.html
index 9836aec468..753389a34b 100644
--- a/__tests__/html2/speech/bargeIn/behavior.html
+++ b/__tests__/html2/speech/bargeIn/behavior.html
@@ -37,17 +37,14 @@
       } = window;
 
       run(async function () {
-        let speechRecognitionAbort;
-        let speechRecognitionStart;
-
         const speechSynthesis = new SpeechSynthesis();
         const ponyfill = {
           SpeechGrammarList,
           SpeechRecognition: fn().mockImplementation(() => {
             const speechRecognition = new SpeechRecognition();
 
-            speechRecognitionAbort = spyOn(speechRecognition, 'abort');
-            speechRecognitionStart = spyOn(speechRecognition, 'start');
+            spyOn(speechRecognition, 'abort');
+            spyOn(speechRecognition, 'start');
 
             return speechRecognition;
           }),
@@ -82,9 +79,6 @@
         expect(speechSynthesisSpeak).toHaveBeenLastCalledWith(expect.objectContaining({ text: '' }));
 
         // WHEN: Synthesized the empty utterance.
-        speechSynthesisSpeak.mock.calls[0][0].dispatchEvent(
-          new SpeechSynthesisEvent('start', { utterance: speechSynthesisSpeak.mock.calls[0] })
-        );
         speechSynthesisSpeak.mock.calls[0][0].dispatchEvent(
           new SpeechSynthesisEvent('end', { utterance: speechSynthesisSpeak.mock.calls[0] })
         );
@@ -187,12 +181,6 @@
         expect(speechSynthesisSpeak).toHaveBeenLastCalledWith(expect.any(SpeechSynthesisUtterance));
         expect(speechSynthesisSpeak).toHaveBeenLastCalledWith(expect.objectContaining({ text: 'Aloha!' }));
 
-        // WHEN: "start" event is emitted for synthesis.
-        speechSynthesisSpeak.mock.calls[1][0].dispatchEvent(
-          new SpeechSynthesisEvent('start', { utterance: speechSynthesisSpeak.mock.calls[1] })
-        );
-
-        // THEN: Should start speaking.
         expect(speechSynthesis).toHaveProperty('paused', false);
         expect(speechSynthesis).toHaveProperty('pending', false);
         expect(speechSynthesis).toHaveProperty('speaking', true);
diff --git a/__tests__/html2/speech/comprehensive.html b/__tests__/html2/speech/comprehensive.html
index 51fd7125e3..53931ad16f 100644
--- a/__tests__/html2/speech/comprehensive.html
+++ b/__tests__/html2/speech/comprehensive.html
@@ -42,14 +42,13 @@
           SpeechRecognition: fn().mockImplementation(() => {
             const speechRecognition = new SpeechRecognition();
 
-            speechRecognitionStart = spyOn(speechRecognition, 'start');
+            spyOn(speechRecognition, 'start');
 
             return speechRecognition;
           }),
           speechSynthesis,
           SpeechSynthesisUtterance
         };
-        let speechRecognitionStart;
 
         const speechSynthesisSpeak = spyOn(speechSynthesis, 'speak');
 
@@ -78,9 +77,6 @@
         expect(speechSynthesisSpeak).toHaveBeenLastCalledWith(expect.objectContaining({ text: '' }));
 
         // WHEN: Priming is done.
-        speechSynthesisSpeak.mock.calls[0][0].dispatchEvent(
-          new SpeechSynthesisEvent('start', { utterance: speechSynthesisSpeak.mock.calls[0] })
-        );
         speechSynthesisSpeak.mock.calls[0][0].dispatchEvent(
           new SpeechSynthesisEvent('end', { utterance: speechSynthesisSpeak.mock.calls[0] })
         );

From 1c620855ca7105dd7f538013a35dd027ba3e6ea5 Mon Sep 17 00:00:00 2001
From: William Wong <compulim@users.noreply.github.com>
Date: Wed, 12 Feb 2025 20:40:09 +0000
Subject: [PATCH 20/43] Add tests

---
 .../speech/inputHint.acceptingInput.html      | 150 +++++++++++++++++
 .../html2/speech/inputHint.ignoringInput.html | 152 ++++++++++++++++++
 2 files changed, 302 insertions(+)
 create mode 100644 __tests__/html2/speech/inputHint.acceptingInput.html
 create mode 100644 __tests__/html2/speech/inputHint.ignoringInput.html

diff --git a/__tests__/html2/speech/inputHint.acceptingInput.html b/__tests__/html2/speech/inputHint.acceptingInput.html
new file mode 100644
index 0000000000..767aeacc09
--- /dev/null
+++ b/__tests__/html2/speech/inputHint.acceptingInput.html
@@ -0,0 +1,150 @@
+<!doctype html>
+<html lang="en-US">
+  <head>
+    <link href="/assets/index.css" rel="stylesheet" type="text/css" />
+    <script crossorigin="anonymous" src="/test-harness.js"></script>
+    <script crossorigin="anonymous" src="/test-page-object.js"></script>
+    <script crossorigin="anonymous" src="/__dist__/webchat-es5.js"></script>
+  </head>
+  <body>
+    <main id="webchat"></main>
+    <script type="importmap">
+      {
+        "imports": {
+          "@testduet/wait-for": "https://unpkg.com/@testduet/wait-for@main/dist/wait-for.mjs",
+          "jest-mock": "https://esm.sh/jest-mock",
+          "react-dictate-button/internal": "https://unpkg.com/react-dictate-button@main/dist/react-dictate-button.internal.mjs"
+        }
+      }
+    </script>
+    <script type="module">
+      import {
+        SpeechGrammarList,
+        SpeechRecognition,
+        SpeechRecognitionAlternative,
+        SpeechRecognitionErrorEvent,
+        SpeechRecognitionEvent,
+        SpeechRecognitionResult,
+        SpeechRecognitionResultList
+      } from 'react-dictate-button/internal';
+      import { fn, spyOn } from 'jest-mock';
+      import { SpeechSynthesis, SpeechSynthesisEvent, SpeechSynthesisUtterance } from './js/index.js';
+      import { waitFor } from '@testduet/wait-for';
+
+      const {
+        testHelpers: { createDirectLineEmulator },
+        WebChat: { renderWebChat, testIds }
+      } = window;
+
+      run(async function () {
+        const speechSynthesis = new SpeechSynthesis();
+        const ponyfill = {
+          SpeechGrammarList,
+          SpeechRecognition: fn().mockImplementation(() => {
+            const speechRecognition = new SpeechRecognition();
+
+            spyOn(speechRecognition, 'abort');
+            spyOn(speechRecognition, 'start');
+
+            return speechRecognition;
+          }),
+          speechSynthesis,
+          SpeechSynthesisUtterance
+        };
+
+        const speechSynthesisCancel = spyOn(speechSynthesis, 'cancel');
+        const speechSynthesisSpeak = spyOn(speechSynthesis, 'speak');
+
+        const { directLine, store } = createDirectLineEmulator();
+
+        renderWebChat(
+          {
+            directLine,
+            store,
+            webSpeechPonyfillFactory: () => ponyfill
+          },
+          document.getElementById('webchat')
+        );
+
+        await pageConditions.uiConnected();
+
+        // WHEN: Microphone button is clicked and synthesized empty utterace for user gesture requirement.
+        await pageObjects.clickMicrophoneButton();
+        await waitFor(() => expect(speechSynthesisSpeak).toHaveBeenCalledTimes(1));
+        speechSynthesisSpeak.mock.calls[0][0].dispatchEvent(
+          new SpeechSynthesisEvent('end', { utterance: speechSynthesisSpeak.mock.calls[0] })
+        );
+
+        // THEN: Should construct the SpeechRecognition() instance and call start().
+        expect(ponyfill.SpeechRecognition).toHaveBeenCalledTimes(1);
+
+        const { value: speechRecognition1 } = ponyfill.SpeechRecognition.mock.results[0];
+
+        expect(speechRecognition1.start).toHaveBeenCalledTimes(1);
+
+        // WHEN: Recognition started and "Hello, World!" finalized result is dispatched.
+        await (
+          await directLine.actPostActivity(async () => {
+            speechRecognition1.dispatchEvent(new Event('start'));
+            speechRecognition1.dispatchEvent(new Event('audiostart'));
+            speechRecognition1.dispatchEvent(new Event('soundstart'));
+            speechRecognition1.dispatchEvent(new Event('speechstart'));
+
+            speechRecognition1.dispatchEvent(
+              new SpeechRecognitionEvent('result', {
+                results: new SpeechRecognitionResultList(
+                  SpeechRecognitionResult.fromFinalized(new SpeechRecognitionAlternative(0.9, 'Hello, World!'))
+                )
+              })
+            );
+
+            speechRecognition1.dispatchEvent(new Event('speechend'));
+            speechRecognition1.dispatchEvent(new Event('soundend'));
+            speechRecognition1.dispatchEvent(new Event('audioend'));
+            speechRecognition1.dispatchEvent(new Event('end'));
+          })
+        ).resolveAll();
+
+        // THEN: Should have send the activity.
+        await pageConditions.numActivitiesShown(1);
+        expect(pageElements.activityContents()[0]).toHaveProperty('textContent', 'Hello, World!');
+
+        // THEN: Send box go back to input mode.
+        expect(pageElements.sendBoxTextBox()).toBeTruthy();
+
+        // WHEN: Bot replied with "acceptingInput".
+        await directLine.emulateIncomingActivity({
+          inputHint: 'acceptingInput',
+          text: 'Aloha!',
+          type: 'message'
+        });
+
+        // THEN: Should send a message and the reply from the bot.
+        await pageConditions.numActivitiesShown(2);
+        expect(pageElements.activityContents()[1]).toHaveProperty('textContent', 'Aloha!');
+
+        // THEN: Should call SpeechSynthesis.speak().
+        await waitFor(() => expect(speechSynthesisSpeak).toHaveBeenCalledTimes(2));
+        expect(speechSynthesisSpeak).toHaveBeenLastCalledWith(expect.any(SpeechSynthesisUtterance));
+        expect(speechSynthesisSpeak).toHaveBeenLastCalledWith(expect.objectContaining({ text: 'Aloha!' }));
+
+        expect(speechSynthesis).toHaveProperty('paused', false);
+        expect(speechSynthesis).toHaveProperty('pending', false);
+        expect(speechSynthesis).toHaveProperty('speaking', true);
+
+        // WHEN: After synthesis completed.
+        speechSynthesisSpeak.mock.calls[1][0].dispatchEvent(
+          new SpeechSynthesisEvent('end', { utterance: speechSynthesisSpeak.mock.calls[1] })
+        );
+
+        // THEN: SpeechSynthesis.speaking should become false.
+        expect(speechSynthesis).toHaveProperty('paused', false);
+        expect(speechSynthesis).toHaveProperty('pending', false);
+        expect(speechSynthesis).toHaveProperty('speaking', false);
+
+        // THEN: Should keep in text input mode.
+        expect(pageElements.sendBoxTextBox()).toBeTruthy();
+      });
+    </script>
+  </body>
+</html>
diff --git a/__tests__/html2/speech/inputHint.ignoringInput.html b/__tests__/html2/speech/inputHint.ignoringInput.html
new file mode 100644
index 0000000000..dd3d5d3518
--- /dev/null
+++ b/__tests__/html2/speech/inputHint.ignoringInput.html
@@ -0,0 +1,152 @@
+<!doctype html>
+<html lang="en-US">
+  <head>
+    <link href="/assets/index.css" rel="stylesheet" type="text/css" />
+    <script crossorigin="anonymous" src="/test-harness.js"></script>
+    <script crossorigin="anonymous" src="/test-page-object.js"></script>
+    <script crossorigin="anonymous" src="/__dist__/webchat-es5.js"></script>
+  </head>
+  <body>
+    <main id="webchat"></main>
+    <script type="importmap">
+      {
+        "imports": {
+          "@testduet/wait-for": "https://unpkg.com/@testduet/wait-for@main/dist/wait-for.mjs",
+          "jest-mock": "https://esm.sh/jest-mock",
+          "react-dictate-button/internal": "https://unpkg.com/react-dictate-button@main/dist/react-dictate-button.internal.mjs"
+        }
+      }
+    </script>
+    <script type="module">
+      import {
+        SpeechGrammarList,
+        SpeechRecognition,
+        SpeechRecognitionAlternative,
+        SpeechRecognitionErrorEvent,
+        SpeechRecognitionEvent,
+        SpeechRecognitionResult,
+        SpeechRecognitionResultList
+      } from 'react-dictate-button/internal';
+      import { fn, spyOn } from 'jest-mock';
+      import { SpeechSynthesis, SpeechSynthesisEvent, SpeechSynthesisUtterance } from './js/index.js';
+      import { waitFor } from '@testduet/wait-for';
+
+      const {
+        testHelpers: { createDirectLineEmulator },
+        WebChat: { renderWebChat, testIds }
+      } = window;
+
+      run(async function () {
+        const speechSynthesis = new SpeechSynthesis();
+        const ponyfill = {
+          SpeechGrammarList,
+          SpeechRecognition: fn().mockImplementation(() => {
+            const speechRecognition = new SpeechRecognition();
+
+            spyOn(speechRecognition, 'abort');
+            spyOn(speechRecognition, 'start');
+
+            return speechRecognition;
+          }),
+          speechSynthesis,
+          SpeechSynthesisUtterance
+        };
+
+        const speechSynthesisCancel = spyOn(speechSynthesis, 'cancel');
+        const speechSynthesisSpeak = spyOn(speechSynthesis, 'speak');
+
+        const { directLine, store } = createDirectLineEmulator();
+
+        renderWebChat(
+          {
+            directLine,
+            store,
+            webSpeechPonyfillFactory: () => ponyfill
+          },
+          document.getElementById('webchat')
+        );
+
+        await pageConditions.uiConnected();
+
+        // WHEN: Microphone button is clicked and synthesized empty utterace for user gesture requirement.
+        await pageObjects.clickMicrophoneButton();
+        await waitFor(() => expect(speechSynthesisSpeak).toHaveBeenCalledTimes(1));
+        speechSynthesisSpeak.mock.calls[0][0].dispatchEvent(
+          new SpeechSynthesisEvent('end', { utterance: speechSynthesisSpeak.mock.calls[0] })
+        );
+
+        // THEN: Should construct the SpeechRecognition() instance and call start().
+        expect(ponyfill.SpeechRecognition).toHaveBeenCalledTimes(1);
+
+        const { value: speechRecognition1 } = ponyfill.SpeechRecognition.mock.results[0];
+
+        expect(speechRecognition1.start).toHaveBeenCalledTimes(1);
+
+        // WHEN: Recognition started and "Hello, World!" finalized result is dispatched.
+        await (
+          await directLine.actPostActivity(async () => {
+            speechRecognition1.dispatchEvent(new Event('start'));
+            speechRecognition1.dispatchEvent(new Event('audiostart'));
+            speechRecognition1.dispatchEvent(new Event('soundstart'));
+            speechRecognition1.dispatchEvent(new Event('speechstart'));
+
+            speechRecognition1.dispatchEvent(
+              new SpeechRecognitionEvent('result', {
+                results: new SpeechRecognitionResultList(
+                  SpeechRecognitionResult.fromFinalized(new SpeechRecognitionAlternative(0.9, 'Hello, World!'))
+                )
+              })
+            );
+
+            speechRecognition1.dispatchEvent(new Event('speechend'));
+            speechRecognition1.dispatchEvent(new Event('soundend'));
+            speechRecognition1.dispatchEvent(new Event('audioend'));
+            speechRecognition1.dispatchEvent(new Event('end'));
+          })
+        ).resolveAll();
+
+        // THEN: Should have send the activity.
+        await pageConditions.numActivitiesShown(1);
+        expect(pageElements.activityContents()[0]).toHaveProperty('textContent', 'Hello, World!');
+
+        // THEN: Send box go back to input mode.
+        expect(pageElements.sendBoxTextBox()).toBeTruthy();
+
+        // WHEN: Bot replied with "ignoringInput".
+        await directLine.emulateIncomingActivity({
+          inputHint: 'ignoringInput',
+          text: 'Aloha!',
+          type: 'message'
+        });
+
+        // THEN: Should send a message and the reply from the bot.
+        await pageConditions.numActivitiesShown(2);
+        expect(pageElements.activityContents()[1]).toHaveProperty('textContent', 'Aloha!');
+
+        // THEN: Should call SpeechSynthesis.speak().
+        await waitFor(() => expect(speechSynthesisSpeak).toHaveBeenCalledTimes(2));
+        expect(speechSynthesisSpeak).toHaveBeenLastCalledWith(expect.any(SpeechSynthesisUtterance));
+        expect(speechSynthesisSpeak).toHaveBeenLastCalledWith(expect.objectContaining({ text: 'Aloha!' }));
+
+        expect(speechSynthesis).toHaveProperty('paused', false);
+        expect(speechSynthesis).toHaveProperty('pending', false);
+        expect(speechSynthesis).toHaveProperty('speaking', true);
+
+        // WHEN: After synthesis completed.
+        speechSynthesisSpeak.mock.calls[1][0].dispatchEvent(
+          new SpeechSynthesisEvent('end', { utterance: speechSynthesisSpeak.mock.calls[1] })
+        );
+
+        // THEN: SpeechSynthesis.speaking should become false.
+        expect(speechSynthesis).toHaveProperty('paused', false);
+        expect(speechSynthesis).toHaveProperty('pending', false);
+        expect(speechSynthesis).toHaveProperty('speaking', false);
+
+        // THEN: Should keep in text input mode.
+        expect(pageElements.sendBoxTextBox()).toBeTruthy();
+
+        // TODO: In future, we should disable send box on receiving input hint of "ignoringInput".
+      });
+    </script>
+  </body>
+</html>

From 1815c5758851de4bf1616bb4b6ad6bfeabf43254 Mon Sep 17 00:00:00 2001
From: William Wong <compulim@users.noreply.github.com>
Date: Wed, 12 Feb 2025 20:48:57 +0000
Subject: [PATCH 21/43] Clean up

---
 __tests__/html2/speech/bargeIn/behavior.html | 26 ++++++++++----------
 __tests__/html2/speech/comprehensive.html    | 22 ++++++++---------
 2 files changed, 24 insertions(+), 24 deletions(-)

diff --git a/__tests__/html2/speech/bargeIn/behavior.html b/__tests__/html2/speech/bargeIn/behavior.html
index 753389a34b..dd7463aa18 100644
--- a/__tests__/html2/speech/bargeIn/behavior.html
+++ b/__tests__/html2/speech/bargeIn/behavior.html
@@ -52,8 +52,8 @@
           SpeechSynthesisUtterance
         };
 
-        const speechSynthesisCancel = spyOn(speechSynthesis, 'cancel');
-        const speechSynthesisSpeak = spyOn(speechSynthesis, 'speak');
+        spyOn(speechSynthesis, 'cancel');
+        spyOn(speechSynthesis, 'speak');
 
         const { directLine, store } = createDirectLineEmulator();
 
@@ -73,14 +73,14 @@
         await pageObjects.clickMicrophoneButton();
 
         // THEN: Should call SpeechSynthesis.speak() for fulfilling user gesture requirement for speech synthesis by synthesizing an empty utterance.
-        await waitFor(() => expect(speechSynthesisSpeak).toHaveBeenCalledTimes(1));
+        await waitFor(() => expect(speechSynthesis.speak).toHaveBeenCalledTimes(1));
 
-        expect(speechSynthesisSpeak).toHaveBeenLastCalledWith(expect.any(SpeechSynthesisUtterance));
-        expect(speechSynthesisSpeak).toHaveBeenLastCalledWith(expect.objectContaining({ text: '' }));
+        expect(speechSynthesis.speak).toHaveBeenLastCalledWith(expect.any(SpeechSynthesisUtterance));
+        expect(speechSynthesis.speak).toHaveBeenLastCalledWith(expect.objectContaining({ text: '' }));
 
         // WHEN: Synthesized the empty utterance.
-        speechSynthesisSpeak.mock.calls[0][0].dispatchEvent(
-          new SpeechSynthesisEvent('end', { utterance: speechSynthesisSpeak.mock.calls[0] })
+        speechSynthesis.speak.mock.calls[0][0].dispatchEvent(
+          new SpeechSynthesisEvent('end', { utterance: speechSynthesis.speak.mock.calls[0] })
         );
 
         // THEN: Should have stopped synthesis.
@@ -177,9 +177,9 @@
         expect(pageElements.activityContents()[1]).toHaveProperty('textContent', 'Aloha!');
 
         // THEN: Should call SpeechSynthesis.speak() again with utterance of "Aloha!"
-        await waitFor(() => expect(speechSynthesisSpeak).toHaveBeenCalledTimes(2));
-        expect(speechSynthesisSpeak).toHaveBeenLastCalledWith(expect.any(SpeechSynthesisUtterance));
-        expect(speechSynthesisSpeak).toHaveBeenLastCalledWith(expect.objectContaining({ text: 'Aloha!' }));
+        await waitFor(() => expect(speechSynthesis.speak).toHaveBeenCalledTimes(2));
+        expect(speechSynthesis.speak).toHaveBeenLastCalledWith(expect.any(SpeechSynthesisUtterance));
+        expect(speechSynthesis.speak).toHaveBeenLastCalledWith(expect.objectContaining({ text: 'Aloha!' }));
 
         expect(speechSynthesis).toHaveProperty('paused', false);
         expect(speechSynthesis).toHaveProperty('pending', false);
@@ -202,7 +202,7 @@
         );
 
         // THEN: Should cancel speech synthesis.
-        expect(speechSynthesisCancel).toHaveBeenCalledTimes(1);
+        expect(speechSynthesis.cancel).toHaveBeenCalledTimes(1);
         expect(speechSynthesis).toHaveProperty('paused', false);
         expect(speechSynthesis).toHaveProperty('pending', false);
         expect(speechSynthesis).toHaveProperty('speaking', false);
@@ -228,13 +228,13 @@
         expect(pageElements.activityContents()[2]).toHaveProperty('textContent', 'Good morning!');
 
         // THEN: Should not have called SpeechRecognition.abort().
-        expect(speechRecognitionAbort).toHaveBeenCalledTimes(0);
+        expect(speechRecognition1.abort).toHaveBeenCalledTimes(0);
 
         // WHEN: Click on the microphone button to stop recognition.
         await pageObjects.clickMicrophoneButton();
 
         // THEN: Should have called SpeechRecognition.abort() once.
-        expect(speechRecognitionAbort).toHaveBeenCalledTimes(1);
+        expect(speechRecognition1.abort).toHaveBeenCalledTimes(1);
 
         // WHEN: Speech recognition dispatch abort error event.
         speechRecognition1.dispatchEvent(new Event('speechend'));
diff --git a/__tests__/html2/speech/comprehensive.html b/__tests__/html2/speech/comprehensive.html
index 53931ad16f..2a68058645 100644
--- a/__tests__/html2/speech/comprehensive.html
+++ b/__tests__/html2/speech/comprehensive.html
@@ -50,7 +50,7 @@
           SpeechSynthesisUtterance
         };
 
-        const speechSynthesisSpeak = spyOn(speechSynthesis, 'speak');
+        spyOn(speechSynthesis, 'speak');
 
         const { directLine, store } = createDirectLineEmulator();
 
@@ -69,16 +69,16 @@
         await pageObjects.clickMicrophoneButton();
 
         // THEN: Should create new SpeechSynthesis() for priming user gesture requirement.
-        await waitFor(() => expect(speechSynthesisSpeak).toHaveBeenCalledTimes(1));
+        await waitFor(() => expect(speechSynthesis.speak).toHaveBeenCalledTimes(1));
         expect(speechSynthesis).toHaveProperty('paused', false);
         expect(speechSynthesis).toHaveProperty('pending', false);
         expect(speechSynthesis).toHaveProperty('speaking', true);
-        expect(speechSynthesisSpeak).toHaveBeenLastCalledWith(expect.any(SpeechSynthesisUtterance));
-        expect(speechSynthesisSpeak).toHaveBeenLastCalledWith(expect.objectContaining({ text: '' }));
+        expect(speechSynthesis.speak).toHaveBeenLastCalledWith(expect.any(SpeechSynthesisUtterance));
+        expect(speechSynthesis.speak).toHaveBeenLastCalledWith(expect.objectContaining({ text: '' }));
 
         // WHEN: Priming is done.
-        speechSynthesisSpeak.mock.calls[0][0].dispatchEvent(
-          new SpeechSynthesisEvent('end', { utterance: speechSynthesisSpeak.mock.calls[0] })
+        speechSynthesis.speak.mock.calls[0][0].dispatchEvent(
+          new SpeechSynthesisEvent('end', { utterance: speechSynthesis.speak.mock.calls[0] })
         );
         expect(speechSynthesis).toHaveProperty('paused', false);
         expect(speechSynthesis).toHaveProperty('pending', false);
@@ -167,11 +167,11 @@
         expect(pageElements.activityContents()[1]).toHaveProperty('textContent', 'Aloha!');
 
         // THEN: Should call SpeechSynthesis.speak() again.
-        await waitFor(() => expect(speechSynthesisSpeak).toHaveBeenCalledTimes(2));
+        await waitFor(() => expect(speechSynthesis.speak).toHaveBeenCalledTimes(2));
 
         // THEN: Should start synthesize "Aloha!".
-        expect(speechSynthesisSpeak).toHaveBeenLastCalledWith(expect.any(SpeechSynthesisUtterance));
-        expect(speechSynthesisSpeak).toHaveBeenLastCalledWith(expect.objectContaining({ text: 'Aloha!' }));
+        expect(speechSynthesis.speak).toHaveBeenLastCalledWith(expect.any(SpeechSynthesisUtterance));
+        expect(speechSynthesis.speak).toHaveBeenLastCalledWith(expect.objectContaining({ text: 'Aloha!' }));
 
         // THEN: SpeechSynthesis.speaking should be true.
         expect(speechSynthesis).toHaveProperty('paused', false);
@@ -179,8 +179,8 @@
         expect(speechSynthesis).toHaveProperty('speaking', true);
 
         // WHEN: Synthesis completed.
-        speechSynthesisSpeak.mock.calls[1][0].dispatchEvent(
-          new SpeechSynthesisEvent('end', { utterance: speechSynthesisSpeak.mock.calls[1] })
+        speechSynthesis.speak.mock.calls[1][0].dispatchEvent(
+          new SpeechSynthesisEvent('end', { utterance: speechSynthesis.speak.mock.calls[1] })
         );
 
         // THEN: SpeechSynthesis.speaking should be false.

From de01d5abb1473b264f82c74c61e2f1c74219232a Mon Sep 17 00:00:00 2001
From: William Wong <compulim@users.noreply.github.com>
Date: Wed, 12 Feb 2025 20:49:01 +0000
Subject: [PATCH 22/43] Clean up

---
 .../speech/inputHint.acceptingInput.html      | 19 +++++++++----------
 .../html2/speech/inputHint.ignoringInput.html | 19 +++++++++----------
 2 files changed, 18 insertions(+), 20 deletions(-)

diff --git a/__tests__/html2/speech/inputHint.acceptingInput.html b/__tests__/html2/speech/inputHint.acceptingInput.html
index 767aeacc09..c06ccef6f0 100644
--- a/__tests__/html2/speech/inputHint.acceptingInput.html
+++ b/__tests__/html2/speech/inputHint.acceptingInput.html
@@ -52,8 +52,7 @@
           SpeechSynthesisUtterance
         };
 
-        const speechSynthesisCancel = spyOn(speechSynthesis, 'cancel');
-        const speechSynthesisSpeak = spyOn(speechSynthesis, 'speak');
+        spyOn(speechSynthesis, 'speak');
 
         const { directLine, store } = createDirectLineEmulator();
 
@@ -70,9 +69,9 @@
 
         // WHEN: Microphone button is clicked and synthesized empty utterace for user gesture requirement.
         await pageObjects.clickMicrophoneButton();
-        await waitFor(() => expect(speechSynthesisSpeak).toHaveBeenCalledTimes(1));
-        speechSynthesisSpeak.mock.calls[0][0].dispatchEvent(
-          new SpeechSynthesisEvent('end', { utterance: speechSynthesisSpeak.mock.calls[0] })
+        await waitFor(() => expect(speechSynthesis.speak).toHaveBeenCalledTimes(1));
+        speechSynthesis.speak.mock.calls[0][0].dispatchEvent(
+          new SpeechSynthesisEvent('end', { utterance: speechSynthesis.speak.mock.calls[0] })
         );
 
         // THEN: Should construct the SpeechRecognition() instance and call start().
@@ -124,17 +123,17 @@
         expect(pageElements.activityContents()[1]).toHaveProperty('textContent', 'Aloha!');
 
         // THEN: Should call SpeechSynthesis.speak().
-        await waitFor(() => expect(speechSynthesisSpeak).toHaveBeenCalledTimes(2));
-        expect(speechSynthesisSpeak).toHaveBeenLastCalledWith(expect.any(SpeechSynthesisUtterance));
-        expect(speechSynthesisSpeak).toHaveBeenLastCalledWith(expect.objectContaining({ text: 'Aloha!' }));
+        await waitFor(() => expect(speechSynthesis.speak).toHaveBeenCalledTimes(2));
+        expect(speechSynthesis.speak).toHaveBeenLastCalledWith(expect.any(SpeechSynthesisUtterance));
+        expect(speechSynthesis.speak).toHaveBeenLastCalledWith(expect.objectContaining({ text: 'Aloha!' }));
 
         expect(speechSynthesis).toHaveProperty('paused', false);
         expect(speechSynthesis).toHaveProperty('pending', false);
         expect(speechSynthesis).toHaveProperty('speaking', true);
 
         // WHEN: After synthesis completed.
-        speechSynthesisSpeak.mock.calls[1][0].dispatchEvent(
-          new SpeechSynthesisEvent('end', { utterance: speechSynthesisSpeak.mock.calls[1] })
+        speechSynthesis.speak.mock.calls[1][0].dispatchEvent(
+          new SpeechSynthesisEvent('end', { utterance: speechSynthesis.speak.mock.calls[1] })
         );
 
         // THEN: SpeechSynthesis.speaking should become false.
diff --git a/__tests__/html2/speech/inputHint.ignoringInput.html b/__tests__/html2/speech/inputHint.ignoringInput.html
index dd3d5d3518..16e619371f 100644
--- a/__tests__/html2/speech/inputHint.ignoringInput.html
+++ b/__tests__/html2/speech/inputHint.ignoringInput.html
@@ -52,8 +52,7 @@
           SpeechSynthesisUtterance
         };
 
-        const speechSynthesisCancel = spyOn(speechSynthesis, 'cancel');
-        const speechSynthesisSpeak = spyOn(speechSynthesis, 'speak');
+        spyOn(speechSynthesis, 'speak');
 
         const { directLine, store } = createDirectLineEmulator();
 
@@ -70,9 +69,9 @@
 
         // WHEN: Microphone button is clicked and synthesized empty utterace for user gesture requirement.
         await pageObjects.clickMicrophoneButton();
-        await waitFor(() => expect(speechSynthesisSpeak).toHaveBeenCalledTimes(1));
-        speechSynthesisSpeak.mock.calls[0][0].dispatchEvent(
-          new SpeechSynthesisEvent('end', { utterance: speechSynthesisSpeak.mock.calls[0] })
+        await waitFor(() => expect(speechSynthesis.speak).toHaveBeenCalledTimes(1));
+        speechSynthesis.speak.mock.calls[0][0].dispatchEvent(
+          new SpeechSynthesisEvent('end', { utterance: speechSynthesis.speak.mock.calls[0] })
         );
 
         // THEN: Should construct the SpeechRecognition() instance and call start().
@@ -124,17 +123,17 @@
         expect(pageElements.activityContents()[1]).toHaveProperty('textContent', 'Aloha!');
 
         // THEN: Should call SpeechSynthesis.speak().
-        await waitFor(() => expect(speechSynthesisSpeak).toHaveBeenCalledTimes(2));
-        expect(speechSynthesisSpeak).toHaveBeenLastCalledWith(expect.any(SpeechSynthesisUtterance));
-        expect(speechSynthesisSpeak).toHaveBeenLastCalledWith(expect.objectContaining({ text: 'Aloha!' }));
+        await waitFor(() => expect(speechSynthesis.speak).toHaveBeenCalledTimes(2));
+        expect(speechSynthesis.speak).toHaveBeenLastCalledWith(expect.any(SpeechSynthesisUtterance));
+        expect(speechSynthesis.speak).toHaveBeenLastCalledWith(expect.objectContaining({ text: 'Aloha!' }));
 
         expect(speechSynthesis).toHaveProperty('paused', false);
         expect(speechSynthesis).toHaveProperty('pending', false);
         expect(speechSynthesis).toHaveProperty('speaking', true);
 
         // WHEN: After synthesis completed.
-        speechSynthesisSpeak.mock.calls[1][0].dispatchEvent(
-          new SpeechSynthesisEvent('end', { utterance: speechSynthesisSpeak.mock.calls[1] })
+        speechSynthesis.speak.mock.calls[1][0].dispatchEvent(
+          new SpeechSynthesisEvent('end', { utterance: speechSynthesis.speak.mock.calls[1] })
         );
 
         // THEN: SpeechSynthesis.speaking should become false.

From b4edd0a0249d3493e6e77ab0859595684f73b1b2 Mon Sep 17 00:00:00 2001
From: William Wong <compulim@users.noreply.github.com>
Date: Wed, 12 Feb 2025 21:05:06 +0000
Subject: [PATCH 23/43] Add more scenarios

---
 __tests__/html2/speech/bargeIn/behavior.html | 65 ++++++++++++++++++++
 1 file changed, 65 insertions(+)

diff --git a/__tests__/html2/speech/bargeIn/behavior.html b/__tests__/html2/speech/bargeIn/behavior.html
index dd7463aa18..a2d4497aa0 100644
--- a/__tests__/html2/speech/bargeIn/behavior.html
+++ b/__tests__/html2/speech/bargeIn/behavior.html
@@ -245,6 +245,71 @@
 
         // THEN: Should go back to text input mode.
         expect(pageElements.sendBoxTextBox()).toBeTruthy();
+
+        // WHEN: Click ont he microphone button to start recognition again.
+        await pageObjects.clickMicrophoneButton();
+
+        // THEN: Should create a new instance of SpeechRecognition.
+        expect(ponyfill.SpeechRecognition).toHaveBeenCalledTimes(2);
+
+        const { value: speechRecognition2 } = ponyfill.SpeechRecognition.mock.results[1];
+
+        // THEN: Should call SpeechRecognition.start().
+        expect(speechRecognition2.start).toHaveBeenCalledTimes(1);
+        expect(speechRecognition2).toHaveProperty('continuous', true);
+
+        // THEN: Send box should say "Starting…" and hide the original text box.
+        expect(pageElements.sendBoxTextBox()).toBeFalsy();
+        expect(document.querySelector(`[data-testid="${testIds.sendBoxSpeechBox}"]`)).toHaveProperty(
+          'textContent',
+          'Starting…'
+        );
+
+        // WHEN: Speech recognition started.
+        speechRecognition2.dispatchEvent(new Event('start'));
+        speechRecognition2.dispatchEvent(new Event('audiostart'));
+        speechRecognition2.dispatchEvent(new Event('soundstart'));
+        speechRecognition2.dispatchEvent(new Event('speechstart'));
+
+        // THEN: Send box should say "Listening…" and hide the original text box.
+        expect(pageElements.sendBoxTextBox()).toBeFalsy();
+        expect(document.querySelector(`[data-testid="${testIds.sendBoxSpeechBox}"]`)).toHaveProperty(
+          'textContent',
+          'Listening…'
+        );
+
+        // WHEN: Recognized result of "What's the weather?".
+        await (
+          await directLine.actPostActivity(async () => {
+            speechRecognition2.dispatchEvent(
+              new SpeechRecognitionEvent('result', {
+                results: new SpeechRecognitionResultList(
+                  SpeechRecognitionResult.fromFinalized(new SpeechRecognitionAlternative(0, "What's the weather?"))
+                )
+              })
+            );
+          })
+        ).resolveAll();
+
+        // THEN: Should have send the activity.
+        await pageConditions.numActivitiesShown(4);
+        expect(pageElements.activityContents()[3]).toHaveProperty('textContent', "What's the weather?");
+
+        // THEN: Send box should keep say "Listening…" and hide the original text box.
+        expect(pageElements.sendBoxTextBox()).toBeFalsy();
+        expect(document.querySelector(`[data-testid="${testIds.sendBoxSpeechBox}"]`)).toHaveProperty(
+          'textContent',
+          'Listening…'
+        );
+
+        // WHEN: Web Speech provider abruptly stopped recognition.
+        speechRecognition2.dispatchEvent(new Event('speechend'));
+        speechRecognition2.dispatchEvent(new Event('soundend'));
+        speechRecognition2.dispatchEvent(new Event('audioend'));
+        speechRecognition2.dispatchEvent(new Event('end'));
+
+        // THEN: Should go back to text input mode.
+        expect(pageElements.sendBoxTextBox()).toBeTruthy();
       });
     </script>
   </body>

From ce4af279e770401eee287fdda5685510ac5b8611 Mon Sep 17 00:00:00 2001
From: William Wong <compulim@users.noreply.github.com>
Date: Wed, 12 Feb 2025 21:28:39 +0000
Subject: [PATCH 24/43] Ignore html2

---
 jest.legacy.config.js | 1 +
 1 file changed, 1 insertion(+)

diff --git a/jest.legacy.config.js b/jest.legacy.config.js
index 55f203263e..d255c6788e 100644
--- a/jest.legacy.config.js
+++ b/jest.legacy.config.js
@@ -66,6 +66,7 @@ module.exports = {
     '<rootDir>/__tests__/html/__dist__',
     '<rootDir>/__tests__/html/__jest__',
     '<rootDir>/__tests__/html/assets',
+    '<rootDir>/__tests__/html2/', // Will be tested by jest.html2.config.js.
     '<rootDir>/__tests__/setup/',
     '<rootDir>/packages/bundle/__tests__/types/__typescript__/',
     '<rootDir>/packages/core/__tests__/types/__typescript__/',

From d5fc3c27dc9ef9b765e3381006fc09ccf4c52ced Mon Sep 17 00:00:00 2001
From: William Wong <compulim@users.noreply.github.com>
Date: Wed, 12 Feb 2025 23:29:13 +0000
Subject: [PATCH 25/43] Ported test

---
 __tests__/hooks/useDictateState.js            |  39 ----
 .../html2/speech/hooks/useDictateState.html   | 220 ++++++++++++++++++
 __tests__/html2/speech/js/index.js            |   1 +
 __tests__/html2/speech/js/renderHook.js       |  77 ++++++
 4 files changed, 298 insertions(+), 39 deletions(-)
 delete mode 100644 __tests__/hooks/useDictateState.js
 create mode 100644 __tests__/html2/speech/hooks/useDictateState.html
 create mode 100644 __tests__/html2/speech/js/renderHook.js

diff --git a/__tests__/hooks/useDictateState.js b/__tests__/hooks/useDictateState.js
deleted file mode 100644
index fef95100dc..0000000000
--- a/__tests__/hooks/useDictateState.js
+++ /dev/null
@@ -1,39 +0,0 @@
-import { timeouts } from '../constants.json';
-
-import uiConnected from '../setup/conditions/uiConnected';
-
-// selenium-webdriver API doc:
-// https://seleniumhq.github.io/selenium/docs/api/javascript/module/selenium-webdriver/index_exports_WebDriver.html
-
-jest.setTimeout(timeouts.test);
-
-test('getter should return dictate state', async () => {
-  const { driver, pageObjects } = await setupWebDriver({
-    props: {
-      webSpeechPonyfillFactory: () => window.WebSpeechMock
-    }
-  });
-
-  await driver.wait(uiConnected(), timeouts.directLine);
-
-  expect((await pageObjects.runHook('useDictateState'))[0]).toMatchInlineSnapshot(`0`);
-
-  await pageObjects.clickMicrophoneButton();
-
-  // Dictate state "1" is for "automatic turning on microphone after current synthesis completed".
-  expect((await pageObjects.runHook('useDictateState'))[0]).toMatchInlineSnapshot(`2`);
-
-  await pageObjects.putSpeechRecognitionResult('recognizing', 'Hello');
-
-  expect((await pageObjects.runHook('useDictateState'))[0]).toMatchInlineSnapshot(`3`);
-
-  await pageObjects.clickMicrophoneButton();
-
-  expect((await pageObjects.runHook('useDictateState'))[0]).toMatchInlineSnapshot(`4`);
-});
-
-test('setter should throw exception', async () => {
-  const { pageObjects } = await setupWebDriver();
-
-  await expect(pageObjects.runHook('useDictateState', [], dictateState => dictateState[1]())).rejects.toThrow();
-});
diff --git a/__tests__/html2/speech/hooks/useDictateState.html b/__tests__/html2/speech/hooks/useDictateState.html
new file mode 100644
index 0000000000..a137ab5485
--- /dev/null
+++ b/__tests__/html2/speech/hooks/useDictateState.html
@@ -0,0 +1,220 @@
+<!doctype html>
+<html lang="en-US">
+  <head>
+    <link href="/assets/index.css" rel="stylesheet" type="text/css" />
+    <script crossorigin="anonymous" src="https://unpkg.com/react@16.8.6/umd/react.development.js"></script>
+    <script crossorigin="anonymous" src="https://unpkg.com/react-dom@16.8.6/umd/react-dom.development.js"></script>
+    <script crossorigin="anonymous" src="/test-harness.js"></script>
+    <script crossorigin="anonymous" src="/test-page-object.js"></script>
+    <script crossorigin="anonymous" src="/__dist__/webchat-es5.js"></script>
+  </head>
+  <body>
+    <main id="webchat"></main>
+    <script type="importmap">
+      {
+        "imports": {
+          "@testduet/wait-for": "https://unpkg.com/@testduet/wait-for@main/dist/wait-for.mjs",
+          "jest-mock": "https://esm.sh/jest-mock",
+          "react-dictate-button/internal": "https://unpkg.com/react-dictate-button@main/dist/react-dictate-button.internal.mjs"
+        }
+      }
+    </script>
+    <script type="module">
+      import {
+        SpeechGrammarList,
+        SpeechRecognition,
+        SpeechRecognitionAlternative,
+        SpeechRecognitionErrorEvent,
+        SpeechRecognitionEvent,
+        SpeechRecognitionResult,
+        SpeechRecognitionResultList
+      } from 'react-dictate-button/internal';
+      import { fn, spyOn } from 'jest-mock';
+      import { renderHook, SpeechSynthesis, SpeechSynthesisEvent, SpeechSynthesisUtterance } from '../js/index.js';
+      import { waitFor } from '@testduet/wait-for';
+
+      const {
+        React: { createElement },
+        ReactDOM: { render },
+        testHelpers: { createDirectLineEmulator },
+        WebChat: {
+          Components: { BasicWebChat, Composer },
+          hooks: { useDictateState },
+          renderWebChat,
+          testIds
+        }
+      } = window;
+
+      run(async function () {
+        const speechSynthesis = new SpeechSynthesis();
+        const ponyfill = {
+          SpeechGrammarList,
+          SpeechRecognition: fn().mockImplementation(() => {
+            const speechRecognition = new SpeechRecognition();
+
+            spyOn(speechRecognition, 'abort');
+            spyOn(speechRecognition, 'start');
+
+            return speechRecognition;
+          }),
+          speechSynthesis,
+          SpeechSynthesisUtterance
+        };
+
+        spyOn(speechSynthesis, 'speak');
+
+        const { directLine, store } = createDirectLineEmulator();
+        const WebChatWrapper = ({ children }) =>
+          createElement(
+            Composer,
+            { directLine, store, webSpeechPonyfillFactory: () => ponyfill },
+            createElement(BasicWebChat),
+            children
+          );
+
+        // WHEN: Render initially.
+        const renderResult = renderHook(() => useDictateState()[0], {
+          legacyRoot: true,
+          wrapper: WebChatWrapper
+        });
+
+        await pageConditions.uiConnected();
+
+        // THEN: `useDictateState` should returns IDLE.
+        await waitFor(() => expect(renderResult).toHaveProperty('result.current', 0)); // IDLE
+
+        // WHEN: Microphone button is clicked and priming user gesture is done.
+        await pageObjects.clickMicrophoneButton();
+
+        await waitFor(() => expect(speechSynthesis.speak).toHaveBeenCalledTimes(1));
+        speechSynthesis.speak.mock.calls[0][0].dispatchEvent(
+          new SpeechSynthesisEvent('end', { utterance: speechSynthesis.speak.mock.calls[0] })
+        );
+
+        // THEN: `useDictateState` should returns STARTING.
+        renderResult.rerender();
+        // Dictate state "1" is for "automatic turning on microphone after current synthesis completed".
+        await waitFor(() => expect(renderResult).toHaveProperty('result.current', 2));
+
+        // THEN: Should construct SpeechRecognition().
+        expect(ponyfill.SpeechRecognition).toHaveBeenCalledTimes(1);
+
+        const { value: speechRecognition1 } = ponyfill.SpeechRecognition.mock.results[0];
+
+        // THEN: Should call SpeechRecognition.start().
+        expect(speechRecognition1.start).toHaveBeenCalledTimes(1);
+
+        // WHEN: Recognition started and interims result is dispatched.
+        speechRecognition1.dispatchEvent(new Event('start'));
+        speechRecognition1.dispatchEvent(new Event('audiostart'));
+        speechRecognition1.dispatchEvent(new Event('soundstart'));
+        speechRecognition1.dispatchEvent(new Event('speechstart'));
+
+        // WHEN: Recognized interim result of "Hello".
+        speechRecognition1.dispatchEvent(
+          new SpeechRecognitionEvent('result', {
+            results: new SpeechRecognitionResultList(
+              new SpeechRecognitionResult(new SpeechRecognitionAlternative(0, 'Hello'))
+            )
+          })
+        );
+
+        // THEN: `useDictateState` should returns DICTATING.
+        renderResult.rerender();
+        await waitFor(() => expect(renderResult).toHaveProperty('result.current', 3));
+
+        // WHEN: Recognized finalized result of "Hello, World!" and ended recognition.
+        await (
+          await directLine.actPostActivity(() =>
+            speechRecognition1.dispatchEvent(
+              new SpeechRecognitionEvent('result', {
+                results: new SpeechRecognitionResultList(
+                  SpeechRecognitionResult.fromFinalized(new SpeechRecognitionAlternative(0.9, 'Hello, World!'))
+                )
+              })
+            )
+          )
+        ).resolveAll();
+
+        speechRecognition1.dispatchEvent(new Event('speechend'));
+        speechRecognition1.dispatchEvent(new Event('soundend'));
+        speechRecognition1.dispatchEvent(new Event('audioend'));
+        speechRecognition1.dispatchEvent(new Event('end'));
+
+        // THEN: `useDictateState` should returns IDLE.
+        renderResult.rerender();
+        await waitFor(() => expect(renderResult).toHaveProperty('result.current', 0));
+
+        // WHEN: Bot replied.
+        await directLine.emulateIncomingActivity({
+          inputHint: 'expectingInput', // "expectingInput" should turn the microphone back on after synthesis completed.
+          text: 'Aloha!',
+          type: 'message'
+        });
+        await pageConditions.numActivitiesShown(2);
+
+        // THEN: Should call SpeechSynthesis.speak() again.
+        await waitFor(() => expect(speechSynthesis.speak).toHaveBeenCalledTimes(2));
+
+        // THEN: Should start synthesize "Aloha!".
+        expect(speechSynthesis.speak).toHaveBeenLastCalledWith(expect.any(SpeechSynthesisUtterance));
+        expect(speechSynthesis.speak).toHaveBeenLastCalledWith(expect.objectContaining({ text: 'Aloha!' }));
+
+        // THEN: `useDictateState` should returns WILL_START.
+        renderResult.rerender();
+        await waitFor(() => expect(renderResult).toHaveProperty('result.current', 1));
+
+        // WHEN: Synthesis completed.
+        speechSynthesis.speak.mock.calls[1][0].dispatchEvent(
+          new SpeechSynthesisEvent('end', { utterance: speechSynthesis.speak.mock.calls[1] })
+        );
+
+        // THEN: `useDictateState` should returns STARTING.
+        renderResult.rerender();
+        await waitFor(() => expect(renderResult).toHaveProperty('result.current', 2));
+
+        // WHEN: Recognition started and interims result is dispatched.
+        const { value: speechRecognition2 } = ponyfill.SpeechRecognition.mock.results[1];
+
+        // THEN: Should call SpeechRecognition.start().
+        expect(speechRecognition2.start).toHaveBeenCalledTimes(1);
+
+        // WHEN: Recognition started and interims result is dispatched.
+        speechRecognition2.dispatchEvent(new Event('start'));
+        speechRecognition2.dispatchEvent(new Event('audiostart'));
+        speechRecognition2.dispatchEvent(new Event('soundstart'));
+        speechRecognition2.dispatchEvent(new Event('speechstart'));
+
+        // WHEN: Recognized interim result of "Good".
+        speechRecognition2.dispatchEvent(
+          new SpeechRecognitionEvent('result', {
+            results: new SpeechRecognitionResultList(
+              new SpeechRecognitionResult(new SpeechRecognitionAlternative(0, 'Good'))
+            )
+          })
+        );
+
+        // THEN: `useDictateState` should returns LISTENING.
+        renderResult.rerender();
+        await waitFor(() => expect(renderResult).toHaveProperty('result.current', 3));
+
+        // WHEN: Click on microphone button.
+        await pageObjects.clickMicrophoneButton();
+
+        // THEN: `useDictateState` should returns STOPPING.
+        renderResult.rerender();
+        // await waitFor(() => expect(renderResult).toHaveProperty('result.current', 4));
+
+        // WHEN: Recognition ended.
+        speechRecognition2.dispatchEvent(new Event('speechend'));
+        speechRecognition2.dispatchEvent(new Event('soundend'));
+        speechRecognition2.dispatchEvent(new Event('audioend'));
+        speechRecognition2.dispatchEvent(new Event('end'));
+
+        // THEN: `useDictateState` should returns STOPPING.
+        renderResult.rerender();
+        await waitFor(() => expect(renderResult).toHaveProperty('result.current', 0));
+      });
+    </script>
+  </body>
+</html>
diff --git a/__tests__/html2/speech/js/index.js b/__tests__/html2/speech/js/index.js
index f04e5a9372..b1e1af9816 100644
--- a/__tests__/html2/speech/js/index.js
+++ b/__tests__/html2/speech/js/index.js
@@ -1,3 +1,4 @@
 export { default as SpeechSynthesis } from './MockedSpeechSynthesis.js';
 export { default as SpeechSynthesisEvent } from './MockedSpeechSynthesisEvent.js';
 export { default as SpeechSynthesisUtterance } from './MockedSpeechSynthesisUtterance.js';
+export { default as renderHook } from './renderHook.js';
diff --git a/__tests__/html2/speech/js/renderHook.js b/__tests__/html2/speech/js/renderHook.js
new file mode 100644
index 0000000000..1593e69293
--- /dev/null
+++ b/__tests__/html2/speech/js/renderHook.js
@@ -0,0 +1,77 @@
+// Adopted from https://github.com/testing-library/react-testing-library/blob/main/src/pure.js#L292C1-L329C2
+
+/*!
+ * The MIT License (MIT)
+ * Copyright (c) 2017-Present Kent C. Dodds
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+function wrapUiIfNeeded(innerElement, wrapperComponent) {
+  return wrapperComponent ? React.createElement(wrapperComponent, null, innerElement) : innerElement;
+}
+
+export default function renderHook(
+  /** @type {(props: RenderCallbackProps) => any} */ renderCallback,
+  /** @type {{}} */ options = {}
+) {
+  const { initialProps, ...renderOptions } = options;
+
+  if (renderOptions.legacyRoot && typeof ReactDOM.render !== 'function') {
+    const error = new Error(
+      '`legacyRoot: true` is not supported in this version of React. ' +
+        'If your app runs React 19 or later, you should remove this flag. ' +
+        'If your app runs React 18 or earlier, visit https://react.dev/blog/2022/03/08/react-18-upgrade-guide for upgrade instructions.'
+    );
+    Error.captureStackTrace(error, renderHook);
+    throw error;
+  }
+
+  const result = React.createRef();
+
+  function TestComponent({ renderCallbackProps }) {
+    const pendingResult = renderCallback(renderCallbackProps);
+
+    React.useEffect(() => {
+      result.current = pendingResult;
+    });
+
+    return null;
+  }
+
+  // A stripped down version of render() from `@testing-library/react`.
+  const render = ({ renderCallbackProps }) => {
+    const element = document.querySelector('main');
+
+    ReactDOM.render(wrapUiIfNeeded(React.createElement(TestComponent, renderCallbackProps), renderOptions.wrapper), element);
+
+    return { rerender: render, unmount: () => ReactDOM.unmountComponentAtNode(element) };
+  };
+
+  const { rerender: baseRerender, unmount } = render(
+    React.createElement(TestComponent, { renderCallbackProps: initialProps }),
+    renderOptions
+  );
+
+  function rerender(rerenderCallbackProps) {
+    return baseRerender(React.createElement(TestComponent, { renderCallbackProps: rerenderCallbackProps }));
+  }
+
+  return { result, rerender, unmount };
+}

From 8cb2ae5c541e8c8f13801e6ff61b4d602ae06a94 Mon Sep 17 00:00:00 2001
From: William Wong <compulim@users.noreply.github.com>
Date: Wed, 12 Feb 2025 23:29:21 +0000
Subject: [PATCH 26/43] Added test

---
 __tests__/html2/speech/hooks/useDictateState.html | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/__tests__/html2/speech/hooks/useDictateState.html b/__tests__/html2/speech/hooks/useDictateState.html
index a137ab5485..8aad482740 100644
--- a/__tests__/html2/speech/hooks/useDictateState.html
+++ b/__tests__/html2/speech/hooks/useDictateState.html
@@ -203,7 +203,7 @@
 
         // THEN: `useDictateState` should returns STOPPING.
         renderResult.rerender();
-        // await waitFor(() => expect(renderResult).toHaveProperty('result.current', 4));
+        await waitFor(() => expect(renderResult).toHaveProperty('result.current', 4));
 
         // WHEN: Recognition ended.
         speechRecognition2.dispatchEvent(new Event('speechend'));

From 206c6f20e2b95ded6afa4b99e225e728e7d7770f Mon Sep 17 00:00:00 2001
From: William Wong <compulim@users.noreply.github.com>
Date: Thu, 13 Feb 2025 07:39:52 +0000
Subject: [PATCH 27/43] Bump react-dictate-button

---
 package-lock.json               | 32 ++++++++++++++++----------------
 packages/component/package.json |  2 +-
 2 files changed, 17 insertions(+), 17 deletions(-)

diff --git a/package-lock.json b/package-lock.json
index 524c567bb0..0f18a68b11 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -18609,6 +18609,21 @@
         "react": ">=16.8.0"
       }
     },
+    "node_modules/react-dictate-button": {
+      "version": "3.0.1-main.4f01cd5",
+      "resolved": "https://registry.npmjs.org/react-dictate-button/-/react-dictate-button-3.0.1-main.4f01cd5.tgz",
+      "integrity": "sha512-JzDlCTBkVdRGGW3lpaq+2YpebgXr1wA8mT+4IkFXiSbKdfWAXpyNMInKvlXYzXOwKC2YCxbnuoceI7++k03n5Q==",
+      "license": "MIT",
+      "dependencies": {
+        "@babel/runtime-corejs3": "^7.14.0",
+        "core-js": "^3.12.1",
+        "react-dictate-button": "^3.0.1-main.4f01cd5",
+        "use-ref-from": "^0.1.0"
+      },
+      "peerDependencies": {
+        "react": ">=16.8.6"
+      }
+    },
     "node_modules/react-dom": {
       "version": "16.8.6",
       "resolved": "https://registry.npmjs.org/react-dom/-/react-dom-16.8.6.tgz",
@@ -24240,7 +24255,7 @@
         "prop-types": "15.8.1",
         "punycode": "2.3.1",
         "react-chain-of-responsibility": "0.2.0-main.3cb47ab",
-        "react-dictate-button": "^3.0.1-main.24a5a4e",
+        "react-dictate-button": "^3.0.1-main.4f01cd5",
         "react-film": "3.1.1-main.f623bf6",
         "react-redux": "7.2.9",
         "react-say": "2.1.0",
@@ -24286,21 +24301,6 @@
         "node": ">=6"
       }
     },
-    "packages/component/node_modules/react-dictate-button": {
-      "version": "3.0.1-main.24a5a4e",
-      "resolved": "https://registry.npmjs.org/react-dictate-button/-/react-dictate-button-3.0.1-main.24a5a4e.tgz",
-      "integrity": "sha512-ReknENPEH0CCMWtA/wLYg7MNVkB5sz2UMGXis9SatC6JDHbuy2s2rBBkBmUN6kWTG4NIdzjSQpxrLeCNth4mMw==",
-      "license": "MIT",
-      "dependencies": {
-        "@babel/runtime-corejs3": "^7.14.0",
-        "core-js": "^3.12.1",
-        "react-dictate-button": "^3.0.1-main.24a5a4e",
-        "use-ref-from": "^0.1.0"
-      },
-      "peerDependencies": {
-        "react": ">=16.8.6"
-      }
-    },
     "packages/component/node_modules/type-fest": {
       "version": "4.26.1",
       "dev": true,
diff --git a/packages/component/package.json b/packages/component/package.json
index b1e0d631a3..0290921435 100644
--- a/packages/component/package.json
+++ b/packages/component/package.json
@@ -151,7 +151,7 @@
     "prop-types": "15.8.1",
     "punycode": "2.3.1",
     "react-chain-of-responsibility": "0.2.0-main.3cb47ab",
-    "react-dictate-button": "^3.0.1-main.24a5a4e",
+    "react-dictate-button": "^3.0.1-main.4f01cd5",
     "react-film": "3.1.1-main.f623bf6",
     "react-redux": "7.2.9",
     "react-say": "2.1.0",

From 1e0f1a30d4a8f4e349510010e784978b217fee1d Mon Sep 17 00:00:00 2001
From: William Wong <compulim@users.noreply.github.com>
Date: Thu, 13 Feb 2025 08:30:27 +0000
Subject: [PATCH 28/43] Add entry

---
 CHANGELOG.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index d0a086d369..73f14896cd 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -78,6 +78,8 @@ Notes: web developers are advised to use [`~` (tilde range)](https://github.com/
    -  New style option supports two values: `'activity-actions'` and `'activity-status'` (default)
    -  When set to `'activity-actions'`, feedback buttons are displayed in the activity actions toolbar
    -  When set to `'activity-status'`, feedback buttons appear in the activity status area (default behavior)
+-  Added speech recognition continuous mode with barge-in support, in PR [#5426](https://github.com/microsoft/BotFramework-WebChat/pull/5426), by [@RushikeshGavali](https://github.com/RushikeshGavali) and [@compulim](https://github.com/compulim)
+   -  Set `styleOptions.speechRecognitionContinuous` to `true` and use a Web Speech API provider which supports continuous mode
 
 ### Changed
 

From 282cd99bf16d44b4e94e1a99bf56714ce3cc3c90 Mon Sep 17 00:00:00 2001
From: William Wong <compulim@users.noreply.github.com>
Date: Thu, 13 Feb 2025 08:34:20 +0000
Subject: [PATCH 29/43] Update entries

---
 CHANGELOG.md | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 73f14896cd..2c737fffb6 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -78,8 +78,8 @@ Notes: web developers are advised to use [`~` (tilde range)](https://github.com/
    -  New style option supports two values: `'activity-actions'` and `'activity-status'` (default)
    -  When set to `'activity-actions'`, feedback buttons are displayed in the activity actions toolbar
    -  When set to `'activity-status'`, feedback buttons appear in the activity status area (default behavior)
--  Added speech recognition continuous mode with barge-in support, in PR [#5426](https://github.com/microsoft/BotFramework-WebChat/pull/5426), by [@RushikeshGavali](https://github.com/RushikeshGavali) and [@compulim](https://github.com/compulim)
-   -  Set `styleOptions.speechRecognitionContinuous` to `true` and use a Web Speech API provider which supports continuous mode
+-  Resolved [#2661](https://github.com/microsoft/BotFramework-WebChat/issues/2661) and [#5352](https://github.com/microsoft/BotFramework-WebChat/issues/5352). Added speech recognition continuous mode with barge-in support, in PR [#5426](https://github.com/microsoft/BotFramework-WebChat/pull/5426), by [@RushikeshGavali](https://github.com/RushikeshGavali) and [@compulim](https://github.com/compulim)
+   -  Set `styleOptions.speechRecognitionContinuous` to `true` with a Web Speech API provider with continuous mode support
 
 ### Changed
 
@@ -101,9 +101,10 @@ Notes: web developers are advised to use [`~` (tilde range)](https://github.com/
 -  Switched math block syntax from `$$` to Tex-style `\[ \]` and `\( \)` delimiters with improved rendering and error handling, in PR [#5353](https://github.com/microsoft/BotFramework-WebChat/pull/5353), by [@OEvgeny](https://github.com/OEvgeny)
 -  Improved avatar display and grouping behavior by fixing rendering issues and activity sender identification, in PR [#5346](https://github.com/microsoft/BotFramework-WebChat/pull/5346), by [@OEvgeny](https://github.com/OEvgeny)
 -  Activity "copy" button will use `outerHTML` and `textContent` for clipboard content, in PR [#5378](https://github.com/microsoft/BotFramework-WebChat/pull/5378), by [@compulim](https://github.com/compulim)
--  Bumped dependencies to the latest versions, by [@compulim](https://github.com/compulim) in PR [#5385](https://github.com/microsoft/BotFramework-WebChat/pull/5385) and [#5400](https://github.com/microsoft/BotFramework-WebChat/pull/5400)
+-  Bumped dependencies to the latest versions, by [@compulim](https://github.com/compulim) in PR [#5385](https://github.com/microsoft/BotFramework-WebChat/pull/5385), [#5400](https://github.com/microsoft/BotFramework-WebChat/pull/5400), and [#5426](https://github.com/microsoft/BotFramework-WebChat/pull/5426)
    -  Production dependencies
       -  [`web-speech-cognitive-services@8.1.0`](https://npmjs.com/package/web-speech-cognitive-services)
+      -  [`react-dictate-button@x.x.x](https://npmjs.com/package/react-dictate-button)
 -  Enabled icon customization in Fluent theme through CSS variables, in PR [#5413](https://github.com/microsoft/BotFramework-WebChat/pull/5413), by [@OEvgeny](https://github.com/OEvgeny)
 
 ### Fixed

From 4fd345f93044ae54faef7834530d468a891c41b1 Mon Sep 17 00:00:00 2001
From: William Wong <compulim@users.noreply.github.com>
Date: Thu, 13 Feb 2025 08:47:19 +0000
Subject: [PATCH 30/43] Bump to react-dictate-button@4.0.0

---
 CHANGELOG.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 2c737fffb6..6003c8884c 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -104,7 +104,7 @@ Notes: web developers are advised to use [`~` (tilde range)](https://github.com/
 -  Bumped dependencies to the latest versions, by [@compulim](https://github.com/compulim) in PR [#5385](https://github.com/microsoft/BotFramework-WebChat/pull/5385), [#5400](https://github.com/microsoft/BotFramework-WebChat/pull/5400), and [#5426](https://github.com/microsoft/BotFramework-WebChat/pull/5426)
    -  Production dependencies
       -  [`web-speech-cognitive-services@8.1.0`](https://npmjs.com/package/web-speech-cognitive-services)
-      -  [`react-dictate-button@x.x.x](https://npmjs.com/package/react-dictate-button)
+      -  [`react-dictate-button@4.0.0`](https://npmjs.com/package/react-dictate-button)
 -  Enabled icon customization in Fluent theme through CSS variables, in PR [#5413](https://github.com/microsoft/BotFramework-WebChat/pull/5413), by [@OEvgeny](https://github.com/OEvgeny)
 
 ### Fixed

From e88f40e962d305b49c900c7761e7846c3e0ce3a7 Mon Sep 17 00:00:00 2001
From: William Wong <compulim@users.noreply.github.com>
Date: Thu, 13 Feb 2025 08:47:27 +0000
Subject: [PATCH 31/43] Bump to react-dictate-button@4.0.0

---
 package-lock.json               | 10 +++++-----
 packages/component/package.json |  2 +-
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/package-lock.json b/package-lock.json
index 0f18a68b11..1076c37de6 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -18610,14 +18610,14 @@
       }
     },
     "node_modules/react-dictate-button": {
-      "version": "3.0.1-main.4f01cd5",
-      "resolved": "https://registry.npmjs.org/react-dictate-button/-/react-dictate-button-3.0.1-main.4f01cd5.tgz",
-      "integrity": "sha512-JzDlCTBkVdRGGW3lpaq+2YpebgXr1wA8mT+4IkFXiSbKdfWAXpyNMInKvlXYzXOwKC2YCxbnuoceI7++k03n5Q==",
+      "version": "4.0.0",
+      "resolved": "https://registry.npmjs.org/react-dictate-button/-/react-dictate-button-4.0.0.tgz",
+      "integrity": "sha512-v+92/yHShDzCapCZm2Y6UoKEKzt32gCJWFTIcJxRTwySfP8+eulUG/2U2ttu74YD6i0z9dYsRfFEHHwC+UfwKQ==",
       "license": "MIT",
       "dependencies": {
         "@babel/runtime-corejs3": "^7.14.0",
         "core-js": "^3.12.1",
-        "react-dictate-button": "^3.0.1-main.4f01cd5",
+        "react-dictate-button": "^4.0.0",
         "use-ref-from": "^0.1.0"
       },
       "peerDependencies": {
@@ -24255,7 +24255,7 @@
         "prop-types": "15.8.1",
         "punycode": "2.3.1",
         "react-chain-of-responsibility": "0.2.0-main.3cb47ab",
-        "react-dictate-button": "^3.0.1-main.4f01cd5",
+        "react-dictate-button": "4.0.0",
         "react-film": "3.1.1-main.f623bf6",
         "react-redux": "7.2.9",
         "react-say": "2.1.0",
diff --git a/packages/component/package.json b/packages/component/package.json
index 0290921435..127a93c351 100644
--- a/packages/component/package.json
+++ b/packages/component/package.json
@@ -151,7 +151,7 @@
     "prop-types": "15.8.1",
     "punycode": "2.3.1",
     "react-chain-of-responsibility": "0.2.0-main.3cb47ab",
-    "react-dictate-button": "^3.0.1-main.4f01cd5",
+    "react-dictate-button": "4.0.0",
     "react-film": "3.1.1-main.f623bf6",
     "react-redux": "7.2.9",
     "react-say": "2.1.0",

From 07c84b3b4813c5c6dfc5c9f78e3d46bd83f828d3 Mon Sep 17 00:00:00 2001
From: William Wong <compulim@users.noreply.github.com>
Date: Thu, 13 Feb 2025 09:07:17 +0000
Subject: [PATCH 32/43] Clean up

---
 __tests__/html2/{speech/js => hooks/private}/renderHook.js | 0
 __tests__/html2/{speech => }/hooks/useDictateState.html    | 7 ++++---
 __tests__/html2/speech/bargeIn/behavior.html               | 4 ++--
 __tests__/html2/speech/comprehensive.html                  | 4 ++--
 __tests__/html2/speech/inputHint.acceptingInput.html       | 4 ++--
 __tests__/html2/speech/inputHint.ignoringInput.html        | 4 ++--
 __tests__/html2/speech/js/index.js                         | 1 -
 __tests__/html2/speech/mockSetup.html                      | 2 +-
 8 files changed, 13 insertions(+), 13 deletions(-)
 rename __tests__/html2/{speech/js => hooks/private}/renderHook.js (100%)
 rename __tests__/html2/{speech => }/hooks/useDictateState.html (98%)

diff --git a/__tests__/html2/speech/js/renderHook.js b/__tests__/html2/hooks/private/renderHook.js
similarity index 100%
rename from __tests__/html2/speech/js/renderHook.js
rename to __tests__/html2/hooks/private/renderHook.js
diff --git a/__tests__/html2/speech/hooks/useDictateState.html b/__tests__/html2/hooks/useDictateState.html
similarity index 98%
rename from __tests__/html2/speech/hooks/useDictateState.html
rename to __tests__/html2/hooks/useDictateState.html
index 8aad482740..838bc22fba 100644
--- a/__tests__/html2/speech/hooks/useDictateState.html
+++ b/__tests__/html2/hooks/useDictateState.html
@@ -20,6 +20,8 @@
       }
     </script>
     <script type="module">
+      import { waitFor } from '@testduet/wait-for';
+      import { fn, spyOn } from 'jest-mock';
       import {
         SpeechGrammarList,
         SpeechRecognition,
@@ -29,9 +31,8 @@
         SpeechRecognitionResult,
         SpeechRecognitionResultList
       } from 'react-dictate-button/internal';
-      import { fn, spyOn } from 'jest-mock';
-      import { renderHook, SpeechSynthesis, SpeechSynthesisEvent, SpeechSynthesisUtterance } from '../js/index.js';
-      import { waitFor } from '@testduet/wait-for';
+      import { SpeechSynthesis, SpeechSynthesisEvent, SpeechSynthesisUtterance } from '../speech/js/index.js';
+      import renderHook from './private/renderHook.js';
 
       const {
         React: { createElement },
diff --git a/__tests__/html2/speech/bargeIn/behavior.html b/__tests__/html2/speech/bargeIn/behavior.html
index a2d4497aa0..5e31da29f1 100644
--- a/__tests__/html2/speech/bargeIn/behavior.html
+++ b/__tests__/html2/speech/bargeIn/behavior.html
@@ -18,6 +18,8 @@
       }
     </script>
     <script type="module">
+      import { waitFor } from '@testduet/wait-for';
+      import { fn, spyOn } from 'jest-mock';
       import {
         SpeechGrammarList,
         SpeechRecognition,
@@ -27,9 +29,7 @@
         SpeechRecognitionResult,
         SpeechRecognitionResultList
       } from 'react-dictate-button/internal';
-      import { fn, spyOn } from 'jest-mock';
       import { SpeechSynthesis, SpeechSynthesisEvent, SpeechSynthesisUtterance } from '../js/index.js';
-      import { waitFor } from '@testduet/wait-for';
 
       const {
         testHelpers: { createDirectLineEmulator },
diff --git a/__tests__/html2/speech/comprehensive.html b/__tests__/html2/speech/comprehensive.html
index 2a68058645..712a86f323 100644
--- a/__tests__/html2/speech/comprehensive.html
+++ b/__tests__/html2/speech/comprehensive.html
@@ -18,6 +18,8 @@
       }
     </script>
     <script type="module">
+      import { waitFor } from '@testduet/wait-for';
+      import { fn, spyOn } from 'jest-mock';
       import {
         SpeechGrammarList,
         SpeechRecognition,
@@ -26,9 +28,7 @@
         SpeechRecognitionResult,
         SpeechRecognitionResultList
       } from 'react-dictate-button/internal';
-      import { fn, spyOn } from 'jest-mock';
       import { SpeechSynthesis, SpeechSynthesisEvent, SpeechSynthesisUtterance } from './js/index.js';
-      import { waitFor } from '@testduet/wait-for';
 
       const {
         testHelpers: { createDirectLineEmulator },
diff --git a/__tests__/html2/speech/inputHint.acceptingInput.html b/__tests__/html2/speech/inputHint.acceptingInput.html
index c06ccef6f0..e491c498d9 100644
--- a/__tests__/html2/speech/inputHint.acceptingInput.html
+++ b/__tests__/html2/speech/inputHint.acceptingInput.html
@@ -18,6 +18,8 @@
       }
     </script>
     <script type="module">
+      import { waitFor } from '@testduet/wait-for';
+      import { fn, spyOn } from 'jest-mock';
       import {
         SpeechGrammarList,
         SpeechRecognition,
@@ -27,9 +29,7 @@
         SpeechRecognitionResult,
         SpeechRecognitionResultList
       } from 'react-dictate-button/internal';
-      import { fn, spyOn } from 'jest-mock';
       import { SpeechSynthesis, SpeechSynthesisEvent, SpeechSynthesisUtterance } from './js/index.js';
-      import { waitFor } from '@testduet/wait-for';
 
       const {
         testHelpers: { createDirectLineEmulator },
diff --git a/__tests__/html2/speech/inputHint.ignoringInput.html b/__tests__/html2/speech/inputHint.ignoringInput.html
index 16e619371f..a210f51ecf 100644
--- a/__tests__/html2/speech/inputHint.ignoringInput.html
+++ b/__tests__/html2/speech/inputHint.ignoringInput.html
@@ -18,6 +18,8 @@
       }
     </script>
     <script type="module">
+      import { waitFor } from '@testduet/wait-for';
+      import { fn, spyOn } from 'jest-mock';
       import {
         SpeechGrammarList,
         SpeechRecognition,
@@ -27,9 +29,7 @@
         SpeechRecognitionResult,
         SpeechRecognitionResultList
       } from 'react-dictate-button/internal';
-      import { fn, spyOn } from 'jest-mock';
       import { SpeechSynthesis, SpeechSynthesisEvent, SpeechSynthesisUtterance } from './js/index.js';
-      import { waitFor } from '@testduet/wait-for';
 
       const {
         testHelpers: { createDirectLineEmulator },
diff --git a/__tests__/html2/speech/js/index.js b/__tests__/html2/speech/js/index.js
index b1e1af9816..f04e5a9372 100644
--- a/__tests__/html2/speech/js/index.js
+++ b/__tests__/html2/speech/js/index.js
@@ -1,4 +1,3 @@
 export { default as SpeechSynthesis } from './MockedSpeechSynthesis.js';
 export { default as SpeechSynthesisEvent } from './MockedSpeechSynthesisEvent.js';
 export { default as SpeechSynthesisUtterance } from './MockedSpeechSynthesisUtterance.js';
-export { default as renderHook } from './renderHook.js';
diff --git a/__tests__/html2/speech/mockSetup.html b/__tests__/html2/speech/mockSetup.html
index 4a2ef0105f..7441e121ed 100644
--- a/__tests__/html2/speech/mockSetup.html
+++ b/__tests__/html2/speech/mockSetup.html
@@ -19,11 +19,11 @@
       }
     </script>
     <script type="module">
+      import { fn } from 'jest-mock';
       import {
         SpeechGrammarList,
         SpeechRecognition
       } from 'react-dictate-button/internal';
-      import { fn } from 'jest-mock';
       import { SpeechSynthesis, SpeechSynthesisEvent, SpeechSynthesisUtterance } from './js/index.js';
 
       run(async function () {

From 9e5abc203e511d8ba5b4984729d77cdc0b4fdda9 Mon Sep 17 00:00:00 2001
From: William Wong <compulim@users.noreply.github.com>
Date: Thu, 13 Feb 2025 09:32:05 +0000
Subject: [PATCH 33/43] Clean up

---
 packages/component/src/Dictation.js | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/packages/component/src/Dictation.js b/packages/component/src/Dictation.js
index e5ca1acf2e..48a1efecd4 100644
--- a/packages/component/src/Dictation.js
+++ b/packages/component/src/Dictation.js
@@ -11,7 +11,6 @@ import useSettableDictateAbortable from './hooks/internal/useSettableDictateAbor
 import useWebSpeechPonyfill from './hooks/useWebSpeechPonyfill';
 
 const {
-  // useActivities,
   useDictateInterims,
   useDictateState,
   useEmitTypingIndicator,
@@ -36,7 +35,6 @@ const Dictation = ({ onError }) => {
   const [, setShouldSpeakIncomingActivity] = useShouldSpeakIncomingActivity();
   const [{ SpeechGrammarList, SpeechRecognition } = {}] = useWebSpeechPonyfill();
   const [{ speechRecognitionContinuous }] = useStyleOptions();
-  // const [activities] = useActivities();
   const [dictateState] = useDictateState();
   const [sendTypingIndicator] = useSendTypingIndicator();
   const [speechLanguage] = useLanguage('speech');
@@ -47,11 +45,6 @@ const Dictation = ({ onError }) => {
   const stopDictate = useStopDictate();
   const submitSendBox = useSubmitSendBox();
 
-  // const numSpeakingActivities = useMemo(
-  //   () => activities.filter(({ channelData: { speak } = {} }) => speak).length,
-  //   [activities]
-  // );
-
   const handleDictate = useCallback(
     ({ result: { confidence, transcript } = {} }) => {
       if (dictateState === DICTATING || dictateState === STARTING) {

From 4936171af203069abbb0b871590fc02b9855fc70 Mon Sep 17 00:00:00 2001
From: William Wong <compulim@users.noreply.github.com>
Date: Thu, 13 Feb 2025 09:32:23 +0000
Subject: [PATCH 34/43] More comments

---
 ...eakActivityAndStartDictateOnIncomingActivityFromOthersSaga.js | 1 +
 1 file changed, 1 insertion(+)

diff --git a/packages/core/src/sagas/speakActivityAndStartDictateOnIncomingActivityFromOthersSaga.js b/packages/core/src/sagas/speakActivityAndStartDictateOnIncomingActivityFromOthersSaga.js
index 1e8153884c..aeb01ca4d5 100644
--- a/packages/core/src/sagas/speakActivityAndStartDictateOnIncomingActivityFromOthersSaga.js
+++ b/packages/core/src/sagas/speakActivityAndStartDictateOnIncomingActivityFromOthersSaga.js
@@ -32,6 +32,7 @@ function* speakActivityAndStartDictateOnIncomingActivityFromOthers({ userID }) {
       }
 
       if (shouldSpeak && activity.inputHint === 'expectingInput') {
+        // In continuous mode, dictateState === LISTENING and we shouldn't set it to WILL_START, which means auto start after synthesis completed.
         if (dictateState === IDLE) {
           yield put(setDictateState(WILL_START));
         }

From e6f41dc6a92dbf08f3d4af9fead852c41b0f490e Mon Sep 17 00:00:00 2001
From: William Wong <compulim@users.noreply.github.com>
Date: Thu, 13 Feb 2025 09:34:43 +0000
Subject: [PATCH 35/43] Add perform card action

---
 .../speech/performCardAction.continuous.html  | 12 +++++++++++
 .../speech/performCardAction.interactive.html | 10 ++++++++++
 .../startSpeakActivityOnPostActivitySaga.js   | 20 ++++++++++++++-----
 .../src/sagas/stopDictateOnCardActionSaga.js  | 18 ++++++++++-------
 4 files changed, 48 insertions(+), 12 deletions(-)
 create mode 100644 __tests__/html2/speech/performCardAction.continuous.html
 create mode 100644 __tests__/html2/speech/performCardAction.interactive.html

diff --git a/__tests__/html2/speech/performCardAction.continuous.html b/__tests__/html2/speech/performCardAction.continuous.html
new file mode 100644
index 0000000000..4fddcd3178
--- /dev/null
+++ b/__tests__/html2/speech/performCardAction.continuous.html
@@ -0,0 +1,12 @@
+<!--
+  - ASSUME: Continuous mode
+  - Receive a card
+  - Click on microphone button
+  - Verify it is listening
+  - Click on the card
+  - Bot replied
+  - EXPECT: The reply should be synthesized
+  - EXPECT: Continue to listen
+  - Say something
+  - EXPECT: Should recognize
+-->
diff --git a/__tests__/html2/speech/performCardAction.interactive.html b/__tests__/html2/speech/performCardAction.interactive.html
new file mode 100644
index 0000000000..1031923b1f
--- /dev/null
+++ b/__tests__/html2/speech/performCardAction.interactive.html
@@ -0,0 +1,10 @@
+<!--
+  - ASSUME: Interactive mode
+  - Receive a card
+  - Click on microphone button
+  - Verify it is listening
+  - Click on the card
+  - Bot replied
+  - EXPECT: The reply should NOT be synthesized
+  - EXPECT: Should not be listening
+-->
diff --git a/packages/core/src/sagas/startSpeakActivityOnPostActivitySaga.js b/packages/core/src/sagas/startSpeakActivityOnPostActivitySaga.js
index bcba0a9feb..f3eb61d8c1 100644
--- a/packages/core/src/sagas/startSpeakActivityOnPostActivitySaga.js
+++ b/packages/core/src/sagas/startSpeakActivityOnPostActivitySaga.js
@@ -1,15 +1,25 @@
-import { put, takeEvery } from 'redux-saga/effects';
+import { put, select, takeEvery } from 'redux-saga/effects';
 
 import { POST_ACTIVITY_PENDING } from '../actions/postActivity';
 import startSpeakingActivity from '../actions/startSpeakingActivity';
+import { DICTATING } from '../constants/DictateState';
+import dictateStateSelector from '../selectors/dictateState';
 import whileConnected from './effects/whileConnected';
 
 function* startSpeakActivityOnPostActivity() {
   yield takeEvery(
-    ({ meta, payload, type }) =>
-      type === POST_ACTIVITY_PENDING && meta.method === 'speech' && payload.activity.type === 'message',
-    function* () {
-      yield put(startSpeakingActivity());
+    ({ type }) => type === POST_ACTIVITY_PENDING,
+    function* ({ meta, payload }) {
+      const dictateState = yield select(dictateStateSelector);
+
+      if (
+        // In continuous mode (speech recognition is active), we should speak everything.
+        dictateState === DICTATING ||
+        // If last user message was sent via speech, we should speak bot response.
+        (meta.method === 'speech' && payload.activity.type === 'message')
+      ) {
+        yield put(startSpeakingActivity());
+      }
     }
   );
 }
diff --git a/packages/core/src/sagas/stopDictateOnCardActionSaga.js b/packages/core/src/sagas/stopDictateOnCardActionSaga.js
index 6e85a3c3a6..ebb303104b 100644
--- a/packages/core/src/sagas/stopDictateOnCardActionSaga.js
+++ b/packages/core/src/sagas/stopDictateOnCardActionSaga.js
@@ -1,23 +1,27 @@
-import { put, takeEvery } from 'redux-saga/effects';
+import { put, select, takeEvery } from 'redux-saga/effects';
 
 import { POST_ACTIVITY_PENDING } from '../actions/postActivity';
 import stopDictate from '../actions/stopDictate';
+import { DICTATING } from '../constants/DictateState';
+import dictateStateSelector from '../selectors/dictateState';
 import whileConnected from './effects/whileConnected';
 
 function* stopDictateOnCardAction() {
   // TODO: [P2] We should stop speech input when the user click on anything on a card, including open URL which doesn't generate postActivity
   //       This functionality was not implemented in v3
-
   yield takeEvery(
     // Currently, there are no actions that are related to card input
     // For now, we are using POST_ACTIVITY of a "message" activity
     // In the future, if we have an action for card input, we should use that instead
-
-    // [P*] TODO: Think about if we should stop dictate on card action.
-    // XXXXXXXXXXX
-    ({ payload, type }) => false && type === POST_ACTIVITY_PENDING && payload.activity.type === 'message',
+    ({ payload, type }) => type === POST_ACTIVITY_PENDING && payload.activity.type === 'message',
     function* putStopDictate() {
-      yield put(stopDictate());
+      const dictateState = yield select(dictateStateSelector);
+
+      // In continuous mode (speech recognition is active) and it should not be stopped by performing card action.
+      // Otherwise, stop dictation.
+      if (dictateState !== DICTATING) {
+        yield put(stopDictate());
+      }
     }
   );
 }

From ddefad0e076fd0ec9205797d844d51a6de8cc649 Mon Sep 17 00:00:00 2001
From: William Wong <compulim@users.noreply.github.com>
Date: Thu, 13 Feb 2025 10:12:12 +0000
Subject: [PATCH 36/43] Add perform card action tests

---
 .../speech/performCardAction.continuous.html  | 194 ++++++++++++++++--
 .../speech/performCardAction.interactive.html | 124 +++++++++++
 .../sagas/stopSpeakingActivityOnInputSaga.js  |  23 ++-
 3 files changed, 325 insertions(+), 16 deletions(-)

diff --git a/__tests__/html2/speech/performCardAction.continuous.html b/__tests__/html2/speech/performCardAction.continuous.html
index 4fddcd3178..89b087f5bb 100644
--- a/__tests__/html2/speech/performCardAction.continuous.html
+++ b/__tests__/html2/speech/performCardAction.continuous.html
@@ -1,12 +1,182 @@
-<!--
-  - ASSUME: Continuous mode
-  - Receive a card
-  - Click on microphone button
-  - Verify it is listening
-  - Click on the card
-  - Bot replied
-  - EXPECT: The reply should be synthesized
-  - EXPECT: Continue to listen
-  - Say something
-  - EXPECT: Should recognize
--->
+<!doctype html>
+<html lang="en-US">
+  <head>
+    <link href="/assets/index.css" rel="stylesheet" type="text/css" />
+    <script crossorigin="anonymous" src="/test-harness.js"></script>
+    <script crossorigin="anonymous" src="/test-page-object.js"></script>
+    <script crossorigin="anonymous" src="/__dist__/webchat-es5.js"></script>
+  </head>
+  <body>
+    <main id="webchat"></main>
+    <script type="importmap">
+      {
+        "imports": {
+          "@testduet/wait-for": "https://unpkg.com/@testduet/wait-for@main/dist/wait-for.mjs",
+          "jest-mock": "https://esm.sh/jest-mock",
+          "react-dictate-button/internal": "https://unpkg.com/react-dictate-button@main/dist/react-dictate-button.internal.mjs"
+        }
+      }
+    </script>
+    <script type="module">
+      import { waitFor } from '@testduet/wait-for';
+      import { fn, spyOn } from 'jest-mock';
+      import {
+        SpeechGrammarList,
+        SpeechRecognition,
+        SpeechRecognitionAlternative,
+        SpeechRecognitionErrorEvent,
+        SpeechRecognitionEvent,
+        SpeechRecognitionResult,
+        SpeechRecognitionResultList
+      } from 'react-dictate-button/internal';
+      import { SpeechSynthesis, SpeechSynthesisEvent, SpeechSynthesisUtterance } from './js/index.js';
+
+      const {
+        testHelpers: { createDirectLineEmulator },
+        WebChat: { renderWebChat, testIds }
+      } = window;
+
+      run(async function () {
+        const speechSynthesis = new SpeechSynthesis();
+        const ponyfill = {
+          SpeechGrammarList,
+          SpeechRecognition: fn().mockImplementation(() => {
+            const speechRecognition = new SpeechRecognition();
+
+            spyOn(speechRecognition, 'abort');
+            spyOn(speechRecognition, 'start');
+
+            return speechRecognition;
+          }),
+          speechSynthesis,
+          SpeechSynthesisUtterance
+        };
+
+        spyOn(speechSynthesis, 'speak');
+
+        const { directLine, store } = createDirectLineEmulator();
+
+        renderWebChat(
+          {
+            directLine,
+            store,
+            styleOptions: { speechRecognitionContinuous: true },
+            webSpeechPonyfillFactory: () => ponyfill
+          },
+          document.getElementById('webchat')
+        );
+
+        await pageConditions.uiConnected();
+
+        // WHEN: Bot send a card.
+        await directLine.emulateIncomingActivity({
+          attachments: [
+            {
+              contentType: 'application/vnd.microsoft.card.adaptive',
+              content: {
+                $schema: 'https://microsoft.github.io/AdaptiveCards/schemas/adaptive-card.json',
+                type: 'AdaptiveCard',
+                version: '1.0',
+                speak: 'This is a card.',
+                body: [{ text: 'This is a card.', type: 'TextBlock' }],
+                actions: [{ title: 'Submit', type: 'Action.Submit' }]
+              }
+            }
+          ],
+          type: 'message'
+        });
+
+        // THEN: Should show the card.
+        await pageConditions.numActivitiesShown(1);
+
+        // WHEN: Microphone button is clicked and synthesized empty utterace for user gesture requirement.
+        await pageObjects.clickMicrophoneButton();
+        await waitFor(() => expect(speechSynthesis.speak).toHaveBeenCalledTimes(1));
+        speechSynthesis.speak.mock.calls[0][0].dispatchEvent(
+          new SpeechSynthesisEvent('end', { utterance: speechSynthesis.speak.mock.calls[0] })
+        );
+
+        // THEN: Should construct the SpeechRecognition() instance and call start().
+        expect(ponyfill.SpeechRecognition).toHaveBeenCalledTimes(1);
+
+        const { value: speechRecognition1 } = ponyfill.SpeechRecognition.mock.results[0];
+
+        expect(speechRecognition1.start).toHaveBeenCalledTimes(1);
+
+        speechRecognition1.dispatchEvent(new Event('start'));
+        speechRecognition1.dispatchEvent(new Event('audiostart'));
+        speechRecognition1.dispatchEvent(new Event('soundstart'));
+        speechRecognition1.dispatchEvent(new Event('speechstart'));
+
+        // WHEN: Click on the card.
+        await (
+          await directLine.actPostActivity(() => host.click(document.querySelector('.ac-pushButton')))
+        ).resolveAll();
+
+        // WHEN: Bot replied.
+        await directLine.emulateIncomingActivity({
+          text: 'Aloha!',
+          type: 'message'
+        });
+
+        // THEN: Should show bot reply.
+        await pageConditions.numActivitiesShown(2);
+        expect(pageElements.activityContents()[1]).toHaveProperty('textContent', 'Aloha!');
+
+        // THEN: Should call SpeechSynthesis.speak().
+        await waitFor(() => expect(speechSynthesis.speak).toHaveBeenCalledTimes(2));
+        expect(speechSynthesis.speak).toHaveBeenLastCalledWith(expect.any(SpeechSynthesisUtterance));
+        expect(speechSynthesis.speak).toHaveBeenLastCalledWith(expect.objectContaining({ text: 'Aloha!' }));
+
+        expect(speechSynthesis).toHaveProperty('paused', false);
+        expect(speechSynthesis).toHaveProperty('pending', false);
+        expect(speechSynthesis).toHaveProperty('speaking', true);
+
+        // WHEN: After synthesis completed.
+        speechSynthesis.speak.mock.calls[1][0].dispatchEvent(
+          new SpeechSynthesisEvent('end', { utterance: speechSynthesis.speak.mock.calls[1] })
+        );
+
+        // THEN: SpeechSynthesis.speaking should become false.
+        expect(speechSynthesis).toHaveProperty('paused', false);
+        expect(speechSynthesis).toHaveProperty('pending', false);
+        expect(speechSynthesis).toHaveProperty('speaking', false);
+
+        // THEN: Should keep in speech mode.
+        expect(pageElements.sendBoxTextBox()).toBeFalsy();
+        expect(document.querySelector(`[data-testid="${testIds.sendBoxSpeechBox}"]`)).toHaveProperty(
+          'textContent',
+          'Listening…'
+        );
+
+        // WHEN: Recognition started and finalized result is dispatched.
+        speechRecognition1.dispatchEvent(new Event('start'));
+        speechRecognition1.dispatchEvent(new Event('audiostart'));
+        speechRecognition1.dispatchEvent(new Event('soundstart'));
+        speechRecognition1.dispatchEvent(new Event('speechstart'));
+
+        await (
+          await directLine.actPostActivity(async () => {
+            speechRecognition1.dispatchEvent(
+              new SpeechRecognitionEvent('result', {
+                results: new SpeechRecognitionResultList(
+                  SpeechRecognitionResult.fromFinalized(new SpeechRecognitionAlternative(0.9, 'Good morning!'))
+                )
+              })
+            );
+          })
+        ).resolveAll();
+
+        // THEN: Should send the recognized message.
+        await pageConditions.numActivitiesShown(3);
+        expect(pageElements.activityContents()[2]).toHaveProperty('textContent', 'Good morning!');
+
+        // WHEN: Click on microphone button.
+        await pageObjects.clickMicrophoneButton();
+
+        // THEN: Should stop recognition.
+        expect(pageElements.sendBoxTextBox()).toBeTruthy();
+      });
+    </script>
+  </body>
+</html>
diff --git a/__tests__/html2/speech/performCardAction.interactive.html b/__tests__/html2/speech/performCardAction.interactive.html
index 1031923b1f..f0153896bc 100644
--- a/__tests__/html2/speech/performCardAction.interactive.html
+++ b/__tests__/html2/speech/performCardAction.interactive.html
@@ -8,3 +8,127 @@
   - EXPECT: The reply should NOT be synthesized
   - EXPECT: Should not be listening
 -->
+
+<!doctype html>
+<html lang="en-US">
+  <head>
+    <link href="/assets/index.css" rel="stylesheet" type="text/css" />
+    <script crossorigin="anonymous" src="/test-harness.js"></script>
+    <script crossorigin="anonymous" src="/test-page-object.js"></script>
+    <script crossorigin="anonymous" src="/__dist__/webchat-es5.js"></script>
+  </head>
+  <body>
+    <main id="webchat"></main>
+    <script type="importmap">
+      {
+        "imports": {
+          "@testduet/wait-for": "https://unpkg.com/@testduet/wait-for@main/dist/wait-for.mjs",
+          "jest-mock": "https://esm.sh/jest-mock",
+          "react-dictate-button/internal": "https://unpkg.com/react-dictate-button@main/dist/react-dictate-button.internal.mjs"
+        }
+      }
+    </script>
+    <script type="module">
+      import { waitFor } from '@testduet/wait-for';
+      import { fn, spyOn } from 'jest-mock';
+      import {
+        SpeechGrammarList,
+        SpeechRecognition,
+        SpeechRecognitionAlternative,
+        SpeechRecognitionErrorEvent,
+        SpeechRecognitionEvent,
+        SpeechRecognitionResult,
+        SpeechRecognitionResultList
+      } from 'react-dictate-button/internal';
+      import { SpeechSynthesis, SpeechSynthesisEvent, SpeechSynthesisUtterance } from './js/index.js';
+
+      const {
+        testHelpers: { createDirectLineEmulator },
+        WebChat: { renderWebChat, testIds }
+      } = window;
+
+      run(async function () {
+        const speechSynthesis = new SpeechSynthesis();
+        const ponyfill = {
+          SpeechGrammarList,
+          SpeechRecognition: fn().mockImplementation(() => {
+            const speechRecognition = new SpeechRecognition();
+
+            spyOn(speechRecognition, 'abort');
+            spyOn(speechRecognition, 'start');
+
+            return speechRecognition;
+          }),
+          speechSynthesis,
+          SpeechSynthesisUtterance
+        };
+
+        spyOn(speechSynthesis, 'speak');
+
+        const { directLine, store } = createDirectLineEmulator();
+
+        renderWebChat(
+          {
+            directLine,
+            store,
+            webSpeechPonyfillFactory: () => ponyfill
+          },
+          document.getElementById('webchat')
+        );
+
+        await pageConditions.uiConnected();
+
+        // WHEN: Bot send a card.
+        await directLine.emulateIncomingActivity({
+          attachments: [
+            {
+              contentType: 'application/vnd.microsoft.card.adaptive',
+              content: {
+                $schema: 'https://microsoft.github.io/AdaptiveCards/schemas/adaptive-card.json',
+                type: 'AdaptiveCard',
+                version: '1.0',
+                speak: 'This is a card.',
+                body: [{ text: 'This is a card.', type: 'TextBlock' }],
+                actions: [{ title: 'Submit', type: 'Action.Submit' }]
+              }
+            }
+          ],
+          type: 'message'
+        });
+
+        // THEN: Should show the card.
+        await pageConditions.numActivitiesShown(1);
+
+        // WHEN: Microphone button is clicked and synthesized empty utterace for user gesture requirement.
+        await pageObjects.clickMicrophoneButton();
+        await waitFor(() => expect(speechSynthesis.speak).toHaveBeenCalledTimes(1));
+        speechSynthesis.speak.mock.calls[0][0].dispatchEvent(
+          new SpeechSynthesisEvent('end', { utterance: speechSynthesis.speak.mock.calls[0] })
+        );
+
+        // THEN: Should construct the SpeechRecognition() instance and call start().
+        expect(ponyfill.SpeechRecognition).toHaveBeenCalledTimes(1);
+
+        const { value: speechRecognition1 } = ponyfill.SpeechRecognition.mock.results[0];
+
+        expect(speechRecognition1.start).toHaveBeenCalledTimes(1);
+
+        speechRecognition1.dispatchEvent(new Event('start'));
+        speechRecognition1.dispatchEvent(new Event('audiostart'));
+        speechRecognition1.dispatchEvent(new Event('soundstart'));
+        speechRecognition1.dispatchEvent(new Event('speechstart'));
+
+        // WHEN: Click on the card.
+        await (
+          await directLine.actPostActivity(() => host.click(document.querySelector('.ac-pushButton')))
+        ).resolveAll();
+
+        // THEN: Should abort speech recognition.
+        expect(speechRecognition1.abort).toHaveBeenCalledTimes(1);
+
+        // THEN: Should go back to text mode.
+        expect(pageElements.sendBoxTextBox()).toBeTruthy();
+      });
+    </script>
+  </body>
+</html>
diff --git a/packages/core/src/sagas/stopSpeakingActivityOnInputSaga.js b/packages/core/src/sagas/stopSpeakingActivityOnInputSaga.js
index 2db7a0192c..35d94e58dc 100644
--- a/packages/core/src/sagas/stopSpeakingActivityOnInputSaga.js
+++ b/packages/core/src/sagas/stopSpeakingActivityOnInputSaga.js
@@ -1,23 +1,38 @@
-import { put, takeEvery } from 'redux-saga/effects';
+import { put, select, takeEvery } from 'redux-saga/effects';
 
 import { POST_ACTIVITY_PENDING } from '../actions/postActivity';
 import { SET_DICTATE_INTERIMS } from '../actions/setDictateInterims';
 import { SET_SEND_BOX } from '../actions/setSendBox';
 import stopSpeakingActivity from '../actions/stopSpeakingActivity';
+import { DICTATING } from '../constants/DictateState';
+import dictateStateSelector from '../selectors/dictateState';
 import whileConnected from './effects/whileConnected';
 
 function* stopSpeakingActivityOnInput() {
   yield takeEvery(
-    ({ meta, payload, type }) =>
+    ({ payload, type }) =>
       (type === SET_SEND_BOX && payload.text) ||
       // We want to stop speaking activity when the user click on a card action
       // But currently there are no actions generated out of a card action
       // So, right now, we are using best-effort by listening to POST_ACTIVITY_PENDING with a "message" event
       // We filter out speech because we will call startSpeakingActivity() for POST_ACTIVITY_PENDING dispatched by speech
-      (type === POST_ACTIVITY_PENDING && meta.method !== 'speech' && payload.activity.type === 'message') ||
+      type === POST_ACTIVITY_PENDING ||
       // We want to stop speaking activity on barge-in.
       type === SET_DICTATE_INTERIMS,
-    function* () {
+    function* ({ meta, payload, type }) {
+      const dictateState = yield select(dictateStateSelector);
+
+      // If input is post activity, do not stop if:
+      // - In continuous mode, or
+      // - Posting via speech, or
+      // - Posting a non-message.
+      if (
+        type === POST_ACTIVITY_PENDING &&
+        (dictateState === DICTATING || meta.method === 'speech' || payload.activity.type !== 'message')
+      ) {
+        return;
+      }
+
       yield put(stopSpeakingActivity());
     }
   );

From d8608d1d046274ebe8dfc34e3045d884207aa4d7 Mon Sep 17 00:00:00 2001
From: William Wong <compulim@users.noreply.github.com>
Date: Thu, 13 Feb 2025 10:33:10 +0000
Subject: [PATCH 37/43] Add test

---
 .../speech/performCardAction.interactive.html | 113 ++++++++++++++----
 .../sagas/startDictateOnSpeakCompleteSaga.js  |   6 +-
 2 files changed, 91 insertions(+), 28 deletions(-)

diff --git a/__tests__/html2/speech/performCardAction.interactive.html b/__tests__/html2/speech/performCardAction.interactive.html
index f0153896bc..05d55ea472 100644
--- a/__tests__/html2/speech/performCardAction.interactive.html
+++ b/__tests__/html2/speech/performCardAction.interactive.html
@@ -1,14 +1,3 @@
-<!--
-  - ASSUME: Interactive mode
-  - Receive a card
-  - Click on microphone button
-  - Verify it is listening
-  - Click on the card
-  - Bot replied
-  - EXPECT: The reply should NOT be synthesized
-  - EXPECT: Should not be listening
--->
-
 <!doctype html>
 <html lang="en-US">
   <head>
@@ -78,6 +67,42 @@
 
         await pageConditions.uiConnected();
 
+        // WHEN: Click on the microphone and send a speech.
+        await pageObjects.clickMicrophoneButton();
+        await waitFor(() => expect(speechSynthesis.speak).toHaveBeenCalledTimes(1));
+        speechSynthesis.speak.mock.calls[0][0].dispatchEvent(
+          new SpeechSynthesisEvent('end', { utterance: speechSynthesis.speak.mock.calls[0] })
+        );
+
+        // THEN: Should construct the SpeechRecognition() instance and call start().
+        expect(ponyfill.SpeechRecognition).toHaveBeenCalledTimes(1);
+
+        const { value: speechRecognition1 } = ponyfill.SpeechRecognition.mock.results[0];
+
+        expect(speechRecognition1.start).toHaveBeenCalledTimes(1);
+
+        speechRecognition1.dispatchEvent(new Event('start'));
+        speechRecognition1.dispatchEvent(new Event('audiostart'));
+        speechRecognition1.dispatchEvent(new Event('soundstart'));
+        speechRecognition1.dispatchEvent(new Event('speechstart'));
+
+        await (
+          await directLine.actPostActivity(async () => {
+            speechRecognition1.dispatchEvent(
+              new SpeechRecognitionEvent('result', {
+                results: new SpeechRecognitionResultList(
+                  SpeechRecognitionResult.fromFinalized(new SpeechRecognitionAlternative(0.9, 'Hello, World!'))
+                )
+              })
+            );
+          })
+        ).resolveAll();
+
+        speechRecognition1.dispatchEvent(new Event('speechend'));
+        speechRecognition1.dispatchEvent(new Event('soundend'));
+        speechRecognition1.dispatchEvent(new Event('audioend'));
+        speechRecognition1.dispatchEvent(new Event('end'));
+
         // WHEN: Bot send a card.
         await directLine.emulateIncomingActivity({
           attachments: [
@@ -93,30 +118,59 @@
               }
             }
           ],
+          inputHint: 'expectingInput',
           type: 'message'
         });
 
         // THEN: Should show the card.
-        await pageConditions.numActivitiesShown(1);
+        await pageConditions.numActivitiesShown(2);
 
-        // WHEN: Microphone button is clicked and synthesized empty utterace for user gesture requirement.
-        await pageObjects.clickMicrophoneButton();
-        await waitFor(() => expect(speechSynthesis.speak).toHaveBeenCalledTimes(1));
-        speechSynthesis.speak.mock.calls[0][0].dispatchEvent(
-          new SpeechSynthesisEvent('end', { utterance: speechSynthesis.speak.mock.calls[0] })
+        // THEN: Should call SpeechSynthesis.speak().
+        await waitFor(() => expect(speechSynthesis.speak).toHaveBeenCalledTimes(2));
+        expect(speechSynthesis.speak).toHaveBeenLastCalledWith(expect.any(SpeechSynthesisUtterance));
+        expect(speechSynthesis.speak).toHaveBeenLastCalledWith(expect.objectContaining({ text: 'This is a card.' }));
+
+        expect(speechSynthesis).toHaveProperty('paused', false);
+        expect(speechSynthesis).toHaveProperty('pending', false);
+        expect(speechSynthesis).toHaveProperty('speaking', true);
+
+        // WHEN: After synthesis completed.
+        speechSynthesis.speak.mock.calls[1][0].dispatchEvent(
+          new SpeechSynthesisEvent('end', { utterance: speechSynthesis.speak.mock.calls[1] })
         );
 
-        // THEN: Should construct the SpeechRecognition() instance and call start().
-        expect(ponyfill.SpeechRecognition).toHaveBeenCalledTimes(1);
+        // THEN: SpeechSynthesis.speaking should become false.
+        expect(speechSynthesis).toHaveProperty('paused', false);
+        expect(speechSynthesis).toHaveProperty('pending', false);
+        expect(speechSynthesis).toHaveProperty('speaking', false);
 
-        const { value: speechRecognition1 } = ponyfill.SpeechRecognition.mock.results[0];
+        // THEN: Should resume speech recognition.
+        await waitFor(() => expect(ponyfill.SpeechRecognition).toHaveBeenCalledTimes(2));
 
-        expect(speechRecognition1.start).toHaveBeenCalledTimes(1);
+        const { value: speechRecognition2 } = ponyfill.SpeechRecognition.mock.results[1];
 
-        speechRecognition1.dispatchEvent(new Event('start'));
-        speechRecognition1.dispatchEvent(new Event('audiostart'));
-        speechRecognition1.dispatchEvent(new Event('soundstart'));
-        speechRecognition1.dispatchEvent(new Event('speechstart'));
+        expect(speechRecognition2.start).toHaveBeenCalledTimes(1);
+
+        speechRecognition2.dispatchEvent(new Event('start'));
+        speechRecognition2.dispatchEvent(new Event('audiostart'));
+        speechRecognition2.dispatchEvent(new Event('soundstart'));
+        speechRecognition2.dispatchEvent(new Event('speechstart'));
+
+        // WHEN: Interim is recognized.
+        speechRecognition2.dispatchEvent(
+          new SpeechRecognitionEvent('result', {
+            results: new SpeechRecognitionResultList(
+              new SpeechRecognitionResult(new SpeechRecognitionAlternative(0.9, 'Good'))
+            )
+          })
+        );
+
+        // THEN: Should display interim.
+        expect(pageElements.sendBoxTextBox()).toBeFalsy();
+        expect(document.querySelector(`[data-testid="${testIds.sendBoxSpeechBox}"]`)).toHaveProperty(
+          'textContent',
+          expect.stringMatching(/^Good\s/u)
+        );
 
         // WHEN: Click on the card.
         await (
@@ -128,6 +182,15 @@
 
         // THEN: Should go back to text mode.
         expect(pageElements.sendBoxTextBox()).toBeTruthy();
+
+        // WHEN: Bot replied.
+        await directLine.emulateIncomingActivity({
+          text: 'Aloha!',
+          type: 'message'
+        });
+
+        // THEN: Should not call SpeechSynthesis.speak().
+        await waitFor(() => expect(speechSynthesis.speak).toHaveBeenCalledTimes(2));
       });
     </script>
   </body>
diff --git a/packages/core/src/sagas/startDictateOnSpeakCompleteSaga.js b/packages/core/src/sagas/startDictateOnSpeakCompleteSaga.js
index 8ae3cddc9f..bb2be23faa 100644
--- a/packages/core/src/sagas/startDictateOnSpeakCompleteSaga.js
+++ b/packages/core/src/sagas/startDictateOnSpeakCompleteSaga.js
@@ -1,12 +1,12 @@
 import { put, select, takeEvery } from 'redux-saga/effects';
 
 import { MARK_ACTIVITY } from '../actions/markActivity';
-import { of as activitiesOf } from '../selectors/activities';
 import { SET_DICTATE_STATE } from '../actions/setDictateState';
+import startDictate from '../actions/startDictate';
 import { WILL_START } from '../constants/DictateState';
-import dictateStateSelector from '../selectors/dictateState';
 import speakingActivity from '../definitions/speakingActivity';
-import startDictate from '../actions/startDictate';
+import { of as activitiesOf } from '../selectors/activities';
+import dictateStateSelector from '../selectors/dictateState';
 
 function* startDictateOnSpeakComplete() {
   const speakingActivities = yield select(activitiesOf(speakingActivity));

From 2e2797efd28633ecc828da721502c1f28f6840e7 Mon Sep 17 00:00:00 2001
From: William Wong <compulim@users.noreply.github.com>
Date: Thu, 13 Feb 2025 10:37:37 +0000
Subject: [PATCH 38/43] More scenarios

---
 .../speech/performCardAction.continuous.html  | 77 +++++++++++++------
 1 file changed, 54 insertions(+), 23 deletions(-)

diff --git a/__tests__/html2/speech/performCardAction.continuous.html b/__tests__/html2/speech/performCardAction.continuous.html
index 89b087f5bb..7723b0d60e 100644
--- a/__tests__/html2/speech/performCardAction.continuous.html
+++ b/__tests__/html2/speech/performCardAction.continuous.html
@@ -68,6 +68,37 @@
 
         await pageConditions.uiConnected();
 
+        // WHEN: Click on the microphone and send a speech.
+        await pageObjects.clickMicrophoneButton();
+        await waitFor(() => expect(speechSynthesis.speak).toHaveBeenCalledTimes(1));
+        speechSynthesis.speak.mock.calls[0][0].dispatchEvent(
+          new SpeechSynthesisEvent('end', { utterance: speechSynthesis.speak.mock.calls[0] })
+        );
+
+        // THEN: Should construct the SpeechRecognition() instance and call start().
+        expect(ponyfill.SpeechRecognition).toHaveBeenCalledTimes(1);
+
+        const { value: speechRecognition1 } = ponyfill.SpeechRecognition.mock.results[0];
+
+        expect(speechRecognition1.start).toHaveBeenCalledTimes(1);
+
+        speechRecognition1.dispatchEvent(new Event('start'));
+        speechRecognition1.dispatchEvent(new Event('audiostart'));
+        speechRecognition1.dispatchEvent(new Event('soundstart'));
+        speechRecognition1.dispatchEvent(new Event('speechstart'));
+
+        await (
+          await directLine.actPostActivity(async () => {
+            speechRecognition1.dispatchEvent(
+              new SpeechRecognitionEvent('result', {
+                results: new SpeechRecognitionResultList(
+                  SpeechRecognitionResult.fromFinalized(new SpeechRecognitionAlternative(0.9, 'Hello, World!'))
+                )
+              })
+            );
+          })
+        ).resolveAll();
+
         // WHEN: Bot send a card.
         await directLine.emulateIncomingActivity({
           attachments: [
@@ -87,26 +118,26 @@
         });
 
         // THEN: Should show the card.
-        await pageConditions.numActivitiesShown(1);
-
-        // WHEN: Microphone button is clicked and synthesized empty utterace for user gesture requirement.
-        await pageObjects.clickMicrophoneButton();
-        await waitFor(() => expect(speechSynthesis.speak).toHaveBeenCalledTimes(1));
-        speechSynthesis.speak.mock.calls[0][0].dispatchEvent(
-          new SpeechSynthesisEvent('end', { utterance: speechSynthesis.speak.mock.calls[0] })
-        );
+        await pageConditions.numActivitiesShown(2);
 
-        // THEN: Should construct the SpeechRecognition() instance and call start().
-        expect(ponyfill.SpeechRecognition).toHaveBeenCalledTimes(1);
+        // THEN: Should call SpeechSynthesis.speak().
+        await waitFor(() => expect(speechSynthesis.speak).toHaveBeenCalledTimes(2));
+        expect(speechSynthesis.speak).toHaveBeenLastCalledWith(expect.any(SpeechSynthesisUtterance));
+        expect(speechSynthesis.speak).toHaveBeenLastCalledWith(expect.objectContaining({ text: 'This is a card.' }));
 
-        const { value: speechRecognition1 } = ponyfill.SpeechRecognition.mock.results[0];
+        expect(speechSynthesis).toHaveProperty('paused', false);
+        expect(speechSynthesis).toHaveProperty('pending', false);
+        expect(speechSynthesis).toHaveProperty('speaking', true);
 
-        expect(speechRecognition1.start).toHaveBeenCalledTimes(1);
+        // WHEN: After synthesis completed.
+        speechSynthesis.speak.mock.calls[1][0].dispatchEvent(
+          new SpeechSynthesisEvent('end', { utterance: speechSynthesis.speak.mock.calls[1] })
+        );
 
-        speechRecognition1.dispatchEvent(new Event('start'));
-        speechRecognition1.dispatchEvent(new Event('audiostart'));
-        speechRecognition1.dispatchEvent(new Event('soundstart'));
-        speechRecognition1.dispatchEvent(new Event('speechstart'));
+        // THEN: SpeechSynthesis.speaking should become false.
+        expect(speechSynthesis).toHaveProperty('paused', false);
+        expect(speechSynthesis).toHaveProperty('pending', false);
+        expect(speechSynthesis).toHaveProperty('speaking', false);
 
         // WHEN: Click on the card.
         await (
@@ -120,11 +151,11 @@
         });
 
         // THEN: Should show bot reply.
-        await pageConditions.numActivitiesShown(2);
-        expect(pageElements.activityContents()[1]).toHaveProperty('textContent', 'Aloha!');
+        await pageConditions.numActivitiesShown(3);
+        expect(pageElements.activityContents()[2]).toHaveProperty('textContent', 'Aloha!');
 
         // THEN: Should call SpeechSynthesis.speak().
-        await waitFor(() => expect(speechSynthesis.speak).toHaveBeenCalledTimes(2));
+        await waitFor(() => expect(speechSynthesis.speak).toHaveBeenCalledTimes(3));
         expect(speechSynthesis.speak).toHaveBeenLastCalledWith(expect.any(SpeechSynthesisUtterance));
         expect(speechSynthesis.speak).toHaveBeenLastCalledWith(expect.objectContaining({ text: 'Aloha!' }));
 
@@ -133,8 +164,8 @@
         expect(speechSynthesis).toHaveProperty('speaking', true);
 
         // WHEN: After synthesis completed.
-        speechSynthesis.speak.mock.calls[1][0].dispatchEvent(
-          new SpeechSynthesisEvent('end', { utterance: speechSynthesis.speak.mock.calls[1] })
+        speechSynthesis.speak.mock.calls[2][0].dispatchEvent(
+          new SpeechSynthesisEvent('end', { utterance: speechSynthesis.speak.mock.calls[2] })
         );
 
         // THEN: SpeechSynthesis.speaking should become false.
@@ -168,8 +199,8 @@
         ).resolveAll();
 
         // THEN: Should send the recognized message.
-        await pageConditions.numActivitiesShown(3);
-        expect(pageElements.activityContents()[2]).toHaveProperty('textContent', 'Good morning!');
+        await pageConditions.numActivitiesShown(4);
+        expect(pageElements.activityContents()[3]).toHaveProperty('textContent', 'Good morning!');
 
         // WHEN: Click on microphone button.
         await pageObjects.clickMicrophoneButton();

From 6cd4a83b41decccabeb2c8776b78507d20cace9d Mon Sep 17 00:00:00 2001
From: William Wong <compulim@users.noreply.github.com>
Date: Thu, 13 Feb 2025 10:52:17 +0000
Subject: [PATCH 39/43] Better comments

---
 ...vityAndStartDictateOnIncomingActivityFromOthersSaga.js | 3 ++-
 .../src/sagas/startSpeakActivityOnPostActivitySaga.js     | 2 +-
 packages/core/src/sagas/stopDictateOnCardActionSaga.js    | 5 +++--
 .../core/src/sagas/stopSpeakingActivityOnInputSaga.js     | 8 ++++----
 4 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/packages/core/src/sagas/speakActivityAndStartDictateOnIncomingActivityFromOthersSaga.js b/packages/core/src/sagas/speakActivityAndStartDictateOnIncomingActivityFromOthersSaga.js
index aeb01ca4d5..87279fa768 100644
--- a/packages/core/src/sagas/speakActivityAndStartDictateOnIncomingActivityFromOthersSaga.js
+++ b/packages/core/src/sagas/speakActivityAndStartDictateOnIncomingActivityFromOthersSaga.js
@@ -32,7 +32,8 @@ function* speakActivityAndStartDictateOnIncomingActivityFromOthers({ userID }) {
       }
 
       if (shouldSpeak && activity.inputHint === 'expectingInput') {
-        // In continuous mode, dictateState === LISTENING and we shouldn't set it to WILL_START, which means auto start after synthesis completed.
+        // In continuous mode (dictateState === LISTENING), we shouldn't set it to WILL_START.
+        // WILL_START means auto start after synthesis completed.
         if (dictateState === IDLE) {
           yield put(setDictateState(WILL_START));
         }
diff --git a/packages/core/src/sagas/startSpeakActivityOnPostActivitySaga.js b/packages/core/src/sagas/startSpeakActivityOnPostActivitySaga.js
index f3eb61d8c1..a3aa338514 100644
--- a/packages/core/src/sagas/startSpeakActivityOnPostActivitySaga.js
+++ b/packages/core/src/sagas/startSpeakActivityOnPostActivitySaga.js
@@ -15,7 +15,7 @@ function* startSpeakActivityOnPostActivity() {
       if (
         // In continuous mode (speech recognition is active), we should speak everything.
         dictateState === DICTATING ||
-        // If last user message was sent via speech, we should speak bot response.
+        // Otherwise, in interactive mode, if last message was sent via speech, we should speak bot response.
         (meta.method === 'speech' && payload.activity.type === 'message')
       ) {
         yield put(startSpeakingActivity());
diff --git a/packages/core/src/sagas/stopDictateOnCardActionSaga.js b/packages/core/src/sagas/stopDictateOnCardActionSaga.js
index ebb303104b..216ee876dd 100644
--- a/packages/core/src/sagas/stopDictateOnCardActionSaga.js
+++ b/packages/core/src/sagas/stopDictateOnCardActionSaga.js
@@ -17,8 +17,9 @@ function* stopDictateOnCardAction() {
     function* putStopDictate() {
       const dictateState = yield select(dictateStateSelector);
 
-      // In continuous mode (speech recognition is active) and it should not be stopped by performing card action.
-      // Otherwise, stop dictation.
+      // When performing card action:
+      // - In continuous mode (speech recognition is active), speech recognition should not be stopped
+      // - Otherwise, in interactive mode, speech recognition should be stopped
       if (dictateState !== DICTATING) {
         yield put(stopDictate());
       }
diff --git a/packages/core/src/sagas/stopSpeakingActivityOnInputSaga.js b/packages/core/src/sagas/stopSpeakingActivityOnInputSaga.js
index 35d94e58dc..63f854f5c3 100644
--- a/packages/core/src/sagas/stopSpeakingActivityOnInputSaga.js
+++ b/packages/core/src/sagas/stopSpeakingActivityOnInputSaga.js
@@ -22,10 +22,10 @@ function* stopSpeakingActivityOnInput() {
     function* ({ meta, payload, type }) {
       const dictateState = yield select(dictateStateSelector);
 
-      // If input is post activity, do not stop if:
-      // - In continuous mode, or
-      // - Posting via speech, or
-      // - Posting a non-message.
+      // If input is post activity, do not stop speaking if either one of the followings:
+      // - In continuous mode (speech should kept as active as long as possible)
+      // - Posting via speech (interactive mode, should speak bot response)
+      // - Posting a non-message (interactive mode, not typing on keyboard, should be ignored)
       if (
         type === POST_ACTIVITY_PENDING &&
         (dictateState === DICTATING || meta.method === 'speech' || payload.activity.type !== 'message')

From 1311d17b06ed7d67e61a7f474d05776e040d926c Mon Sep 17 00:00:00 2001
From: William Wong <compulim@users.noreply.github.com>
Date: Thu, 13 Feb 2025 11:01:43 +0000
Subject: [PATCH 40/43] Better comment

---
 packages/core/src/sagas/stopSpeakingActivityOnInputSaga.js | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/packages/core/src/sagas/stopSpeakingActivityOnInputSaga.js b/packages/core/src/sagas/stopSpeakingActivityOnInputSaga.js
index 63f854f5c3..aa7843a985 100644
--- a/packages/core/src/sagas/stopSpeakingActivityOnInputSaga.js
+++ b/packages/core/src/sagas/stopSpeakingActivityOnInputSaga.js
@@ -23,7 +23,7 @@ function* stopSpeakingActivityOnInput() {
       const dictateState = yield select(dictateStateSelector);
 
       // If input is post activity, do not stop speaking if either one of the followings:
-      // - In continuous mode (speech should kept as active as long as possible)
+      // - In continuous mode (speech recognition should active for as long as possible)
       // - Posting via speech (interactive mode, should speak bot response)
       // - Posting a non-message (interactive mode, not typing on keyboard, should be ignored)
       if (

From 50f1628af06368f31dacc37678e4df0d01b0cd10 Mon Sep 17 00:00:00 2001
From: William Wong <compulim@users.noreply.github.com>
Date: Thu, 13 Feb 2025 18:32:35 +0000
Subject: [PATCH 41/43] Add comment

---
 packages/api/src/StyleOptions.ts | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/packages/api/src/StyleOptions.ts b/packages/api/src/StyleOptions.ts
index bf5783ad72..f229984adc 100644
--- a/packages/api/src/StyleOptions.ts
+++ b/packages/api/src/StyleOptions.ts
@@ -943,6 +943,14 @@ type StyleOptions = {
    */
   feedbackActionsPlacement?: 'activity-actions' | 'activity-status';
 
+  /**
+   * Use continuous mode for speech recognition. Default to `false`.
+   *
+   * - `true` to use continuous mode which focuses on a hands-off experience, keeping speech recognition active for extended periods, supporting barge-in, non-speech interactions will not stop speech recognition
+   * - `false` to use interactive mode which focuses on privacy, keeping speech recognition active only for the minimal time required, no barge-in, non-speech interactions will stop speech recognition
+   *
+   * @see https://github.com/microsoft/BotFramework-WebChat/pull/5426
+   */
   speechRecognitionContinuous?: boolean | undefined;
 };
 

From 5be4691773b3fa931169b8306e0002898e0dd655 Mon Sep 17 00:00:00 2001
From: William Wong <compulim@users.noreply.github.com>
Date: Thu, 13 Feb 2025 18:53:05 +0000
Subject: [PATCH 42/43] Add speech error telemetry

---
 __tests__/html2/speech/errorTelemetry.html | 115 +++++++++++++++++++++
 packages/component/src/Composer.tsx        |  21 +++-
 2 files changed, 131 insertions(+), 5 deletions(-)
 create mode 100644 __tests__/html2/speech/errorTelemetry.html

diff --git a/__tests__/html2/speech/errorTelemetry.html b/__tests__/html2/speech/errorTelemetry.html
new file mode 100644
index 0000000000..f456f1d6f0
--- /dev/null
+++ b/__tests__/html2/speech/errorTelemetry.html
@@ -0,0 +1,115 @@
+<!doctype html>
+<html lang="en-US">
+  <head>
+    <link href="/assets/index.css" rel="stylesheet" type="text/css" />
+    <script crossorigin="anonymous" src="/test-harness.js"></script>
+    <script crossorigin="anonymous" src="/test-page-object.js"></script>
+    <script crossorigin="anonymous" src="/__dist__/webchat-es5.js"></script>
+  </head>
+  <body>
+    <main id="webchat"></main>
+    <script type="importmap">
+      {
+        "imports": {
+          "@testduet/wait-for": "https://unpkg.com/@testduet/wait-for@main/dist/wait-for.mjs",
+          "jest-mock": "https://esm.sh/jest-mock",
+          "react-dictate-button/internal": "https://unpkg.com/react-dictate-button@main/dist/react-dictate-button.internal.mjs"
+        }
+      }
+    </script>
+    <script type="module">
+      import { waitFor } from '@testduet/wait-for';
+      import { fn, spyOn } from 'jest-mock';
+      import {
+        SpeechGrammarList,
+        SpeechRecognition,
+        SpeechRecognitionAlternative,
+        SpeechRecognitionErrorEvent,
+        SpeechRecognitionEvent,
+        SpeechRecognitionResult,
+        SpeechRecognitionResultList
+      } from 'react-dictate-button/internal';
+      import { SpeechSynthesis, SpeechSynthesisEvent, SpeechSynthesisUtterance } from './js/index.js';
+
+      const {
+        testHelpers: { createDirectLineEmulator },
+        WebChat: { renderWebChat, testIds }
+      } = window;
+
+      run(async function () {
+        const speechSynthesis = new SpeechSynthesis();
+        const onTelemetry = fn();
+        const ponyfill = {
+          SpeechGrammarList,
+          SpeechRecognition: fn().mockImplementation(() => {
+            const speechRecognition = new SpeechRecognition();
+
+            spyOn(speechRecognition, 'abort');
+            spyOn(speechRecognition, 'start');
+
+            return speechRecognition;
+          }),
+          speechSynthesis,
+          SpeechSynthesisUtterance
+        };
+
+        spyOn(console, 'error').mockImplementation(() => {});
+        spyOn(speechSynthesis, 'speak');
+
+        const { directLine, store } = createDirectLineEmulator();
+
+        renderWebChat(
+          {
+            directLine,
+            onTelemetry,
+            store,
+            webSpeechPonyfillFactory: () => ponyfill
+          },
+          document.getElementById('webchat')
+        );
+
+        await pageConditions.uiConnected();
+
+        // WHEN: Microphone button is clicked and synthesized empty utterace for user gesture requirement.
+        await pageObjects.clickMicrophoneButton();
+        await waitFor(() => expect(speechSynthesis.speak).toHaveBeenCalledTimes(1));
+        speechSynthesis.speak.mock.calls[0][0].dispatchEvent(
+          new SpeechSynthesisEvent('end', { utterance: speechSynthesis.speak.mock.calls[0] })
+        );
+
+        // THEN: Should construct the SpeechRecognition() instance and call start().
+        expect(ponyfill.SpeechRecognition).toHaveBeenCalledTimes(1);
+
+        const { value: speechRecognition1 } = ponyfill.SpeechRecognition.mock.results[0];
+
+        expect(speechRecognition1.start).toHaveBeenCalledTimes(1);
+
+        // WHEN: Recognition started and "Hello, World!" finalized result is dispatched.
+        speechRecognition1.dispatchEvent(new Event('start'));
+        speechRecognition1.dispatchEvent(new Event('audiostart'));
+        speechRecognition1.dispatchEvent(new Event('soundstart'));
+        speechRecognition1.dispatchEvent(new Event('speechstart'));
+        speechRecognition1.dispatchEvent(new SpeechRecognitionErrorEvent('error', { error: 'service-not-allowed' }));
+
+        // THEN: Should have send the activity.
+        expect(onTelemetry).toHaveBeenLastCalledWith(
+          expect.objectContaining({
+            error: expect.objectContaining({
+              cause: expect.objectContaining({ error: 'service-not-allowed', message: '' }),
+              message: 'Speech recognition failed'
+            }),
+            type: 'exception'
+          })
+        );
+
+        // THEN: Should log the error to console.
+        expect(console.error).toHaveBeenLastCalledWith(
+          expect.objectContaining({
+            cause: expect.objectContaining({ error: 'service-not-allowed', message: '' }),
+            message: 'Speech recognition failed'
+          })
+        );
+      });
+    </script>
+  </body>
+</html>
diff --git a/packages/component/src/Composer.tsx b/packages/component/src/Composer.tsx
index cfd7507e00..6cd0d91b01 100644
--- a/packages/component/src/Composer.tsx
+++ b/packages/component/src/Composer.tsx
@@ -57,7 +57,7 @@ import addTargetBlankToHyperlinksMarkdown from './Utils/addTargetBlankToHyperlin
 import downscaleImageToDataURL from './Utils/downscaleImageToDataURL';
 import mapMap from './Utils/mapMap';
 
-const { useGetActivityByKey, useReferenceGrammarID, useStyleOptions } = hooks;
+const { useGetActivityByKey, useReferenceGrammarID, useStyleOptions, useTrackException } = hooks;
 
 const node_env = process.env.node_env || process.env.NODE_ENV;
 
@@ -85,11 +85,22 @@ const ComposerCoreUI = memo(({ children }: ComposerCoreUIProps) => {
   const [{ internalLiveRegionFadeAfter }] = useStyleOptions();
   const [customPropertiesClassName] = useCustomPropertiesClassName();
   const rootClassName = useStyleToEmotionObject()(ROOT_STYLE) + '';
+  const trackException = useTrackException();
 
-  const dictationOnError = useCallback(err => {
-    // Ignore aborted error as it is likely user clicking on the microphone button to abort recognition.
-    err.error === 'aborted' || console.error(err);
-  }, []);
+  const dictationOnError = useCallback(
+    (errorEvent: SpeechRecognitionErrorEvent) => {
+      // Ignore aborted error as it is likely user clicking on the microphone button to abort recognition.
+      if (errorEvent.error !== 'aborted') {
+        const nativeError = new Error('Speech recognition failed');
+
+        nativeError.cause = errorEvent;
+
+        trackException(nativeError, false);
+        console.error(nativeError);
+      }
+    },
+    [trackException]
+  );
 
   return (
     <div className={classNames('webchat', 'webchat__css-custom-properties', rootClassName, customPropertiesClassName)}>

From 0e65ee54e890b8bb801e9e82bf8ad5c9b87bcd37 Mon Sep 17 00:00:00 2001
From: William Wong <compulim@users.noreply.github.com>
Date: Thu, 13 Feb 2025 18:54:55 +0000
Subject: [PATCH 43/43] Add types

---
 .../js/MockedSpeechSynthesisUtterance.js      | 67 ++++---------------
 1 file changed, 13 insertions(+), 54 deletions(-)

diff --git a/__tests__/html2/speech/js/MockedSpeechSynthesisUtterance.js b/__tests__/html2/speech/js/MockedSpeechSynthesisUtterance.js
index 025b5d510e..d8c3bb423d 100644
--- a/__tests__/html2/speech/js/MockedSpeechSynthesisUtterance.js
+++ b/__tests__/html2/speech/js/MockedSpeechSynthesisUtterance.js
@@ -5,64 +5,23 @@ export default class SpeechSynthesisUtterance extends EventTarget {
     super();
 
     this.#eventTargetProperties = new EventTargetProperties(this);
-    this.#text = text || '';
+    this.text = text || '';
   }
 
   #eventTargetProperties;
-  #lang;
-  #pitch;
-  #rate;
-  #text;
-  #voice;
-  #volume;
-
-  get lang() {
-    return this.#lang;
-  }
-
-  set lang(value) {
-    this.#lang = value;
-  }
-
-  get pitch() {
-    return this.#pitch;
-  }
-
-  set pitch(value) {
-    this.#pitch = value;
-  }
-
-  get rate() {
-    return this.#rate;
-  }
-
-  set rate(value) {
-    this.#rate = value;
-  }
 
-  get text() {
-    return this.#text;
-  }
-
-  set text(value) {
-    this.#text = value;
-  }
-
-  get voice() {
-    return this.#voice;
-  }
-
-  set voice(value) {
-    this.#voice = value;
-  }
-
-  get volume() {
-    return this.#volume;
-  }
-
-  set volume(value) {
-    this.#volume = value;
-  }
+  /** @type {string} */
+  lang;
+  /** @type {number} */
+  pitch;
+  /** @type {number} */
+  rate;
+  /** @type {string} */
+  text;
+  /** @type {any} */
+  voice;
+  /** @type {number} */
+  volume;
 
   get onboundary() {
     return this.#eventTargetProperties.getProperty('boundary');