diff --git a/CHANGELOG.md b/CHANGELOG.md index d3c0f6c1af..3002a355a2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -80,6 +80,8 @@ Notes: web developers are advised to use [`~` (tilde range)](https://github.com/ - When set to `'activity-status'`, feedback buttons appear in the activity status area (default behavior) - Added support for including activity ID and key into form data indicated by `data-webchat-include-activity-id` and `data-webchat-include-activity-key` attributes, in PR [#5418](https://github.com/microsoft/BotFramework-WebChat/pull/5418), by [@OEvgeny](https://github.com/OEvgeny) - Added dedicated loading animation for messages in preparing state for Fluent theme, in PR [#5423](https://github.com/microsoft/BotFramework-WebChat/pull/5423), by [@OEvgeny](https://github.com/OEvgeny) +- Resolved [#2661](https://github.com/microsoft/BotFramework-WebChat/issues/2661) and [#5352](https://github.com/microsoft/BotFramework-WebChat/issues/5352). Added speech recognition continuous mode with barge-in support, in PR [#5426](https://github.com/microsoft/BotFramework-WebChat/pull/5426), by [@RushikeshGavali](https://github.com/RushikeshGavali) and [@compulim](https://github.com/compulim) + - Set `styleOptions.speechRecognitionContinuous` to `true` with a Web Speech API provider with continuous mode support ### Changed @@ -101,9 +103,10 @@ Notes: web developers are advised to use [`~` (tilde range)](https://github.com/ - Switched math block syntax from `$$` to Tex-style `\[ \]` and `\( \)` delimiters with improved rendering and error handling, in PR [#5353](https://github.com/microsoft/BotFramework-WebChat/pull/5353), by [@OEvgeny](https://github.com/OEvgeny) - Improved avatar display and grouping behavior by fixing rendering issues and activity sender identification, in PR [#5346](https://github.com/microsoft/BotFramework-WebChat/pull/5346), by [@OEvgeny](https://github.com/OEvgeny) - Activity "copy" button will use `outerHTML` and `textContent` for clipboard content, in PR [#5378](https://github.com/microsoft/BotFramework-WebChat/pull/5378), by [@compulim](https://github.com/compulim) -- Bumped dependencies to the latest versions, by [@compulim](https://github.com/compulim) in PR [#5385](https://github.com/microsoft/BotFramework-WebChat/pull/5385) and [#5400](https://github.com/microsoft/BotFramework-WebChat/pull/5400) +- Bumped dependencies to the latest versions, by [@compulim](https://github.com/compulim) in PR [#5385](https://github.com/microsoft/BotFramework-WebChat/pull/5385), [#5400](https://github.com/microsoft/BotFramework-WebChat/pull/5400), and [#5426](https://github.com/microsoft/BotFramework-WebChat/pull/5426) - Production dependencies - [`web-speech-cognitive-services@8.1.0`](https://npmjs.com/package/web-speech-cognitive-services) + - [`react-dictate-button@4.0.0`](https://npmjs.com/package/react-dictate-button) - Enabled icon customization in Fluent theme through CSS variables, in PR [#5413](https://github.com/microsoft/BotFramework-WebChat/pull/5413), by [@OEvgeny](https://github.com/OEvgeny) ### Fixed diff --git a/__tests__/hooks/useDictateState.js b/__tests__/hooks/useDictateState.js deleted file mode 100644 index fef95100dc..0000000000 --- a/__tests__/hooks/useDictateState.js +++ /dev/null @@ -1,39 +0,0 @@ -import { timeouts } from '../constants.json'; - -import uiConnected from '../setup/conditions/uiConnected'; - -// selenium-webdriver API doc: -// https://seleniumhq.github.io/selenium/docs/api/javascript/module/selenium-webdriver/index_exports_WebDriver.html - -jest.setTimeout(timeouts.test); - -test('getter should return dictate state', async () => { - const { driver, pageObjects } = await setupWebDriver({ - props: { - webSpeechPonyfillFactory: () => window.WebSpeechMock - } - }); - - await driver.wait(uiConnected(), timeouts.directLine); - - expect((await pageObjects.runHook('useDictateState'))[0]).toMatchInlineSnapshot(`0`); - - await pageObjects.clickMicrophoneButton(); - - // Dictate state "1" is for "automatic turning on microphone after current synthesis completed". - expect((await pageObjects.runHook('useDictateState'))[0]).toMatchInlineSnapshot(`2`); - - await pageObjects.putSpeechRecognitionResult('recognizing', 'Hello'); - - expect((await pageObjects.runHook('useDictateState'))[0]).toMatchInlineSnapshot(`3`); - - await pageObjects.clickMicrophoneButton(); - - expect((await pageObjects.runHook('useDictateState'))[0]).toMatchInlineSnapshot(`4`); -}); - -test('setter should throw exception', async () => { - const { pageObjects } = await setupWebDriver(); - - await expect(pageObjects.runHook('useDictateState', [], dictateState => dictateState[1]())).rejects.toThrow(); -}); diff --git a/__tests__/html2/hooks/private/renderHook.js b/__tests__/html2/hooks/private/renderHook.js new file mode 100644 index 0000000000..1593e69293 --- /dev/null +++ b/__tests__/html2/hooks/private/renderHook.js @@ -0,0 +1,77 @@ +// Adopted from https://github.com/testing-library/react-testing-library/blob/main/src/pure.js#L292C1-L329C2 + +/*! + * The MIT License (MIT) + * Copyright (c) 2017-Present Kent C. Dodds + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +function wrapUiIfNeeded(innerElement, wrapperComponent) { + return wrapperComponent ? React.createElement(wrapperComponent, null, innerElement) : innerElement; +} + +export default function renderHook( + /** @type {(props: RenderCallbackProps) => any} */ renderCallback, + /** @type {{}} */ options = {} +) { + const { initialProps, ...renderOptions } = options; + + if (renderOptions.legacyRoot && typeof ReactDOM.render !== 'function') { + const error = new Error( + '`legacyRoot: true` is not supported in this version of React. ' + + 'If your app runs React 19 or later, you should remove this flag. ' + + 'If your app runs React 18 or earlier, visit https://react.dev/blog/2022/03/08/react-18-upgrade-guide for upgrade instructions.' + ); + Error.captureStackTrace(error, renderHook); + throw error; + } + + const result = React.createRef(); + + function TestComponent({ renderCallbackProps }) { + const pendingResult = renderCallback(renderCallbackProps); + + React.useEffect(() => { + result.current = pendingResult; + }); + + return null; + } + + // A stripped down version of render() from `@testing-library/react`. + const render = ({ renderCallbackProps }) => { + const element = document.querySelector('main'); + + ReactDOM.render(wrapUiIfNeeded(React.createElement(TestComponent, renderCallbackProps), renderOptions.wrapper), element); + + return { rerender: render, unmount: () => ReactDOM.unmountComponentAtNode(element) }; + }; + + const { rerender: baseRerender, unmount } = render( + React.createElement(TestComponent, { renderCallbackProps: initialProps }), + renderOptions + ); + + function rerender(rerenderCallbackProps) { + return baseRerender(React.createElement(TestComponent, { renderCallbackProps: rerenderCallbackProps })); + } + + return { result, rerender, unmount }; +} diff --git a/__tests__/html2/hooks/useDictateState.html b/__tests__/html2/hooks/useDictateState.html new file mode 100644 index 0000000000..838bc22fba --- /dev/null +++ b/__tests__/html2/hooks/useDictateState.html @@ -0,0 +1,221 @@ + + + + + + + + + + + +
+ + + + diff --git a/__tests__/html2/speech/bargeIn/behavior.html b/__tests__/html2/speech/bargeIn/behavior.html new file mode 100644 index 0000000000..5e31da29f1 --- /dev/null +++ b/__tests__/html2/speech/bargeIn/behavior.html @@ -0,0 +1,316 @@ + + + + + + + + + +
+ + + + diff --git a/__tests__/html2/speech/comprehensive.html b/__tests__/html2/speech/comprehensive.html new file mode 100644 index 0000000000..712a86f323 --- /dev/null +++ b/__tests__/html2/speech/comprehensive.html @@ -0,0 +1,226 @@ + + + + + + + + + +
+ + + + diff --git a/__tests__/html2/speech/errorTelemetry.html b/__tests__/html2/speech/errorTelemetry.html new file mode 100644 index 0000000000..f456f1d6f0 --- /dev/null +++ b/__tests__/html2/speech/errorTelemetry.html @@ -0,0 +1,115 @@ + + + + + + + + + +
+ + + + diff --git a/__tests__/html2/speech/inputHint.acceptingInput.html b/__tests__/html2/speech/inputHint.acceptingInput.html new file mode 100644 index 0000000000..e491c498d9 --- /dev/null +++ b/__tests__/html2/speech/inputHint.acceptingInput.html @@ -0,0 +1,149 @@ + + + + + + + + + +
+ + + + diff --git a/__tests__/html2/speech/inputHint.ignoringInput.html b/__tests__/html2/speech/inputHint.ignoringInput.html new file mode 100644 index 0000000000..a210f51ecf --- /dev/null +++ b/__tests__/html2/speech/inputHint.ignoringInput.html @@ -0,0 +1,151 @@ + + + + + + + + + +
+ + + + diff --git a/__tests__/html2/speech/js/MockedSpeechSynthesis.js b/__tests__/html2/speech/js/MockedSpeechSynthesis.js new file mode 100644 index 0000000000..cdc3436376 --- /dev/null +++ b/__tests__/html2/speech/js/MockedSpeechSynthesis.js @@ -0,0 +1,117 @@ +import { EventTargetProperties } from 'https://unpkg.com/event-target-properties@latest/dist/event-target-properties.mjs'; +import SpeechSynthesisEvent from './MockedSpeechSynthesisEvent.js'; + +export default class SpeechSynthesis extends EventTarget { + constructor() { + super(); + + this.#eventTargetProperties = new EventTargetProperties(this); + } + + /** @type {SpeechSynthesisUtterance} */ + #currentUtterance; + /** @type {EventTargetProperties} */ + #eventTargetProperties; + /** @type {boolean} */ + #paused = false; + // #pending = false; + /** @type {SpeechSynthesisUtterance[]} */ + #queue = []; + /** @type {boolean} */ + #speaking = false; + + get onvoiceschanged() { + return this.#eventTargetProperties.getProperty('voiceschanged'); + } + + set onvoiceschanged(value) { + this.#eventTargetProperties.setProperty('voiceschanged', value); + } + + /** @type {boolean} */ + get paused() { + return this.#paused; + } + + /** @type {boolean} */ + get pending() { + return !!this.#queue.length; + } + + /** @type {boolean} */ + get speaking() { + return !this.paused && this.#speaking; + } + + cancel() { + this.#paused = false; + this.#speaking = false; + this.#queue.splice(0); + + this.#currentUtterance?.dispatchEvent(new SpeechSynthesisEvent('end', { utterance: this.#currentUtterance })); + } + + getVoices() { + return []; + } + + pause() { + if (this.#paused) { + return; + } + + this.#paused = true; + + this.#currentUtterance?.dispatchEvent(new SpeechSynthesisEvent('pause', { utterance: this.#currentUtterance })); + } + + resume() { + if (!this.#paused) { + return; + } + + this.#paused = false; + + if (this.#currentUtterance) { + this.#currentUtterance.dispatchEvent(new SpeechSynthesisEvent('resume', { utterance: this.#currentUtterance })); + } else { + this.#next(); + } + } + + speak(/** @type {SpeechSynthesisUtterance} */ utterance) { + this.#queue.push(/** @type {SpeechSynthesisUtterance} */ utterance); + + !this.#paused && !this.#speaking && this.#next(); + } + + #next() { + if (this.#paused) { + throw new Error('Should not call #next() when it is paused.'); + } + + this.#currentUtterance = this.#queue.shift(); + + if (!this.#currentUtterance) { + this.#paused = false; + this.#speaking = false; + + return; + } + + this.#speaking = true; + + this.#currentUtterance.addEventListener('end', () => this.#next(), { once: true }); + this.#currentUtterance.addEventListener( + 'error', + () => { + this.#paused = false; + this.#speaking = false; + this.#queue.splice(0); + }, + { once: true } + ); + + this.#currentUtterance.dispatchEvent(new SpeechSynthesisEvent('start', { utterance: this.#currentUtterance })); + } +} diff --git a/__tests__/html2/speech/js/MockedSpeechSynthesisEvent.js b/__tests__/html2/speech/js/MockedSpeechSynthesisEvent.js new file mode 100644 index 0000000000..bd99d28cd0 --- /dev/null +++ b/__tests__/html2/speech/js/MockedSpeechSynthesisEvent.js @@ -0,0 +1,47 @@ +export default class SpeechSynthesisEvent extends Event { + constructor( + /** @type {string} */ + type, + /** @type {EventInitDict} */ + eventInitDict + ) { + super(type, eventInitDict); + + this.#charIndex = eventInitDict.charIndex || 0; + this.#charLength = eventInitDict.charLength || 0; + this.#elapsedTime = eventInitDict.elapsedTime || 0; + this.#name = eventInitDict.name || ''; + this.#utterance = eventInitDict.utterance; + } + + /** @type {number} */ + #charIndex; + /** @type {number} */ + #charLength; + /** @type {number} */ + #elapsedTime; + /** @type {string} */ + #name; + /** @type {SpeechSynthesisUtterance | undefined} */ + #utterance; + + get charIndex() { + return this.#charIndex; + } + + get charLength() { + return this.#charLength; + } + + get elapsedTime() { + return this.#elapsedTime; + } + + get name() { + return this.#name; + } + + get utterance() { + return this.#utterance; + } +} diff --git a/__tests__/html2/speech/js/MockedSpeechSynthesisUtterance.js b/__tests__/html2/speech/js/MockedSpeechSynthesisUtterance.js new file mode 100644 index 0000000000..d8c3bb423d --- /dev/null +++ b/__tests__/html2/speech/js/MockedSpeechSynthesisUtterance.js @@ -0,0 +1,81 @@ +import { EventTargetProperties } from 'https://unpkg.com/event-target-properties@latest/dist/event-target-properties.mjs'; + +export default class SpeechSynthesisUtterance extends EventTarget { + constructor(text) { + super(); + + this.#eventTargetProperties = new EventTargetProperties(this); + this.text = text || ''; + } + + #eventTargetProperties; + + /** @type {string} */ + lang; + /** @type {number} */ + pitch; + /** @type {number} */ + rate; + /** @type {string} */ + text; + /** @type {any} */ + voice; + /** @type {number} */ + volume; + + get onboundary() { + return this.#eventTargetProperties.getProperty('boundary'); + } + + set onboundary(value) { + this.#eventTargetProperties.setProperty('boundary', value); + } + + get onend() { + return this.#eventTargetProperties.getProperty('end'); + } + + set onend(value) { + this.#eventTargetProperties.setProperty('end', value); + } + + get onerror() { + return this.#eventTargetProperties.getProperty('error'); + } + + set onerror(value) { + this.#eventTargetProperties.setProperty('error', value); + } + + get onmark() { + return this.#eventTargetProperties.getProperty('mark'); + } + + set onmark(value) { + this.#eventTargetProperties.setProperty('mark', value); + } + + get onpause() { + return this.#eventTargetProperties.getProperty('pause'); + } + + set onpause(value) { + this.#eventTargetProperties.setProperty('pause', value); + } + + get onresume() { + return this.#eventTargetProperties.getProperty('resume'); + } + + set onresume(value) { + this.#eventTargetProperties.setProperty('resume', value); + } + + get onstart() { + return this.#eventTargetProperties.getProperty('start'); + } + + set onstart(value) { + this.#eventTargetProperties.setProperty('start', value); + } +} diff --git a/__tests__/html2/speech/js/index.js b/__tests__/html2/speech/js/index.js new file mode 100644 index 0000000000..f04e5a9372 --- /dev/null +++ b/__tests__/html2/speech/js/index.js @@ -0,0 +1,3 @@ +export { default as SpeechSynthesis } from './MockedSpeechSynthesis.js'; +export { default as SpeechSynthesisEvent } from './MockedSpeechSynthesisEvent.js'; +export { default as SpeechSynthesisUtterance } from './MockedSpeechSynthesisUtterance.js'; diff --git a/__tests__/html2/speech/mockSetup.html b/__tests__/html2/speech/mockSetup.html new file mode 100644 index 0000000000..7441e121ed --- /dev/null +++ b/__tests__/html2/speech/mockSetup.html @@ -0,0 +1,183 @@ + + + + + + + + + +
+

This test validates the mock of SpeechRecognition.

+
+ + + + diff --git a/__tests__/html2/speech/performCardAction.continuous.html b/__tests__/html2/speech/performCardAction.continuous.html new file mode 100644 index 0000000000..7723b0d60e --- /dev/null +++ b/__tests__/html2/speech/performCardAction.continuous.html @@ -0,0 +1,213 @@ + + + + + + + + + +
+ + + + diff --git a/__tests__/html2/speech/performCardAction.interactive.html b/__tests__/html2/speech/performCardAction.interactive.html new file mode 100644 index 0000000000..05d55ea472 --- /dev/null +++ b/__tests__/html2/speech/performCardAction.interactive.html @@ -0,0 +1,197 @@ + + + + + + + + + +
+ + + + diff --git a/jest.legacy.config.js b/jest.legacy.config.js index 55f203263e..d255c6788e 100644 --- a/jest.legacy.config.js +++ b/jest.legacy.config.js @@ -66,6 +66,7 @@ module.exports = { '/__tests__/html/__dist__', '/__tests__/html/__jest__', '/__tests__/html/assets', + '/__tests__/html2/', // Will be tested by jest.html2.config.js. '/__tests__/setup/', '/packages/bundle/__tests__/types/__typescript__/', '/packages/core/__tests__/types/__typescript__/', diff --git a/package-lock.json b/package-lock.json index 4f09cfc9b2..1076c37de6 100644 --- a/package-lock.json +++ b/package-lock.json @@ -18610,36 +18610,20 @@ } }, "node_modules/react-dictate-button": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/react-dictate-button/-/react-dictate-button-2.0.1.tgz", - "integrity": "sha512-cLVxzjEy/I5IdOhZHedSbMwPIV62cQHUj09kvHm6XyRpycX7j3efLRRm661HO9zZM3ZtYT+Sy4j7F5eJaAWBug==", + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/react-dictate-button/-/react-dictate-button-4.0.0.tgz", + "integrity": "sha512-v+92/yHShDzCapCZm2Y6UoKEKzt32gCJWFTIcJxRTwySfP8+eulUG/2U2ttu74YD6i0z9dYsRfFEHHwC+UfwKQ==", "license": "MIT", "dependencies": { "@babel/runtime-corejs3": "^7.14.0", "core-js": "^3.12.1", - "prop-types": "15.7.2" + "react-dictate-button": "^4.0.0", + "use-ref-from": "^0.1.0" }, "peerDependencies": { - "react": ">= 16.8.0" - } - }, - "node_modules/react-dictate-button/node_modules/prop-types": { - "version": "15.7.2", - "resolved": "https://registry.npmjs.org/prop-types/-/prop-types-15.7.2.tgz", - "integrity": "sha512-8QQikdH7//R2vurIJSutZ1smHYTcLpRWEOlHnzcWHmBYrOGUysKwSsrC89BCiFj3CbrfJ/nXFdJepOVrY1GCHQ==", - "license": "MIT", - "dependencies": { - "loose-envify": "^1.4.0", - "object-assign": "^4.1.1", - "react-is": "^16.8.1" + "react": ">=16.8.6" } }, - "node_modules/react-dictate-button/node_modules/react-is": { - "version": "16.13.1", - "resolved": "https://registry.npmjs.org/react-is/-/react-is-16.13.1.tgz", - "integrity": "sha512-24e6ynE2H+OKt4kqsOvNd8kBpV65zoxbA4BVsEOB3ARVWQki/DHzaUoC5KuON/BiccDaCCTZBuOcfZs70kR8bQ==", - "license": "MIT" - }, "node_modules/react-dom": { "version": "16.8.6", "resolved": "https://registry.npmjs.org/react-dom/-/react-dom-16.8.6.tgz", @@ -24271,7 +24255,7 @@ "prop-types": "15.8.1", "punycode": "2.3.1", "react-chain-of-responsibility": "0.2.0-main.3cb47ab", - "react-dictate-button": "2.0.1", + "react-dictate-button": "4.0.0", "react-film": "3.1.1-main.f623bf6", "react-redux": "7.2.9", "react-say": "2.1.0", diff --git a/packages/api/src/StyleOptions.ts b/packages/api/src/StyleOptions.ts index 70dfb01051..f229984adc 100644 --- a/packages/api/src/StyleOptions.ts +++ b/packages/api/src/StyleOptions.ts @@ -942,6 +942,16 @@ type StyleOptions = { * New in 4.19.0. */ feedbackActionsPlacement?: 'activity-actions' | 'activity-status'; + + /** + * Use continuous mode for speech recognition. Default to `false`. + * + * - `true` to use continuous mode which focuses on a hands-off experience, keeping speech recognition active for extended periods, supporting barge-in, non-speech interactions will not stop speech recognition + * - `false` to use interactive mode which focuses on privacy, keeping speech recognition active only for the minimal time required, no barge-in, non-speech interactions will stop speech recognition + * + * @see https://github.com/microsoft/BotFramework-WebChat/pull/5426 + */ + speechRecognitionContinuous?: boolean | undefined; }; // StrictStyleOptions is only used internally in Web Chat and for simplifying our code: diff --git a/packages/api/src/defaultStyleOptions.ts b/packages/api/src/defaultStyleOptions.ts index 80d4d11649..8fa1ef4e6b 100644 --- a/packages/api/src/defaultStyleOptions.ts +++ b/packages/api/src/defaultStyleOptions.ts @@ -307,7 +307,10 @@ const DEFAULT_OPTIONS: Required = { codeBlockTheme: 'github-light-default' as const, - feedbackActionsPlacement: 'activity-status' as const + feedbackActionsPlacement: 'activity-status' as const, + + // Speech recognition + speechRecognitionContinuous: false }; export default DEFAULT_OPTIONS; diff --git a/packages/api/src/hooks/Composer.tsx b/packages/api/src/hooks/Composer.tsx index 6ae1f9340d..99f7e21513 100644 --- a/packages/api/src/hooks/Composer.tsx +++ b/packages/api/src/hooks/Composer.tsx @@ -116,12 +116,14 @@ const EMPTY_ARRAY: readonly [] = Object.freeze([]); function createCardActionContext({ cardActionMiddleware, + continuous, directLine, dispatch, markAllAsAcknowledged, ponyfill }: { cardActionMiddleware: readonly CardActionMiddleware[]; + continuous: boolean; directLine: DirectLineJSBotConnection; dispatch: Function; markAllAsAcknowledged: () => void; @@ -137,6 +139,11 @@ function createCardActionContext({ onCardAction: (cardAction, { target }: { target?: any } = {}) => { markAllAsAcknowledged(); + // Stop speech recognition only if under interactive mode. + if (!continuous) { + dispatch(stopDictate()); + } + return runMiddleware({ cardAction, getSignInUrl: @@ -340,12 +347,20 @@ const ComposerCore = ({ () => createCardActionContext({ cardActionMiddleware: Object.freeze([...singleToArray(cardActionMiddleware)]), + continuous: !!styleOptions.speechRecognitionContinuous, directLine, dispatch, markAllAsAcknowledged, ponyfill }), - [cardActionMiddleware, directLine, dispatch, markAllAsAcknowledged, ponyfill] + [ + cardActionMiddleware, + directLine, + dispatch, + markAllAsAcknowledged, + ponyfill, + styleOptions.speechRecognitionContinuous + ] ); const patchedSelectVoice = useMemo( diff --git a/packages/component/package.json b/packages/component/package.json index 1edef023d6..127a93c351 100644 --- a/packages/component/package.json +++ b/packages/component/package.json @@ -151,7 +151,7 @@ "prop-types": "15.8.1", "punycode": "2.3.1", "react-chain-of-responsibility": "0.2.0-main.3cb47ab", - "react-dictate-button": "2.0.1", + "react-dictate-button": "4.0.0", "react-film": "3.1.1-main.f623bf6", "react-redux": "7.2.9", "react-say": "2.1.0", diff --git a/packages/component/src/Composer.tsx b/packages/component/src/Composer.tsx index 7847f5917f..6cd0d91b01 100644 --- a/packages/component/src/Composer.tsx +++ b/packages/component/src/Composer.tsx @@ -57,7 +57,7 @@ import addTargetBlankToHyperlinksMarkdown from './Utils/addTargetBlankToHyperlin import downscaleImageToDataURL from './Utils/downscaleImageToDataURL'; import mapMap from './Utils/mapMap'; -const { useGetActivityByKey, useReferenceGrammarID, useStyleOptions } = hooks; +const { useGetActivityByKey, useReferenceGrammarID, useStyleOptions, useTrackException } = hooks; const node_env = process.env.node_env || process.env.NODE_ENV; @@ -85,10 +85,22 @@ const ComposerCoreUI = memo(({ children }: ComposerCoreUIProps) => { const [{ internalLiveRegionFadeAfter }] = useStyleOptions(); const [customPropertiesClassName] = useCustomPropertiesClassName(); const rootClassName = useStyleToEmotionObject()(ROOT_STYLE) + ''; + const trackException = useTrackException(); - const dictationOnError = useCallback(err => { - console.error(err); - }, []); + const dictationOnError = useCallback( + (errorEvent: SpeechRecognitionErrorEvent) => { + // Ignore aborted error as it is likely user clicking on the microphone button to abort recognition. + if (errorEvent.error !== 'aborted') { + const nativeError = new Error('Speech recognition failed'); + + nativeError.cause = errorEvent; + + trackException(nativeError, false); + console.error(nativeError); + } + }, + [trackException] + ); return (
diff --git a/packages/component/src/Dictation.js b/packages/component/src/Dictation.js index 3934350a91..48a1efecd4 100644 --- a/packages/component/src/Dictation.js +++ b/packages/component/src/Dictation.js @@ -3,7 +3,7 @@ import { hooks } from 'botframework-webchat-api'; import { useSetDictateState } from 'botframework-webchat-api/internal'; import { Constants } from 'botframework-webchat-core'; import PropTypes from 'prop-types'; -import React, { useCallback, useEffect, useMemo } from 'react'; +import React, { useCallback, useEffect } from 'react'; import { Composer as DictateComposer } from 'react-dictate-button'; import useResumeAudioContext from './hooks/internal/useResumeAudioContext'; @@ -11,7 +11,6 @@ import useSettableDictateAbortable from './hooks/internal/useSettableDictateAbor import useWebSpeechPonyfill from './hooks/useWebSpeechPonyfill'; const { - useActivities, useDictateInterims, useDictateState, useEmitTypingIndicator, @@ -20,6 +19,7 @@ const { useSendTypingIndicator, useShouldSpeakIncomingActivity, useStopDictate, + useStyleOptions, useSubmitSendBox, useUIState } = hooks; @@ -34,7 +34,7 @@ const Dictation = ({ onError }) => { const [, setSendBox] = useSendBoxValue(); const [, setShouldSpeakIncomingActivity] = useShouldSpeakIncomingActivity(); const [{ SpeechGrammarList, SpeechRecognition } = {}] = useWebSpeechPonyfill(); - const [activities] = useActivities(); + const [{ speechRecognitionContinuous }] = useStyleOptions(); const [dictateState] = useDictateState(); const [sendTypingIndicator] = useSendTypingIndicator(); const [speechLanguage] = useLanguage('speech'); @@ -45,17 +45,10 @@ const Dictation = ({ onError }) => { const stopDictate = useStopDictate(); const submitSendBox = useSubmitSendBox(); - const numSpeakingActivities = useMemo( - () => activities.filter(({ channelData: { speak } = {} }) => speak).length, - [activities] - ); - const handleDictate = useCallback( ({ result: { confidence, transcript } = {} }) => { if (dictateState === DICTATING || dictateState === STARTING) { setDictateInterims([]); - setDictateState(IDLE); - stopDictate(); if (transcript) { setSendBox(transcript); @@ -64,15 +57,7 @@ const Dictation = ({ onError }) => { } } }, - [ - dictateState, - setDictateInterims, - setDictateState, - stopDictate, - setSendBox, - submitSendBox, - setShouldSpeakIncomingActivity - ] + [dictateState, setDictateInterims, setSendBox, submitSendBox, setShouldSpeakIncomingActivity] ); const handleDictating = useCallback( @@ -89,6 +74,11 @@ const Dictation = ({ onError }) => { [dictateState, emitTypingIndicator, sendTypingIndicator, setDictateAbortable, setDictateInterims, setDictateState] ); + const handleEnd = useCallback(() => { + dictateState !== IDLE && setDictateState(IDLE); + (dictateState === DICTATING || dictateState === STARTING) && stopDictate(); + }, [dictateState, setDictateState, stopDictate]); + const handleError = useCallback( event => { dictateState !== IDLE && setDictateState(IDLE); @@ -107,15 +97,15 @@ const Dictation = ({ onError }) => { return ( ); }; diff --git a/packages/component/src/SendBox/BasicSendBox.tsx b/packages/component/src/SendBox/BasicSendBox.tsx index e4acd9b869..864de878b2 100644 --- a/packages/component/src/SendBox/BasicSendBox.tsx +++ b/packages/component/src/SendBox/BasicSendBox.tsx @@ -13,13 +13,11 @@ import SendButton from './SendButton'; import SuggestedActions from './SuggestedActions'; import TextBox from './TextBox'; -import type { WebChatActivity } from 'botframework-webchat-core'; - const { DictateState: { DICTATING, STARTING } } = Constants; -const { useActivities, useDirection, useDictateState, useStyleOptions } = hooks; +const { useDirection, useDictateState, useStyleOptions } = hooks; const ROOT_STYLE = { '&.webchat__send-box': { @@ -31,19 +29,10 @@ const ROOT_STYLE = { } }; -// TODO: [P3] We should consider exposing core/src/definitions and use it instead -function activityIsSpeakingOrQueuedToSpeak(activity: WebChatActivity) { - return activity.type === 'message' && activity.channelData?.speak; -} - function useSendBoxSpeechInterimsVisible(): [boolean] { - const [activities] = useActivities(); const [dictateState] = useDictateState(); - return [ - (dictateState === STARTING || dictateState === DICTATING) && - !activities.filter(activityIsSpeakingOrQueuedToSpeak).length - ]; + return [dictateState === STARTING || dictateState === DICTATING]; } type BasicSendBoxProps = Readonly<{ diff --git a/packages/component/src/SendBox/DictationInterims.tsx b/packages/component/src/SendBox/DictationInterims.tsx index ff69dd9c42..340af54cda 100644 --- a/packages/component/src/SendBox/DictationInterims.tsx +++ b/packages/component/src/SendBox/DictationInterims.tsx @@ -1,13 +1,14 @@ /* eslint react/no-array-index-key: "off" */ -import { Constants } from 'botframework-webchat-core'; import { hooks } from 'botframework-webchat-api'; +import { Constants } from 'botframework-webchat-core'; import classNames from 'classnames'; import PropTypes from 'prop-types'; import React, { FC } from 'react'; -import useStyleSet from '../hooks/useStyleSet'; import { useStyleToEmotionObject } from '../hooks/internal/styleToEmotionObject'; +import useStyleSet from '../hooks/useStyleSet'; +import testIds from '../testIds'; const { DictateState: { DICTATING, STARTING, STOPPING } @@ -32,13 +33,19 @@ const DictationInterims: FC = ({ className }) => { const rootClassName = useStyleToEmotionObject()(ROOT_STYLE) + ''; return dictateState === STARTING || dictateState === STOPPING ? ( -

+

{dictateState === STARTING && localize('SPEECH_INPUT_STARTING')}

) : ( dictateState === DICTATING && (dictateInterims.length ? ( -

+

{dictateInterims.map((interim, index) => ( {interim} @@ -47,7 +54,10 @@ const DictationInterims: FC = ({ className }) => { ))}

) : ( -

+

{localize('SPEECH_INPUT_LISTENING')}

)) diff --git a/packages/component/src/testIds.ts b/packages/component/src/testIds.ts index f72e256c8c..d58f326907 100644 --- a/packages/component/src/testIds.ts +++ b/packages/component/src/testIds.ts @@ -1,6 +1,7 @@ const testIds = { codeBlockCopyButton: 'code block copy button', copyButton: 'copy button', + sendBoxSpeechBox: 'send box speech box', sendBoxTextBox: 'send box text area', viewCodeButton: 'view code button' }; diff --git a/packages/core/src/sagas/speakActivityAndStartDictateOnIncomingActivityFromOthersSaga.js b/packages/core/src/sagas/speakActivityAndStartDictateOnIncomingActivityFromOthersSaga.js index 41aa2f3e58..87279fa768 100644 --- a/packages/core/src/sagas/speakActivityAndStartDictateOnIncomingActivityFromOthersSaga.js +++ b/packages/core/src/sagas/speakActivityAndStartDictateOnIncomingActivityFromOthersSaga.js @@ -1,12 +1,13 @@ import { put, select, takeEvery } from 'redux-saga/effects'; import { INCOMING_ACTIVITY } from '../actions/incomingActivity'; -import { WILL_START } from '../constants/DictateState'; import markActivity from '../actions/markActivity'; import setDictateState from '../actions/setDictateState'; -import shouldSpeakIncomingActivitySelector from '../selectors/shouldSpeakIncomingActivity'; -import speakableActivity from '../definitions/speakableActivity'; import stopDictate from '../actions/stopDictate'; +import { IDLE, WILL_START } from '../constants/DictateState'; +import speakableActivity from '../definitions/speakableActivity'; +import dictateStateSelector from '../selectors/dictateState'; +import shouldSpeakIncomingActivitySelector from '../selectors/shouldSpeakIncomingActivity'; import whileConnected from './effects/whileConnected'; function* speakActivityAndStartDictateOnIncomingActivityFromOthers({ userID }) { @@ -18,6 +19,7 @@ function* speakActivityAndStartDictateOnIncomingActivityFromOthers({ userID }) { type === INCOMING_ACTIVITY && payload.activity.from.id !== userID && payload.activity.from.role !== 'user', function* ({ payload: { activity } }) { const shouldSpeakIncomingActivity = yield select(shouldSpeakIncomingActivitySelector); + const dictateState = yield select(dictateStateSelector); const shouldSpeak = speakableActivity(activity) && shouldSpeakIncomingActivity; if ( @@ -30,7 +32,11 @@ function* speakActivityAndStartDictateOnIncomingActivityFromOthers({ userID }) { } if (shouldSpeak && activity.inputHint === 'expectingInput') { - yield put(setDictateState(WILL_START)); + // In continuous mode (dictateState === LISTENING), we shouldn't set it to WILL_START. + // WILL_START means auto start after synthesis completed. + if (dictateState === IDLE) { + yield put(setDictateState(WILL_START)); + } } else if (activity.inputHint === 'ignoringInput') { yield put(stopDictate()); } diff --git a/packages/core/src/sagas/startDictateOnSpeakCompleteSaga.js b/packages/core/src/sagas/startDictateOnSpeakCompleteSaga.js index 8ae3cddc9f..bb2be23faa 100644 --- a/packages/core/src/sagas/startDictateOnSpeakCompleteSaga.js +++ b/packages/core/src/sagas/startDictateOnSpeakCompleteSaga.js @@ -1,12 +1,12 @@ import { put, select, takeEvery } from 'redux-saga/effects'; import { MARK_ACTIVITY } from '../actions/markActivity'; -import { of as activitiesOf } from '../selectors/activities'; import { SET_DICTATE_STATE } from '../actions/setDictateState'; +import startDictate from '../actions/startDictate'; import { WILL_START } from '../constants/DictateState'; -import dictateStateSelector from '../selectors/dictateState'; import speakingActivity from '../definitions/speakingActivity'; -import startDictate from '../actions/startDictate'; +import { of as activitiesOf } from '../selectors/activities'; +import dictateStateSelector from '../selectors/dictateState'; function* startDictateOnSpeakComplete() { const speakingActivities = yield select(activitiesOf(speakingActivity)); diff --git a/packages/core/src/sagas/startSpeakActivityOnPostActivitySaga.js b/packages/core/src/sagas/startSpeakActivityOnPostActivitySaga.js index bcba0a9feb..a3aa338514 100644 --- a/packages/core/src/sagas/startSpeakActivityOnPostActivitySaga.js +++ b/packages/core/src/sagas/startSpeakActivityOnPostActivitySaga.js @@ -1,15 +1,25 @@ -import { put, takeEvery } from 'redux-saga/effects'; +import { put, select, takeEvery } from 'redux-saga/effects'; import { POST_ACTIVITY_PENDING } from '../actions/postActivity'; import startSpeakingActivity from '../actions/startSpeakingActivity'; +import { DICTATING } from '../constants/DictateState'; +import dictateStateSelector from '../selectors/dictateState'; import whileConnected from './effects/whileConnected'; function* startSpeakActivityOnPostActivity() { yield takeEvery( - ({ meta, payload, type }) => - type === POST_ACTIVITY_PENDING && meta.method === 'speech' && payload.activity.type === 'message', - function* () { - yield put(startSpeakingActivity()); + ({ type }) => type === POST_ACTIVITY_PENDING, + function* ({ meta, payload }) { + const dictateState = yield select(dictateStateSelector); + + if ( + // In continuous mode (speech recognition is active), we should speak everything. + dictateState === DICTATING || + // Otherwise, in interactive mode, if last message was sent via speech, we should speak bot response. + (meta.method === 'speech' && payload.activity.type === 'message') + ) { + yield put(startSpeakingActivity()); + } } ); } diff --git a/packages/core/src/sagas/stopDictateOnCardActionSaga.js b/packages/core/src/sagas/stopDictateOnCardActionSaga.js index 4981c00f14..216ee876dd 100644 --- a/packages/core/src/sagas/stopDictateOnCardActionSaga.js +++ b/packages/core/src/sagas/stopDictateOnCardActionSaga.js @@ -1,20 +1,28 @@ -import { put, takeEvery } from 'redux-saga/effects'; +import { put, select, takeEvery } from 'redux-saga/effects'; import { POST_ACTIVITY_PENDING } from '../actions/postActivity'; import stopDictate from '../actions/stopDictate'; +import { DICTATING } from '../constants/DictateState'; +import dictateStateSelector from '../selectors/dictateState'; import whileConnected from './effects/whileConnected'; function* stopDictateOnCardAction() { // TODO: [P2] We should stop speech input when the user click on anything on a card, including open URL which doesn't generate postActivity // This functionality was not implemented in v3 - yield takeEvery( // Currently, there are no actions that are related to card input // For now, we are using POST_ACTIVITY of a "message" activity // In the future, if we have an action for card input, we should use that instead ({ payload, type }) => type === POST_ACTIVITY_PENDING && payload.activity.type === 'message', function* putStopDictate() { - yield put(stopDictate()); + const dictateState = yield select(dictateStateSelector); + + // When performing card action: + // - In continuous mode (speech recognition is active), speech recognition should not be stopped + // - Otherwise, in interactive mode, speech recognition should be stopped + if (dictateState !== DICTATING) { + yield put(stopDictate()); + } } ); } diff --git a/packages/core/src/sagas/stopSpeakingActivityOnInputSaga.js b/packages/core/src/sagas/stopSpeakingActivityOnInputSaga.js index e0b5661021..aa7843a985 100644 --- a/packages/core/src/sagas/stopSpeakingActivityOnInputSaga.js +++ b/packages/core/src/sagas/stopSpeakingActivityOnInputSaga.js @@ -1,20 +1,38 @@ -import { put, takeEvery } from 'redux-saga/effects'; +import { put, select, takeEvery } from 'redux-saga/effects'; import { POST_ACTIVITY_PENDING } from '../actions/postActivity'; +import { SET_DICTATE_INTERIMS } from '../actions/setDictateInterims'; import { SET_SEND_BOX } from '../actions/setSendBox'; import stopSpeakingActivity from '../actions/stopSpeakingActivity'; +import { DICTATING } from '../constants/DictateState'; +import dictateStateSelector from '../selectors/dictateState'; import whileConnected from './effects/whileConnected'; function* stopSpeakingActivityOnInput() { yield takeEvery( - ({ meta, payload, type }) => + ({ payload, type }) => (type === SET_SEND_BOX && payload.text) || // We want to stop speaking activity when the user click on a card action // But currently there are no actions generated out of a card action // So, right now, we are using best-effort by listening to POST_ACTIVITY_PENDING with a "message" event // We filter out speech because we will call startSpeakingActivity() for POST_ACTIVITY_PENDING dispatched by speech - (type === POST_ACTIVITY_PENDING && meta.method !== 'speech' && payload.activity.type === 'message'), - function* () { + type === POST_ACTIVITY_PENDING || + // We want to stop speaking activity on barge-in. + type === SET_DICTATE_INTERIMS, + function* ({ meta, payload, type }) { + const dictateState = yield select(dictateStateSelector); + + // If input is post activity, do not stop speaking if either one of the followings: + // - In continuous mode (speech recognition should active for as long as possible) + // - Posting via speech (interactive mode, should speak bot response) + // - Posting a non-message (interactive mode, not typing on keyboard, should be ignored) + if ( + type === POST_ACTIVITY_PENDING && + (dictateState === DICTATING || meta.method === 'speech' || payload.activity.type !== 'message') + ) { + return; + } + yield put(stopSpeakingActivity()); } );