-
Notifications
You must be signed in to change notification settings - Fork 66
/
Copy pathGrijjy.TextToSpeech.iOS.pas
233 lines (195 loc) · 7.97 KB
/
Grijjy.TextToSpeech.iOS.pas
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
unit Grijjy.TextToSpeech.iOS;
{< Text To Speech engine implementation for iOS }
interface
uses
Macapi.ObjectiveC,
iOSapi.Foundation,
iOSapi.CocoaTypes,
iOSapi.AVFoundation,
Grijjy.TextToSpeech.Base;
{ These declarations are missing from iOSapi.AVFoundation }
type
AVSpeechBoundary = NSInteger;
const
AVSpeechBoundaryImmediate = 0;
AVSpeechBoundaryWord = 1;
type
AVSpeechSynthesisVoice = interface;
AVSpeechSynthesisVoiceClass = interface(NSObjectClass)
['{A2006345-086C-4416-AAE7-3B1DD6B47BE1}']
{class} function speechVoices: NSArray{<AVSpeechSynthesisVoice>}; cdecl;
{class} function currentLanguageCode: NSString; cdecl;
{class} function voiceWithLanguage(language: NSString): AVSpeechSynthesisVoice; cdecl;
end;
AVSpeechSynthesisVoice = interface(NSObject)
['{FBFD24DF-08F6-43A3-8A9B-32D583B0B8B5}']
function language: NSString; cdecl;
end;
TAVSpeechSynthesisVoice = class(TOCGenericImport<AVSpeechSynthesisVoiceClass, AVSpeechSynthesisVoice>) end;
type
AVSpeechUtterance = interface;
AVSpeechUtteranceClass = interface(NSObjectClass)
['{E6695EAF-6909-4D1E-AFFA-DFB7CDC256EF}']
{class} function speechUtteranceWithString(str: NSString): AVSpeechUtterance; cdecl;
end;
AVSpeechUtterance = interface(NSObject)
['{5D2DDD5B-688B-4193-B0F3-26C6C755AEDC}']
function initWithString(str: NSString): AVSpeechUtterance; cdecl;
function voice: AVSpeechSynthesisVoice; cdecl;
procedure setVoice(voice: AVSpeechSynthesisVoice); cdecl;
function speechString: NSString; cdecl;
function rate: Single; cdecl;
procedure setRate(rate: Single); cdecl;
function pitchMultiplier: Single; cdecl;
procedure setPitchMultiplier(pitchMultiplier: Single); cdecl;
function volume: Single; cdecl;
procedure setVolume(volume: Single); cdecl;
function preUtteranceDelay: NSTimeInterval; cdecl;
procedure setPreUtteranceDelay(preUtteranceDelay: NSTimeInterval); cdecl;
function postUtteranceDelay: NSTimeInterval; cdecl;
procedure setPostUtteranceDelay(postUtteranceDelay: NSTimeInterval); cdecl;
end;
TAVSpeechUtterance = class(TOCGenericImport<AVSpeechUtteranceClass, AVSpeechUtterance>) end;
type
AVSpeechSynthesizer = interface;
AVSpeechSynthesizerDelegate = interface;
AVSpeechSynthesizerClass = interface(NSObjectClass)
['{4F761699-0210-47EB-802B-DAC900C9979B}']
end;
AVSpeechSynthesizer = interface(NSObject)
['{EC1850A7-B7EA-4C5D-A47B-D3EDDC3D4146}']
function delegate: Pointer; cdecl;
procedure setDelegate(delegate: AVSpeechSynthesizerDelegate); cdecl;
function isSpeaking: Boolean; cdecl;
function isPaused: Boolean; cdecl;
procedure speakUtterance(utterance: AVSpeechUtterance); cdecl;
function stopSpeakingAtBoundary(boundary: AVSpeechBoundary): Boolean; cdecl;
function pauseSpeakingAtBoundary(boundary: AVSpeechBoundary): Boolean; cdecl;
function continueSpeaking: Boolean; cdecl;
end;
TAVSpeechSynthesizer = class(TOCGenericImport<AVSpeechSynthesizerClass, AVSpeechSynthesizer>) end;
AVSpeechSynthesizerDelegate = interface(IObjectiveC)
['{EF579B2B-6CB1-47E4-AD77-07F580876F8F}']
[MethodName('speechSynthesizer:didStartSpeechUtterance:')]
procedure speechSynthesizerDidStartSpeechUtterance(synthesizer: AVSpeechSynthesizer; utterance: AVSpeechUtterance); cdecl;
[MethodName('speechSynthesizer:didFinishSpeechUtterance:')]
procedure speechSynthesizerDidFinishSpeechUtterance(synthesizer: AVSpeechSynthesizer; utterance: AVSpeechUtterance); cdecl;
[MethodName('speechSynthesizer:didCancelSpeechUtterance:')]
procedure speechSynthesizerDidCancelSpeechUtterance(synthesizer: AVSpeechSynthesizer; utterance: AVSpeechUtterance); cdecl;
end;
type
{ IgoSpeechToText implementation }
TgoTextToSpeechImplementation = class(TgoTextToSpeechBase)
{$REGION 'Internal Declarations'}
private const
{ AVSpeechUtterance.Rate ranges from 0.0 to 1.0, where 0.5 is the default.
On iOS 9 (and up?), the default right is fine.
On iOS 8 and earlier, it is much too fast. }
DEFAULT_SPEECH_RATE_IOS8_DOWN = 0.1;
private type
TDelegate = class(TOCLocal, AVSpeechSynthesizerDelegate)
private
[weak] FTextToSpeech: TgoTextToSpeechImplementation;
FFireEvents: Boolean;
public
constructor Create(const ATextToSpeech: TgoTextToSpeechImplementation);
public
{ AVSpeechSynthesizerDelegate }
[MethodName('speechSynthesizer:didStartSpeechUtterance:')]
procedure speechSynthesizerDidStartSpeechUtterance(synthesizer: AVSpeechSynthesizer; utterance: AVSpeechUtterance); cdecl;
[MethodName('speechSynthesizer:didFinishSpeechUtterance:')]
procedure speechSynthesizerDidFinishSpeechUtterance(synthesizer: AVSpeechSynthesizer; utterance: AVSpeechUtterance); cdecl;
[MethodName('speechSynthesizer:didCancelSpeechUtterance:')]
procedure speechSynthesizerDidCancelSpeechUtterance(synthesizer: AVSpeechSynthesizer; utterance: AVSpeechUtterance); cdecl;
end;
private
FSpeechSynthesizer: AVSpeechSynthesizer;
FDelegate: TDelegate;
protected
{ IgoTextToSpeech }
function Speak(const AText: String): Boolean; override;
procedure Stop; override;
function IsSpeaking: Boolean; override;
{$ENDREGION 'Internal Declarations'}
public
constructor Create;
destructor Destroy; override;
end;
implementation
uses
System.SysUtils,
Macapi.Helpers;
{ TgoTextToSpeechImplementation }
constructor TgoTextToSpeechImplementation.Create;
begin
inherited;
FSpeechSynthesizer := TAVSpeechSynthesizer.Create;
FDelegate := TgoTextToSpeechImplementation.TDelegate.Create(Self);
FSpeechSynthesizer.setDelegate(FDelegate);
Available := True;
end;
destructor TgoTextToSpeechImplementation.Destroy;
begin
if (FSpeechSynthesizer <> nil) then
FSpeechSynthesizer.release;
inherited;
end;
function TgoTextToSpeechImplementation.IsSpeaking: Boolean;
begin
Result := FSpeechSynthesizer.isSpeaking;
end;
function TgoTextToSpeechImplementation.Speak(const AText: String): Boolean;
var
Utterance: AVSpeechUtterance;
begin
if (AText.Trim = '') then
Exit(True);
if (FSpeechSynthesizer.isSpeaking) then
begin
{ Calling stopSpeakingAtBoundary will also call
speechSynthesizerDidCancelSpeechUtterance at some point. We don't want
that event to fire here, so we set FFireEvents to False. That flag is
set to True again when the next speech is started. }
FDelegate.FFireEvents := False;
FSpeechSynthesizer.stopSpeakingAtBoundary(AVSpeechBoundaryImmediate);
end;
Utterance := TAVSpeechUtterance.OCClass.speechUtteranceWithString(StrToNSStr(AText));
if (not TOSVersion.Check(9)) then
Utterance.setRate(DEFAULT_SPEECH_RATE_IOS8_DOWN);
FSpeechSynthesizer.speakUtterance(Utterance);
Result := True;
end;
procedure TgoTextToSpeechImplementation.Stop;
begin
if (FSpeechSynthesizer.isSpeaking) then
{ This will also call speechSynthesizerDidCancelSpeechUtterance }
FSpeechSynthesizer.stopSpeakingAtBoundary(AVSpeechBoundaryImmediate);
end;
{ TgoTextToSpeechImplementation.TDelegate }
constructor TgoTextToSpeechImplementation.TDelegate.Create(
const ATextToSpeech: TgoTextToSpeechImplementation);
begin
Assert(Assigned(ATextToSpeech));
inherited Create;
FTextToSpeech := ATextToSpeech;
end;
procedure TgoTextToSpeechImplementation.TDelegate.speechSynthesizerDidCancelSpeechUtterance(
synthesizer: AVSpeechSynthesizer; utterance: AVSpeechUtterance);
begin
if Assigned(FTextToSpeech) and (FFireEvents) then
FTextToSpeech.DoSpeechFinished;
end;
procedure TgoTextToSpeechImplementation.TDelegate.speechSynthesizerDidFinishSpeechUtterance(
synthesizer: AVSpeechSynthesizer; utterance: AVSpeechUtterance);
begin
if Assigned(FTextToSpeech) and (FFireEvents) then
FTextToSpeech.DoSpeechFinished;
end;
procedure TgoTextToSpeechImplementation.TDelegate.speechSynthesizerDidStartSpeechUtterance(
synthesizer: AVSpeechSynthesizer; utterance: AVSpeechUtterance);
begin
FFireEvents := True;
if Assigned(FTextToSpeech) then
FTextToSpeech.DoSpeechStarted;
end;
end.