-
Notifications
You must be signed in to change notification settings - Fork 19
/
Speech Recognition.ahk
executable file
·272 lines (214 loc) · 8.64 KB
/
Speech Recognition.ahk
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
#NoEnv
#Warn All
#Warn LocalSameAsGlobal, Off
#Persistent
/*
Speech Recognition
==================
A class providing access to Microsoft's SAPI. Requires the SAPI SDK.
Reference
---------
### Recognizer := new SpeechRecognizer
Creates a new speech recognizer instance.
The instance starts off listening to any phrases.
### Recognizer.Recognize(Values = True)
Set the values that can be recognized by the recognizer.
If `Values` is an array of strings, the array is interpreted as a list of possibile phrases to recognize. Phrases not in the array will not be recognized. This provides a relatively high degree of recognition accuracy compared to dictation mode.
If `Values` is otherwise truthy, dictation mode is enabled, which means that the speech recognizer will attempt to recognize any phrases spoken.
If `Values` is falsy, the speech recognizer will be disabled and will stop listening if currently doing so.
Returns the speech recognizer instance.
### Recognizer.Listen(State = True)
Set the state of the recognizer.
If `State` is truthy, then the recognizer will start listening if not already doing so.
If `State` is falsy, then the recognizer will stop listening if currently doing so.
Returns the speech recognizer instance.
### Text := Recognizer.Prompt(Timeout = -1)
Obtains the next phrase spoken as plain text.
If `Timeout` is a positive number, the function will stop and return a blank string after this amount of time, if the user has not said anything in this interval.
If `Timeout` is a negative number, the function will wait indefinitely for the user to speak a phrase.
Returns the text spoken.
### Recognizer.OnRecognize(Text)
A callback invoked immediately upon any phrases being recognized.
The `Text` parameter received the phrase spoken.
This function is meant to be overridden in subclasses. By default, it does nothing.
The return value is discarded.
*/
/* Example: recognizing a specific list of phrases
TrayTip, Speech Recognition, Say a number between 0 and 9 inclusive
s := new SpeechRecognizer
s.Recognize(["zero", "one", "two", "three", "four", "five", "six", "seven", "eight", "nine"])
Text := s.Prompt()
TrayTip, Speech Recognition, You said: %Text%
Sleep, 3000
ExitApp
*/
/* Example: recognizing any phrase
TrayTip, Speech Recognition, Say something
s := new SpeechRecognizer
s.Recognize(True)
Text := s.Prompt()
TrayTip, Speech Recognition, You said: %Text%
Sleep, 3000
ExitApp
*/
/* Example: custom behaviour upon phrase recognition
TrayTip, Speech Recognition, Say something (press Escape to close)
s := new CustomSpeech ;create the custom speech recognizer
s.Recognize(True)
Esc::ExitApp
class CustomSpeech extends SpeechRecognizer
{
OnRecognize(Text)
{
static cSpeaker := ComObjCreate("SAPI.SpVoice")
TrayTip, Speech Recognition, You said: %Text%
cSpeaker.Speak("You said: " . Text)
}
}
*/
class SpeechRecognizer
{ ;speech recognition class by Uberi
static Contexts := {}
__New()
{
try
{
this.cListener := ComObjCreate("SAPI.SpInprocRecognizer") ;obtain speech recognizer (ISpeechRecognizer object)
cAudioInputs := this.cListener.GetAudioInputs() ;obtain list of audio inputs (ISpeechObjectTokens object)
this.cListener.AudioInput := cAudioInputs.Item(0) ;set audio device to first input
}
catch e
throw Exception("Could not create recognizer: " . e.Message)
try this.cContext := this.cListener.CreateRecoContext() ;obtain speech recognition context (ISpeechRecoContext object)
catch e
throw Exception("Could not create recognition context: " . e.Message)
try this.cGrammar := this.cContext.CreateGrammar() ;obtain phrase manager (ISpeechRecoGrammar object)
catch e
throw Exception("Could not create recognition grammar: " . e.Message)
;create rule to use when dictation mode is off
try
{
this.cRules := this.cGrammar.Rules() ;obtain list of grammar rules (ISpeechGrammarRules object)
this.cRule := this.cRules.Add("WordsRule",0x1 | 0x20) ;add a new grammar rule (SRATopLevel | SRADynamic)
}
catch e
throw Exception("Could not create speech recognition grammar rules: " . e.Message)
this.Phrases(["hello", "hi", "greetings", "salutations"])
this.Dictate(True)
SpeechRecognizer.Contexts[&this.cContext] := &this ;store a weak reference to the instance so event callbacks can obtain this instance
this.Prompting := False ;prompting defaults to inactive
ComObjConnect(this.cContext, "SpeechRecognizer_") ;connect the recognition context events to functions
}
Recognize(Values = True)
{
If Values ;enable speech recognition
{
this.Listen(True)
If IsObject(Values) ;list of phrases to use
this.Phrases(Values)
Else ;recognize any phrase
this.Dictate(True)
}
Else ;disable speech recognition
this.Listen(False)
Return, this
}
Listen(State = True)
{
try
{
If State
this.cListener.State := 1 ;SRSActive
Else
this.cListener.State := 0 ;SRSInactive
}
catch e
throw Exception("Could not set listener state: " . e.Message)
Return, this
}
Prompt(Timeout = -1)
{
this.Prompting := True
this.SpokenText := ""
If Timeout < 0 ;no timeout
{
While, this.Prompting
Sleep, 0
}
Else
{
StartTime := A_TickCount
While, this.Prompting && (A_TickCount - StartTime) > Timeout
Sleep, 0
}
Return, this.SpokenText
}
Phrases(PhraseList)
{
try this.cRule.Clear() ;reset rule to initial state
catch e
throw Exception("Could not reset rule: " . e.Message)
try cState := this.cRule.InitialState() ;obtain rule initial state (ISpeechGrammarRuleState object)
catch e
throw Exception("Could not obtain rule initial state: " . e.Message)
;add rules to recognize
cNull := ComObjParameter(13,0) ;null IUnknown pointer
For Index, Phrase In PhraseList
{
try cState.AddWordTransition(cNull, Phrase) ;add a no-op rule state transition triggered by a phrase
catch e
throw Exception("Could not add rule """ . Phrase . """: " . e.Message)
}
try this.cRules.Commit() ;compile all rules in the rule collection
catch e
throw Exception("Could not update rule: " . e.Message)
this.Dictate(False) ;disable dictation mode
Return, this
}
Dictate(Enable = True)
{
try
{
If Enable ;enable dictation mode
{
this.cGrammar.DictationSetState(1) ;enable dictation mode (SGDSActive)
this.cGrammar.CmdSetRuleState("WordsRule", 0) ;disable the rule (SGDSInactive)
}
Else ;disable dictation mode
{
this.cGrammar.DictationSetState(0) ;disable dictation mode (SGDSInactive)
this.cGrammar.CmdSetRuleState("WordsRule", 1) ;enable the rule (SGDSActive)
}
}
catch e
throw Exception("Could not set grammar dictation state: " . e.Message)
Return, this
}
OnRecognize(Text)
{
;placeholder function meant to be overridden in subclasses
}
__Delete()
{
;remove weak reference to the instance
this.base.Contexts.Remove(&this.cContext, "")
}
}
SpeechRecognizer_Recognition(StreamNumber, StreamPosition, RecognitionType, cResult, cContext) ;speech recognition engine produced a recognition
{
try
{
pPhrase := cResult.PhraseInfo() ;obtain detailed information about recognized phrase (ISpeechPhraseInfo object from ISpeechRecoResult object)
Text := pPhrase.GetText() ;obtain the spoken text
}
catch e
throw Exception("Could not obtain recognition result text: " . e.Message)
Instance := Object(SpeechRecognizer.Contexts[&cContext]) ;obtain reference to the recognizer
;handle prompting mode
If Instance.Prompting
{
Instance.SpokenText := Text
Instance.Prompting := False
}
Instance.OnRecognize(Text) ;invoke callback in recognizer
}