问题
I am working on Bot app and here I have 2 features
- Speech to Text
- Text to Speech
Both are working as expected but I want to detect that when user stop speaking at that time I want to stop detection and send that data to server.
Is there any way to get that user is not speaking ?
I am using below code for speech detection :
// Starts an AVAudio Session
NSError *error;
AVAudioSession *audioSession = [AVAudioSession sharedInstance];
[audioSession setCategory:AVAudioSessionCategoryPlayAndRecord error:&error];
[audioSession setActive:YES withOptions:AVAudioSessionSetActiveOptionNotifyOthersOnDeactivation error:&error];
// Starts a recognition process, in the block it logs the input or stops the audio
// process if there's an error.
recognitionRequest = [[SFSpeechAudioBufferRecognitionRequest alloc] init];
AVAudioInputNode *inputNode = audioEngine.inputNode;
recognitionRequest.shouldReportPartialResults = YES;
recognitionTask = [speechRecognizer recognitionTaskWithRequest:recognitionRequest resultHandler:^(SFSpeechRecognitionResult * _Nullable result, NSError * _Nullable error) {
BOOL isFinal = NO;
if (result) {
// Whatever you say in the microphone after pressing the button should be being logged
// in the console.
NSLog(@"RESULT:%@",result.bestTranscription.formattedString);
self.inputToolbar.contentView.textView.text = result.bestTranscription.formattedString;
self.inputToolbar.contentView.rightBarButtonItem.enabled = YES;
isFinal = !result.isFinal;
}
if (error) {
if (audioEngine != NULL) {
[audioEngine stop];
[inputNode removeTapOnBus:0];
recognitionRequest = nil;
recognitionTask = nil;
}
}
}];
// Sets the recording format
AVAudioFormat *recordingFormat = [inputNode outputFormatForBus:0]; //[[AVAudioFormat alloc] initStandardFormatWithSampleRate:44100 channels:1];
[inputNode installTapOnBus:0 bufferSize:1024 format:recordingFormat block:^(AVAudioPCMBuffer * _Nonnull buffer, AVAudioTime * _Nonnull when) {
[recognitionRequest appendAudioPCMBuffer:buffer];
}];
// Starts the audio engine, i.e. it starts listening.
[audioEngine prepare];
[audioEngine startAndReturnError:&error];
NSLog(@"Say Something, I'm listening");
Let me know if any one required more detail on this.
Thanks in Advance.
回答1:
try using this:
AVAudioRecorder *recorder;
NSTimer *levelTimer;
double lowPassResults;
-(void)configureRecorder{
// AVAudioSession already set in your code, so no need for these 2 lines.
[[AVAudioSession sharedInstance] setCategory:AVAudioSessionCategoryPlayAndRecord error:nil];
[[AVAudioSession sharedInstance] setActive:YES error:nil];
NSURL *url = [NSURL fileURLWithPath:@"/dev/null"];
NSDictionary *settings = [NSDictionary dictionaryWithObjectsAndKeys:
[NSNumber numberWithFloat: 44100.0], AVSampleRateKey,
[NSNumber numberWithInt: kAudioFormatAppleLossless], AVFormatIDKey,
[NSNumber numberWithInt: 1], AVNumberOfChannelsKey,
[NSNumber numberWithInt: AVAudioQualityMax], AVEncoderAudioQualityKey,
nil];
NSError *error;
lowPassResults = 0;
recorder = [[AVAudioRecorder alloc] initWithURL:url settings:settings error:&error];
if (recorder) {
[recorder prepareToRecord];
recorder.meteringEnabled = YES;
[recorder record];
levelTimer = [NSTimer scheduledTimerWithTimeInterval: 0.05 target: self selector: @selector(levelTimerCallback:) userInfo: nil repeats: YES];
} else
NSLog(@"%@", [error description]);
}
}
- (void)levelTimerCallback:(NSTimer *)timer {
[recorder updateMeters];
const double ALPHA = 0.05;
double peakPowerForChannel = pow(10, (0.05 * [recorder peakPowerForChannel:0]));
lowPassResults = ALPHA * peakPowerForChannel + (1.0 - ALPHA) * lowPassResults;
NSLog(@"lowPassResults: %f",lowPassResults);
// Use here a threshold value to stablish if there is silence or speech
if (lowPassResults < 0.1) {
NSLog(@"Silence");
} else if(lowPassResults > 0.5){
NSLog(@"Speech");
}
}
Reference: http://codedrago.com/q/200783/ios-objective-c-speech-recognition-how-to-detect-speech-start-on-ios-speech-api
来源:https://stackoverflow.com/questions/46337097/how-to-auto-stop-speech-recognition-if-user-stop-speaking