Continuous speech recogn. with SFSpeechRecognizer (ios10-beta)
I converted the SpeakToMe sample Swift code from the Speech Recognition WWDC developer talk to Objective-C, and it worked for me. For Swift, see https://developer.apple.com/videos/play/wwdc2016/509/, or for Objective-C see below.
- (void) viewDidAppear:(BOOL)animated {_recognizer = [[SFSpeechRecognizer alloc] initWithLocale:[NSLocale localeWithLocaleIdentifier:@"en-US"]];[_recognizer setDelegate:self];[SFSpeechRecognizer requestAuthorization:^(SFSpeechRecognizerAuthorizationStatus authStatus) { switch (authStatus) { case SFSpeechRecognizerAuthorizationStatusAuthorized: //User gave access to speech recognition NSLog(@"Authorized"); break; case SFSpeechRecognizerAuthorizationStatusDenied: //User denied access to speech recognition NSLog(@"SFSpeechRecognizerAuthorizationStatusDenied"); break; case SFSpeechRecognizerAuthorizationStatusRestricted: //Speech recognition restricted on this device NSLog(@"SFSpeechRecognizerAuthorizationStatusRestricted"); break; case SFSpeechRecognizerAuthorizationStatusNotDetermined: //Speech recognition not yet authorized break; default: NSLog(@"Default"); break; }}];audioEngine = [[AVAudioEngine alloc] init];_speechSynthesizer = [[AVSpeechSynthesizer alloc] init]; [_speechSynthesizer setDelegate:self];}-(void)startRecording{[self clearLogs:nil];NSError * outError;AVAudioSession *audioSession = [AVAudioSession sharedInstance];[audioSession setCategory:AVAudioSessionCategoryRecord error:&outError];[audioSession setMode:AVAudioSessionModeMeasurement error:&outError];[audioSession setActive:true withOptions:AVAudioSessionSetActiveOptionNotifyOthersOnDeactivation error:&outError];request2 = [[SFSpeechAudioBufferRecognitionRequest alloc] init];inputNode = [audioEngine inputNode];if (request2 == nil) { NSLog(@"Unable to created a SFSpeechAudioBufferRecognitionRequest object");}if (inputNode == nil) { NSLog(@"Unable to created a inputNode object");}request2.shouldReportPartialResults = true;_currentTask = [_recognizer recognitionTaskWithRequest:request2 delegate:self];[inputNode installTapOnBus:0 bufferSize:4096 format:[inputNode outputFormatForBus:0] block:^(AVAudioPCMBuffer *buffer, AVAudioTime *when){ NSLog(@"Block tap!"); [request2 appendAudioPCMBuffer:buffer];}]; [audioEngine prepare]; [audioEngine startAndReturnError:&outError]; NSLog(@"Error %@", outError);}- (void)speechRecognitionTask:(SFSpeechRecognitionTask *)task didFinishRecognition:(SFSpeechRecognitionResult *)result {NSLog(@"speechRecognitionTask:(SFSpeechRecognitionTask *)task didFinishRecognition");NSString * translatedString = [[[result bestTranscription] formattedString] stringByTrimmingCharactersInSet:[NSCharacterSet whitespaceAndNewlineCharacterSet]];[self log:translatedString];if ([result isFinal]) { [audioEngine stop]; [inputNode removeTapOnBus:0]; _currentTask = nil; request2 = nil;}}
I have success to use the SFSpeechRecognizer in continuous.The main point is to use AVCaptureSession to capture audio and transfer to SpeechRecognizer.Sorry I am poor in Swift,so just the ObjC version.
Here is my sample code (leave out some UI code,some important has marked):
@interface ViewController ()<AVCaptureAudioDataOutputSampleBufferDelegate,SFSpeechRecognitionTaskDelegate>@property (nonatomic, strong) AVCaptureSession *capture;@property (nonatomic, strong) SFSpeechAudioBufferRecognitionRequest *speechRequest;@end@implementation ViewController- (void)startRecognizer{ [SFSpeechRecognizer requestAuthorization:^(SFSpeechRecognizerAuthorizationStatus status) { if (status == SFSpeechRecognizerAuthorizationStatusAuthorized){ NSLocale *local =[[NSLocale alloc] initWithLocaleIdentifier:@"fr_FR"]; SFSpeechRecognizer *sf =[[SFSpeechRecognizer alloc] initWithLocale:local]; self.speechRequest = [[SFSpeechAudioBufferRecognitionRequest alloc] init]; [sf recognitionTaskWithRequest:self.speechRequest delegate:self]; // should call startCapture method in main queue or it may crash dispatch_async(dispatch_get_main_queue(), ^{ [self startCapture]; }); } }];}- (void)endRecognizer{ // END capture and END voice Reco // or Apple will terminate this task after 30000ms. [self endCapture]; [self.speechRequest endAudio];}- (void)startCapture{ NSError *error; self.capture = [[AVCaptureSession alloc] init]; AVCaptureDevice *audioDev = [AVCaptureDevice defaultDeviceWithMediaType:AVMediaTypeAudio]; if (audioDev == nil){ NSLog(@"Couldn't create audio capture device"); return ; } // create mic device AVCaptureDeviceInput *audioIn = [AVCaptureDeviceInput deviceInputWithDevice:audioDev error:&error]; if (error != nil){ NSLog(@"Couldn't create audio input"); return ; } // add mic device in capture object if ([self.capture canAddInput:audioIn] == NO){ NSLog(@"Couldn't add audio input"); return ; } [self.capture addInput:audioIn]; // export audio data AVCaptureAudioDataOutput *audioOutput = [[AVCaptureAudioDataOutput alloc] init]; [audioOutput setSampleBufferDelegate:self queue:dispatch_get_main_queue()]; if ([self.capture canAddOutput:audioOutput] == NO){ NSLog(@"Couldn't add audio output"); return ; } [self.capture addOutput:audioOutput]; [audioOutput connectionWithMediaType:AVMediaTypeAudio]; [self.capture startRunning];}-(void)endCapture{ if (self.capture != nil && [self.capture isRunning]){ [self.capture stopRunning]; }}- (void)captureOutput:(AVCaptureOutput *)captureOutput didOutputSampleBuffer:(CMSampleBufferRef)sampleBuffer fromConnection:(AVCaptureConnection *)connection{ [self.speechRequest appendAudioSampleBuffer:sampleBuffer];}// some Recognition Delegate@end
Here is the Swift (3.0) implementation of @cube's answer:
import UIKitimport Speechimport AVFoundationclass ViewController: UIViewController { @IBOutlet weak var console: UITextView! var capture: AVCaptureSession? var speechRequest: SFSpeechAudioBufferRecognitionRequest? override func viewDidLoad() { super.viewDidLoad() } override func viewDidAppear(_ animated: Bool) { super.viewDidAppear(animated) startRecognizer() } func startRecognizer() { SFSpeechRecognizer.requestAuthorization { (status) in switch status { case .authorized: let locale = NSLocale(localeIdentifier: "fr_FR") let sf = SFSpeechRecognizer(locale: locale as Locale) self.speechRequest = SFSpeechAudioBufferRecognitionRequest() sf?.recognitionTask(with: self.speechRequest!, delegate: self) DispatchQueue.main.async { } case .denied: fallthrough case .notDetermined: fallthrough case.restricted: print("User Autorization Issue.") } } } func endRecognizer() { endCapture() speechRequest?.endAudio() } func startCapture() { capture = AVCaptureSession() guard let audioDev = AVCaptureDevice.defaultDevice(withMediaType: AVMediaTypeAudio) else { print("Could not get capture device.") return } guard let audioIn = try? AVCaptureDeviceInput(device: audioDev) else { print("Could not create input device.") return } guard true == capture?.canAddInput(audioIn) else { print("Couls not add input device") return } capture?.addInput(audioIn) let audioOut = AVCaptureAudioDataOutput() audioOut.setSampleBufferDelegate(self, queue: DispatchQueue.main) guard true == capture?.canAddOutput(audioOut) else { print("Could not add audio output") return } capture?.addOutput(audioOut) audioOut.connection(withMediaType: AVMediaTypeAudio) capture?.startRunning() } func endCapture() { if true == capture?.isRunning { capture?.stopRunning() } }}extension ViewController: AVCaptureAudioDataOutputSampleBufferDelegate { func captureOutput(_ captureOutput: AVCaptureOutput!, didOutputSampleBuffer sampleBuffer: CMSampleBuffer!, from connection: AVCaptureConnection!) { speechRequest?.appendAudioSampleBuffer(sampleBuffer) }}extension ViewController: SFSpeechRecognitionTaskDelegate { func speechRecognitionTask(_ task: SFSpeechRecognitionTask, didFinishRecognition recognitionResult: SFSpeechRecognitionResult) { console.text = console.text + "\n" + recognitionResult.bestTranscription.formattedString }}
Don't forget to add a value for NSSpeechRecognitionUsageDescription
in info.plist
file or otherwise it'll crash.