Continuous speech recogn. with SFSpeechRecognizer (ios10-beta) Continuous speech recogn. with SFSpeechRecognizer (ios10-beta) ios ios

Continuous speech recogn. with SFSpeechRecognizer (ios10-beta)


I converted the SpeakToMe sample Swift code from the Speech Recognition WWDC developer talk to Objective-C, and it worked for me. For Swift, see https://developer.apple.com/videos/play/wwdc2016/509/, or for Objective-C see below.

- (void) viewDidAppear:(BOOL)animated {_recognizer = [[SFSpeechRecognizer alloc] initWithLocale:[NSLocale localeWithLocaleIdentifier:@"en-US"]];[_recognizer setDelegate:self];[SFSpeechRecognizer requestAuthorization:^(SFSpeechRecognizerAuthorizationStatus authStatus) {    switch (authStatus) {        case SFSpeechRecognizerAuthorizationStatusAuthorized:            //User gave access to speech recognition            NSLog(@"Authorized");            break;        case SFSpeechRecognizerAuthorizationStatusDenied:            //User denied access to speech recognition            NSLog(@"SFSpeechRecognizerAuthorizationStatusDenied");            break;        case SFSpeechRecognizerAuthorizationStatusRestricted:            //Speech recognition restricted on this device            NSLog(@"SFSpeechRecognizerAuthorizationStatusRestricted");            break;        case SFSpeechRecognizerAuthorizationStatusNotDetermined:            //Speech recognition not yet authorized            break;        default:            NSLog(@"Default");            break;    }}];audioEngine = [[AVAudioEngine alloc] init];_speechSynthesizer  = [[AVSpeechSynthesizer alloc] init];         [_speechSynthesizer setDelegate:self];}-(void)startRecording{[self clearLogs:nil];NSError * outError;AVAudioSession *audioSession = [AVAudioSession sharedInstance];[audioSession setCategory:AVAudioSessionCategoryRecord error:&outError];[audioSession setMode:AVAudioSessionModeMeasurement error:&outError];[audioSession setActive:true withOptions:AVAudioSessionSetActiveOptionNotifyOthersOnDeactivation  error:&outError];request2 = [[SFSpeechAudioBufferRecognitionRequest alloc] init];inputNode = [audioEngine inputNode];if (request2 == nil) {    NSLog(@"Unable to created a SFSpeechAudioBufferRecognitionRequest object");}if (inputNode == nil) {    NSLog(@"Unable to created a inputNode object");}request2.shouldReportPartialResults = true;_currentTask = [_recognizer recognitionTaskWithRequest:request2                delegate:self];[inputNode installTapOnBus:0 bufferSize:4096 format:[inputNode outputFormatForBus:0] block:^(AVAudioPCMBuffer *buffer, AVAudioTime *when){    NSLog(@"Block tap!");    [request2 appendAudioPCMBuffer:buffer];}];    [audioEngine prepare];    [audioEngine startAndReturnError:&outError];    NSLog(@"Error %@", outError);}- (void)speechRecognitionTask:(SFSpeechRecognitionTask *)task didFinishRecognition:(SFSpeechRecognitionResult *)result {NSLog(@"speechRecognitionTask:(SFSpeechRecognitionTask *)task didFinishRecognition");NSString * translatedString = [[[result bestTranscription] formattedString] stringByTrimmingCharactersInSet:[NSCharacterSet whitespaceAndNewlineCharacterSet]];[self log:translatedString];if ([result isFinal]) {    [audioEngine stop];    [inputNode removeTapOnBus:0];    _currentTask = nil;    request2 = nil;}}


I have success to use the SFSpeechRecognizer in continuous.The main point is to use AVCaptureSession to capture audio and transfer to SpeechRecognizer.Sorry I am poor in Swift,so just the ObjC version.

Here is my sample code (leave out some UI code,some important has marked):

@interface ViewController ()<AVCaptureAudioDataOutputSampleBufferDelegate,SFSpeechRecognitionTaskDelegate>@property (nonatomic, strong) AVCaptureSession *capture;@property (nonatomic, strong) SFSpeechAudioBufferRecognitionRequest *speechRequest;@end@implementation ViewController- (void)startRecognizer{    [SFSpeechRecognizer requestAuthorization:^(SFSpeechRecognizerAuthorizationStatus status) {        if (status == SFSpeechRecognizerAuthorizationStatusAuthorized){            NSLocale *local =[[NSLocale alloc] initWithLocaleIdentifier:@"fr_FR"];            SFSpeechRecognizer *sf =[[SFSpeechRecognizer alloc] initWithLocale:local];            self.speechRequest = [[SFSpeechAudioBufferRecognitionRequest alloc] init];            [sf recognitionTaskWithRequest:self.speechRequest delegate:self];            // should call startCapture method in main queue or it may crash            dispatch_async(dispatch_get_main_queue(), ^{                [self startCapture];            });        }    }];}- (void)endRecognizer{    // END capture and END voice Reco    // or Apple will terminate this task after 30000ms.    [self endCapture];    [self.speechRequest endAudio];}- (void)startCapture{    NSError *error;    self.capture = [[AVCaptureSession alloc] init];    AVCaptureDevice *audioDev = [AVCaptureDevice defaultDeviceWithMediaType:AVMediaTypeAudio];    if (audioDev == nil){        NSLog(@"Couldn't create audio capture device");        return ;    }    // create mic device    AVCaptureDeviceInput *audioIn = [AVCaptureDeviceInput deviceInputWithDevice:audioDev error:&error];    if (error != nil){        NSLog(@"Couldn't create audio input");        return ;    }    // add mic device in capture object    if ([self.capture canAddInput:audioIn] == NO){        NSLog(@"Couldn't add audio input");        return ;    }    [self.capture addInput:audioIn];    // export audio data    AVCaptureAudioDataOutput *audioOutput = [[AVCaptureAudioDataOutput alloc] init];    [audioOutput setSampleBufferDelegate:self queue:dispatch_get_main_queue()];    if ([self.capture canAddOutput:audioOutput] == NO){        NSLog(@"Couldn't add audio output");        return ;    }    [self.capture addOutput:audioOutput];    [audioOutput connectionWithMediaType:AVMediaTypeAudio];    [self.capture startRunning];}-(void)endCapture{    if (self.capture != nil && [self.capture isRunning]){        [self.capture stopRunning];    }}- (void)captureOutput:(AVCaptureOutput *)captureOutput didOutputSampleBuffer:(CMSampleBufferRef)sampleBuffer fromConnection:(AVCaptureConnection *)connection{    [self.speechRequest appendAudioSampleBuffer:sampleBuffer];}// some Recognition Delegate@end


Here is the Swift (3.0) implementation of @cube's answer:

import UIKitimport Speechimport AVFoundationclass ViewController: UIViewController  {  @IBOutlet weak var console: UITextView!  var capture: AVCaptureSession?  var speechRequest: SFSpeechAudioBufferRecognitionRequest?  override func viewDidLoad() {    super.viewDidLoad()  }  override func viewDidAppear(_ animated: Bool) {    super.viewDidAppear(animated)    startRecognizer()  }  func startRecognizer() {    SFSpeechRecognizer.requestAuthorization { (status) in      switch status {      case .authorized:        let locale = NSLocale(localeIdentifier: "fr_FR")        let sf = SFSpeechRecognizer(locale: locale as Locale)        self.speechRequest = SFSpeechAudioBufferRecognitionRequest()        sf?.recognitionTask(with: self.speechRequest!, delegate: self)        DispatchQueue.main.async {        }      case .denied:        fallthrough      case .notDetermined:        fallthrough      case.restricted:        print("User Autorization Issue.")      }    }  }  func endRecognizer() {    endCapture()    speechRequest?.endAudio()  }  func startCapture() {    capture = AVCaptureSession()    guard let audioDev = AVCaptureDevice.defaultDevice(withMediaType: AVMediaTypeAudio) else {      print("Could not get capture device.")      return    }    guard let audioIn = try? AVCaptureDeviceInput(device: audioDev) else {      print("Could not create input device.")      return    }    guard true == capture?.canAddInput(audioIn) else {      print("Couls not add input device")      return    }    capture?.addInput(audioIn)    let audioOut = AVCaptureAudioDataOutput()    audioOut.setSampleBufferDelegate(self, queue: DispatchQueue.main)    guard true == capture?.canAddOutput(audioOut) else {      print("Could not add audio output")      return    }    capture?.addOutput(audioOut)    audioOut.connection(withMediaType: AVMediaTypeAudio)    capture?.startRunning()  }  func endCapture() {    if true == capture?.isRunning {      capture?.stopRunning()    }  }}extension ViewController: AVCaptureAudioDataOutputSampleBufferDelegate {  func captureOutput(_ captureOutput: AVCaptureOutput!, didOutputSampleBuffer sampleBuffer: CMSampleBuffer!, from connection: AVCaptureConnection!) {    speechRequest?.appendAudioSampleBuffer(sampleBuffer)  }}extension ViewController: SFSpeechRecognitionTaskDelegate {  func speechRecognitionTask(_ task: SFSpeechRecognitionTask, didFinishRecognition recognitionResult: SFSpeechRecognitionResult) {    console.text = console.text + "\n" + recognitionResult.bestTranscription.formattedString  }}

Don't forget to add a value for NSSpeechRecognitionUsageDescription in info.plist file or otherwise it'll crash.