Speech to text and Text to speech at same time

问题

INTRODUCTION

I'm developing an app where I need to use booth SpeechRecognizer and TTS. But I'm facing some problems while trying this. The main one is that if I initialize TTS, SpeechRecgonizer seems not to work, and If I disable TTS, then SpeechRecognizer works fine. Next there is code snipet with the relevant code:

CODE

public class GameActivity extends Activity implements OnInitListener {

    private static TextToSpeech tts;

    @Override
    public void onCreate(Bundle savedInstanceState) {
        super.onCreate(savedInstanceState);
        setContentView(R.layout.game);

        /*SPEECH RECOGNIZER INSTANT*/
        Log.d("SPEECH", "speech recognition available: " + SpeechRecognizer.isRecognitionAvailable(this));
        mSpeechRecognizer = SpeechRecognizer.createSpeechRecognizer(this);
        mSpeechRecognizer.setRecognitionListener(new SpeechListener());

        mSpeechRecognizerIntent = new Intent(RecognizerIntent.ACTION_RECOGNIZE_SPEECH);
        mSpeechRecognizerIntent.putExtra(RecognizerIntent.EXTRA_LANGUAGE_MODEL,
            RecognizerIntent.LANGUAGE_MODEL_FREE_FORM);
        mSpeechRecognizerIntent.putExtra(RecognizerIntent.EXTRA_CALLING_PACKAGE,
            this.getPackageName());

        /*START LISTENING*/
        mSpeechRecognizer.startListening(mSpeechRecognizerIntent);

        /*CHECK TTS AVAILABLE*/
        Intent checkIntent = new Intent();
        checkIntent.setAction( TextToSpeech.Engine.ACTION_CHECK_TTS_DATA );
        startActivityForResult(checkIntent, CHECK_TTS_DATA);
    }


    @Override
    protected void onActivityResult(int requestCode, int resultCode, Intent data) {
        switch (requestCode) {
            case CHECK_TTS_DATA: {
                /*IF OK, INITIALIZES TTS*/
                if (resultCode == TextToSpeech.Engine.CHECK_VOICE_DATA_PASS) {
                    Log.d("TTS", "TTS available");
                    tts = new TextToSpeech(this, this);
                }
                else {
                    Log.d("TTS", "TTS NOT available");
                    Intent promptInstall = new Intent();
                    promptInstall.setAction(TextToSpeech.Engine.ACTION_INSTALL_TTS_DATA);
                    startActivity(promptInstall);
                }
                break;
            }
        }
    }


    /**OnInitListener implementation*/
    @Override
    public void onInit(int status) {
        if (status == TextToSpeech.SUCCESS) {
            /*set Language*/
            tts.setLanguage(Locale.getDefault());
            /*STARTS MAIN METHOD*/
            Log.d("SPEECH", "Initialized, starting main method");
            SpeechWhenMotion();
        } else {
            Log.e("TTS", "Initilization Failed");
        }
    }


    /**INNER LISTENER CLASS*/
    private class SpeechListener implements RecognitionListener {
        @Override
        public void onBufferReceived(byte[] buffer) {
            Log.d("SR_LISTENER", "onBufferReceived");
        }
        @Override
        public void onError(int error) {
            Log.d("SR_LISTENER", "onError: " + error);
            mSpeechRecognizer.startListening(mSpeechRecognizerIntent);
        }
        @Override
        public void onEvent(int eventType, Bundle params) {
            Log.d("SR_LISTENER", "onEvent");
        }
        @Override
        public void onPartialResults(Bundle partialResults) {
            Log.d("SR_LISTENER", "onPartialResults");
        }
        @Override
        public void onReadyForSpeech(Bundle params) {
            Log.d("SR_LISTENER", "onReadyForSpeech");
        }
        @Override
        public void onResults(Bundle results) {
            Log.d("SR_LISTENER", "onResult");

        matches = results.getStringArrayList(SpeechRecognizer.RESULTS_RECOGNITION);
        }
        @Override
        public void onRmsChanged(float rmsdB) {
            Log.d("SR_LISTENER", "onRmsChanged");
        }
        @Override
        public void onBeginningOfSpeech() {
            Log.d("SR_LISTENER", "onBeginningOfSpeech");
        }
        @Override
        public void onEndOfSpeech() {
            Log.d("SR_LISTENER", "onEndOfSpeech");
        }
    }

So basically what I'm facing is what explained in the introduction. If I don't do tts = new TextToSpeech(this, this);, then I see in the LogCat the logs refering to the speech listener, but If I initialize the TTS I don't see this logs.

Also, I suppose that when I'm able to make them work, I should stop listening when TTS speaks.

UPDATE -- COMPLETE SIMPLE PROJECT CODE

Main Activity:

public class MainActivity extends Activity implements OnInitListener {

    /*Voice and speech variables*/
    private static TextToSpeech tts;

    public static final int CHECK_TTS_DATA = 1;

    public static boolean fase0 = true;
    public static boolean fase1 = false;

    /*Service*/
    private int mBindFlag;
    public static Messenger mActivityMessenger;

    static int result;
    private final boolean mFinished = false;



    @Override
    protected void onCreate(Bundle savedInstanceState) {
        super.onCreate(savedInstanceState);
        setContentView(R.layout.activity_main);

        mActivityMessenger = new Messenger(new IncomingHandler());

        Intent serviceIntent = new Intent(this, SpeechRecognitionService.class);
        serviceIntent.putExtra("Messenger", mActivityMessenger);
        startService(serviceIntent);
        mBindFlag = Build.VERSION.SDK_INT < Build.VERSION_CODES.ICE_CREAM_SANDWICH ? 0 : Context.BIND_ABOVE_CLIENT;

        Intent checkIntent = new Intent();
        checkIntent.setAction( TextToSpeech.Engine.ACTION_CHECK_TTS_DATA );
        startActivityForResult(checkIntent, CHECK_TTS_DATA);
    }



    @Override
    protected void onActivityResult(int requestCode, int resultCode, Intent data) {
        switch (requestCode) {
            case CHECK_TTS_DATA: {
                if (resultCode == TextToSpeech.Engine.CHECK_VOICE_DATA_PASS) {
                    Log.d("TTS", "TTS available");
                    tts = new TextToSpeech(this, this);
                }
                else {
                    Log.d("TTS", "TTS NOT available");
                    Intent promptInstall = new Intent();
                    promptInstall.setAction(TextToSpeech.Engine.ACTION_INSTALL_TTS_DATA);
                    startActivity(promptInstall);
                }
                break;
            }
        }
    }



    /******************
     * 
     * MAIN METHOD
     * 
     ******************/

    public void SpeechWhenMotion() {
        while (!mFinished) {
            if (fase0) {
                tts.speak("Initializing", TextToSpeech.QUEUE_FLUSH, null);
                fase0 = false;
            }
            else if (fase1) {
                if (result == SpeechRecognitionService.CONTINUE) {
                    tts.speak("Correct", TextToSpeech.QUEUE_FLUSH, null);
                    mFinished = true;
                }
                else if (result == SpeechRecognitionService.NO_MATCH) {
                    tts.speak("No matching", TextToSpeech.QUEUE_FLUSH, null);
                }
                else {
                    Log.d("RESULT", String.valueOf(result));
                }
            }
        }
    }



    /******************
     * 
     * SERVICE LINK
     * 
     ******************/

    private final ServiceConnection mServiceConnection = new ServiceConnection() {
        @Override
        public void onServiceConnected(ComponentName name, IBinder service) {
            Log.d("SERVICE", "onServiceConnected"); //$NON-NLS-1$

            SpeechRecognitionService.mServiceMessenger = new Messenger(service);
            Message msg = new Message();
            msg.what = SpeechRecognitionService.MSG_RECOGNIZER_START_LISTENING;
            try {
                SpeechRecognitionService.mServiceMessenger.send(msg);
            }
            catch (RemoteException e) {
                e.printStackTrace();
            }
        }

        @Override
        public void onServiceDisconnected(ComponentName name) {
            Log.d("SERVICE", "onServiceDisconnected");
            SpeechRecognitionService.mServiceMessenger = null;
        }
    };


    private static class IncomingHandler extends Handler {
        @Override
        public void handleMessage(Message msg) {
            switch (msg.what) {
                case SpeechRecognitionService.MSG_SPEECH_RESULT:
                    Log.d("MESSENGER", "Message received");
                    result = msg.arg1;
                    break;
                default:
                    super.handleMessage(msg);
            }
        }
    }



    /******************
     * 
     * IMPLEMENTED METHODS
     * 
     ******************/

    @Override
    public void onInit(int status) {
        if (status == TextToSpeech.SUCCESS) {
            /*set Language*/
            tts.setLanguage(Locale.getDefault());
            // tts.setPitch(5); // set pitch level
            // tts.setSpeechRate(2); // set speech speed rate
            /*Start main method*/
            Log.d("TTS", "Initialized, starting main method");
            SpeechWhenMotion();
        } else {
            Log.e("TTS", "Initilization Failed");
        }

    }



    /******************
     * 
     * LIFE CYCLE
     * 
     ******************/

    @Override
    public void onDestroy() {
        super.onDestroy();

        if (tts != null) {
            tts.stop();
            tts.shutdown();
        }
    }

    @Override
    protected void onStart() {
        super.onStart();

        /*Bind SpeechRecognition service*/
        bindService(new Intent(this, SpeechRecognitionService.class), mServiceConnection, mBindFlag);
    }

    @Override
    protected void onStop() {
        super.onStop();

        /*Unbind SpeechRecognition service*/
        if (mActivityMessenger != null) {
            unbindService(mServiceConnection);
            mActivityMessenger = null;
        }
    }
}

Service:

public class SpeechRecognitionService extends Service {

    protected static AudioManager mAudioManager;
    protected SpeechRecognizer mSpeechRecognizer;
    protected Intent mSpeechRecognizerIntent;
    public static Messenger mServiceMessenger;

    protected boolean mIsListening;
    protected volatile boolean mIsCountDownOn;
    private static boolean mIsStreamSolo;

    static final int MSG_RECOGNIZER_START_LISTENING = 1;
    static final int MSG_RECOGNIZER_CANCEL = 2;
    static final int MSG_SPEECH_RESULT = 3;

    static ArrayList<String> matches;
    static final int CONTINUE = 11;
    static final int NO_MATCH = 12;



    /******************
     * 
     * ONCREATE
     * 
     ******************/

    @Override
    public void onCreate() {
        super.onCreate();

        mServiceMessenger = new Messenger(new IncomingHandler(this));

        mAudioManager = (AudioManager) getSystemService(Context.AUDIO_SERVICE);
        mSpeechRecognizer = SpeechRecognizer.createSpeechRecognizer(this);
        mSpeechRecognizer.setRecognitionListener(new SpeechRecognitionListener());

        mSpeechRecognizerIntent = new Intent(RecognizerIntent.ACTION_RECOGNIZE_SPEECH);
        mSpeechRecognizerIntent.putExtra(RecognizerIntent.EXTRA_LANGUAGE_MODEL,
                RecognizerIntent.LANGUAGE_MODEL_FREE_FORM);
        mSpeechRecognizerIntent.putExtra(RecognizerIntent.EXTRA_CALLING_PACKAGE,
                this.getPackageName());
    }


    @Override
    public int onStartCommand(Intent intent, int flags, int startId) {
        Log.d("SERVICE", "onStartCommand");
        MainActivity.mActivityMessenger = intent.getParcelableExtra("Messenger");
        return super.onStartCommand(intent, flags, startId);
    }



    /******************
     * 
     * METHODS
     * 
     ******************/

    @Override
    public IBinder onBind(Intent intent) {
        Log.d("BIND", "onBind");
        return mServiceMessenger.getBinder();
    }


    /**Count down timer for Jelly Bean work around*/
    protected CountDownTimer mNoSpeechCountDown = new CountDownTimer(5000, 5000) {
        @Override
        public void onTick(long millisUntilFinished) {
            // TODO Auto-generated method stub
        }

        @Override
        public void onFinish() {
            mIsCountDownOn = false;
            Message message = Message.obtain(null, MSG_RECOGNIZER_CANCEL);
            try {
                mServiceMessenger.send(message);
                message = Message.obtain(null, MSG_RECOGNIZER_START_LISTENING);
                mServiceMessenger.send(message);
            }
            catch (RemoteException e) {
            }
        }
    };



    public void sendMessageToUI(int result) {
        Message msg = new Message();
        msg = Message.obtain(null, MSG_SPEECH_RESULT);
        msg.arg1 = result;
        try {
            MainActivity.mActivityMessenger.send(msg);
        } catch (RemoteException e) {
            e.printStackTrace();
        }
    }



    /******************
     * 
     * INNER CLASSES/LISTENERS
     * 
     ******************/

    protected static class IncomingHandler extends Handler {
        private final WeakReference<SpeechRecognitionService> mtarget;

        IncomingHandler(SpeechRecognitionService target) {
            mtarget = new WeakReference<SpeechRecognitionService>(target);
        }

        @Override
        public void handleMessage(Message msg) {
            final SpeechRecognitionService target = mtarget.get();

            switch (msg.what) {
                case MSG_RECOGNIZER_START_LISTENING:

                    if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.JELLY_BEAN) {
                        /*turn off beep sound*/
                        if (!mIsStreamSolo) {
                            mAudioManager.setStreamSolo(AudioManager.STREAM_VOICE_CALL, true);
                            mIsStreamSolo = true;
                        }
                    }
                    if (!target.mIsListening) {
                        target.mSpeechRecognizer.startListening(target.mSpeechRecognizerIntent);
                        target.mIsListening = true;
                        Log.d("SERVICE", "message start listening"); //$NON-NLS-1$
                    }
                    break;

                case MSG_RECOGNIZER_CANCEL:
                    if (mIsStreamSolo) {
                        mAudioManager.setStreamSolo(AudioManager.STREAM_VOICE_CALL, false);
                        mIsStreamSolo = false;
                    }
                    target.mSpeechRecognizer.cancel();
                    target.mIsListening = false;
                    Log.d(TAG, "message canceled recognizer"); //$NON-NLS-1$
                    break;
            }
        }
    }


    protected class SpeechRecognitionListener implements RecognitionListener {

        @Override
        public void onBeginningOfSpeech() {
            /*speech input will be processed, so there is no need for count down anymore*/
            if (mIsCountDownOn) {
                mIsCountDownOn = false;
                mNoSpeechCountDown.cancel();
            }
            Log.d("SR_LISTENER", "onBeginingOfSpeech"); //$NON-NLS-1$
        }

        @Override
        public void onBufferReceived(byte[] buffer) {

        }

        @Override
        public void onEndOfSpeech() {
            Log.d("SR_LISTENER", "onEndOfSpeech"); //$NON-NLS-1$
        }

        @Override
        public void onError(int error) {
            if (mIsCountDownOn) {
                mIsCountDownOn = false;
                mNoSpeechCountDown.cancel();
            }
            mIsListening = false;
            Message message = Message.obtain(null, MSG_RECOGNIZER_START_LISTENING);
            try {
                mServiceMessenger.send(message);
            }
            catch (RemoteException e) {

            }
            String issue = null;
            if (error == 1) {
                issue = "ERROR_NETWORK_TIMEOUT";
            }
            if (error == 2) {
                issue = "ERROR_NETWORK";
            }
            if (error == 3) {
                issue = "ERROR_AUDIO";
            }
            if (error == 4) {
                issue = "ERROR_SERVER";
            }
            if (error == 5) {
                issue = "ERROR_CLIENT";
            }
            if (error == 6) {
                issue = "ERROR_SPEECH_TIMEOUT";
            }
            if (error == 7) {
                issue = "ERROR_NO_MATCH";
            }
            if (error == 8) {
                issue = "ERROR_RECOGNIZER_BUSY";
            }
            if (error == 9) {
                issue = "ERROR_INSUFFICIENT_PERMISSIONS";
            }
            Log.d("SR_LISTENER", "error = " + issue); //$NON-NLS-1$
        }

        @Override
        public void onEvent(int eventType, Bundle params) {

        }

        @Override
        public void onPartialResults(Bundle partialResults) {

        }

        @Override
        public void onReadyForSpeech(Bundle params) {
            if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.JELLY_BEAN) {
                mIsCountDownOn = true;
                mNoSpeechCountDown.start();
            }
            Log.d("SR_LISTENER", "onReadyForSpeech"); //$NON-NLS-1$
        }

        @Override
        public void onResults(Bundle results) {
            Log.d("SR_LISTENER", "onResults"); //$NON-NLS-1$

            matches = results.getStringArrayList(SpeechRecognizer.RESULTS_RECOGNITION);

            if (matches.contains("continue")) {
                MainActivity.fase1 = true;
                sendMessageToUI(CONTINUE);
            }
            else {
                sendMessageToUI(NO_MATCH);
            }
        }

        @Override
        public void onRmsChanged(float rmsdB) {

        }
    }



    /******************
     * 
     * LIFE CYCLE
     * 
     ******************/

    @Override
    public void onDestroy() {
        super.onDestroy();

        if (mIsCountDownOn) {
            mNoSpeechCountDown.cancel();
        }
        if (mSpeechRecognizer != null) {
            mSpeechRecognizer.destroy();
        }
    }
}

Log from de debuger:

As seen, the service listener starts and shows onReadyForSpeech, but next to this the TTS is initialized and the listener stops showing logs even if I speak, that then should show onBeginingOfSpeech

回答1:

You should implement onUtteranceCompletedListener and send the MSG_RECOGNIZER_START_LISTENING in onUtteranceCompleted.

来源：https://stackoverflow.com/questions/23650973/speech-to-text-and-text-to-speech-at-same-time

标签

android

speech-recognition

text-to-speech

voice-recognition

speech-to-text