Is there a way to use the SpeechRecognizer API directly for speech input?

问题

The Android Dev website provides an example of doing speech input using the built-in Google Speech Input Activity. The activity displays a pre-configured pop-up with the mic and passes its results using onActivityResult()

My question: Is there a way to use the SpeechRecognizer class directly to do speech input without displaying the canned activity? This would let me build my own Activity for voice input.

回答1:

Here is the code using SpeechRecognizer class (sourced from here and here):

import android.app.Activity;
import android.content.Intent;
import android.os.Bundle;
import android.view.View;
import android.view.View.OnClickListener;
import android.speech.RecognitionListener;
import android.speech.RecognizerIntent;
import android.speech.SpeechRecognizer;
import android.widget.Button;
import android.widget.TextView;
import java.util.ArrayList;
import android.util.Log;



public class VoiceRecognitionTest extends Activity implements OnClickListener 
{

   private TextView mText;
   private SpeechRecognizer sr;
   private static final String TAG = "MyStt3Activity";
   @Override
   public void onCreate(Bundle savedInstanceState) 
   {
            super.onCreate(savedInstanceState);
            setContentView(R.layout.main);
            Button speakButton = (Button) findViewById(R.id.btn_speak);     
            mText = (TextView) findViewById(R.id.textView1);     
            speakButton.setOnClickListener(this);
            sr = SpeechRecognizer.createSpeechRecognizer(this);       
            sr.setRecognitionListener(new listener());        
   }

   class listener implements RecognitionListener          
   {
            public void onReadyForSpeech(Bundle params)
            {
                     Log.d(TAG, "onReadyForSpeech");
            }
            public void onBeginningOfSpeech()
            {
                     Log.d(TAG, "onBeginningOfSpeech");
            }
            public void onRmsChanged(float rmsdB)
            {
                     Log.d(TAG, "onRmsChanged");
            }
            public void onBufferReceived(byte[] buffer)
            {
                     Log.d(TAG, "onBufferReceived");
            }
            public void onEndOfSpeech()
            {
                     Log.d(TAG, "onEndofSpeech");
            }
            public void onError(int error)
            {
                     Log.d(TAG,  "error " +  error);
                     mText.setText("error " + error);
            }
            public void onResults(Bundle results)                   
            {
                     String str = new String();
                     Log.d(TAG, "onResults " + results);
                     ArrayList data = results.getStringArrayList(SpeechRecognizer.RESULTS_RECOGNITION);
                     for (int i = 0; i < data.size(); i++)
                     {
                               Log.d(TAG, "result " + data.get(i));
                               str += data.get(i);
                     }
                     mText.setText("results: "+String.valueOf(data.size()));        
            }
            public void onPartialResults(Bundle partialResults)
            {
                     Log.d(TAG, "onPartialResults");
            }
            public void onEvent(int eventType, Bundle params)
            {
                     Log.d(TAG, "onEvent " + eventType);
            }
   }
   public void onClick(View v) {
            if (v.getId() == R.id.btn_speak) 
            {
                Intent intent = new Intent(RecognizerIntent.ACTION_RECOGNIZE_SPEECH);        
                intent.putExtra(RecognizerIntent.EXTRA_LANGUAGE_MODEL,RecognizerIntent.LANGUAGE_MODEL_FREE_FORM);
                intent.putExtra(RecognizerIntent.EXTRA_CALLING_PACKAGE,"voice.recognition.test");

                intent.putExtra(RecognizerIntent.EXTRA_MAX_RESULTS,5); 
                     sr.startListening(intent);
                     Log.i("111111","11111111");
            }
   }
}

Define main.xml with a button and give RECORD_AUDIO permission in manifest

回答2:

Also make sure to request the proper permissions from the user. I got stuck with an error 9 return value: INSUFFICIENT_PERMISSIONS, even though I had the proper RECORD_AUDIO permissions listed in the manifest.

In following the sample code here I was able to get the permissions from the user and then the speech recognizer returned good responses.

E.g. This block I put in my onCreate() for the activity, though it could go somewhere else in the UI flow, prior to calling the SpeechRecognizer methods:

    protected void onCreate(Bundle savedInstanceState) {
        ...
        if (ContextCompat.checkSelfPermission(this,
            Manifest.permission.RECORD_AUDIO)
            != PackageManager.PERMISSION_GRANTED) {

        // Should we show an explanation?
        if (ActivityCompat.shouldShowRequestPermissionRationale(this,
                Manifest.permission.RECORD_AUDIO)) {

            // Show an explanation to the user *asynchronously* -- don't block
            // this thread waiting for the user's response! After the user
            // sees the explanation, try again to request the permission.

        } else {

            // No explanation needed, we can request the permission.

            ActivityCompat.requestPermissions(this,
                    new String[]{Manifest.permission.RECORD_AUDIO},
                    527);

            // MY_PERMISSIONS_REQUEST_READ_CONTACTS is an
            // app-defined int constant. The callback method gets the
            // result of the request. (In this example I just punched in
            // the value 527)
        }
        ...
    }

Then provide a callback method in the activity for the permissions request:

@Override
public void onRequestPermissionsResult(int requestCode,
                                       String permissions[], int[] grantResults) {
    switch (requestCode) {
        case 527: {
            // If request is cancelled, the result arrays are empty.
            if (grantResults.length > 0
                    && grantResults[0] == PackageManager.PERMISSION_GRANTED) {

                // permission was granted, yay! Do the
                // contacts-related task you need to do.

            } else {

                // permission denied, boo! Disable the
                // functionality that depends on this permission.
            }
            return;
        }

        // other 'case' lines to check for other
        // permissions this app might request
    }
}

One other thing I have to change in preetha's example code above where the resulting text is retrieved in the onResults() method. To get the actual text of the translated speech (rather than the size, as the original code prints) either print the value of the constructed string str or get one of the return values in the ArrayList (data). For instance:

.setText(data.get(0));

回答3:

You can use SpeechRecognizer, though I am not aware of any sample code for it beyond this previous SO question. However, that is new to API level 8 (Android 2.2), and therefore is not widely usable at the time of this writing.

回答4:

You can do it as:

import android.app.Activity
import androidx.appcompat.app.AppCompatActivity
import android.os.Bundle
import kotlinx.android.synthetic.main.activity_main.*
import android.widget.Toast
import android.content.ActivityNotFoundException
import android.speech.RecognizerIntent
import android.content.Intent

class MainActivity : AppCompatActivity() {
    private val REQ_CODE = 100

    override fun onCreate(savedInstanceState: Bundle?) {
        super.onCreate(savedInstanceState)
        setContentView(R.layout.activity_main)

        speak.setOnClickListener {
            val intent = Intent(RecognizerIntent.ACTION_RECOGNIZE_SPEECH)
            intent.putExtra(RecognizerIntent.EXTRA_LANGUAGE_MODEL,
                    RecognizerIntent.LANGUAGE_MODEL_FREE_FORM)
            intent.putExtra(RecognizerIntent.EXTRA_LANGUAGE,  "ar-JO") //  Locale.getDefault()
            intent.putExtra(RecognizerIntent.EXTRA_PROMPT, "Need to speak")
            try {
                startActivityForResult(intent, REQ_CODE)
            } catch (a: ActivityNotFoundException) {
                Toast.makeText(applicationContext,
                        "Sorry your device not supported",
                        Toast.LENGTH_SHORT).show()
            }
        }
    }

    override fun onActivityResult(requestCode: Int, resultCode: Int, data: Intent?) {
        super.onActivityResult(requestCode, resultCode, data)

        when (requestCode) {
            REQ_CODE -> {
                if (resultCode == Activity.RESULT_OK && data != null) {
                    val result = data
                            .getStringArrayListExtra(RecognizerIntent.EXTRA_RESULTS)
                    println("result: $result")
                    text.text = result[0]
                }
            }
        }
    }
}

The layout could be simply:

<?xml version = "1.0" encoding = "utf-8"?>
<RelativeLayout xmlns:android = "http://schemas.android.com/apk/res/android"
    xmlns:app = "http://schemas.android.com/apk/res-auto"
    xmlns:tools = "http://schemas.android.com/tools"
    android:layout_width = "match_parent"
    android:layout_height = "match_parent"
    tools:context = ".MainActivity">
    <LinearLayout
        android:layout_width = "match_parent"
        android:gravity = "center"
        android:layout_height = "match_parent">
        <TextView
            android:id = "@+id/text"
            android:textSize = "30sp"
            android:layout_width = "wrap_content"
            android:layout_height = "wrap_content"/>
    </LinearLayout>
    <LinearLayout
        android:layout_width = "wrap_content"
        android:layout_alignParentBottom = "true"
        android:layout_centerInParent = "true"
        android:orientation = "vertical"
        android:layout_height = "wrap_content">
        <ImageView
            android:id = "@+id/speak"
            android:layout_width = "wrap_content"
            android:layout_height = "wrap_content"
            android:background = "?selectableItemBackground"
            android:src = "@android:drawable/ic_btn_speak_now"/>
    </LinearLayout>
</RelativeLayout>

The other way which you are asking about, is little longer, but give you more control, also does not annoying you with Google Assistance dialouge:

1- First you need to grand permissions in the Manifest file:

    <uses-permission android:name="android.permission.INTERNET" />
    <uses-permission android:name="android.permission.RECORD_AUDIO"/>

2- I'm consolidating all the answers above, as:

Create RecognitionListener class, as:

private val TAG = "Driver-Assistant"

class Listener(context: Context): RecognitionListener {
    private var ctx = context

    override fun onReadyForSpeech(params: Bundle?) {
        Log.d(TAG, "onReadyForSpeech")
    }

    override fun onRmsChanged(rmsdB: Float) {
        Log.d(TAG, "onRmsChanged")
    }

    override fun onBufferReceived(buffer: ByteArray?) {
        Log.d(TAG, "onBufferReceived")
    }

    override fun onPartialResults(partialResults: Bundle?) {
        Log.d(TAG, "onPartialResults")
    }

    override fun onEvent(eventType: Int, params: Bundle?) {
        Log.d(TAG, "onEvent")
    }

    override fun onBeginningOfSpeech() {
        Toast.makeText(ctx, "Speech started", Toast.LENGTH_LONG).show()
    }

    override fun onEndOfSpeech() {
        Toast.makeText(ctx, "Speech finished", Toast.LENGTH_LONG).show()
    }

    override fun onError(error: Int) {
        var string = when (error) {
            6 -> "No speech input"
            4 -> "Server sends error status"
            8 -> "RecognitionService busy."
            7 -> "No recognition result matched."
            1 -> "Network operation timed out."
            2 -> "Other network related errors."
            9 -> "Insufficient permissions"
            5 -> " Other client side errors."
            3 -> "Audio recording error."
            else -> "unknown!!"
        }
        Toast.makeText(ctx, "sorry error occurred: $string", Toast.LENGTH_LONG).show()
    }

    override fun onResults(results: Bundle?) {
        Log.d(TAG, "onResults $results")
        val data = results!!.getStringArrayList(SpeechRecognizer.RESULTS_RECOGNITION)
        display.text = data!![0]
    }
}

In the main file you need to define SpeechRecognizer, adding the above listner to it, and do not forget asking for runtime permission, all together are below:

lateinit var sr: SpeechRecognizer
lateinit var display: TextView

class MainActivity : AppCompatActivity() {

    override fun onCreate(savedInstanceState: Bundle?) {
        super.onCreate(savedInstanceState)
        setContentView(R.layout.activity_main)

        display = text

        if (ContextCompat.checkSelfPermission(this,
                        Manifest.permission.RECORD_AUDIO)
                != PackageManager.PERMISSION_GRANTED) {
            if (ActivityCompat.shouldShowRequestPermissionRationale(this,
                            Manifest.permission.RECORD_AUDIO)) {
            } else {
                ActivityCompat.requestPermissions(this,
                        arrayOf(Manifest.permission.RECORD_AUDIO),
                        527)
            }
        }

        sr = SpeechRecognizer.createSpeechRecognizer(this)
        sr.setRecognitionListener(Listener(this))

        speak.setOnClickListener {
            val intent = Intent(RecognizerIntent.ACTION_RECOGNIZE_SPEECH)
            intent.putExtra(RecognizerIntent.EXTRA_LANGUAGE_MODEL,
                    RecognizerIntent.LANGUAGE_MODEL_FREE_FORM)
            intent.putExtra(RecognizerIntent.EXTRA_LANGUAGE,  "ar-JO") //  Locale.getDefault()
            sr.startListening(intent)
        }

    }

    override fun onRequestPermissionsResult(requestCode: Int, permissions: Array<out String>, grantResults: IntArray) {
        super.onRequestPermissionsResult(requestCode, permissions, grantResults)
        when (requestCode) {
            527  -> if (grantResults.isNotEmpty()
                    && grantResults[0] == PackageManager.PERMISSION_GRANTED) {

                Toast.makeText(this, "Permission granted", Toast.LENGTH_SHORT).show()
            } else {
                Toast.makeText(this, "Permission not granted", Toast.LENGTH_SHORT).show()
            }
        }
    }
}

来源：https://stackoverflow.com/questions/4975443/is-there-a-way-to-use-the-speechrecognizer-api-directly-for-speech-input

标签

android

speech-recognition

speech-to-text