Is there a way to use the SpeechRecognizer API directly for speech input?

前端 未结 5 1795
隐瞒了意图╮
隐瞒了意图╮ 2020-11-30 01:42

The Android Dev website provides an example of doing speech input using the built-in Google Speech Input Activity. The activity displays a pre-configured pop-up with the mi

相关标签:
5条回答
  • 2020-11-30 01:42

    Here is the code using SpeechRecognizer class (sourced from here and here):

    import android.app.Activity;
    import android.content.Intent;
    import android.os.Bundle;
    import android.view.View;
    import android.view.View.OnClickListener;
    import android.speech.RecognitionListener;
    import android.speech.RecognizerIntent;
    import android.speech.SpeechRecognizer;
    import android.widget.Button;
    import android.widget.TextView;
    import java.util.ArrayList;
    import android.util.Log;
    
    
    
    public class VoiceRecognitionTest extends Activity implements OnClickListener 
    {
    
       private TextView mText;
       private SpeechRecognizer sr;
       private static final String TAG = "MyStt3Activity";
       @Override
       public void onCreate(Bundle savedInstanceState) 
       {
                super.onCreate(savedInstanceState);
                setContentView(R.layout.main);
                Button speakButton = (Button) findViewById(R.id.btn_speak);     
                mText = (TextView) findViewById(R.id.textView1);     
                speakButton.setOnClickListener(this);
                sr = SpeechRecognizer.createSpeechRecognizer(this);       
                sr.setRecognitionListener(new listener());        
       }
    
       class listener implements RecognitionListener          
       {
                public void onReadyForSpeech(Bundle params)
                {
                         Log.d(TAG, "onReadyForSpeech");
                }
                public void onBeginningOfSpeech()
                {
                         Log.d(TAG, "onBeginningOfSpeech");
                }
                public void onRmsChanged(float rmsdB)
                {
                         Log.d(TAG, "onRmsChanged");
                }
                public void onBufferReceived(byte[] buffer)
                {
                         Log.d(TAG, "onBufferReceived");
                }
                public void onEndOfSpeech()
                {
                         Log.d(TAG, "onEndofSpeech");
                }
                public void onError(int error)
                {
                         Log.d(TAG,  "error " +  error);
                         mText.setText("error " + error);
                }
                public void onResults(Bundle results)                   
                {
                         String str = new String();
                         Log.d(TAG, "onResults " + results);
                         ArrayList data = results.getStringArrayList(SpeechRecognizer.RESULTS_RECOGNITION);
                         for (int i = 0; i < data.size(); i++)
                         {
                                   Log.d(TAG, "result " + data.get(i));
                                   str += data.get(i);
                         }
                         mText.setText("results: "+String.valueOf(data.size()));        
                }
                public void onPartialResults(Bundle partialResults)
                {
                         Log.d(TAG, "onPartialResults");
                }
                public void onEvent(int eventType, Bundle params)
                {
                         Log.d(TAG, "onEvent " + eventType);
                }
       }
       public void onClick(View v) {
                if (v.getId() == R.id.btn_speak) 
                {
                    Intent intent = new Intent(RecognizerIntent.ACTION_RECOGNIZE_SPEECH);        
                    intent.putExtra(RecognizerIntent.EXTRA_LANGUAGE_MODEL,RecognizerIntent.LANGUAGE_MODEL_FREE_FORM);
                    intent.putExtra(RecognizerIntent.EXTRA_CALLING_PACKAGE,"voice.recognition.test");
    
                    intent.putExtra(RecognizerIntent.EXTRA_MAX_RESULTS,5); 
                    sr.startListening(intent);
                    Log.i("111111","11111111");
                }
       }
    }
    

    Define main.xml with a button and give RECORD_AUDIO permission in manifest

    0 讨论(0)
  • 2020-11-30 01:43
    package com.android.example.speechtxt;
    
    import androidx.appcompat.app.AppCompatActivity;
    import androidx.core.content.ContextCompat;
    
    import android.Manifest;
    import android.content.Intent;
    import android.content.pm.PackageManager;
    import android.net.Uri;
    import android.os.Build;
    import android.os.Bundle;
    import android.provider.Settings;
    import android.speech.RecognitionListener;
    import android.speech.RecognizerIntent;
    import android.speech.SpeechRecognizer;
    import android.view.MotionEvent;
    import android.view.View;
    import android.widget.RelativeLayout;
    import android.widget.Toast;
    
    import java.util.ArrayList;
    import java.util.Locale;
    
    public class MainActivity extends AppCompatActivity {
    
        private RelativeLayout relativeLayout;
        private SpeechRecognizer speechRecognizer;
        private Intent speechintent;
        String keeper="";
    
        @Override
        protected void onCreate(Bundle savedInstanceState) {
            super.onCreate(savedInstanceState);
            setContentView(R.layout.activity_main);
    
            checkVoiceCommandPermission();
            relativeLayout = findViewById(R.id.touchscr);
    
            speechRecognizer = SpeechRecognizer.createSpeechRecognizer(getApplicationContext());
            speechintent = new Intent(RecognizerIntent.ACTION_RECOGNIZE_SPEECH);
            speechintent.putExtra(RecognizerIntent.EXTRA_LANGUAGE_MODEL,RecognizerIntent.LANGUAGE_MODEL_FREE_FORM);
            speechintent.putExtra(RecognizerIntent.EXTRA_LANGUAGE, Locale.getDefault());
    
    
            speechRecognizer.setRecognitionListener(new RecognitionListener() {
                @Override
                public void onReadyForSpeech(Bundle params) {
    
                }
    
                @Override
                public void onBeginningOfSpeech() {
    
                }
    
                @Override
                public void onRmsChanged(float rmsdB) {
    
                }
    
                @Override
                public void onBufferReceived(byte[] buffer) {
    
                }
    
                @Override
                public void onEndOfSpeech() {
    
                }
    
                @Override
                public void onError(int error) {
    
                }
    
                @Override
                public void onResults(Bundle results)
                {
                    ArrayList<String> speakedStringArray = results.getStringArrayList(SpeechRecognizer.RESULTS_RECOGNITION);
                    if(speakedStringArray!=null)
                    {
                        keeper = speakedStringArray.get(0);
    
                        Toast.makeText(getApplicationContext(),""+keeper,Toast.LENGTH_SHORT).show();
                    }
                }
    
                @Override
                public void onPartialResults(Bundle partialResults) {
    
                }
    
                @Override
                public void onEvent(int eventType, Bundle params) {
    
                }
            });
    
            relativeLayout.setOnTouchListener(new View.OnTouchListener() {
                @Override
                public boolean onTouch(View v, MotionEvent event) {
                    switch (event.getAction())
                    {
                        case MotionEvent.ACTION_DOWN:
                            speechRecognizer.startListening(speechintent);
                            keeper="";
                            break;
                        case MotionEvent.ACTION_UP:
                            speechRecognizer.stopListening();
                            break;
                    }
                    return false;
                }
            });
        }
    
    
        private void checkVoiceCommandPermission()
        {
            if(Build.VERSION.SDK_INT>=Build.VERSION_CODES.M)
            {
                if (!(ContextCompat.checkSelfPermission(MainActivity.this, Manifest.permission.RECORD_AUDIO)== PackageManager.PERMISSION_GRANTED))
                {
                    Intent intent = new Intent(Settings.ACTION_APPLICATION_DETAILS_SETTINGS, Uri.parse("package:" +getPackageName()));
                    startActivity(intent);
                    finish();
                }
    
            }
        }
    }
    
    0 讨论(0)
  • 2020-11-30 01:58

    You can do it as:

    import android.app.Activity
    import androidx.appcompat.app.AppCompatActivity
    import android.os.Bundle
    import kotlinx.android.synthetic.main.activity_main.*
    import android.widget.Toast
    import android.content.ActivityNotFoundException
    import android.speech.RecognizerIntent
    import android.content.Intent
    
    class MainActivity : AppCompatActivity() {
        private val REQ_CODE = 100
    
        override fun onCreate(savedInstanceState: Bundle?) {
            super.onCreate(savedInstanceState)
            setContentView(R.layout.activity_main)
    
            speak.setOnClickListener {
                val intent = Intent(RecognizerIntent.ACTION_RECOGNIZE_SPEECH)
                intent.putExtra(RecognizerIntent.EXTRA_LANGUAGE_MODEL,
                        RecognizerIntent.LANGUAGE_MODEL_FREE_FORM)
                intent.putExtra(RecognizerIntent.EXTRA_LANGUAGE,  "ar-JO") //  Locale.getDefault()
                intent.putExtra(RecognizerIntent.EXTRA_PROMPT, "Need to speak")
                try {
                    startActivityForResult(intent, REQ_CODE)
                } catch (a: ActivityNotFoundException) {
                    Toast.makeText(applicationContext,
                            "Sorry your device not supported",
                            Toast.LENGTH_SHORT).show()
                }
            }
        }
    
        override fun onActivityResult(requestCode: Int, resultCode: Int, data: Intent?) {
            super.onActivityResult(requestCode, resultCode, data)
    
            when (requestCode) {
                REQ_CODE -> {
                    if (resultCode == Activity.RESULT_OK && data != null) {
                        val result = data
                                .getStringArrayListExtra(RecognizerIntent.EXTRA_RESULTS)
                        println("result: $result")
                        text.text = result[0]
                    }
                }
            }
        }
    }
    

    The layout could be simply:

    <?xml version = "1.0" encoding = "utf-8"?>
    <RelativeLayout xmlns:android = "http://schemas.android.com/apk/res/android"
        xmlns:app = "http://schemas.android.com/apk/res-auto"
        xmlns:tools = "http://schemas.android.com/tools"
        android:layout_width = "match_parent"
        android:layout_height = "match_parent"
        tools:context = ".MainActivity">
        <LinearLayout
            android:layout_width = "match_parent"
            android:gravity = "center"
            android:layout_height = "match_parent">
            <TextView
                android:id = "@+id/text"
                android:textSize = "30sp"
                android:layout_width = "wrap_content"
                android:layout_height = "wrap_content"/>
        </LinearLayout>
        <LinearLayout
            android:layout_width = "wrap_content"
            android:layout_alignParentBottom = "true"
            android:layout_centerInParent = "true"
            android:orientation = "vertical"
            android:layout_height = "wrap_content">
            <ImageView
                android:id = "@+id/speak"
                android:layout_width = "wrap_content"
                android:layout_height = "wrap_content"
                android:background = "?selectableItemBackground"
                android:src = "@android:drawable/ic_btn_speak_now"/>
        </LinearLayout>
    </RelativeLayout>
    

    The other way which you are asking about, is little longer, but give you more control, also does not annoying you with Google Assistance dialouge:

    1- First you need to grand permissions in the Manifest file:

        <uses-permission android:name="android.permission.INTERNET" />
        <uses-permission android:name="android.permission.RECORD_AUDIO"/>
    

    2- I'm consolidating all the answers above, as:

    • Create RecognitionListener class, as:
    private val TAG = "Driver-Assistant"
    
    class Listener(context: Context): RecognitionListener {
        private var ctx = context
    
        override fun onReadyForSpeech(params: Bundle?) {
            Log.d(TAG, "onReadyForSpeech")
        }
    
        override fun onRmsChanged(rmsdB: Float) {
            Log.d(TAG, "onRmsChanged")
        }
    
        override fun onBufferReceived(buffer: ByteArray?) {
            Log.d(TAG, "onBufferReceived")
        }
    
        override fun onPartialResults(partialResults: Bundle?) {
            Log.d(TAG, "onPartialResults")
        }
    
        override fun onEvent(eventType: Int, params: Bundle?) {
            Log.d(TAG, "onEvent")
        }
    
        override fun onBeginningOfSpeech() {
            Toast.makeText(ctx, "Speech started", Toast.LENGTH_LONG).show()
        }
    
        override fun onEndOfSpeech() {
            Toast.makeText(ctx, "Speech finished", Toast.LENGTH_LONG).show()
        }
    
        override fun onError(error: Int) {
            var string = when (error) {
                6 -> "No speech input"
                4 -> "Server sends error status"
                8 -> "RecognitionService busy."
                7 -> "No recognition result matched."
                1 -> "Network operation timed out."
                2 -> "Other network related errors."
                9 -> "Insufficient permissions"
                5 -> " Other client side errors."
                3 -> "Audio recording error."
                else -> "unknown!!"
            }
            Toast.makeText(ctx, "sorry error occurred: $string", Toast.LENGTH_LONG).show()
        }
    
        override fun onResults(results: Bundle?) {
            Log.d(TAG, "onResults $results")
            val data = results!!.getStringArrayList(SpeechRecognizer.RESULTS_RECOGNITION)
            display.text = data!![0]
        }
    }
    
    • In the main file you need to define SpeechRecognizer, adding the above listner to it, and do not forget asking for runtime permission, all together are below:
    lateinit var sr: SpeechRecognizer
    lateinit var display: TextView
    
    class MainActivity : AppCompatActivity() {
    
        override fun onCreate(savedInstanceState: Bundle?) {
            super.onCreate(savedInstanceState)
            setContentView(R.layout.activity_main)
    
            display = text
    
            if (ContextCompat.checkSelfPermission(this,
                            Manifest.permission.RECORD_AUDIO)
                    != PackageManager.PERMISSION_GRANTED) {
                if (ActivityCompat.shouldShowRequestPermissionRationale(this,
                                Manifest.permission.RECORD_AUDIO)) {
                } else {
                    ActivityCompat.requestPermissions(this,
                            arrayOf(Manifest.permission.RECORD_AUDIO),
                            527)
                }
            }
    
            sr = SpeechRecognizer.createSpeechRecognizer(this)
            sr.setRecognitionListener(Listener(this))
    
            speak.setOnClickListener {
                val intent = Intent(RecognizerIntent.ACTION_RECOGNIZE_SPEECH)
                intent.putExtra(RecognizerIntent.EXTRA_LANGUAGE_MODEL,
                        RecognizerIntent.LANGUAGE_MODEL_FREE_FORM)
                intent.putExtra(RecognizerIntent.EXTRA_LANGUAGE,  "ar-JO") //  Locale.getDefault()
                sr.startListening(intent)
            }
    
        }
    
        override fun onRequestPermissionsResult(requestCode: Int, permissions: Array<out String>, grantResults: IntArray) {
            super.onRequestPermissionsResult(requestCode, permissions, grantResults)
            when (requestCode) {
                527  -> if (grantResults.isNotEmpty()
                        && grantResults[0] == PackageManager.PERMISSION_GRANTED) {
    
                    Toast.makeText(this, "Permission granted", Toast.LENGTH_SHORT).show()
                } else {
                    Toast.makeText(this, "Permission not granted", Toast.LENGTH_SHORT).show()
                }
            }
        }
    }
    
    0 讨论(0)
  • 2020-11-30 02:01

    You can use SpeechRecognizer, though I am not aware of any sample code for it beyond this previous SO question. However, that is new to API level 8 (Android 2.2), and therefore is not widely usable at the time of this writing.

    0 讨论(0)
  • 2020-11-30 02:03

    Also make sure to request the proper permissions from the user. I got stuck with an error 9 return value: INSUFFICIENT_PERMISSIONS, even though I had the proper RECORD_AUDIO permissions listed in the manifest.

    In following the sample code here I was able to get the permissions from the user and then the speech recognizer returned good responses.

    E.g. This block I put in my onCreate() for the activity, though it could go somewhere else in the UI flow, prior to calling the SpeechRecognizer methods:

        protected void onCreate(Bundle savedInstanceState) {
            ...
            if (ContextCompat.checkSelfPermission(this,
                Manifest.permission.RECORD_AUDIO)
                != PackageManager.PERMISSION_GRANTED) {
    
            // Should we show an explanation?
            if (ActivityCompat.shouldShowRequestPermissionRationale(this,
                    Manifest.permission.RECORD_AUDIO)) {
    
                // Show an explanation to the user *asynchronously* -- don't block
                // this thread waiting for the user's response! After the user
                // sees the explanation, try again to request the permission.
    
            } else {
    
                // No explanation needed, we can request the permission.
    
                ActivityCompat.requestPermissions(this,
                        new String[]{Manifest.permission.RECORD_AUDIO},
                        527);
    
                // MY_PERMISSIONS_REQUEST_READ_CONTACTS is an
                // app-defined int constant. The callback method gets the
                // result of the request. (In this example I just punched in
                // the value 527)
            }
            ...
        }
    

    Then provide a callback method in the activity for the permissions request:

    @Override
    public void onRequestPermissionsResult(int requestCode,
                                           String permissions[], int[] grantResults) {
        switch (requestCode) {
            case 527: {
                // If request is cancelled, the result arrays are empty.
                if (grantResults.length > 0
                        && grantResults[0] == PackageManager.PERMISSION_GRANTED) {
    
                    // permission was granted, yay! Do the
                    // contacts-related task you need to do.
    
                } else {
    
                    // permission denied, boo! Disable the
                    // functionality that depends on this permission.
                }
                return;
            }
    
            // other 'case' lines to check for other
            // permissions this app might request
        }
    }
    

    One other thing I have to change in preetha's example code above where the resulting text is retrieved in the onResults() method. To get the actual text of the translated speech (rather than the size, as the original code prints) either print the value of the constructed string str or get one of the return values in the ArrayList (data). For instance:

    .setText(data.get(0));
    
    0 讨论(0)
提交回复
热议问题