问题
I have an ExampleSstreaming class which actually I got from GitHub of IBM Watson SDK (speech to text service demo). Here it is
public class ExampleStreaming : MonoBehaviour
{
private int m_RecordingRoutine = 0;
private string m_MicrophoneID = null;
private AudioClip m_Recording = null;
private int m_RecordingBufferSize = 5;
private int m_RecordingHZ = 22050;
private SpeechToText m_SpeechToText = new SpeechToText();
void Start()
{
LogSystem.InstallDefaultReactors();
Log.Debug("ExampleStreaming", "Start();");
Active = true;
Debug.Log("start");
StartRecording();
}
public void Update() {
Debug.Log(m_SpeechToText.IsListening);
}
public bool Active
{
get { return m_SpeechToText.IsListening; }
set
{
if (value && !m_SpeechToText.IsListening)
{
m_SpeechToText.DetectSilence = true;
m_SpeechToText.EnableWordConfidence = false;
m_SpeechToText.EnableTimestamps = false;
m_SpeechToText.SilenceThreshold = 0.03f;
m_SpeechToText.MaxAlternatives = 1;
m_SpeechToText.EnableContinousRecognition = true;
m_SpeechToText.EnableInterimResults = true;
m_SpeechToText.OnError = OnError;
m_SpeechToText.StartListening(OnRecognize);
}
else if (!value && m_SpeechToText.IsListening)
{
m_SpeechToText.StopListening();
}
}
}
private void StartRecording()
{
if (m_RecordingRoutine == 0)
{
Debug.Log("m_RecordingRoutine");
UnityObjectUtil.StartDestroyQueue();
m_RecordingRoutine = Runnable.Run(RecordingHandler());
}
}
private void StopRecording()
{
if (m_RecordingRoutine != 0)
{
Microphone.End(m_MicrophoneID);
Runnable.Stop(m_RecordingRoutine);
m_RecordingRoutine = 0;
}
}
private void OnError(string error)
{
Active = false;
Log.Debug("ExampleStreaming", "Error! {0}", error);
}
private IEnumerator RecordingHandler()
{
Log.Debug("ExampleStreaming", "devices: {0}", Microphone.devices);
m_MicrophoneID = Microphone.devices[0];
Debug.Log("m_MicrophoneID : " + m_MicrophoneID);
m_Recording = Microphone.Start(m_MicrophoneID, true, m_RecordingBufferSize, m_RecordingHZ);
yield return null; // let m_RecordingRoutine get set..
Debug.Log("m_Recording : " + m_Recording.length);
if (m_Recording == null)
{
Debug.Log("m_Recording is null");
StopRecording();
yield break;
}
bool bFirstBlock = true;
int midPoint = m_Recording.samples / 2;
float[] samples = null;
while (m_RecordingRoutine != 0 && m_Recording != null)
{
int writePos = Microphone.GetPosition(m_MicrophoneID);
if (writePos > m_Recording.samples || !Microphone.IsRecording(m_MicrophoneID))
{
Log.Error("MicrophoneWidget", "Microphone disconnected.");
StopRecording();
yield break;
}
if ((bFirstBlock && writePos >= midPoint)
|| (!bFirstBlock && writePos < midPoint))
{
// front block is recorded, make a RecordClip and pass it onto our callback.
samples = new float[midPoint];
m_Recording.GetData(samples, bFirstBlock ? 0 : midPoint);
AudioData record = new AudioData();
record.MaxLevel = Mathf.Max(samples);
record.Clip = AudioClip.Create("Recording", midPoint, m_Recording.channels, m_RecordingHZ, false);
record.Clip.SetData(samples, 0);
m_SpeechToText.OnListen(record);
bFirstBlock = !bFirstBlock;
}
else
{
// calculate the number of samples remaining until we ready for a block of audio,
// and wait that amount of time it will take to record.
int remaining = bFirstBlock ? (midPoint - writePos) : (m_Recording.samples - writePos);
float timeRemaining = (float)remaining / (float)m_RecordingHZ;
yield return new WaitForSeconds(timeRemaining);
}
}
yield break;
}
private void OnRecognize(SpeechRecognitionEvent result)
{
Debug.Log("OnRecognize");
if (result != null && result.results.Length > 0)
{
foreach (var res in result.results)
{
foreach (var alt in res.alternatives)
{
string text = alt.transcript;
Debug.Log(text);
Log.Debug("ExampleStreaming", string.Format("{0} ({1}, {2:0.00})\n", text, res.final ? "Final" : "Interim", alt.confidence));
}
}
}
}
}
and this is the line i add to get microphone. I just edit it to provide Microphone Device at zero index which was actually null (I don't know why, is this intentionally left or an error), in the function RecordingHandler .
m_MicrophoneID = Microphone.devices[0];
but unfortunately it is not showing any output log in EventOnRecognize which i think that it should execute.
Wile it displaying these logs, after some seconds (as i given length 5 of the audio). What i am doing wrong, i am unable to understand that how speech to text.
[DEBUG] OnListenClosed(), State = DISCONNECTED
[DEBUG] KeepAlive exited.
I have also tried IBM Watson Speech To text Scene it is also not showing anything.
回答1:
I am not able to stream real-time output yet but become able to convert audio clip into text through watson service and here is the simple code (which took three days).
using UnityEngine;
using System.Collections;
using IBM.Watson.DeveloperCloud.Services.SpeechToText.v1;
public class AudioClipToTextWatson : MonoBehaviour {
// Non-streaming
SpeechToText m_SpeechToText = new SpeechToText();
public AudioClip m_AudioClip = new AudioClip();
public bool on = false;
void Start () {
m_AudioClip = Microphone.Start(Microphone.devices[0], false, 4, 44100);
m_SpeechToText.Recognize(m_AudioClip, OnRecognize);
// Streaming
m_SpeechToText.StartListening(OnRecognize);
// Stop listening
m_SpeechToText.StopListening();
}
private void OnRecognize(SpeechRecognitionEvent result)
{
Debug.Log("result : " + result);
if (result != null && result.results.Length > 0)
{
foreach (var res in result.results)
{
foreach (var alt in res.alternatives)
{
string text = alt.transcript;
Debug.Log(text);
Debug.Log(res.final);
}
}
}
}
}
Note :You can record and an audio clip using your microphone and convert it to text. If you already have an audio then drop it to inspector and comment out the first line in Start Event.
来源:https://stackoverflow.com/questions/46095944/ibm-watson-speech-to-text-service-is-not-giving-response-in-unity3d