问题
I'm fresh in speech recognition topic
I'm working on project in which I can find some usecases for Pepper bot in the workspace. Playing with Pepper we figured out some issues with it's speech recognition ability. I've found that we can try to connect it with external engines for that and I picked up DialogFlow from GCP. Also because I've found some integrations with this service. I've used code from this project
import traceback
from naoqi import qi
# [START dialogflow_detect_intent_streaming]
def detect_intent_stream(project_id, session_id, audio_file_path,
language_code,ip):
"""Returns the result of detect intent with streaming audio as input.
Using the same `session_id` between requests allows continuation
of the conversation."""
import dialogflow_v2 as dialogflow
session_client = dialogflow.SessionsClient()
# Note: hard coding audio_encoding and sample_rate_hertz for simplicity.
audio_encoding = dialogflow.enums.AudioEncoding.AUDIO_ENCODING_LINEAR_16
sample_rate_hertz = 44100
session_path = session_client.session_path(project_id, session_id)
print('Session path: {}\n'.format(session_path))
def request_generator(audio_config, audio_file_path):
query_input = dialogflow.types.QueryInput(audio_config=audio_config)
# The first request contains the configuration.
yield dialogflow.types.StreamingDetectIntentRequest(
session=session_path, query_input=query_input)
# Here we are reading small chunks of audio data from a local
# audio file. In practice these chunks should come from
# an audio input device.
try:
with open(audio_file_path, 'rb') as audio_file:
while True:
chunk = audio_file.read(4096)
print(chunk)
if not chunk:
break
except:
traceback.print_exc()
# The later requests contains audio data.
yield dialogflow.types.StreamingDetectIntentRequest(input_audio=chunk)
audio_config = dialogflow.types.InputAudioConfig(audio_encoding=audio_encoding, language_code=language_code, sample_rate_hertz=sample_rate_hertz)
requests = request_generator(audio_config, audio_file_path)
responses = session_client.streaming_detect_intent(requests)
print('=' * 20)
for response in responses:
print('Intermediate transcript: "{}".'.format(response.recognition_result.transcript))
# Note: The result from the last response is the final transcript along
# with the detected content.
query_result = response.query_result
session = qi.Session()
try:
tts = session.service("ALTextToSpeech")
tts.say(query_result.fulfillment_text)
except:
traceback.print_exc()
raise Exception("session.connect failed.")
finally:
session.close()
print('=' * 20)
print('Query text: {}'.format(query_result.query_text))
print('Detected intent: {} (confidence: {})\n'.format(
query_result.intent.display_name,
query_result.intent_detection_confidence))
print('Fulfillment text: {}\n'.format(
query_result.fulfillment_text))
# [END dialogflow_detect_intent_streaming]
EDITED: (adding pepper_recorder.py code) Code below is taking any sound from Pepper and send only with desire peak to dialog flow detect_intent_stream
import StringIO
from Queue import Queue
from naoqi import ALModule, ALProxy
import numpy as np
import time
import logging
import uuid
import traceback
from detect_intent_stream import detect_intent_stream
LISTEN_RETRIES = 10
DIALOG_FLOW_GCP_PROJECT_ID = "XXXXXXXXXXXXXXXXXXXX"
class SoundProcessingModule(ALModule):
def __init__( self, name, ip, stop_recognition):
try:
ALModule.__init__( self, name );
except Exception as e:
logging.error(str(e))
pass
print("connected")
self.ip = ip
self.BIND_PYTHON( name, "processRemote")
self.ALAudioDevice = ALProxy("ALAudioDevice", self.ip, 9559)
self.framesCount=0
self.count = LISTEN_RETRIES
self.recordingInProgress = False
self.stopRecognition = stop_recognition
self.uuid = uuid.uuid4()
self.previous_sound_data = None
def startProcessing(self):
"""init sound processing, set microphone and stream rate"""
print("startProcessing")
self.ALAudioDevice.setClientPreferences(self.getName(), 16000, 4, 0)
self.ALAudioDevice.subscribe(self.getName())
while not self.stopRecognition.is_set():
time.sleep(1)
self.ALAudioDevice.unsubscribe(self.getName())
def processRemote(self, nbOfChannels, nbOfSamplesByChannel, timeStamp, inputBuffer):
"""audio stream callback method with simple silence detection"""
self.framesCount = self.framesCount + 1
sound_data_interlaced = np.fromstring(str(inputBuffer), dtype=np.int16)
sound_data = np.reshape(sound_data_interlaced, (nbOfChannels, nbOfSamplesByChannel), 'F')
peak_value = np.max(sound_data)
# detect sound
if peak_value > 10000:
print("Peak:", peak_value)
self.count = LISTEN_RETRIES
if not self.recordingInProgress:
self.startRecording(self.previous_sound_data)
# if there is no sound for a few seconds we end the current recording and start audio processing
if self.count <= 0 and self.recordingInProgress:
self.stopRecording()
# if recording is in progress we save the sound to an in-memory file
if self.recordingInProgress:
self.count -= 1
self.previous_data = sound_data
self.procssingQueue.put(sound_data[0].tostring())
self.outfile.write(sound_data[0].tostring())
def startRecording(self, previous_sound_data):
"""init a in memory file object and save the last raw sound buffer to it."""
self.outfile = StringIO.StringIO()
self.procssingQueue = Queue()
self.recordingInProgress = True
if not previous_sound_data is None:
self.procssingQueue.put(previous_sound_data[0].tostring())
self.outfile.write(previous_sound_data[0].tostring())
print("start recording")
def stopRecording(self):
"""saves the recording to memory"""
print("stopped recording")
self.previous_sound_data = None
self.outfile.seek(0)
try:
detect_intent_stream(DIALOG_FLOW_GCP_PROJECT_ID, self.uuid,
self.outfile, "en-US", self.ip)
except:
traceback.print_exc()
self.recordingInProgress = False
I'm connected to the robot and it is listening but each time it starts to record I have something like this in console
('Peak:', 14023)
start recording
stopped recording
Session path: XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
====================
And then error...
Traceback (most recent call last):
> File
> "C:\Users\marwloda\PycharmProjects\Pepper\scripts\pepper_recorder.py",
> line 83, in stopRecording self.outfile, "en-US", self.ip) File
> "C:\Users\marwloda\PycharmProjects\Pepper\scripts\detect_intent_stream.py",
> line 76, in detect_intent_stream for response in responses: File
> "C:\Users\marwloda\PycharmProjects\Pepper\venv\lib\site-packages\google\api_core\grpc_helpers.py",
> line 81, in next
> six.raise_from(exceptions.from_grpc_error(exc), exc) File "C:\Users\marwloda\PycharmProjects\Pepper\venv\lib\site-packages\six.py",
> line 737, in raise_from
> raise value Unknown: None Exception iterating requests!
If like audio_file recorded from robot was empty. But I've done some prints of speech reco data and produce some raw unreadable string
To proof that I have some connection to API - when I open GCP console API & services I see this view
What might cause this error? Where should I look for causes ?
回答1:
So apparently you are passing StringIO
to detect_intent_stream
as audio_file_path
where at:
with open(audio_file_path, 'rb') as audio_file:
you try to open the StringIO
as a File
But:
A StringIO instance is an open file already. The open command, on the other hand, only takes filenames, to return an open file. A StringIO instance is not suitable as a filename. link
So the request_generator
function in detect_intent_stream
needs to look like:
def request_generator(audio_config, audio_file_path):
query_input = dialogflow.types.QueryInput(audio_config=audio_config)
yield dialogflow.types.StreamingDetectIntentRequest(
session=session_path, query_input=query_input)
while True:
chunk = audio_file_path.read(4096)
if not chunk:
break
yield dialogflow.types.StreamingDetectIntentRequest(
input_audio=chunk)
回答2:
Hi there might be an issue with your audio file or the network connection. Have you tried to send a simple text or a prerecorded wav file as input to dialogflow?
This script works well on my pepper: (my wav file is Mono, 44100Hz, 32Bit)
import os
import time
import sys
import uuid
import google
def printResponse(response):
print('=' * 20)
print('Query text: {}'.format(response.query_result.query_text.encode('utf-8')))
print('Detected intent: {} (confidence: {})'.format(
response.query_result.intent.display_name.encode('utf-8'),
response.query_result.intent_detection_confidence))
print('Fulfillment text: {}'.format(
response.query_result.fulfillment_text.encode('utf-8')))
def detect_intent_audio(project_id, session_id, audio_file_path,
language_code, sample_rate_hertz):
import dialogflow_v2 as dialogflow
session_client = dialogflow.SessionsClient()
# Note: hard coding audio_encoding and sample_rate_hertz for simplicity.
audio_encoding = dialogflow.enums.AudioEncoding.AUDIO_ENCODING_LINEAR_16
session = session_client.session_path(project_id, session_id)
with open(audio_file_path, 'rb') as audio_file:
input_audio = audio_file.read()
audio_config = dialogflow.types.InputAudioConfig(
audio_encoding=audio_encoding, language_code=language_code,
sample_rate_hertz=sample_rate_hertz)
query_input = dialogflow.types.QueryInput(audio_config=audio_config)
response = session_client.detect_intent(
session=session, query_input=query_input,
input_audio=input_audio)
printResponse(response)
def detect_intent_stream(project_id, session_id, audio_file_path,
language_code, sample_rate_hertz):
import dialogflow_v2 as dialogflow
session_client = dialogflow.SessionsClient()
# Note: hard coding audio_encoding and sample_rate_hertz for simplicity.
audio_encoding = dialogflow.enums.AudioEncoding.AUDIO_ENCODING_LINEAR_16
session_path = session_client.session_path(project_id, session_id)
def request_generator(audio_config, audio_file_path):
query_input = dialogflow.types.QueryInput(audio_config=audio_config)
# The first request contains the configuration.
yield dialogflow.types.StreamingDetectIntentRequest(
session=session_path, query_input=query_input)
# Here we are reading small chunks of audio data from a local
# audio file. In practice these chunks should come from
# an audio input device.
with open(audio_file_path, 'rb') as audio_file:
while True:
chunk = audio_file.read(4096)
if not chunk:
break
# The later requests contains audio data.
yield dialogflow.types.StreamingDetectIntentRequest(
input_audio=chunk)
audio_config = dialogflow.types.InputAudioConfig(
audio_encoding=audio_encoding, language_code=language_code,
sample_rate_hertz=sample_rate_hertz)
requests = request_generator(audio_config, audio_file_path)
responses = session_client.streaming_detect_intent(requests)
print('=' * 20)
for response in responses:
print('Intermediate transcript: "{}".'.format(
response.recognition_result.transcript.encode('utf-8')))
# Note: The result from the last response is the final transcript along
# with the detected content.
printResponse(response)
def detect_intent_texts(project_id, session_id, texts, language_code):
import dialogflow_v2 as dialogflow
session_client = dialogflow.SessionsClient()
session = session_client.session_path(project_id, session_id)
for text in texts:
text_input = dialogflow.types.TextInput(
text=text, language_code=language_code)
query_input = dialogflow.types.QueryInput(text=text_input)
response = session_client.detect_intent(
session=session, query_input=query_input)
printResponse(response)
project_id = 'my_project_id'
session_id = str(uuid.uuid4())
language_code = 'de'
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = os.path.join('/home/me/', 'xyz.json')
wav_path = os.path.join('/home/me/', 'audio.wav')
try:
print ("detect_intent_texts:")
detect_intent_texts(project_id, session_id, ["Hallo"], language_code)
print('=' * 20)
print ("detect_intent_audio:")
detect_intent_audio(project_id, session_id, wav_path, language_code, 44100)
print('=' * 20)
print ("detect_intent_stream:")
detect_intent_stream(project_id, session_id, wav_path, language_code, 44100)
print('=' * 20)
except google.api_core.exceptions.ServiceUnavailable:
print("503 Connect Failed")
来源:https://stackoverflow.com/questions/58375010/how-to-fix-none-exception-iterating-requests-issue