Google speech to text node-record-lpcm16 stream error

问题

I am setting up google's speech-to-text in a node/express environment on Google App Engine. I have an Angular app that communicates to the server via websockets.

This all works perfectly on local host, but when my angular app points to the App Engine instance it does not.

It can connect fine - sends connection msg back/forth. And it runs my google speech connection fine.

However I get an error in the bit where i try to access the mic stream. The error message isn't much use: ERROR with recorder.record sox has exited with error code 2. (full code below)

The error seems to be with recorder.record (provided by const recorder = require('node-record-lpcm16'); . Or could it be that my GOOGLE_APPLICATION_CREDENTIALS auth isn't set (though I'd expect an error). OR something else I am missing? I have attached my package.json and my Dockerfile at the bottom for reference.

I have SoX installed in npm, and on the machine (using Flexible env and Dockerfile to set it up).

Any idea what could be causing this error / how to debug / how to fix?

Here's my code to set the connection; you can see the error handler on recorder.record which is called.

var constants = require('../conf/consts');

function GCPSpeechToTextService(socket) {

  console.log('GCPSpeechToTextService() called');
  const msg = 'Hello from server. GCP Speech Started!';
  socket.emit(constants.SOCKET_NEW_MSG_STRING, msg);

  const recorder = require('node-record-lpcm16');
  const speech = require('@google-cloud/speech');
  const encoding = 'LINEAR16';
  const sampleRateHertz = 16000;
  const languageCode = 'en-GB'; // en-US etc. This really improves recognition!

  const request = {
    config: {
      encoding: encoding,
      sampleRateHertz: sampleRateHertz,
      languageCode: languageCode,
      single_utterance: true, // Sends voice immediately after speaking, ideal for clear/not clear prompts
    },
    interimResults: true,
  };

  // // Create a recognize stream
  const client = new speech.SpeechClient();
  const recognizeStream = client
    .streamingRecognize(request)
    .on('error', (error) => console.log('ERROR with streamingRecognize', error))
    .on('data', data => {
      console.log('>> data: ', data); //.results[0].alternatives);
      var msg = data.results[0] && data.results[0].alternatives[0]
          ? data.results[0].alternatives[0].transcript
          : 'ERROR:TIMELIMIT Reached transcription time limit';
      process.stdout.write(msg);
    
      // Send it to the client
      socket.emit(constants.SOCKET_NEW_MSG_STRING, msg);
    });

  // Start recording and send the microphone input to the Speech API.
  console.log('>set recorder...');
  recorder
    .record({
      sampleRateHertz: sampleRateHertz,
      threshold: 0.1,
      verbose: false,
      recordProgram: 'sox',
      recordProgram: 'sox',
      silence: '10.0',
    })
    .stream()
    .on('error', (error) => console.log('ERROR with recorder.record', error))
    .on('end', () => console.log('> END recorder.record'))
    .pipe(recognizeStream);
}

module.exports = GCPSpeechToTextService;

package.json dependencies:

"dependencies": {
    "@google-cloud/speech": "^4.1.4",
    "cookie-parser": "~1.4.4",
    "cpx": "^1.5.0",
    "debug": "~2.6.9",
    "express": "^4.16.4",
    "express-ws": "^4.0.0",
    "http-errors": "~1.6.3",
    "jade": "^1.11.0",
    "morgan": "~1.9.1",
    "node-record-lpcm16": "^1.0.1",
    "pug": "^3.0.0",
    "socket.io": "^3.0.3",
    "sox": "^0.1.0"
  }

and my Dockerfile:

WORKDIR /app
COPY package.json /app/package.json
RUN apt-get update \
 && apt-get install -y sox libsox-fmt-all
RUN npm install
COPY . /app
EXPOSE 8080
CMD ["npm", "start"]

来源：https://stackoverflow.com/questions/65100476/google-speech-to-text-node-record-lpcm16-stream-error

标签

google-app-engine

google-cloud-platform

speech-to-text

sox