How to save a text to speech audio file client side?

一笑奈何 提交于 2021-01-29 12:39:36

问题


desired behaviour

allow user to download text to speech audio file by clicking a button, like this official demo:

https://text-to-speech-starter-kit.ng.bluemix.net

what i've tried

i am using:

https://github.com/watson-developer-cloud/node-sdk

i can generate an audio file server side but can't figure out how to send that file back to the client for them to save - so i am trying to generate it client side instead.

attempt 01: generate audio file server side

server.js (works)

const fs = require('fs');
const TextToSpeechV1 = require('ibm-watson/text-to-speech/v1');

const textToSpeech = new TextToSpeechV1({
    iam_apikey: '{apikey}',
});

const synthesizeParams = {
    text: 'Hello world',
    accept: 'audio/wav',
    voice: 'en-US_AllisonVoice',
};

textToSpeech.synthesize(synthesizeParams)
    .then(audio => {
        audio.pipe(fs.createWriteStream('hello_world.wav'));
    })
    .catch(err => {
        console.log('error:', err);
    });

for reference, according to the docs, the .synthesize() method response type is:

NodeJS.ReadableStream|FileObject|Buffer   

attempt 02: generate audio file client side

server.js - required to get token (works)

var AuthorizationV1 = require('ibm-watson/authorization/v1');

var iam_apikey = local_settings.TEXT_TO_SPEECH_IAM_APIKEY;
var url = local_settings.TEXT_TO_SPEECH_URL;

var authorization = new AuthorizationV1({
    iam_apikey: iam_apikey,
    url: url
});

const api_tts_token_get = async (req, res) => {

    authorization.getToken(function(err, token) {
        if (!token) {
            console.log('error:', err);
        } else {
            res.json({ token: token, url: url });
        }
    });

}

app.route("/api/:api_version/text-to-speech/token")
    .get(api_tts_token_get); 

client.js (doesn’t work)

var TextToSpeechV1 = require('ibm-watson/text-to-speech/v1');

const get_token = (parameters) => {

    $.ajax({
        url: "/api/v1/text-to-speech/token",
        data: parameters,
        dataType: 'json',
        cache: false,
        headers: headers,
        success: function(results) {

            var token = results.token;
            var url = results.url;

            var textToSpeech = new TextToSpeechV1({ token: token, url: url });

            var synthesizeParams = {
                text: 'hello world!',
                accept: 'audio/wav',
                voice: 'en-US_AllisonV3Voice'
            };

            textToSpeech.synthesize(synthesizeParams, function(err, result) {
                if (err) {
                    return console.log(err);
                }
                console.log(result);
            });

        },
        statusCode: {
            500: function() {

                console.log("that didn't work");

            }
        }
    });

}

webpack.config.js

added per instructions at:

https://github.com/watson-developer-cloud/node-sdk/tree/master/examples/webpack#important-notes

node: {
    // see http://webpack.github.io/docs/configuration.html#node
    // and https://webpack.js.org/configuration/node/
    fs: 'empty',
    net: 'empty',
    tls: 'empty'
},

chrome dev tools errors:

xhr.js:108 Refused to set unsafe header "User-Agent"

The provided value 'stream' is not a valid enum value of type XMLHttpRequestResponseType.

Access to XMLHttpRequest at 'https://***.watsonplatform.net/text-to-speech/api/v1/synthesize?voice=en-US_AllisonV3Voice'
from origin 'http://localhost:3000' has been blocked by CORS policy: 
Request header field x-ibmcloud-sdk-analytics is not allowed by 
Access-Control-Allow-Headers in preflight response.

Error: Response not received. Body of error is HTTP ClientRequest object
at RequestWrapper.formatError (requestwrapper.js:218)
at eval (requestwrapper.js:206)

回答1:


Here is one solution i have figured out.

It generates the audio file server side and sends it back via res.download().

The only caveat is that you can't use $.ajax() but rather something like:

window.open("/api/v1/audio?file_id=12345");

server.js

var TextToSpeechV1 = require('ibm-watson/text-to-speech/v1');

const api_audio_get = async (req, res) => {

    var query_parameters = req.query;

    var file_id = query_parameters.file_id;

    var textToSpeech = new TextToSpeechV1({
        iam_apikey: local_settings.TEXT_TO_SPEECH_IAM_APIKEY,
        url: local_settings.TEXT_TO_SPEECH_URL
    });

    const synthesizeParams = {
        text: 'here is test voice',
        accept: 'audio/wav',
        voice: 'en-US_AllisonV3Voice',
    };

    textToSpeech.synthesize(
        synthesizeParams,
        function(err, audio) {
            if (err) {
                console.log(err);
                return;
            }

            // see:  https://stackoverflow.com/a/46413467
            // this allows you to create temp file on server, send it, then delete it
            var filename = file_id + ".wav";
            var absPath = path.join(__dirname, "/my_files/", filename);
            var relPath = path.join("./my_files", filename); // path relative to server root

            // see: https://nodejs.org/en/knowledge/advanced/streams/how-to-use-fs-create-write-stream/
            var write_stream = fs.createWriteStream(relPath);
            // audio is written to the writestream
            audio.pipe(write_stream);

            // see: https://stackoverflow.com/questions/19829379/detecting-the-end-of-a-writestream-in-node
            write_stream.on('finish', function() {

                res.download(absPath, (err) => {
                    if (err) {
                        console.log(err);
                    }
                    fs.unlink(relPath, (err) => {
                        if (err) {
                            console.log(err);
                        }
                        console.log("FILE [" + filename + "] REMOVED!");
                    });
                });

            });

        }
    );

}

// route handler
app.route("/api/:api_version/audio")
    .get(api_audio_get);

client.js

$(document).on("click", ".download_audio", function() {

    window.open("/api/v1/audio?file_id=12345");

});


来源:https://stackoverflow.com/questions/57020422/how-to-save-a-text-to-speech-audio-file-client-side

易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!