Chrome Speech Synthesis with longer texts

寵の児 提交于 2019-11-27 10:36:58

I've had this issue for a while now with Google Chrome Speech Synthesis. After some investigation, I discovered the following:

  • The breaking of the utterances only happens when the voice is not a native voice,
  • The cutting out usually occurs between 200-300 characters,
  • When it does break you can un-freeze it by doing speechSynthesis.cancel();
  • The 'onend' event sometimes decides not to fire. A quirky work-around to this is to console.log() out the utterance object before speaking it. Also I found wrapping the speak invocation in a setTimeout callback helps smooth these issues out.

In response to these problems, I have written a function that overcomes the character limit, by chunking the text up into smaller utterances, and playing them one after another. Obviously you'll get some odd sounds sometimes as sentences might be chunked into two separate utterances with a small time delay in between each, however the code will try and split these points at punctuation marks as to make the breaks in sound less obvious.

Update

I've made this work-around publicly available at https://gist.github.com/woollsta/2d146f13878a301b36d7#file-chunkify-js. Many thanks to Brett Zamir for his contributions.

The function:

var speechUtteranceChunker = function (utt, settings, callback) {
    settings = settings || {};
    var newUtt;
    var txt = (settings && settings.offset !== undefined ? utt.text.substring(settings.offset) : utt.text);
    if (utt.voice && utt.voice.voiceURI === 'native') { // Not part of the spec
        newUtt = utt;
        newUtt.text = txt;
        newUtt.addEventListener('end', function () {
            if (speechUtteranceChunker.cancel) {
                speechUtteranceChunker.cancel = false;
            }
            if (callback !== undefined) {
                callback();
            }
        });
    }
    else {
        var chunkLength = (settings && settings.chunkLength) || 160;
        var pattRegex = new RegExp('^[\\s\\S]{' + Math.floor(chunkLength / 2) + ',' + chunkLength + '}[.!?,]{1}|^[\\s\\S]{1,' + chunkLength + '}$|^[\\s\\S]{1,' + chunkLength + '} ');
        var chunkArr = txt.match(pattRegex);

        if (chunkArr[0] === undefined || chunkArr[0].length <= 2) {
            //call once all text has been spoken...
            if (callback !== undefined) {
                callback();
            }
            return;
        }
        var chunk = chunkArr[0];
        newUtt = new SpeechSynthesisUtterance(chunk);
        var x;
        for (x in utt) {
            if (utt.hasOwnProperty(x) && x !== 'text') {
                newUtt[x] = utt[x];
            }
        }
        newUtt.addEventListener('end', function () {
            if (speechUtteranceChunker.cancel) {
                speechUtteranceChunker.cancel = false;
                return;
            }
            settings.offset = settings.offset || 0;
            settings.offset += chunk.length - 1;
            speechUtteranceChunker(utt, settings, callback);
        });
    }

    if (settings.modifier) {
        settings.modifier(newUtt);
    }
    console.log(newUtt); //IMPORTANT!! Do not remove: Logging the object out fixes some onend firing issues.
    //placing the speak invocation inside a callback fixes ordering and onend issues.
    setTimeout(function () {
        speechSynthesis.speak(newUtt);
    }, 0);
};

How to use it...

//create an utterance as you normally would...
var myLongText = "This is some long text, oh my goodness look how long I'm getting, wooooohooo!";

var utterance = new SpeechSynthesisUtterance(myLongText);

//modify it as you normally would
var voiceArr = speechSynthesis.getVoices();
utterance.voice = voiceArr[2];

//pass it into the chunking function to have it played out.
//you can set the max number of characters by changing the chunkLength property below.
//a callback function can also be added that will fire once the entire text has been spoken.
speechUtteranceChunker(utterance, {
    chunkLength: 120
}, function () {
    //some code to execute when done
    console.log('done');
});

Hope people find this as useful.

The problem with Peter's answer is it doesn't work when you have a queue of speech synthesis set up. The script will put the new chunk at the end of the queue, and thus out of order. Example: https://jsfiddle.net/1gzkja90/

<script type='text/javascript' src='http://code.jquery.com/jquery-2.1.0.js'></script>
<script type='text/javascript'>    
    u = new SpeechSynthesisUtterance();
    $(document).ready(function () {
        $('.t').each(function () {
            u = new SpeechSynthesisUtterance($(this).text());

            speechUtteranceChunker(u, {
                chunkLength: 120
            }, function () {
                console.log('end');
            });
        });
    });
     /**
     * Chunkify
     * Google Chrome Speech Synthesis Chunking Pattern
     * Fixes inconsistencies with speaking long texts in speechUtterance objects 
     * Licensed under the MIT License
     *
     * Peter Woolley and Brett Zamir
     */
    var speechUtteranceChunker = function (utt, settings, callback) {
        settings = settings || {};
        var newUtt;
        var txt = (settings && settings.offset !== undefined ? utt.text.substring(settings.offset) : utt.text);
        if (utt.voice && utt.voice.voiceURI === 'native') { // Not part of the spec
            newUtt = utt;
            newUtt.text = txt;
            newUtt.addEventListener('end', function () {
                if (speechUtteranceChunker.cancel) {
                    speechUtteranceChunker.cancel = false;
                }
                if (callback !== undefined) {
                    callback();
                }
            });
        }
        else {
            var chunkLength = (settings && settings.chunkLength) || 160;
            var pattRegex = new RegExp('^[\\s\\S]{' + Math.floor(chunkLength / 2) + ',' + chunkLength + '}[.!?,]{1}|^[\\s\\S]{1,' + chunkLength + '}$|^[\\s\\S]{1,' + chunkLength + '} ');
            var chunkArr = txt.match(pattRegex);

            if (chunkArr[0] === undefined || chunkArr[0].length <= 2) {
                //call once all text has been spoken...
                if (callback !== undefined) {
                    callback();
                }
                return;
            }
            var chunk = chunkArr[0];
            newUtt = new SpeechSynthesisUtterance(chunk);
            var x;
            for (x in utt) {
                if (utt.hasOwnProperty(x) && x !== 'text') {
                    newUtt[x] = utt[x];
                }
            }
            newUtt.addEventListener('end', function () {
                if (speechUtteranceChunker.cancel) {
                    speechUtteranceChunker.cancel = false;
                    return;
                }
                settings.offset = settings.offset || 0;
                settings.offset += chunk.length - 1;
                speechUtteranceChunker(utt, settings, callback);
            });
        }

        if (settings.modifier) {
            settings.modifier(newUtt);
        }
        console.log(newUtt); //IMPORTANT!! Do not remove: Logging the object out fixes some onend firing issues.
        //placing the speak invocation inside a callback fixes ordering and onend issues.
        setTimeout(function () {
            speechSynthesis.speak(newUtt);
        }, 0);
    };
</script>
<p class="t">MLA format follows the author-page method of in-text citation. This means that the author's last name and the page number(s) from which the quotation or paraphrase is taken must appear in the text, and a complete reference should appear on your Works Cited page. The author's name may appear either in the sentence itself or in parentheses following the quotation or paraphrase, but the page number(s) should always appear in the parentheses, not in the text of your sentence.</p>
<p class="t">Joe waited for the train.</p>
<p class="t">The train was late.</p>
<p class="t">Mary and Samantha took the bus.</p>

In my case, the answer was to "chunk" the string before adding them to the queue. See here: http://jsfiddle.net/vqvyjzq4/

Many props to Peter for the idea as well as the regex (which I still have yet to conquer.) I'm sure the javascript can be cleaned up, this is more of a proof of concept.

<script type='text/javascript' src='http://code.jquery.com/jquery-2.1.0.js'></script>
<script type='text/javascript'>    
    var chunkLength = 120;
    var pattRegex = new RegExp('^[\\s\\S]{' + Math.floor(chunkLength / 2) + ',' + chunkLength + '}[.!?,]{1}|^[\\s\\S]{1,' + chunkLength + '}$|^[\\s\\S]{1,' + chunkLength + '} ');

    $(document).ready(function () {
        var element = this;
        var arr = [];
        var txt = replaceBlank($(element).text());
        while (txt.length > 0) {
            arr.push(txt.match(pattRegex)[0]);
            txt = txt.substring(arr[arr.length - 1].length);
        }
        $.each(arr, function () {
            var u = new SpeechSynthesisUtterance(this.trim());
            window.speechSynthesis.speak(u);
        });
    });
</script>
<p class="t">MLA format follows the author-page method of in-text citation. This means that the author's last name and the page number(s) from which the quotation or paraphrase is taken must appear in the text, and a complete reference should appear on your Works Cited page. The author's name may appear either in the sentence itself or in parentheses following the quotation or paraphrase, but the page number(s) should always appear in the parentheses, not in the text of your sentence.</p>
<p class="t">Joe waited for the train.</p>
<p class="t">The train was late.</p>
<p class="t">Mary and Samantha took the bus.</p>

A simple and effective solution is to resume periodically.

function resumeInfinity() {
    window.speechSynthesis.resume();
    timeoutResumeInfinity = setTimeout(resumeInfinity, 1000);
}

You can associate this with the onend and onstart events, so you will only be invoking the resume if necessary. Something like:

var utterance = new SpeechSynthesisUtterance();

utterance.onstart = function(event) {
    resumeInfinity();
};

utterance.onend = function(event) {
    clearTimeout(timeoutResumeInfinity);
};

I discovered this by chance!

Hope this help!

I ended up chunking up the text and having some intelligence around handling of various punctucations like periods, commas, etc. For example, you don't want to break the text up on a comma if it's part of a number (i.e., $10,000).

I have tested it and it seems to work on arbitrarily large sets of input and it also appears to work not just on the desktop but on android phones and iphones.

Set up a github page for the synthesizer at: https://github.com/unk1911/speech

You can see it live at: http://edeliverables.com/tts/

Here is what i ended up with, it simply splits my sentences on the period "."

var voices = window.speechSynthesis.getVoices();

var sayit = function ()
{
    var msg = new SpeechSynthesisUtterance();

    msg.voice = voices[10]; // Note: some voices don't support altering params
    msg.voiceURI = 'native';
    msg.volume = 1; // 0 to 1
    msg.rate = 1; // 0.1 to 10
    msg.pitch = 2; //0 to 2
    msg.lang = 'en-GB';
    msg.onstart = function (event) {

        console.log("started");
    };
    msg.onend = function(event) {
        console.log('Finished in ' + event.elapsedTime + ' seconds.');
    };
    msg.onerror = function(event)
    {

        console.log('Errored ' + event);
    }
    msg.onpause = function (event)
    {
        console.log('paused ' + event);

    }
    msg.onboundary = function (event)
    {
        console.log('onboundary ' + event);
    }

    return msg;
}


var speekResponse = function (text)
{
    speechSynthesis.cancel(); // if it errors, this clears out the error.

    var sentences = text.split(".");
    for (var i=0;i< sentences.length;i++)
    {
        var toSay = sayit();
        toSay.text = sentences[i];
        speechSynthesis.speak(toSay);
    }
}

2017 and this bug is still around. I happen to understand this problem quite well, being the developer of the award-winning Chrome extension Read Aloud. OK, just kidding about the award winning part.

  1. Your speech will get stuck if it's longer than 15 seconds.
  2. I discover that Chrome uses a 15 second idle timer to decide when to deactivate an extension's event/background page. I believe this is the culprit.

The workaround I've used is a fairly complicated chunking algorithm that respects punctuation. For Latin languages, I set max chunk size at 36 words. The code is open-source, if you're inclined: https://github.com/ken107/read-aloud/blob/master/js/speech.js (line 144)

The 36-word limit works well most of the time, staying within 15 seconds. But there'll be cases where it still gets stuck. To recover from that, I use a 16 second timer.

I have solved the probleme while having a timer function which call the pause() and resume() function and callset the timer again. On the onend event I clear the timer.

    var myTimeout;
    function myTimer() {
        window.speechSynthesis.pause();
        window.speechSynthesis.resume();
        myTimeout = setTimeout(myTimer, 10000);
    }
    ...
        window.speechSynthesis.cancel();
        myTimeout = setTimeout(myTimer, 10000);
        var toSpeak = "some text";
        var utt = new SpeechSynthesisUtterance(toSpeak);
        ...
        utt.onend =  function() { clearTimeout(myTimeout); }
        window.speechSynthesis.speak(utt);
    ...

This seem to work well.

As Michael proposed, Peter's solutions is really great except when your text is on different lines. Michael created demo to better illustrate the problem with it. - https://jsfiddle.net/1gzkja90/ and proposed another solution.

To add one maybe simpler way to solve this is to remove line breaks from textarea in Peter's solution and it works just great.

//javascript
var noLineBreaks = document.getElementById('mytextarea').replace(/\n/g,'');

//jquery
var noLineBreaks = $('#mytextarea').val().replace(/\n/g,'');

So in Peter's solution it might look the following way :

utterance.text = $('#mytextarea').val().replace(/\n/g,'');

But still there's problem with canceling the speech. It just goes to another sequence and won't stop.

Other suggestion do weird thing with dot or say DOT and do not respect speech intonnation on sentence end.

var CHARACTER_LIMIT = 200;
var lang = "en";

var text = "MLA format follows the author-page method of in-text citation. This means that the author's last name and the page number(s) from which the quotation or paraphrase is taken must appear in the text, and a complete reference should appear on your Works Cited page. The author's name may appear either in the sentence itself or in parentheses following the quotation or paraphrase, but the page number(s) should always appear in the parentheses, not in the text of your sentence. Joe waited for the train. The train was late. Mary and Samantha took the bus.";

    speak(text, lang)

    function speak(text, lang) {

      //Support for multipart text (there is a limit on characters)
      var multipartText = [];

      if (text.length > CHARACTER_LIMIT) {

        var tmptxt = text;

        while (tmptxt.length > CHARACTER_LIMIT) {

          //Split by common phrase delimiters
          var p = tmptxt.search(/[:!?.;]+/);
          var part = '';

          //Coludn't split by priority characters, try commas
          if (p == -1 || p >= CHARACTER_LIMIT) {
            p = tmptxt.search(/[,]+/);
          }

          //Couldn't split by normal characters, then we use spaces
          if (p == -1 || p >= CHARACTER_LIMIT) {

            var words = tmptxt.split(' ');

            for (var i = 0; i < words.length; i++) {

              if (part.length + words[i].length + 1 > CHARACTER_LIMIT)
                break;

              part += (i != 0 ? ' ' : '') + words[i];

            }

          } else {

            part = tmptxt.substr(0, p + 1);

          }

          tmptxt = tmptxt.substr(part.length, tmptxt.length - part.length);

          multipartText.push(part);
          //console.log(part.length + " - " + part);

        }

        //Add the remaining text
        if (tmptxt.length > 0) {
          multipartText.push(tmptxt);
        }

      } else {

        //Small text
        multipartText.push(text);
      }


      //Play multipart text
      for (var i = 0; i < multipartText.length; i++) {

        //Use SpeechSynthesis
        //console.log(multipartText[i]);

        //Create msg object
        var msg = new SpeechSynthesisUtterance();
        //msg.voice = profile.systemvoice;
        //msg.voiceURI = profile.systemvoice.voiceURI;
        msg.volume = 1; // 0 to 1
        msg.rate = 1; // 0.1 to 10
        // msg.rate = usersetting || 1; // 0.1 to 10
        msg.pitch = 1; //0 to 2*/
        msg.text = multipartText[i];
        msg.speak = multipartText;
        msg.lang = lang;
        msg.onend = self.OnFinishedPlaying;
        msg.onerror = function (e) {
          console.log('Error');
          console.log(e);
        };
        /*GC*/
        msg.onstart = function (e) {
          var curenttxt = e.currentTarget.text;
          console.log(curenttxt);
          //highlight(e.currentTarget.text);
          //$('#showtxt').text(curenttxt);
          //console.log(e);
        };
        //console.log(msg);
        speechSynthesis.speak(msg);

      }

    }

https://jsfiddle.net/onigetoc/9r27Ltqz/

Yes, the google synthesis api will stop at some point during speaking a long text.

We can see onend event, onpause and onerror event of SpeechSynthesisUtterance won't be fired normally when the sudden stop happens, so does the speechSynthesis onerror event.

After several trials, found speechSynthesis.paused is working, and speechSynthesis.resume() can help resume the speaking.

Hence we just need to have a timer to check the pause status during the speaking, and calling speechSynthesis.resume() to continue. The interval should be small enough to prevent glitch when continuing the speak.

let timer = null;
let reading = false;

let readText = function(text) {

    if (!reading) {
        speechSynthesis.cancel();
        if (timer) {
            clearInterval(timer);
        }
        let msg = new SpeechSynthesisUtterance();
        let voices = window.speechSynthesis.getVoices();
        msg.voice = voices[82];
        msg.voiceURI = 'native';
        msg.volume = 1; // 0 to 1
        msg.rate = 1.0; // 0.1 to 10
        msg.pitch = 1; //0 to 2
        msg.text = text;
        msg.lang = 'zh-TW';

        msg.onerror = function(e) {
            speechSynthesis.cancel();
            reading = false;
            clearInterval(timer);
        };

        msg.onpause = function(e) {
            console.log('onpause in ' + e.elapsedTime + ' seconds.');
        }            

        msg.onend = function(e) {
            console.log('onend in ' + e.elapsedTime + ' seconds.');
            reading = false;
            clearInterval(timer);
        };

        speechSynthesis.onerror = function(e) {
            console.log('speechSynthesis onerror in ' + e.elapsedTime + ' seconds.');
            speechSynthesis.cancel();
            reading = false;
            clearInterval(timer);
        };

        speechSynthesis.speak(msg);

        timer = setInterval(function(){
            if (speechSynthesis.paused) {
                console.log("#continue")
                speechSynthesis.resume();
            }

        }, 100);

        reading = true;

    }
}

I want to say that through Chrome Extensions and Applications, I solved this quite irritating issue through using chrome.tts, since chrome.tts allows you to speak through the browser, instead of the window which stops the talk when you close the window.

Using the below code, you can fix the above issue with large speakings:

chrome.tts.speak("Abnormally large string, over 250 characters, etc...");
setInterval(() => { chrome.tts.resume(); }, 100);

I'm sure that will work, but I did this just to be safe:

var largeData = "";
var smallChunks = largeData.match(/.{1,250}/g);
for (var chunk of smallChunks) {
  chrome.tts.speak(chunk, {'enqueue': true});
}

Hope this helps someone! It helped make my application work more functionally, and epicly.

易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!