SAPI: Speech to Text example

后端 未结 1 1502
孤城傲影
孤城傲影 2020-12-16 06:45

I am new to SAPI, and I would really appreciate if any of you can provide me a speech to text Hello World example in SAPI. I know MS got some examples like \"Dictation\" etc

相关标签:
1条回答
  • 2020-12-16 07:18

    I played a bit with Windows Voice Recognition using SAPI, it really isn't user friendly. Here is an example of code I wrote (in C++) :

    #include <sphelper.h>
    #include <sapi.h>
    #include <iostream>
    #include <string>
    
    const ULONGLONG grammarId = 0;
    const wchar_t* ruleName1 = L"ruleName1";
    
    int start_listening(const std::string& word);
    ISpRecoGrammar* init_grammar(ISpRecoContext* recoContext, const std::string& command);
    void get_text(ISpRecoContext* reco_context);
    void check_result(const HRESULT& result);
    
    int main(int argc, char** argv)
    {
        start_listening("Hello");
        return EXIT_SUCCESS;
    }
    
    // This function exits when the word passed as parameter is said by the user
    int start_listening(const std::string& word)
    {
        // Initialize COM library
        if (FAILED(::CoInitialize(nullptr))) {
            return EXIT_FAILURE;
        }
    
        std::cout << "You should start Windows Recognition" << std::endl;
        std::cout << "Just say \""<< word << "\"" << std::endl;
    
        HRESULT hr;
    
        ISpRecognizer* recognizer;
        hr = CoCreateInstance(CLSID_SpSharedRecognizer,
            nullptr, CLSCTX_ALL, IID_ISpRecognizer,
            reinterpret_cast<void**>(&recognizer));
        check_result(hr);
    
        ISpRecoContext* recoContext;
        hr = recognizer->CreateRecoContext(&recoContext);
        check_result(hr);
    
        // Disable context
        hr = recoContext->Pause(0);
        check_result(hr);
    
        ISpRecoGrammar* recoGrammar = init_grammar(recoContext, word);
    
        hr = recoContext->SetNotifyWin32Event();
        check_result(hr);
    
        HANDLE handleEvent;
        handleEvent = recoContext->GetNotifyEventHandle();
        if(handleEvent == INVALID_HANDLE_VALUE) {
            check_result(E_FAIL);
        }
    
        ULONGLONG interest;
        interest = SPFEI(SPEI_RECOGNITION);
        hr = recoContext->SetInterest(interest, interest);
        check_result(hr);
    
        // Activate Grammar
        hr = recoGrammar->SetRuleState(ruleName1, 0, SPRS_ACTIVE);
        check_result(hr);
    
        // Enable context
        hr = recoContext->Resume(0);
        check_result(hr);
    
        // Wait for reco
        HANDLE handles[1];
        handles[0] = handleEvent;
        WaitForMultipleObjects(1, handles, FALSE, INFINITE);
        get_text(recoContext);
    
        std::cout << "Hello user" << std::endl;
    
        recoGrammar->Release();
        ::CoUninitialize();
    
        system("PAUSE");
        return EXIT_SUCCESS;
    }
    
    /**
    * Create and initialize the Grammar.
    * Create a rule for the grammar.
    * Add word to the grammar.
    */
    ISpRecoGrammar* init_grammar(ISpRecoContext* recoContext, const std::string& command)
    {
        HRESULT hr;
        SPSTATEHANDLE sate;
    
        ISpRecoGrammar* recoGrammar;
        hr = recoContext->CreateGrammar(grammarId, &recoGrammar);
        check_result(hr);
    
        WORD langId = MAKELANGID(LANG_FRENCH, SUBLANG_FRENCH);
        hr = recoGrammar->ResetGrammar(langId);
        check_result(hr);
        // TODO: Catch error and use default langId => GetUserDefaultUILanguage()
    
        // Create rules
        hr = recoGrammar->GetRule(ruleName1, 0, SPRAF_TopLevel | SPRAF_Active, true, &sate);
        check_result(hr);
    
        // Add a word
        const std::wstring commandWstr = std::wstring(command.begin(), command.end());
        hr = recoGrammar->AddWordTransition(sate, NULL, commandWstr.c_str(), L" ", SPWT_LEXICAL, 1, nullptr);
        check_result(hr);
    
        // Commit changes
        hr = recoGrammar->Commit(0);
        check_result(hr);
    
        return recoGrammar;
    }
    
    void get_text(ISpRecoContext* reco_context)
    {
        const ULONG maxEvents = 10;
        SPEVENT events[maxEvents];
    
        ULONG eventCount;
        HRESULT hr;
        hr = reco_context->GetEvents(maxEvents, events, &eventCount);
    
        // Warning hr equal S_FALSE if everything is OK 
        // but eventCount < requestedEventCount
        if(!(hr == S_OK || hr == S_FALSE)) {
            check_result(hr);
        }
    
        ISpRecoResult* recoResult;
        recoResult = reinterpret_cast<ISpRecoResult*>(events[0].lParam);
    
        wchar_t* text;
        hr = recoResult->GetText(SP_GETWHOLEPHRASE, SP_GETWHOLEPHRASE, FALSE, &text, NULL);
        check_result(hr);
    
        CoTaskMemFree(text);
    }
    
    void check_result(const HRESULT& result)
    {
        if (result == S_OK) {
            return;
        }
    
        std::string message;
    
        switch(result) {
    
        case E_INVALIDARG:
            message = "One or more arguments are invalids.";
    
        case E_ACCESSDENIED:
            message = "Acces Denied.";
    
        case E_NOINTERFACE:
            message = "Interface does not exist.";
    
        case E_NOTIMPL:
            message = "Not implemented method.";
    
        case E_OUTOFMEMORY:
            message = "Out of memory.";
    
        case E_POINTER:
            message = "Invalid pointer.";
    
        case E_UNEXPECTED:
            message = "Unexpecter error.";
    
        case E_FAIL:
            message = "Failure";
    
        default:
            message = "Unknown : " + std::to_string(result);
        }
    
        throw std::exception(message.c_str());
    }
    

    As I said, it's a bit complicated. I think you should wrap all that code into a library to make it easier to use.

    0 讨论(0)
提交回复
热议问题