Streaming input to System.Speech.Recognition.SpeechRecognitionEngine

前端未结

关注

 5  1367

I am trying to do \"streaming\" speech recognition in C# from a TCP socket. The problem I am having is that SpeechRecognitionEngine.SetInputToAudioStream() seems to require

相关标签:

5条回答

鱼传尺愫

2020-11-30 06:37

I got live speech recognition working by overriding the stream class:

class SpeechStreamer : Stream
{
    private AutoResetEvent _writeEvent;
    private List<byte> _buffer;
    private int _buffersize;
    private int _readposition;
    private int _writeposition;
    private bool _reset;

    public SpeechStreamer(int bufferSize)
    {
        _writeEvent = new AutoResetEvent(false);
         _buffersize = bufferSize;
         _buffer = new List<byte>(_buffersize);
         for (int i = 0; i < _buffersize;i++ )
             _buffer.Add(new byte());
        _readposition = 0;
        _writeposition = 0;
    }

    public override bool CanRead
    {
        get { return true; }
    }

    public override bool CanSeek
    {
        get { return false; }
    }

    public override bool CanWrite
    {
        get { return true; }
    }

    public override long Length
    {
        get { return -1L; }
    }

    public override long Position
    {
        get { return 0L; }
        set {  }
    }

    public override long Seek(long offset, SeekOrigin origin)
    {
        return 0L;
    }

    public override void SetLength(long value)
    {

    }

    public override int Read(byte[] buffer, int offset, int count)
    {
        int i = 0;
        while (i<count && _writeEvent!=null)
        {
            if (!_reset && _readposition >= _writeposition)
            {
                _writeEvent.WaitOne(100, true);
                continue;
            }
            buffer[i] = _buffer[_readposition+offset];
            _readposition++;
            if (_readposition == _buffersize)
            {
                _readposition = 0;
                _reset = false;
            }
            i++;
        }

        return count;
    }

    public override void Write(byte[] buffer, int offset, int count)
    {
        for (int i = offset; i < offset+count; i++)
        {
            _buffer[_writeposition] = buffer[i];
            _writeposition++;
            if (_writeposition == _buffersize)
            {
                _writeposition = 0;
                _reset = true;
            }
        }
        _writeEvent.Set();

    }

    public override void Close()
    {
        _writeEvent.Close();
        _writeEvent = null;
        base.Close();
    }

    public override void Flush()
    {

    }
}

... and using an instance of that as the stream input to the SetInputToAudioStream method. As soon as the stream returns a length or the returned count is less than that requested the recognition engine thinks the input has finished. This sets up a circular buffer that never finishes.

0 讨论(0)

栀梦

2020-11-30 06:38

Apparently it can't be done ("By design"!). See http://social.msdn.microsoft.com/Forums/en/netfxbcl/thread/fcf62d6d-19df-4ca9-9f1f-17724441f84e

0 讨论(0)
发布评论:

提交评论
- 加载中...

失恋的感觉

2020-11-30 06:51

Have you tried wrapping the network stream in a System.IO.BufferedStream?

NetworkStream netStream = new NetworkStream(socket,true);
BufferedStream buffStream = new BufferedStream(netStream, 8000*16*1); // buffers 1 second worth of data
appRecognizer.SetInputToAudioStream(buffStream, formatInfo);

0 讨论(0)

盖世英雄少女心

2020-11-30 06:58

I ended up buffering the input and then sending it to the speech recognition engine in successively larger chunks. For instance, I might send at first the first 0.25 seconds, then the first 0.5 seconds, then the first 0.75 seconds, and so on until I get a result. I am not sure if this is the most efficient way of going about this, but it yields satisfactory results for me.

Best of luck, Sean

0 讨论(0)
发布评论:

提交评论
- 加载中...

半阙折子戏

2020-11-30 07:04

This is my solution.

class FakeStreamer : Stream
{
    public bool bExit = false;
    Stream stream;
    TcpClient client;
    public FakeStreamer(TcpClient client)
    {
        this.client = client;
        this.stream = client.GetStream();
        this.stream.ReadTimeout = 100; //100ms
    }
    public override bool CanRead
    {
        get { return stream.CanRead; }
    }

    public override bool CanSeek
    {
        get { return false; }
    }

    public override bool CanWrite
    {
        get { return stream.CanWrite; }
    }

    public override long Length
    {
        get { return -1L; }
    }

    public override long Position
    {
        get { return 0L; }
        set { }
    }
    public override long Seek(long offset, SeekOrigin origin)
    {
        return 0L;
    }

    public override void SetLength(long value)
    {
        stream.SetLength(value);
    }
    public override int Read(byte[] buffer, int offset, int count)
    {
        int len = 0, c = count;
        while (c > 0 && !bExit)
        {
            try
            {
                len = stream.Read(buffer, offset, c);
            }
            catch (Exception e)
            {
                if (e.HResult == -2146232800) // Timeout
                {
                    continue;
                }
                else
                {
                    //Exit read loop
                    break;
                }
            }
            if (!client.Connected || len == 0)
            {
                //Exit read loop
                return 0;
            }
            offset += len;
            c -= len;
        }
        return count;
    }

    public override void Write(byte[] buffer, int offset, int count)
    {
        stream.Write(buffer,offset,count);
    }

    public override void Close()
    {
        stream.Close();
        base.Close();
    }

    public override void Flush()
    {
        stream.Flush();
    }
}

How to Use:

//client connect in
TcpClient clientSocket = ServerSocket.AcceptTcpClient();
FakeStreamer buffStream = new FakeStreamer(clientSocket);
...
//recognizer init
m_recognizer.SetInputToAudioStream(buffStream , audioFormat);
...
//recognizer end
if (buffStream != null)
    buffStream.bExit = true;

0 讨论(0)