Streaming input to System.Speech.Recognition.SpeechRecognitionEngine

前端 未结 5 1367
失恋的感觉
失恋的感觉 2020-11-30 06:03

I am trying to do \"streaming\" speech recognition in C# from a TCP socket. The problem I am having is that SpeechRecognitionEngine.SetInputToAudioStream() seems to require

相关标签:
5条回答
  • 2020-11-30 06:37

    I got live speech recognition working by overriding the stream class:

    class SpeechStreamer : Stream
    {
        private AutoResetEvent _writeEvent;
        private List<byte> _buffer;
        private int _buffersize;
        private int _readposition;
        private int _writeposition;
        private bool _reset;
    
        public SpeechStreamer(int bufferSize)
        {
            _writeEvent = new AutoResetEvent(false);
             _buffersize = bufferSize;
             _buffer = new List<byte>(_buffersize);
             for (int i = 0; i < _buffersize;i++ )
                 _buffer.Add(new byte());
            _readposition = 0;
            _writeposition = 0;
        }
    
        public override bool CanRead
        {
            get { return true; }
        }
    
        public override bool CanSeek
        {
            get { return false; }
        }
    
        public override bool CanWrite
        {
            get { return true; }
        }
    
        public override long Length
        {
            get { return -1L; }
        }
    
        public override long Position
        {
            get { return 0L; }
            set {  }
        }
    
        public override long Seek(long offset, SeekOrigin origin)
        {
            return 0L;
        }
    
        public override void SetLength(long value)
        {
    
        }
    
        public override int Read(byte[] buffer, int offset, int count)
        {
            int i = 0;
            while (i<count && _writeEvent!=null)
            {
                if (!_reset && _readposition >= _writeposition)
                {
                    _writeEvent.WaitOne(100, true);
                    continue;
                }
                buffer[i] = _buffer[_readposition+offset];
                _readposition++;
                if (_readposition == _buffersize)
                {
                    _readposition = 0;
                    _reset = false;
                }
                i++;
            }
    
            return count;
        }
    
        public override void Write(byte[] buffer, int offset, int count)
        {
            for (int i = offset; i < offset+count; i++)
            {
                _buffer[_writeposition] = buffer[i];
                _writeposition++;
                if (_writeposition == _buffersize)
                {
                    _writeposition = 0;
                    _reset = true;
                }
            }
            _writeEvent.Set();
    
        }
    
        public override void Close()
        {
            _writeEvent.Close();
            _writeEvent = null;
            base.Close();
        }
    
        public override void Flush()
        {
    
        }
    }
    

    ... and using an instance of that as the stream input to the SetInputToAudioStream method. As soon as the stream returns a length or the returned count is less than that requested the recognition engine thinks the input has finished. This sets up a circular buffer that never finishes.

    0 讨论(0)
  • 2020-11-30 06:38

    Apparently it can't be done ("By design"!). See http://social.msdn.microsoft.com/Forums/en/netfxbcl/thread/fcf62d6d-19df-4ca9-9f1f-17724441f84e

    0 讨论(0)
  • 2020-11-30 06:51

    Have you tried wrapping the network stream in a System.IO.BufferedStream?

    NetworkStream netStream = new NetworkStream(socket,true);
    BufferedStream buffStream = new BufferedStream(netStream, 8000*16*1); // buffers 1 second worth of data
    appRecognizer.SetInputToAudioStream(buffStream, formatInfo);
    
    0 讨论(0)
  • 2020-11-30 06:58

    I ended up buffering the input and then sending it to the speech recognition engine in successively larger chunks. For instance, I might send at first the first 0.25 seconds, then the first 0.5 seconds, then the first 0.75 seconds, and so on until I get a result. I am not sure if this is the most efficient way of going about this, but it yields satisfactory results for me.

    Best of luck, Sean

    0 讨论(0)
  • 2020-11-30 07:04

    This is my solution.

    class FakeStreamer : Stream
    {
        public bool bExit = false;
        Stream stream;
        TcpClient client;
        public FakeStreamer(TcpClient client)
        {
            this.client = client;
            this.stream = client.GetStream();
            this.stream.ReadTimeout = 100; //100ms
        }
        public override bool CanRead
        {
            get { return stream.CanRead; }
        }
    
        public override bool CanSeek
        {
            get { return false; }
        }
    
        public override bool CanWrite
        {
            get { return stream.CanWrite; }
        }
    
        public override long Length
        {
            get { return -1L; }
        }
    
        public override long Position
        {
            get { return 0L; }
            set { }
        }
        public override long Seek(long offset, SeekOrigin origin)
        {
            return 0L;
        }
    
        public override void SetLength(long value)
        {
            stream.SetLength(value);
        }
        public override int Read(byte[] buffer, int offset, int count)
        {
            int len = 0, c = count;
            while (c > 0 && !bExit)
            {
                try
                {
                    len = stream.Read(buffer, offset, c);
                }
                catch (Exception e)
                {
                    if (e.HResult == -2146232800) // Timeout
                    {
                        continue;
                    }
                    else
                    {
                        //Exit read loop
                        break;
                    }
                }
                if (!client.Connected || len == 0)
                {
                    //Exit read loop
                    return 0;
                }
                offset += len;
                c -= len;
            }
            return count;
        }
    
        public override void Write(byte[] buffer, int offset, int count)
        {
            stream.Write(buffer,offset,count);
        }
    
        public override void Close()
        {
            stream.Close();
            base.Close();
        }
    
        public override void Flush()
        {
            stream.Flush();
        }
    }
    

    How to Use:

    //client connect in
    TcpClient clientSocket = ServerSocket.AcceptTcpClient();
    FakeStreamer buffStream = new FakeStreamer(clientSocket);
    ...
    //recognizer init
    m_recognizer.SetInputToAudioStream(buffStream , audioFormat);
    ...
    //recognizer end
    if (buffStream != null)
        buffStream.bExit = true;
    
    0 讨论(0)
提交回复
热议问题