I am trying to do \"streaming\" speech recognition in C# from a TCP socket. The problem I am having is that SpeechRecognitionEngine.SetInputToAudioStream() seems to require
I got live speech recognition working by overriding the stream class:
class SpeechStreamer : Stream
{
private AutoResetEvent _writeEvent;
private List<byte> _buffer;
private int _buffersize;
private int _readposition;
private int _writeposition;
private bool _reset;
public SpeechStreamer(int bufferSize)
{
_writeEvent = new AutoResetEvent(false);
_buffersize = bufferSize;
_buffer = new List<byte>(_buffersize);
for (int i = 0; i < _buffersize;i++ )
_buffer.Add(new byte());
_readposition = 0;
_writeposition = 0;
}
public override bool CanRead
{
get { return true; }
}
public override bool CanSeek
{
get { return false; }
}
public override bool CanWrite
{
get { return true; }
}
public override long Length
{
get { return -1L; }
}
public override long Position
{
get { return 0L; }
set { }
}
public override long Seek(long offset, SeekOrigin origin)
{
return 0L;
}
public override void SetLength(long value)
{
}
public override int Read(byte[] buffer, int offset, int count)
{
int i = 0;
while (i<count && _writeEvent!=null)
{
if (!_reset && _readposition >= _writeposition)
{
_writeEvent.WaitOne(100, true);
continue;
}
buffer[i] = _buffer[_readposition+offset];
_readposition++;
if (_readposition == _buffersize)
{
_readposition = 0;
_reset = false;
}
i++;
}
return count;
}
public override void Write(byte[] buffer, int offset, int count)
{
for (int i = offset; i < offset+count; i++)
{
_buffer[_writeposition] = buffer[i];
_writeposition++;
if (_writeposition == _buffersize)
{
_writeposition = 0;
_reset = true;
}
}
_writeEvent.Set();
}
public override void Close()
{
_writeEvent.Close();
_writeEvent = null;
base.Close();
}
public override void Flush()
{
}
}
... and using an instance of that as the stream input to the SetInputToAudioStream method. As soon as the stream returns a length or the returned count is less than that requested the recognition engine thinks the input has finished. This sets up a circular buffer that never finishes.
Apparently it can't be done ("By design"!). See http://social.msdn.microsoft.com/Forums/en/netfxbcl/thread/fcf62d6d-19df-4ca9-9f1f-17724441f84e
Have you tried wrapping the network stream in a System.IO.BufferedStream?
NetworkStream netStream = new NetworkStream(socket,true);
BufferedStream buffStream = new BufferedStream(netStream, 8000*16*1); // buffers 1 second worth of data
appRecognizer.SetInputToAudioStream(buffStream, formatInfo);
I ended up buffering the input and then sending it to the speech recognition engine in successively larger chunks. For instance, I might send at first the first 0.25 seconds, then the first 0.5 seconds, then the first 0.75 seconds, and so on until I get a result. I am not sure if this is the most efficient way of going about this, but it yields satisfactory results for me.
Best of luck, Sean
This is my solution.
class FakeStreamer : Stream
{
public bool bExit = false;
Stream stream;
TcpClient client;
public FakeStreamer(TcpClient client)
{
this.client = client;
this.stream = client.GetStream();
this.stream.ReadTimeout = 100; //100ms
}
public override bool CanRead
{
get { return stream.CanRead; }
}
public override bool CanSeek
{
get { return false; }
}
public override bool CanWrite
{
get { return stream.CanWrite; }
}
public override long Length
{
get { return -1L; }
}
public override long Position
{
get { return 0L; }
set { }
}
public override long Seek(long offset, SeekOrigin origin)
{
return 0L;
}
public override void SetLength(long value)
{
stream.SetLength(value);
}
public override int Read(byte[] buffer, int offset, int count)
{
int len = 0, c = count;
while (c > 0 && !bExit)
{
try
{
len = stream.Read(buffer, offset, c);
}
catch (Exception e)
{
if (e.HResult == -2146232800) // Timeout
{
continue;
}
else
{
//Exit read loop
break;
}
}
if (!client.Connected || len == 0)
{
//Exit read loop
return 0;
}
offset += len;
c -= len;
}
return count;
}
public override void Write(byte[] buffer, int offset, int count)
{
stream.Write(buffer,offset,count);
}
public override void Close()
{
stream.Close();
base.Close();
}
public override void Flush()
{
stream.Flush();
}
}
How to Use:
//client connect in
TcpClient clientSocket = ServerSocket.AcceptTcpClient();
FakeStreamer buffStream = new FakeStreamer(clientSocket);
...
//recognizer init
m_recognizer.SetInputToAudioStream(buffStream , audioFormat);
...
//recognizer end
if (buffStream != null)
buffStream.bExit = true;