Imports NAudio.Wave
Imports MathNet.Numerics.IntegralTransforms
Imports System.Numerics
Imports TensorFlow
Imports System.IO
Public Class Form1
'录音
Dim wav As New WaveInEvent
Private Sub Button1_Click(sender As Object, e As EventArgs) Handles Button1.Click
'设置缓冲区大小
wav.BufferMilliseconds = 128 '缓冲区大小= 频率*Milliseconds*字节/1000 ;
wav.NumberOfBuffers = 6 '原12 减少缓冲区数量,使用录音不中断
wav.WaveFormat = New WaveFormat(16000, 16, 1) '格式 16000
'添加回调函数
AddHandler wav.DataAvailable, AddressOf waveIn_DataAvailable
wav.StartRecording()
End Sub
'回调函数
Dim WavData16(2048 - 1) As Int16
Dim WavDataDb(2048 - 1) As Single
Dim mfcc As New MFCC
Private Sub waveIn_DataAvailable(sender As Object, e As WaveInEventArgs)
Buffer.BlockCopy(e.Buffer, 0, WavData16, 0, e.BytesRecorded)
'Array.Copy(WavData16, WavDataDb, 2048)
'计算mfcc 返回182维数
Dim mfccs = WavTMfcc(WavData16)
Dim float(0, 182 - 1) As Single
For i = 0 To 182 - 1
float(0, i) = mfccs(i)
Next
Dim out() As Single = piano.Detect(float)
Dim max As Single = out(0)
Dim num = 0
For i = 0 To 7 - 1
If out(i) > max Then
max = out(i)
num = i
End If
Next
If max > 0.9 Then
' Label1.Text = num.ToString
Me.Invoke(New ShowText(AddressOf ShowTxt), num.ToString)
End If
'Debug.Print(num & ":" & max)
End Sub
Public Delegate Sub ShowText(txt As String)
Public Sub ShowTxt(txt As String)
Label1.Text = txt
End Sub
Public Function WavTMfcc(data() As Int16) As Single()
'分帧,每一帧进行mfcc计算 帧长512 帧移256
Dim len = data.Length
Dim FrmSize = 512
Dim FrmNum = len / 256 - 1
Dim mfccs(FrmNum * 26 - 1) As Single '7*26 =182
Dim Frame(FrmSize - 1) As Single
For i As Integer = 0 To FrmNum - 1
Array.Copy(data, i * 256, Frame, 0, FrmSize)
mfcc.Hamming_window(Frame)
Dim fft As Complex() = mfcc.FFT(Frame)
Dim rs As Single() = mfcc.MFCC(fft)
Array.Copy(rs, 0, mfccs, i * 26, 26)
Next
Return mfccs
End Function
Private Sub Button7_Click(sender As Object, e As EventArgs) Handles Button7.Click
Dim txtwrite As New IO.StreamWriter("d:\mfcc.txt", False)
Dim fs As New DirectoryInfo("d:\piano")
For Each i As FileInfo In fs.GetFiles("*.wav", SearchOption.AllDirectories)
Dim fn = i.Name
Dim fullname = i.FullName
'打开文件获取mfcc
Dim len As Integer = 2048
Dim bt(len * 2 - 1) As Byte
Dim it16(len - 1) As Int16
Dim wf As New WaveFileReader(fullname)
wf.Read(bt, 0, len * 2)
Buffer.BlockCopy(bt, 0, it16, 0, len * 2)
'VAD(it16)
'计算mfcc 返回182维数
Dim mfccs = WavTMfcc(it16)
Dim StrMfcc = String.Join(",", mfccs.ToArray)
Dim Ans = {0, 0, 0, 0, 0, 0, 0, 0}
Ans(Val(Mid(fn, 1, 1))) = 1
Dim StrAns = String.Join(",", Ans.ToArray)
txtwrite.WriteLine(StrMfcc)
txtwrite.WriteLine(StrAns)
Next
txtwrite.Close()
'语音区域识别-指数平均法 v = 0.1*vo + (1-0.1)v(0-1)
End Sub
Public Function VAD(data As Int16()) As List(Of Point)
Dim Belta As Single = 0.1
Dim Sum As UInt64
Dim StartP, EndP As Integer
Dim Status As Boolean = False
Dim WaveArea As New List(Of Point)
For i = 0 To data.Length - 1
Sum = Belta * Math.Pow(data(i), 2) + (1 - Belta) * Sum
'声音开始位置
If (Sum > Math.Pow(1024, 2)) Then
If Status = False Then
Status = True
StartP = i
End If
End If
'声音结束位置
If Status = True Then
If Sum < Math.Pow(100, 2) Then
WaveArea.Add(New Point(StartP, i))
Status = False
End If
End If
Next
Return WaveArea
End Function
Private Sub Button8_Click(sender As Object, e As EventArgs) Handles Button8.Click
Dim wf As New WaveFileReader("d:\d00.wav")
Dim len As Integer = wf.Length / 2
Dim bt(len * 2 - 1) As Byte
Dim it16(len - 1) As Int16
wf.Read(bt, 0, len * 2)
Buffer.BlockCopy(bt, 0, it16, 0, len * 2)
VAD(it16)
End Sub
Dim piano As New TensorflowPiano
Private Sub Button9_Click(sender As Object, e As EventArgs) Handles Button9.Click
Dim wf As New WaveFileReader("d:\testpiano.wav")
Dim len As Integer = wf.Length / 2
Dim bt(len * 2 - 1) As Byte
Dim it16(len - 1) As Int16
wf.Read(bt, 0, len * 2)
Buffer.BlockCopy(bt, 0, it16, 0, len * 2)
Dim frame(2048 - 1) As Int16
For k As Integer = 0 To Math.Floor(len / 2048) - 1
Array.Copy(it16, k * 2048, frame, 0, 2048)
'计算mfcc 返回182维数
Dim mfccs = WavTMfcc(frame)
Dim float(0, 182 - 1) As Single
For i = 0 To 182 - 1
float(0, i) = mfccs(i)
Next
Dim out() As Single = piano.Detect(float)
Dim max As Single = out(0)
Dim num = 0
For i = 0 To 7 - 1
If out(i) > max Then
max = out(i)
num = i
End If
Next
Debug.Print(num & ":" & max)
Next
End Sub
Private Sub Form1_Load(sender As Object, e As EventArgs) Handles MyBase.Load
End Sub
End Class
在vb中对训练数据的MFCC进行读取,保存为TXT文件;在PYTHON文件中进行训练
.py
import tensorflow as tf
import librosa
import numpy as np
from tensorflow.python.framework.graph_util import convert_variables_to_constants
f = open("d:/mfcc.txt","r")
xdata = []
ydata = []
while True:
v1 = f.readline()
if v1 == '':
break;
xd = np.array(tuple(eval(v1)))
#xd = list(xd.reshape(1,-1))
xdata.append(xd)
v2 = f.readline()
yd = np.array(tuple(eval(v2)))
ydata.append(yd)
x = tf.placeholder("float32",[None,182],name= 'input')
w = tf.Variable(tf.truncated_normal([182,8],stddev = 0.1))
b = tf.Variable(tf.truncated_normal(shape=[8],stddev = 0.1,dtype = tf.float32))
y = tf.nn.softmax(tf.matmul(x,w)+b,name = 'out')
y_ = tf.placeholder("float32",[None,8])
loss = -tf.reduce_sum(y_*tf.log(y))
train = tf.train.GradientDescentOptimizer(1e-5).minimize(loss)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for i in range(20000):
sess.run(train,feed_dict={x:xdata,y_:ydata})
if i%100 == 0:
print("step %d,%g"%(i,i))
print(sess.run(y_,feed_dict={y_:ydata}))
print(sess.run(loss,feed_dict={x:xdata,y_:ydata}))
print(sess.run(y,feed_dict={x:xdata}))
#保存为pb格式
graph = convert_variables_to_constants(sess,sess.graph_def,['out'])
tf.train.write_graph(graph,'d:/','piano.pb',as_text=False)
#保存训练参数
在VB中使用训练好的模型
Imports System.IO
Imports System.Numerics
Imports TensorFlow
'Install-Package TensorFlowSharp
Public Class TensorflowPiano
Dim graph As TFGraph
Dim session As TFSession
'加载模型
Public Sub New()
Dim model As Byte() = File.ReadAllBytes("d:\piano.pb")
graph = New TFGraph()
graph.Import(model, "")
session = New TFSession(graph)
' Threading.ThreadPool.SetMaxThreads(5, 5)
End Sub
Protected Overrides Sub finalize()
' session.CloseSession()
End Sub
Dim output
Dim runner As TFSession.Runner
Dim result
Dim rshape
'运行模型
Public Function Detect(Data(,) As Single) As Single()
runner = session.GetRunner()
runner.AddInput(graph("input")(0), Data).Fetch(graph("out")(0))
output = runner.Run()
result = output(0)
rshape = result.Shape
Dim rt As Single()
rt = result.GetValue(True)(0)
'For k = 0 To rshape.GetValue(0) - 1
' rt = result.GetValue(True)(k)(0)
' 'Debug.Print(rt)
' If (rt > 0.8) Then
' Debug.Print("-----------recogxili")
' ' MsgBox("recgo")
' End If
'Next
Return rt
End Function
End Class
MFCC特征提取:
Imports System.Numerics
Imports MathNet.Numerics.IntegralTransforms
Public Class MFCC
Public H As Double(,)
Private MFCCNum As Integer
Private FrameSize As Integer '帧长512
Public Sub New(Optional framesize As Integer = 512, Optional MFCCNum As Integer = 26)
'注意设置最小频率 freMin 0 ,300
Me.MFCCNum = MFCCNum
Me.FrameSize = framesize
H = New Double(MFCCNum, Me.FrameSize / 2) {}
'计算mel系数
Dim filter_points(40 + 1) As Integer '40个滤波器,需要41点
Const sampleRate As Integer = 16000 '采样频率 16000
Const filterNum As Integer = 40 '滤波器数量 取40个
Dim freMax As Double = sampleRate / 2 '实际最大频率
Dim freMin As Double = 0 '实际最小频率
Dim melFremax As Double = 1125 * Math.Log(1 + freMax / 700) '将实际频率转换成梅尔频率
Dim melFremin As Double = 1125 * Math.Log(1 + freMin / 700)
Dim k As Double = (melFremax - melFremin) / (filterNum + 1)
Dim m As Double() = New Double(filterNum + 1) {}
Dim r As Double() = New Double(filterNum + 1) {}
For i As Integer = 0 To filterNum + 1
m(i) = melFremin + k * i
r(i) = 700 * (Math.Exp(m(i) / 1125) - 1)
'将梅尔频率转换成实际频率
filter_points(i) = Math.Floor((Me.FrameSize + 1) * r(i) / sampleRate)
Next
'生成mel滤波器
For i As Integer = 0 To MFCCNum
For j As Integer = 0 To Me.FrameSize / 2 - 1
If j < filter_points(i) Then
H(i, j) = 0
End If
If (filter_points(i) <= j) And (j <= filter_points(i + 1)) Then
H(i, j) = (CDbl(j - filter_points(i)) / (filter_points(i + 1) - filter_points(i)))
End If
If (filter_points(i + 1) <= j) And (j <= filter_points(i + 2)) Then
H(i, j) = (CDbl(filter_points(i + 2) - j) / (filter_points(i + 2) - filter_points(i + 1)))
End If
If j > filter_points(i + 2) Then
H(i, j) = 0
End If
Next
Next
End Sub
'汉明窗
Public Sub Hamming_window(WaveData() As Single)
Dim len As Integer = WaveData.Length
Dim omega As Single = 2.0 * Math.PI / len
For j As Integer = 0 To len - 1
WaveData(j) = (0.54 - 0.46 * Math.Cos(omega * (j))) * WaveData(j)
Next
End Sub
'傅里叶计算
Public Function FFT(WaveData() As Single) As Complex()
Dim FFT_Complex(WaveData.Length - 1) As Complex
For i = 0 To WaveData.Length - 1
FFT_Complex(i) = WaveData(i)
Next
MathNet.Numerics.IntegralTransforms.Fourier.Forward(FFT_Complex, FourierOptions.Matlab)
Return FFT_Complex
End Function
Public Function MFCC(fft() As Complex) As Single()
'取LOG
Dim S As Single() = New Single(MFCCNum - 1) {}
For i As Integer = 0 To MFCCNum - 1
For j As Integer = 0 To Me.FrameSize / 2 - 1
S(i) = S(i) + Math.Pow(fft(j).Magnitude, 2) * H(i, j)
Next
If S(i) <> 0 Then
S(i) = Math.Log(S(i), Math.E)
End If
Next
'DCT运算
Dim mfcc_mass(MFCCNum - 1) As Double
For l As Integer = 0 To MFCCNum - 1
For i As Integer = 0 To MFCCNum - 1
mfcc_mass(l) += S(i) * Math.Cos(Math.PI * l * ((i * 0.5) / 20))
Next
Next
Return S
End Function
End Class
来源:CSDN
作者:qq_39239990
链接:https://blog.csdn.net/qq_39239990/article/details/104040727