摘要:
unimrcp vad 模块voice activity dector一直认为比较粗暴,而且unimrcp的社区也很久没有更新了。使用原始unimrcp如果只是用来做Demo演示,通过手动调整参数,还是可以的。但是距离生产环境,还是有很远的一段路。
这篇文章介绍如何使用webRtc vad模块替换原来的算法。
【题外话:昨天开了题目,因为有事,没有更新,今天补上】
unimrcp 的vad的模块,在libs/mpf/src/mpf_activity_detector.c 文件中,主要算法函数如下:
1 static apr_size_t mpf_activity_detector_level_calculate(const mpf_frame_t *frame)
2 {
3 apr_size_t sum = 0;
4 apr_size_t count = frame->codec_frame.size/2;
5 const apr_int16_t *cur = frame->codec_frame.buffer;
6 const apr_int16_t *end = cur + count;
7
8 for(; cur < end; cur++) {
9 if(*cur < 0) {
10 sum -= *cur;
11 }
12 else {
13 sum += *cur;
14 }
15 }
16
17 return sum / count;
18 }
大家看这个算法,非常简单粗暴,累加求其平均值,如果大于阈值,表示有声音,如果不大于,表示静音。并没有噪音检测。所以基本上就是不可用。
在上一篇文档介绍了WebRTC 的 VAD的算法,今天主要使用webRTC 的VAD的算法,替换该算法。步骤和上一篇介绍webRTC的是一致的。
1 static apr_size_t mpf_activity_detector_level_calculate(const mpf_frame_t *frame)
2 {
3 //calculate samplesCount
4 apr_size_t samplesCount = frame->codec_frame.size/2;
5 //default 10
6 int per_ms_frames = 10;
7 //calculate samples
8 apr_size_t sampleRate = 16000;
9 //
10 size_t samples = sampleRate * per_ms_frames / 1000;
11 if (samples == 0) return -1;
12 //
13 size_t nTotal = (samplesCount / samples);
14 //buffer
15 int16_t *input = frame->codec_frame.buffer;
16 //init vad
17 VadInst * vadInst = WebRtcVad_Create();
18 if (vadInst == NULL) {
19 return -1;
20 }
21 int status = WebRtcVad_Init(vadInst);
22 if (status != 0) {
23 WebRtcVad_Free(vadInst);
24 return -1;
25 }
26 //default 1
27 int16_t vad_mode = 1;
28 status = WebRtcVad_set_mode(vadInst, vad_mode);
29 if (status != 0) {
30 WebRtcVad_Free(vadInst);
31 return -1;
32 }
33 int cnt = 0;
34 int i = 0;
35 for (i = 0; i < nTotal; i++) {
36 int keep_weight = 0;
37 int nVadRet = WebRtcVad_Process(vadInst, sampleRate, input, samples, keep_weight);
38 if (nVadRet == -1) {
39 WebRtcVad_Free(vadInst);
40 return -1;
41 } else {
42 if (nVadRet >= 1) {
43 cnt++;
44 }
45 printf(" %d \t", nVadRet);
46 }
47 input += samples;
48 }
49 //if hunman voice < nTotal/10, as silent sample。maybe ... //FIXME
50 if (cnt < nTotal/10) {
51 return 0;
52 }
53 else {
54 return 1;
55 }
56 } WebRtcVad_Free(vadInst)
下面要更新主处理函数,保留他原有的TRANSION的中间状态逻辑,
1 /** Process current frame */
2 MPF_DECLARE(mpf_detector_event_e) mpf_activity_detector_process(mpf_activity_detector_t *detector, const mpf_frame_t *frame)
3 {
4 mpf_detector_event_e det_event = MPF_DETECTOR_EVENT_NONE;
5 apr_size_t level = 0;
6 if((frame->type & MEDIA_FRAME_TYPE_AUDIO) == MEDIA_FRAME_TYPE_AUDIO) {
7 /* first, calculate current activity level of processed frame */
8 level = mpf_activity_detector_level_calculate(frame);
9 #if 0
10 apt_log(APT_LOG_MARK,APT_PRIO_INFO,"Activity Detector --------------------- [%"APR_SIZE_T_FMT"]",level);
11 #endif
12 }
13
14 if(detector->state == DETECTOR_STATE_INACTIVITY) {
15 //if(level >= detector->level_threshold) {
16 if(level >= 1) {
17 /* start to detect activity */
18 mpf_activity_detector_state_change(detector,DETECTOR_STATE_ACTIVITY_TRANSITION);
19 }
20 else {
21 detector->duration += CODEC_FRAME_TIME_BASE;
22 if(detector->duration >= detector->noinput_timeout) {
23 /* detected noinput */
24 det_event = MPF_DETECTOR_EVENT_NOINPUT;
25 }
26 }
27 }
28 else if(detector->state == DETECTOR_STATE_ACTIVITY_TRANSITION) {
29 //if(level >= detector->level_threshold) {
30 if(level >= 1) {
31 detector->duration += CODEC_FRAME_TIME_BASE;
32 if(detector->duration >= detector->speech_timeout) {
33 /* finally detected activity */
34 det_event = MPF_DETECTOR_EVENT_ACTIVITY;
35 mpf_activity_detector_state_change(detector,DETECTOR_STATE_ACTIVITY);
36 }
37 }
38 else {
39 /* fallback to inactivity */
40 mpf_activity_detector_state_change(detector,DETECTOR_STATE_INACTIVITY);
41 }
42 }
43 else if(detector->state == DETECTOR_STATE_ACTIVITY) {
44 //if(level >= detector->level_threshold) {
45 if(level >= 1) {
46 detector->duration += CODEC_FRAME_TIME_BASE;
47 }
48 else {
49 /* start to detect inactivity */
50 mpf_activity_detector_state_change(detector,DETECTOR_STATE_INACTIVITY_TRANSITION);
51 }
52 }
53 else if(detector->state == DETECTOR_STATE_INACTIVITY_TRANSITION) {
54 //if(level >= detector->level_threshold) {
55 if(level >= 1) {
56 /* fallback to activity */
57 mpf_activity_detector_state_change(detector,DETECTOR_STATE_ACTIVITY);
58 }
59 else {
60 detector->duration += CODEC_FRAME_TIME_BASE;
61 if(detector->duration >= detector->silence_timeout) {
62 /* detected inactivity */
63 det_event = MPF_DETECTOR_EVENT_INACTIVITY;
64 mpf_activity_detector_state_change(detector,DETECTOR_STATE_INACTIVITY);
65 }
66 }
67 }
68
69 return det_event;
70 }
如此替换后,就完成了算法的更新。当然还需要调整一下cmake的相关的文件配置,加载相应的webRTC的vad文件。
static apr_size_t mpf_activity_detector_level_calculate(const mpf_frame_t *frame)
{
//calculate samplesCount
apr_size_t samplesCount = frame->codec_frame.size/2;
//default 10
int per_ms_frames = 10;
//calculate samples
apr_size_t sampleRate = 16000;
//
size_t samples = sampleRate * per_ms_frames / 1000;
if (samples == 0) return -1;
//
size_t nTotal = (samplesCount / samples);
//buffer
int16_t *input = frame->codec_frame.buffer;
//init vad
VadInst * vadInst = WebRtcVad_Create();
if (vadInst == NULL) {
return -1;
}
int status = WebRtcVad_Init(vadInst);
if (status != 0) {
WebRtcVad_Free(vadInst);
return -1;
}
//default 1
int16_t vad_mode = 1;
status = WebRtcVad_set_mode(vadInst, vad_mode);
if (status != 0) {
WebRtcVad_Free(vadInst);
return -1;
}
int cnt = 0;
int i = 0;
for (i = 0; i < nTotal; i++) {
int keep_weight = 0;
int nVadRet = WebRtcVad_Process(vadInst, sampleRate, input, samples, keep_weight);
if (nVadRet == -1) {
WebRtcVad_Free(vadInst);
return -1;
} else {
if (nVadRet >= 1) {
cnt++;
}
printf(" %d \t", nVadRet);
}
input += samples;
}
//if hunman voice < nTotal/10, as silent sample
if (cnt < nTotal/10) {
return 0;
}
else {
return 1;
}
来源:https://www.cnblogs.com/damizhou/p/11323394.html
