【Python + Flask + Web录音 + baidu-aip + 图灵机器人 = 人机对话】

原文: http://blog.gqylpy.com/gqy/351

安装baidu-aip：pip install baidu-aip

百度云网址：https://login.bce.baidu.com
百度语音合成文档：https://ai.baidu.com/docs#/TTS-Online-Python-SDK/top
百度语音识别文档：https://ai.baidu.com/docs#/ASR-Online-Python-SDK/top
百度自然语言处理基础技术文档：https://ai.baidu.com/docs#/NLP-Python-SDK/6dfe1b04
Mac系统安装ffmpeg文档：https://blog.csdn.net/stonenotes/article/details/68958332
图灵机器人官网：http://www.tuling123.com/
图灵机器人接口说明：https://www.kancloud.cn/turing/www-tuling123-com/718227

后端代码

# ⚠️这是在macOS系统上（版本10.14）写的

# pip install baidu-aip

# 百度云：https://login.bce.baidu.com
# 百度语音合成文档：https://ai.baidu.com/docs#/TTS-Online-Python-SDK/top
# 百度语音识别文档：https://ai.baidu.com/docs#/ASR-Online-Python-SDK/top
# 百度自然语言处理基础技术文档：https://ai.baidu.com/docs#/NLP-Python-SDK/6dfe1b04
# Mac系统安装ffmpeg文档：https://blog.csdn.net/stonenotes/article/details/68958332
# 图灵机器人官网：http://www.tuling123.com/
# 图灵机器人接口说明：https://www.kancloud.cn/turing/www-tuling123-com/718227


import os
import time
import uuid
import requests
import subprocess
from aip import AipSpeech, AipNlp  # baidu-aip
from flask import Flask, request, render_template, send_file, jsonify


# =======================下面是百度语音合成/识别=======================

class VoiceTextConversion(object):
    # 这3个私有属性是本人的百度应用的appID等信息
    __APP_ID = '15225447'
    __API_KEY = 's5m43BMMEGGPaFGxeX3SsY7m'
    __SECRET_KEY = 'Lca9FEGpWNZW6yd8WWAHAyCyLovmi6rb'
    text_to_voice_error_info = {500: '不支持的输入', 501: '输入的参数不正确', 502: 'token验证失败', 503: '合成后端错误'}

    def __init__(self, app_id=None, api_key=None, secret_key=None,
                 connect_timeout=None, socket_timeout=None, is_external_called=False):
        """
        :param APP_ID: 请使用你自己的百度应用
        :param API_KEY: 请使用你自己的百度应用
        :param SECRET_KEY: 请使用你自己的百度应用
        :param connectTimeout: 建立连接的超时时间（单位：毫秒，1000毫秒=1秒）默认60秒
        :param socketTimeout: 通过打开的连接传输数据的超时时间（单位：毫秒）默认60秒
        :param is_external_called: 当前对象是否为其它应用调用
        """
        self.__is_external_called = is_external_called
        if requests.get('http://www.baidu.com').status_code is not 200:
            exit(self.__custom_print("没网你玩个锤子啊！"))
        # 如果你有自己的百度语音应用，请使用你自己的百度语音应用，如果你没有，请去注册(免费)：
        self.client = AipSpeech(app_id, api_key, secret_key) if app_id \
            else AipSpeech(self.__APP_ID, self.__API_KEY, self.__SECRET_KEY)
        # 可设置链接/传输的超时时间：
        connect_timeout and self.client.setConnectionTimeoutInMillis(connect_timeout)
        socket_timeout and self.client.setSocketTimeoutInMillis(socket_timeout)

    def text_to_voice(self, text, filepath='语音文件', lang='zh', ctp=1, per=3, pid=5, spd=5, vol=5):
        """
        文字 -> 语音
        :param text: 要转换的文字
        :param filename: 保存路径，自动添加 .mp3 后缀
        :param lang: 语言，默认中文
        :param ctp: ?
        :param per: 发音人，0为女声；1为男声；3为情感合成-度逍遥；4为情感合成-度牙牙
        :param pid: 音调，取值0-9，默认为5 中语速
        :param spd: 语速，取值0-9，默认为5 中语速
        :param vol: 音量，取值0-15，默认为5 中音量
        """
        voice_format = {'per': per, 'pid': pid, 'spd': spd, 'vol': vol}
        # 开始转换：
        # 转换成功时返回bytes语音数据；转换失败时返回错误信息字典；
        result = self.client.synthesis(text, lang, ctp, voice_format)
        # 如果转换成功，将保存至磁盘：
        if isinstance(result, bytes):
            with open(f'{filepath}.mp3', 'wb') as f:
                f.write(result)
            self.__custom_print(f'合成语音成功：{filepath}', color=32, is_error=False)
        # 如果出现错误：
        else:
            err_code = result.get('err_no')
            err_info = self.text_to_voice_error_info[err_code]
            self.__custom_print(err_info)

    def voice_to_text(self, filepath=None, format='pcm', dev_pid=1536):
        """
        语音 -> 文字
        使用前请安装ffmpeg，用于将其它格式的语音文件转换为 .pcm 格式的语音文件
        :param filepath: 语音文件路径
        :param format: 语音文件格式(后缀)，.pcm 或 .wav 或 .amr 不区分大小写。推荐使用 .pcm 文件
        :param dev_pid: 语言，1536普通话(支持简单的英文识别)，1537普通话，1737英语，1637粤语，1837四川话，1936普通话远场(距离识别)
        :return:
        """
        # 如果输入的文件路径有误
        if not os.path.isfile(filepath):
            return self.__custom_print('语音文件路径错误')
        # 将语音文件转换为 format 格式，这一步骤将调用系统命令ffmpeg并会生成新文件，需要安装该命令（详见顶部：Mac系统安装ffmpeg文档链接）
        cmd = f'ffmpeg -y -i {filepath} -acodec pcm_s16le -f s16le -ac 1 -ar 16000 {filepath}.{format}'
        # ffmpeg命令执行过程中输出的信息被保存在stderr中
        r = subprocess.Popen(cmd, shell=True, stderr=subprocess.PIPE)
        # 命令执行成功，将会生成 format 格式的语音文件
        # 语音文件生成的很慢，这个很奇怪的问题，先这么解决吧：
        for i in range(999):
            time.sleep(0.1)
            if os.path.isfile(f'{filepath}.{format}'):
                break
        # 读取转换后的语言文件：
        with open(f'{filepath}.{format}', 'rb') as fp:
            voice_content = fp.read()
        # 读取后，删除生成的 format 格式的文件
        os.remove(f'{filepath}.{format}')
        # 开始转换：
        result = self.client.asr(voice_content, format, 16000, {'dev_pid': dev_pid})
        # 如果转换成功，直接输出文字：
        if not result.get('err_no'):
            content = result.get('result')[0]
            if self.__is_external_called:
                return content
            self.__custom_print(content, 34, 1, is_error=False)
        # 如果转换错误
        else:
            err_code = result.get('err_no')
            self.__custom_print(f'错误代码：{err_code}，详见顶部：百度语音识别文档')

    def short_text_compare(self, text1, text2):
        """
        短文本相似度，详见顶部：百度自然语言处理基础技术文档
        超过72% 极度相似
        超过60% 比较相似
        超过60% 可能相似
        低于50% 不相似
        越低越不相似
        :param text1: 第一个短文本
        :param text2: 第二个短文本
        :return: 相似百分比
        """
        nlp_client = AipNlp(self.__APP_ID, self.__API_KEY, self.__SECRET_KEY)
        # 传送短文本
        result = nlp_client.simnet(text1, text2)
        # 获取相似度
        score = result.get('score')
        # 转换百分比
        score = f'{round(score * 100)}%'
        self.__custom_print(f'相似度：{score}', 35, 1, is_error=False)

    def __custom_print(self, data=None, color=31, font=0, is_error=True):
        """
        自定义打印信息的字体及颜色
        :param color: 打印颜色，默认红色，32绿色，34蓝色，35紫红色，更多打印颜色请百度
        :param font: 打印字体，默认正常字体，1为黑体(加粗)
        :param is_error: 是否打印错误信息
        """
        if self.__is_external_called:
            return
        is_error and self.__custom_print('错误！！！', font=1, is_error=False)
        print_info = f'\033[{font};{color};0m{data}\033[0m'
        print(print_info)


# ---------开始使用---------

# if __name__ == '__main__':
#     # 我们先实例化一个对象
#     obj = VoiceTextConversion()
#
#     # 文字 -> 语音
#     text = '人活着就要有梦想，不然跟咸鱼又有什么区别呢？'
#     obj.text_to_voice(text, filepath='语音文件')
#
#     # 语音 -> 文字
#     obj.voice_to_text('语音文件.mp3')
#
#     # 短文本相似度
#     text1, text2 = '你叫什么名字', '你的名字叫什么'
#     obj.short_text_compare(text1, text2)


# =======================下面是图灵机器人=======================


class TuringRobot(object):  # 玩转图灵机器人，详见顶部：图灵机器人接口说明
    # 机器人标示，如果你有自己的机器人，请使用你自己的机器人
    __API_KEY = '8c512c52ac324a3eb4c40639d43bd507'  # 免费版 100次/天
    # 图灵机器人接入地址
    URL = 'http://openapi.tuling123.com/openapi/api/v2'
    # 请求参数
    POST_DATA = {
        "reqType": 0,  # 输入类型：0-文本(默认) 1-图片 2-音频
        "perception": {  # 输入信息
            "inputText": {  # 文本信息
                "text": ""  # 直接输入文本，1-128个字符
            },
        },
        "userInfo": {  # 用户参数
            "apiKey": "",  # 机器人标示
            "userId": "",  # 用户唯一标示，用于请求上下文判断是否为同一个用户，不可重复
        }
    }

    @classmethod
    def run(cls, text='你的名字叫什么', user_id='user01', api_key=None, *args, **kwargs):
        """
        开始玩转图灵机器人
        :param text: 你对机器人说的话
        :param api_key: 图灵机器人标示，如果你有自己的机器人，请使用你自己的机器人
        :param user_id: 用户唯一ID，用于请求上下文判断是否为同一个用户，多个用户使用时不可重复
        :return: 直接返回机器人回的话
        """
        cls.POST_DATA.get('userInfo')['apiKey'] = api_key or cls.__API_KEY
        cls.POST_DATA.get('userInfo')['userId'] = user_id
        cls.POST_DATA.get('perception')['inputText']['text'] = text
        result = requests.post(cls.URL, json=cls.POST_DATA).json()
        return result.get('results')[0]['values']['text']


# =======================Flase App=======================


app = Flask(__name__)


@app.route('/', methods=['GET', 'POST'])
def index():
    return render_template('web.html')


@app.route('/toy_uploader', methods=['GET', 'POST'])
def toy_uploader():
    uuid4 = uuid.uuid4()
    vct = VoiceTextConversion(is_external_called=True)
    # 确保文件唯一，录音文件为 .wav 格式
    filename = f'{uuid4}.wav'
    # 保存语音文件
    request.files['record'].save(filename)
    # 开始语音转文字
    content = vct.voice_to_text(filename)
    os.remove(filename)
    # 与图灵机器人对话
    result = TuringRobot.run(text=content, user_id='user01')
    # 开始文字转语音
    vct.text_to_voice(text=result, filepath=uuid4)
    # 返回给前端语音文件名
    return jsonify({'filename': f'{uuid4}.mp3'})


@app.route('/get_audio/<filename>')
def get_audio(filename):
    # 前端ajax通过文件名来获取语音文件
    ret = send_file(filename)
    os.remove(filename)
    return ret


if __name__ == '__main__':
    app.run('0.0.0.0', 5000, debug=True)

前端代码

<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <title>人机对话</title>
</head>
<body>
<audio src="" controls autoplay id="player"></audio>
<div>
    <button onclick="start_reco()" style="background-color: red">录制语音</button>
</div>
<div>
    <button onclick="stop_reco()" style="background-color: green">发送语音</button>
</div>
</body>
<script type="text/javascript" src=https://cdn.bootcss.com/jquery/3.3.1/jquery.min.js></script>
<script type="text/javascript" src="/static/Recorder.js"></script>
<script type="text/javascript">
    var serv = "http://127.0.0.1:5000";
    // var get_music = "http://192.168.1.102:9527/get_music/";
    // var get_chat = "http://192.168.1.102:9527/get_chat/";
    var reco = null;

    var audio_context = new AudioContext();
    navigator.getUserMedia = (navigator.getUserMedia ||
        navigator.webkitGetUserMedia ||
        navigator.mozGetUserMedia ||
        navigator.msGetUserMedia);

    navigator.getUserMedia({audio: true}, create_stream, function (err) {
        console.log(err)
    });

    function create_stream(user_media) {
        var stream_input = audio_context.createMediaStreamSource(user_media);
        reco = new Recorder(stream_input);
    }

    function start_reco() {
        reco.record();
    }

    function stop_reco() {
        reco.stop();
        get_audio();
        reco.clear();
    }

    function get_audio() {
        reco.exportWAV(function (wav_file) {
            // wav_file = Blob对象 file对象
            // ws.send(wav_file);
            var formdata = new FormData();
            formdata.append("record", wav_file);
            // formdata.append("sender", toy_id);
            // formdata.append("to_user", document.getElementById("from_user").innerText);
            $.ajax({
                url: serv + "/toy_uploader",  // ⚠️
                type: 'post',
                processData: false,
                contentType: false,
                data: formdata,
                dataType: 'json',
                success: function (data) {
                    console.log(data.A);
                    // console.log(data);
                    document.getElementById("player").src =
                        "http://127.0.0.1:5000/get_audio/" + data.filename;  // ⚠️
                }
            })
        })
    }
</script>
</html>

前端依赖插件：
Recorder.js：https://download.csdn.net/download/qq_41964425/10867911
uuid.js：https://download.csdn.net/download/qq_41964425/10867907

使用火狐浏览器录音.