三分钟把声优带回家

点击▲关注 “爪哇笔记” 给公众号标星置顶

更多精彩第一时间直达

Java版本

语音合成可将计算机自己产生的、或外部输入的文字信息转变为可以听得懂的、流利的口语输出的技术。适用于手机APP、儿童故事机、智能机器人等多种应用场景。

整合

这里我们使用百度云的人工智能语音合成 API，个人用户拥有一定的免费额度，基本已经够用了，在线语音合成支持丰富的 SDK，基本上主流的语言都可以轻松接入。这里使用世界上最流行的语言 Java。

pom.xml引入：


   
   
   
    
    
    
 <
 
    
    
    dependency>
 
    
    
    
    <
 
    
    
    groupId>com.baidu.aip</
 
    
    
    groupId>
 
    
    
    
    <
 
    
    
    artifactId>java-sdk</
 
    
    
    artifactId>
 
    
    
    
    <
 
    
    
    version>4.12.0</
 
    
    
    version>
 
    
    
    
</
 
    
    
    dependency>

application.properties引入：


   
   
   # ===================================
 
    
    
    
# 百度人工智能，申请百度云账号并创建应用
 
    
    
    
# ===================================
 
    
    
    
bai-du.appId = 
 
    
    
    **********
 
    
    
    
bai-du.apiKey = 
 
    
    
    **********
 
    
    
    
bai-du.accessKeySecret = 
 
    
    
    **********

BaiDuProperties.java 配置实体：


   
   
   @Data
 
    
    
    
@ConfigurationProperties(prefix = 
 
    
    
    "bai-du")
 
    
    
    
public class BaiDuProperties {
 
    
    
    

private 
 
    
    
    String 
 
    
    
    appId;
 
    
    
    
private 
 
    
    
    String 
 
    
    
    apiKey;
 
    
    
    
private 
 
    
    
    String 
 
    
    
    accessKeySecret;
 
    
    
    

}

撸一个工具类：


   
   
   /**
 * 百度智能AI
 */
 
    
    
    
@Component
 
    
    
    
@Configuration
 
    
    
    
@EnableConfigurationProperties({BaiDuProperties.class})
 
    
    
    
public 
 
    
    
    class BaiDuUtils {
 
    
    
    

private BaiDuProperties baiDu;
 
    
    
    

public 
 
    
    
    BaiDuUtils(BaiDuProperties baiDu) {
 
    
    
    
this.baiDu = baiDu;
 
    
    
    
    }
 
    
    
    

private AipSpeech instance;
 
    
    
    

@Value(
 
    
    
    "${file.path}")
 
    
    
    
private String filePath;
 
    
    
    

@PostConstruct
 
    
    
    
public 
 
    
    
    void 
 
    
    
    init() {
 
    
    
    
try {
 
    
    
    
            FileUtil.mkdir(filePath+SystemConstant.SF_FILE_SEPARATOR+
 
    
    
    "voice");
 
    
    
    
            instance = 
 
    
    
    new AipSpeech(baiDu.getAppId(), baiDu.getApiKey(), baiDu.getAccessKeySecret());
 
    
    
    
// 可选：设置网络连接参数
 
    
    
    
            instance.setConnectionTimeoutInMillis(
 
    
    
    2000);
 
    
    
    
            instance.setSocketTimeoutInMillis(
 
    
    
    60000);
 
    
    
    
        } 
 
    
    
    catch (Exception e) {
 
    
    
    
            e.printStackTrace();
 
    
    
    
        }
 
    
    
    
    }
 
    
    
    

/**
     * 语音合成
     * 本地测试可能会出现https认证的问题 调用一下 ignoreSsl 方法即可
     * @param text 合成的文本，使用UTF-8编码，请注意文本长度必须小于1024字节
     */
 
    
    
    
public String 
 
    
    
    text2Voice(String text,Boolean per) {
 
    
    
    
        SslUtils.ignoreSsl();
 
    
    
    
        HashMap<String, Object> options = 
 
    
    
    new HashMap<>();
 
    
    
    
if(per){
 
    
    
    
            options.put(
 
    
    
    "per", 
 
    
    
    "3");
 
    
    
    
        }
 
    
    
    else{
 
    
    
    
            options.put(
 
    
    
    "per", 
 
    
    
    "4");
 
    
    
    
        }
 
    
    
    
        TtsResponse res = instance.synthesis(text, 
 
    
    
    "zh", 
 
    
    
    1, options);
 
    
    
    
byte[] data = res.getData();
 
    
    
    
if (data != 
 
    
    
    null) {
 
    
    
    
try {
 
    
    
    
                String file = 
 
    
    
    "voice"+SystemConstant.SF_FILE_SEPARATOR+UUID.randomUUID()+
 
    
    
    ".mp3";
 
    
    
    
                Util.writeBytesToFileSystem(data, filePath + SystemConstant.SF_FILE_SEPARATOR + file);
 
    
    
    
return file;
 
    
    
    
            } 
 
    
    
    catch (IOException e) {
 
    
    
    
                e.printStackTrace();
 
    
    
    
            }
 
    
    
    
        }
 
    
    
    
return 
 
    
    
    null;
 
    
    
    
    }
 
    
    
    
}

注意

请注意文本长度必须小于1024字节
本地测试可能会出现https认证的问题调用一下 ignoreSsl 方法即可

截图

Python版本

语音合成技术能将用户输入的文字，转换成流畅自然的语音输出，并且可以支持语速、音调、音量设置，打破传统文字式人机交互的方式，让人机沟通更自然。

应用场景

将游戏场景中的公告、任务或派单信息通过语音播报，让玩家玩游戏或配送员送货的同时，也可接听新任务。

文学小说类软件，可以利用百度语音合成技术将文学小说作品进行高质量的朗读，流畅清晰，解放双眼，畅听世界。

项目架构

Python3.7.2、Django2.1.7、baidu-aip

案例

这里只展示部分代码，有兴趣的同学可以自行下载源码安装调试。

  
  
  import os
   
   
   import time
   
   
   import codecs
   
   
   from aip importAipSpeech
   
   
   from django.shortcuts import render
   
   
   from django.http importHttpResponse
   
   
   

   
   
   

   
   
   '''
   
   
   pip install --upgrade pip
   
   
   pip install django
   
   
   pip install baidu-aip
   
   
   '''
   
   
   

   
   
   

   
   
   def main(request):
   
   
   return render(request, 'index.html')
   
   
   

   
   
   

   
   
   def m_main(request):
   
   
   return render(request, 'm_index.html')
   
   
   

   
   
   

   
   
   def convert(request):
   
   
       message = request.POST.get("message")
   
   
       switch = request.POST.get("switch")
   
   
       mp3 = du_say(message, switch)
   
   
   returnHttpResponse(mp3)
   
   
   

   
   
   

   
   
   def du_say(message, switch):
   
   
       write_txt(message)
   
   
       app_id = '*****'
   
   
       api_key = '*****'
   
   
       secret_key = '*****'
   
   
       client = AipSpeech(app_id, api_key, secret_key)
   
   
   if switch == "true":
   
   
           switch = 3
   
   
   else:
   
   
           switch = 4
   
   
       result = client.synthesis(message, 'zh', 1, {
   
   
   'vol': 5, 'per': switch,
   
   
   })
   
   
       t = time.time()
   
   
       now_time = lambda: int(round(t * 1000))
   
   
       path = os.getcwd() + os.path.sep + "static"+ os.path.sep + "audio"+os.path.sep
   
   
       audio = path+str(now_time())+'.mp3'
   
   
   # 识别正确返回语音二进制 错误则返回dict 参照下面错误码
   
   
   ifnot isinstance(result, dict):
   
   
   with open(audio, 'wb') as f:
   
   
               f.write(result)
   
   
   return str(now_time())+'.mp3'
   
   
   

   
   
   

   
   
   def write_txt(message):
   
   
       t = time.time()
   
   
       now_time = lambda: int(round(t * 1000))
   
   
       path = os.getcwd() + os.path.sep + "static"+ os.path.sep + "text"+os.path.sep
   
   
       text = path+str(now_time())+'.txt'
   
   
   with codecs.open(text, 'a', encoding='utf8')as f:
   
   
           f.write(message)

本地部署

从码云拉取项目到本地：

  
  
  https://gitee.com/52itstyle/baidu-speech.git

配置百度语音API：

  
  
  # 自行注册申请
   
   
   https://console.bce.baidu.com/ai/#/ai/speech/app/list

启动项目：

  
  
  # 切换到项目根目录，执行
   
   
   manage.py runserver

外网部署

这里以Linux为例，代理使用 openresty。

安装 Python3

  
  
  wget https://www.python.org/ftp/python/3.7.1/Python-3.7.1.tar.xz

事先安装依赖，否则后期安装会报错：

  
  
  yum -y install zlib*
   
   
   yum -y install libffi-devel

下面开始正式安装：

  
  
  # 解压
   
   
   tar -xvf Python-3.7.1.tar.xz
   
   
   # 切换大目录
   
   
   cd Python-3.7.1
   
   
   # 配置编译
   
   
   ./configure
   
   
   # 编译安装
   
   
   make && make install

安装 Django

  
  
  pip install Django

安装成功以后需要重新配置并编译安装 Python3：

  
  
  # 配置编译
   
   
   ./configure
   
   
   # 编译安装
   
   
   make && make install

安装服务器 uwsgi

  
  
  pip3 install uwsgi

上传项目到服务器，并切换到 speech 目录：

  
  
  # 目录下新建文件夹
   
   
   mkdir script

在 script 下新增 uwsgi.ini (项目中已经配置好，自行修改路径即可)：

  
  
  # uwsig使用配置文件启动
   
   
   [uwsgi]
   
   
   # 项目目录
   
   
   chdir=/www/speech/
   
   
   # 指定项目的application
   
   
   module=speech.wsgi:application
   
   
   # 指定sock的文件路径
   
   
   socket=/www/speech/script/uwsgi.sock
   
   
   # 进程个数
   
   
   workers=5
   
   
   pidfile=/www/speech/script/uwsgi.pid
   
   
   # 指定IP端口
   
   
   http=127.0.0.1:8001
   
   
   # 指定静态文件
   
   
   static-map=/static=/www/speech/static
   
   
   # 启动uwsgi的用户名和用户组
   
   
   uid=root
   
   
   gid=root
   
   
   # 启用主进程
   
   
   master=true
   
   
   # 自动移除unix Socket和pid文件当服务停止的时候
   
   
   vacuum=true
   
   
   # 序列化接受的内容，如果可能的话
   
   
   thunder-lock=true
   
   
   # 启用线程
   
   
   enable-threads=true
   
   
   # 设置自中断时间
   
   
   harakiri=30
   
   
   # 设置缓冲
   
   
   post-buffering=4096
   
   
   # 设置日志目录
   
   
   daemonize=/www/speech/script/uwsgi.log

然后使用以下命令启动：

  
  
  uwsgi  --ini uwsgi.ini

执行命令，查看是否启动成功：

  
  
  [root@AY140216131049Z script]# ps -ef|grep uwsgi
   
   
   root      304010Nov21?        00:00:03 uwsgi --ini uwsgi.ini
   
   
   root      304130400Nov21?        00:00:00 uwsgi --ini uwsgi.ini
   
   
   root      304230400Nov21?        00:00:00 uwsgi --ini uwsgi.ini
   
   
   root      304330400Nov21?        00:00:00 uwsgi --ini uwsgi.ini
   
   
   root      304430400Nov21?        00:00:00 uwsgi --ini uwsgi.ini
   
   
   root      304530400Nov21?        00:00:00 uwsgi --ini uwsgi.ini
   
   
   root      304630400Nov21?        00:00:00 uwsgi --ini uwsgi.ini
   
   
   root      66066580018:13 pts/000:00:00 grep --color=auto uwsgi

重启：

  
  
  uwsgi --reload uwsgi.pid

配置Nginx代理：

  
  
  server {
   
   
           listen  80;
   
   
           server_name  speech.52itstyle.vip;
   
   
           charset utf-8;
   
   
           location / {
   
   
              include uwsgi_params; # 导入一个Nginx模块他是用来和uWSGI进行通讯的
   
   
              uwsgi_connect_timeout 30; # 设置连接uWSGI超时时间
   
   
              uwsgi_pass unix:/www/speech/script/uwsgi.sock; # 指定uwsgi的sock文件所有动态请求就会直接丢给他
   
   
   }
   
   
   # 动静分离 Nginx 处理静态请求
   
   
           location /static{
   
   
               root /www/speech/;
   
   
   }
   
   
   }

如果启动HTTPS：

  
  
  server {
   
   
            listen 80;
   
   
            listen 443 ssl;
   
   
            server_name  speech.52itstyle.vip;
   
   
   #ssl on;
   
   
   #证书路径
   
   
            ssl_certificate    /usr/local/openresty/nginx/cert/1901523_speech.52itstyle.vip.pem;
   
   
   #私钥路径
   
   
            ssl_certificate_key   /usr/local/openresty/nginx/cert/1901523_speech.52itstyle.vip.key;
   
   
   #缓存有效期
   
   
            ssl_session_timeout 5m;
   
   
   #可选的加密算法,顺序很重要,越靠前的优先级越高.
   
   
            ssl_ciphers ECDHE-RSA-AES128-GCM-SHA256:ECDHE:ECDH:AES:HIGH:!NULL:!aNULL:!MD5:!ADH:!RC4;
   
   
   #安全链接可选的加密协议
   
   
            ssl_protocols TLSv1TLSv1.1TLSv1.2;
   
   
            ssl_prefer_server_ciphers on;
   
   
   

   
   
            location / {
   
   
               include uwsgi_params; # 导入一个Nginx模块他是用来和uWSGI进行通讯的
   
   
               uwsgi_connect_timeout 30; # 设置连接uWSGI超时时间
   
   
               uwsgi_pass unix:/www/speech/script/uwsgi.sock; # 指定uwsgi的sock文件所有动态请求就会直接丢给他
   
   
   }
   
   
   # 动静分离 Nginx 处理静态请求
   
   
            location /static{
   
   
               root /www/speech/;
   
   
   }
   
   
   }