Unity AIUI
上篇回顾
https://blog.csdn.net/weixin_42208093/article/details/106277629
在上篇文章中,我们讲到了讯飞的语音识别和合成以及百度的UNIT 人机交互。
但由于百度的语义技能过少达不到我们想要的要求。类如音乐、故事部分并没有实际的内容,所以我又重新好好研究了一下讯飞AIUI。但由于Window版本的AIUIDLL我们无法使用,就选择了使用WebAPI版AIUI,效果也是一样的。
WebAPI AIUI
WebAPI创建就不说了,在创建好WebAPI AIUI项目后,项目中的开发工具中有C#的示例代码
我们只需要稍微修改下就行,这里就直接贴出修改好的代码。
注意:
1.APPID、API_KEY 是WebAPI的,和之前的SDK版本不一样。
2.另外AUTH_ID 是自己定义的,如果不知道怎么定义可以直接复制【开发工具】中的 “authId”。
3.示例代码中private const string SCENE = “main”; 修改为 “main_box”,不然会出现询问问题,没有回答的情况 【当前页面配置修改仅在测试环境生效,设备端体验需要SDK传参时在情景模式后加“_box”或“更新发布”至生产环境体验。】
4.询问的问题,需要是已添加的商店技能里的问题才能做出正确的回答
using System;
using System.Collections.Generic;
using System.IO;
using System.Net;
using System.Security.Cryptography;
using System.Text;
using UnityEngine;
public class AIUI : MonoBehaviour
{
private const string URL = "http://openapi.xfyun.cn/v2/aiui";
private const string APPID = "";
private const string API_KEY = "";
private const string AUE = "raw";
private const string AUTH_ID = "2049a1b2fdedae553bd03ce6f4820ac5";
private const string DATA_TYPE = "audio";
private const string SAMPLE_RATE = "16000";
private const string SCENE = "main_box";//需要加_box
private const string RESULT_LEVEL = "plain";
private string FILE_PATH = "";
void Start()
{
//测试
FILE_PATH = Application.streamingAssetsPath + "/Baidu-TTS.pcm";
Dictionary<string, string> header = buildHeader();
byte[] dataByteArray = readFile(FILE_PATH);
print(aiui(dataByteArray));
}
public static string aiui(byte[] boby)
{
Dictionary<string, string> header = buildHeader();
string result = HttpPost(URL, header, boby);
return result;
}
/// <summary>
/// http post访问
/// </summary>
/// <param name="url">链接</param>
/// <param name="headerDic">头字典</param>
/// <param name="body">主体</param>
/// <returns></returns>
private static string HttpPost(string url, Dictionary<string, string> headerDic, byte[] body)
{
HttpWebRequest httpWebRequest = null;
HttpWebResponse httpWebResponse = null;
string result = "";
try
{
httpWebRequest = (HttpWebRequest)WebRequest.Create(url);
httpWebRequest.Method = "post";
httpWebRequest.Headers.Add("X-Param", headerDic["X-Param"]);
httpWebRequest.Headers.Add("X-CurTime", headerDic["X-CurTime"]);
httpWebRequest.Headers.Add("X-CheckSum", headerDic["X-CheckSum"]);
httpWebRequest.Headers.Add("X-Appid", headerDic["X-Appid"]);
httpWebRequest.ContentLength = body.Length;
httpWebRequest.GetRequestStream().Write(body, 0, body.Length);
httpWebResponse = (HttpWebResponse)httpWebRequest.GetResponse();
Stream responseStream = httpWebResponse.GetResponseStream();
result = new StreamReader(responseStream, Encoding.UTF8).ReadToEnd();
responseStream.Close();
httpWebRequest.Abort();
httpWebResponse.Close();
}
catch (Exception ex)
{
Debug.LogError("Error: " + ex.ToString());
}
return result;
}
/// <summary>
/// 头字典
/// </summary>
/// <returns></returns>
private static Dictionary<string, string> buildHeader()
{
TimeSpan ts = DateTime.UtcNow - new DateTime(1970, 1, 1, 0, 0, 0, 0);
string curTime = Convert.ToInt64(ts.TotalSeconds).ToString();
//请求参数
string param = "{\"aue\":\"" + AUE + "\",\"result_level\":\"" + RESULT_LEVEL + "\",\"sample_rate\":\"" + SAMPLE_RATE + "\",\"auth_id\":\"" + AUTH_ID + "\",\"data_type\":\"" + DATA_TYPE + "\",\"scene\":\"" + SCENE + "\"}";
string paramBase64 = Convert.ToBase64String(Encoding.Default.GetBytes(param));
string checkSum = EncryptWithMD5(API_KEY + curTime + paramBase64);
Dictionary<string, string> header = new Dictionary<string, string>();
header.Add("X-Param", paramBase64);
header.Add("X-CurTime", curTime);
header.Add("X-CheckSum", checkSum);
header.Add("X-Appid", APPID);
return header;
}
/// <summary>
/// MD5加密
/// </summary>
/// <param name="source">参数</param>
/// <returns></returns>
private static string EncryptWithMD5(string source)
{
byte[] sor = Encoding.UTF8.GetBytes(source);
MD5 md5 = MD5.Create();
byte[] result = md5.ComputeHash(sor);
StringBuilder strbul = new StringBuilder(40);
for (int i = 0; i < result.Length; i++)
{
//加密结果"x2"结果为32位,"x3"结果为48位,"x4"结果为64位
strbul.Append(result[i].ToString("x2"));
}
return strbul.ToString();
}
private byte[] readFile(string filePath)
{
FileStream fs = new FileStream(filePath, FileMode.Open);
byte[] data = new byte[fs.Length];
fs.Read(data, 0, data.Length);
fs.Close();
return data;
}
}
测试
下面我们先简单的测试一下,把AIUI能够跑通。我们需要一段询问问题(类如问天气)的音频文件,可以自己录一段,也可以用语音合成生成一段。我是用的百度的语音合成的,然后直接下载。
https://ai.baidu.com/tech/speech/tts_online
下载下来是Mp3格式的音频,我们还需要做个格式转换,转换成AIUI能识别的.pcm或者.wav
这里我用的是【Cool Edit Pro 2.1】百度自己下一个就行。
另存为:windows pcm 格式就行
添加到unity中 修改脚本音频路径。
运行
通常回答的答案都是在 answer.text中,个别有提供是url,就不是在这了。
json 解析这部分就不说了。
优化
讯飞语音合成这一块做的并不是特别好,Windows 在线版的,其他的不了解。处理合成的时间会随着合成内容的大小而发生变化,100多字的内容,差不多需要5-6秒的处理时间,这个还是很影响用户体验的。百度了一下微软有自己的C#语音合成库:Interop.SpeechLib。处理合成比讯飞快的不是一点点,用法也是特别的简单。需要用线程的来做,不然unity会有假死的状态。
MP3转WAV
AIUI中的【故事】【国学】【音乐】【相声】等技能都是提供的url链接,格式为MP3的格式,unity的WWW或者UnityWebRequest无法加载MP3格式的,就需要我们将MP3转成WAV或者其他可用的格式。
这里使用了NAudio.dll 引用 using NAudio.Wave;
/// <summary>
/// UnityWebRequest 加载MP3播放
/// </summary>
/// <param name="url"></param>
/// <param name="audio"></param>
/// <returns></returns>
public IEnumerator OnMP3LoadAndPlay(string url, AudioSource audio)
{
UnityWebRequest www = UnityWebRequest.Get(url);
yield return www.SendWebRequest();
audio.clip = FromMp3Data(www.downloadHandler.data);
audio.Play();
}
/// <summary>
/// mp3转wav
/// </summary>
/// <param name="data">byte[]</param>
/// <returns></returns>
AudioClip FromMp3Data(byte[] data)
{
//将数据加载到流中
MemoryStream mp3stream = new MemoryStream(data);
//将流中的数据转换为WAV格式
Mp3FileReader mp3audio = new Mp3FileReader(mp3stream);
WaveStream waveStream = WaveFormatConversionStream.CreatePcmStream(mp3audio);
//转换为WAV数据
WAV wav = new WAV(AudioMemStream(waveStream).ToArray());
AudioClip audioClip = AudioClip.Create("testSound", wav.SampleCount, 1, wav.Frequency, false);
audioClip.SetData(wav.LeftChannel, 0);
return audioClip;
}
/// <summary>
/// 内存流
/// </summary>
/// <param name="waveStream">byte[]</param>
/// <returns></returns>
MemoryStream AudioMemStream(WaveStream waveStream)
{
MemoryStream outputStream = new MemoryStream();
using (WaveFileWriter waveFileWriter = new WaveFileWriter(outputStream, waveStream.WaveFormat))
{
byte[] bytes = new byte[waveStream.Length];
waveStream.Position = 0;
waveStream.Read(bytes, 0, Convert.ToInt32(waveStream.Length));
waveFileWriter.Write(bytes, 0, bytes.Length);
waveFileWriter.Flush();
}
return outputStream;
}
WAV.cs
/* From http://answers.unity3d.com/questions/737002/wav-byte-to-audioclip.html */
public class WAV
{
// convert two bytes to one float in the range -1 to 1
static float bytesToFloat(byte firstByte, byte secondByte)
{
// convert two bytes to one short (little endian)
short s = (short)((secondByte << 8) | firstByte);
// convert to range from -1 to (just below) 1
return s / 32768.0F;
}
static int bytesToInt(byte[] bytes, int offset = 0)
{
int value = 0;
for (int i = 0; i < 4; i++)
{
value |= ((int)bytes[offset + i]) << (i * 8);
}
return value;
}
// properties
public float[] LeftChannel { get; internal set; }
public float[] RightChannel { get; internal set; }
public int ChannelCount { get; internal set; }
public int SampleCount { get; internal set; }
public int Frequency { get; internal set; }
public WAV(byte[] wav)
{
// Determine if mono or stereo
ChannelCount = wav[22]; // Forget byte 23 as 99.999% of WAVs are 1 or 2 channels
// Get the frequency
Frequency = bytesToInt(wav, 24);
// Get past all the other sub chunks to get to the data subchunk:
int pos = 12; // First Subchunk ID from 12 to 16
// Keep iterating until we find the data chunk (i.e. 64 61 74 61 ...... (i.e. 100 97 116 97 in decimal))
while (!(wav[pos] == 100 && wav[pos + 1] == 97 && wav[pos + 2] == 116 && wav[pos + 3] == 97))
{
pos += 4;
int chunkSize = wav[pos] + wav[pos + 1] * 256 + wav[pos + 2] * 65536 + wav[pos + 3] * 16777216;
pos += 4 + chunkSize;
}
pos += 8;
// Pos is now positioned to start of actual sound data.
SampleCount = (wav.Length - pos) / 2; // 2 bytes per sample (16 bit sound mono)
if (ChannelCount == 2) SampleCount /= 2; // 4 bytes per sample (16 bit stereo)
// Allocate memory (right will be null if only mono sound)
LeftChannel = new float[SampleCount];
if (ChannelCount == 2) RightChannel = new float[SampleCount];
else RightChannel = null;
// Write to double array/s:
int i = 0;
int maxInput = wav.Length - (RightChannel == null ? 1 : 3);
// while (pos < wav.Length)
while ((i < SampleCount) && (pos < maxInput))
{
LeftChannel[i] = bytesToFloat(wav[pos], wav[pos + 1]);
pos += 2;
if (ChannelCount == 2)
{
RightChannel[i] = bytesToFloat(wav[pos], wav[pos + 1]);
pos += 2;
}
i++;
}
}
public override string ToString()
{
return string.Format("[WAV: LeftChannel={0}, RightChannel={1}, ChannelCount={2}, SampleCount={3}, Frequency={4}]", LeftChannel, RightChannel, ChannelCount, SampleCount, Frequency);
}
}
上一篇: Spring4学习笔记(3)
下一篇: 谈谈CSS滤镜的详细介绍