Unity中使用百度中文语音识别功能
下面是API类
Asr.cs
using System; using System.Collections; using System.Collections.Generic; using UnityEngine; /// <summary> /// 用户解析token的json数据 /// </summary> class TokenResponse { public string access_token = null; } public class Asr { public string SecretKey { get; private set; } public string APIKey { get; private set; } public string Token { get; private set; } public Asr(string apiKey, string secretKey) { APIKey = apiKey; SecretKey = secretKey; } public IEnumerator GetAccessToken() { var uri = string.Format( "https://openapi.baidu.com/oauth/2.0/token?grant_type=client_credentials&client_id={0}&client_secret={1}", APIKey, SecretKey); var www = new WWW(uri); yield return www; if (string.IsNullOrEmpty(www.error)) { var result = JsonUtility.FromJson<TokenResponse>(www.text); Token = result.access_token; Debug.Log("Get access_token successfully"); } else { Debug.LogError(www.error); } } public IEnumerator Recognize(byte[] data, Action<string> callback) { var uri = string.Format("http://vop.baidu.com/server_api?lan=zh&cuid={0}&token={1}", SystemInfo.deviceUniqueIdentifier, Token); var headers = new Dictionary<string, string> { { "Content-Type", "audio/pcm;rate=16000" } }; var www = new WWW(uri, data, headers); yield return www; if (string.IsNullOrEmpty(www.error)) { Debug.Log(www.text); callback(www.text); } else Debug.LogError(www.error); } /// <summary> /// 将Unity的AudioClip数据转化为PCM格式16bit数据 /// </summary> /// <param name="clip"></param> /// <returns></returns> public static byte[] ConvertAudioClipToPCM16(AudioClip clip) { var samples = new float[clip.samples * clip.channels]; clip.GetData(samples, 0); var samples_int16 = new short[samples.Length]; for (var index = 0; index < samples.Length; index++) { var f = samples[index]; samples_int16[index] = (short)(f * short.MaxValue); } var byteArray = new byte[samples_int16.Length * 2]; Buffer.BlockCopy(samples_int16, 0, byteArray, 0, byteArray.Length); return byteArray; } }
下面是测试类
main.cs
using UnityEngine; using System.Collections; using UnityEngine.UI; public class main : MonoBehaviour { public string APIKey = ""; public string SecretKey = ""; public Button StartButton; public Button StopButton; public Text DescriptionText; private AudioClip _clipRecord = new AudioClip(); private Asr _asr; void Start() { _asr = new Asr(APIKey, SecretKey); StartCoroutine(_asr.GetAccessToken()); StartButton.gameObject.SetActive(true); StopButton.gameObject.SetActive(false); DescriptionText.text = ""; StartButton.onClick.AddListener(OnClickStartButton); StopButton.onClick.AddListener(OnClickStopButton); } private void OnClickStartButton() { StartButton.gameObject.SetActive(false); StopButton.gameObject.SetActive(true); DescriptionText.text = "Listening..."; _clipRecord = Microphone.Start(null, false, 30, 16000); } private void OnClickStopButton() { StartButton.gameObject.SetActive(false); StopButton.gameObject.SetActive(false); DescriptionText.text = "Recognizing..."; Microphone.End(null); Debug.Log("end record"); var data = Asr.ConvertAudioClipToPCM16(_clipRecord); StartCoroutine(_asr.Recognize(data, s => { DescriptionText.text = s; StartButton.gameObject.SetActive(true); })); } }
资源来源于关尔Manic的技术园
http://blog.csdn.net/zhenghongzhi6/article/details/78688571#comments