百度AI开放平台,语音识别,语音合成以及短文本相似度
from aip import AipSpeech APP_ID="15420964" #'你的 App ID' API_KEY="6bPrLnkguN5ltxvfxRYP96Hk" #'你的 Api Key' SECRET_KEY="ckSFGccmaGr0b2EPGE3dueb1PkfW5IsW" #'你的 Secret Key' client=AipSpeech(APP_ID,API_KEY,SECRET_KEY) result=client.synthesis("快乐的池塘里面有只小青蛙,呱呱呱儿 ","zh",1,{ "vol":5, #音量 "spd":4, #语速 "pit":7, #语调 "per":1 #音色(0,1,3,4) }) print(result) if not isinstance(result,dict): with open("audio.mp3","wb") as f: f.write(result)
语音识别
from aip import AipSpeech import os """ 你的 APPID AK SK """ APP_ID="15420964" API_KEY="6bPrLnkguN5ltxvfxRYP96Hk" SECRET_KEY="ckSFGccmaGr0b2EPGE3dueb1PkfW5IsW" client=AipSpeech(APP_ID,API_KEY,SECRET_KEY) # 读取文件 def get_file_content(filePath): with open(filePath, 'rb') as fp: return fp.read() # 识别本地文件 res=client.asr(get_file_content('wb.m4a'), 'pcm', 16000, { 'dev_pid': 1536, }) print(res.get("result")[0])
学说话
from aip import AipSpeech import os import time """ 你的 APPID AK SK """ APP_ID="15420964" API_KEY="6bPrLnkguN5ltxvfxRYP96Hk" SECRET_KEY="ckSFGccmaGr0b2EPGE3dueb1PkfW5IsW" client=AipSpeech(APP_ID,API_KEY,SECRET_KEY) ############语音识别########## #读取文件 def get_file_content(filePath): os.system(f"ffmpeg -y -i {filePath} -acodec pcm_s16le -f s16le -ac 1 -ar 16000 {filePath}.pcm") with open(f"{filePath}.pcm", 'rb') as fp: return fp.read() # 识别本地文件 def audio2text(filename): res=client.asr(get_file_content(filename), 'pcm', 16000, { 'dev_pid': 1536, }) return res.get("result")[0] ############语音合成######### def text2audio(getedtext): filename = f"{time.time()}.mp3" result = client.synthesis(getedtext, "zh", 1, { "vol": 5, "spd": 4, "pit": 7, "per": 0 }) if not isinstance(result, dict): with open(filename, "wb") as f: f.write(result) return filename getedtext=audio2text("wb.m4a") res=text2audio(getedtext) os.system(res)
语音回答问题
from aip import AipSpeech import os import time """ 你的 APPID AK SK """ APP_ID="15420964" API_KEY="6bPrLnkguN5ltxvfxRYP96Hk" SECRET_KEY="ckSFGccmaGr0b2EPGE3dueb1PkfW5IsW" client=AipSpeech(APP_ID,API_KEY,SECRET_KEY) ############语音识别########## #读取文件 def get_file_content(filePath): with open(filePath, 'rb') as fp: return fp.read() # 识别本地文件 def audio2text(filepath): res = client.asr(get_file_content(filepath), 'pcm', 16000, { 'dev_pid': 1536, }) return res.get("result")[0] filename = f"{time.time()}.mp3" ############语音合成######### def text2audio(getedtext): result = client.synthesis(getedtext, "zh", 1, { "vol": 5, "spd": 4, "pit": 7, "per": 1 }) # print(result) if not isinstance(result, dict): with open(filename, "wb") as f: f.write(result) getedtext=audio2text("wb.m4a") text2audio(getedtext) os.system(filename)
短文本相似度
from aip import AipSpeech,AipNlp import os import time """ 你的 APPID AK SK """ APP_ID="15420964" API_KEY="6bPrLnkguN5ltxvfxRYP96Hk" SECRET_KEY="ckSFGccmaGr0b2EPGE3dueb1PkfW5IsW" client=AipSpeech(APP_ID,API_KEY,SECRET_KEY) nlp = AipNlp(APP_ID, API_KEY, SECRET_KEY) ############语音识别########## #读取文件 def get_file_content(filePath): os.system(f"ffmpeg -y -i {filePath} -acodec pcm_s16le -f s16le -ac 1 -ar 16000 {filePath}.pcm") with open(f"{filePath}.pcm", 'rb') as fp: return fp.read() # 识别本地文件 def audio2text(filename): res=client.asr(get_file_content(filename), 'pcm', 16000, { 'dev_pid': 1536, }) return res.get("result")[0] ############语音合成######### def text2audio(getedtext): filename = f"{time.time()}.mp3" result = client.synthesis(getedtext, "zh", 1, { "vol": 5, "spd": 4, "pit": 7, "per": 0 }) if not isinstance(result, dict): with open(filename, "wb") as f: f.write(result) return filename ##########图灵################ def to_tuling(text): import requests args = { "reqType": 0, "perception": { "inputText": { "text": text } }, "userInfo": { "apiKey": "e963f65c4c7a466a80e5aaa3510da2fa", "userId": "1111" } } url = "http://openapi.tuling123.com/openapi/api/v2" res = requests.post(url, json=args) print(res) text = res.json().get("results")[0].get("values").get("text") print("图灵答案",text) return text ###########调用################ getedtext=audio2text("wb.m4a") if nlp.simnet("你叫什么名字",getedtext).get("score")>=0.68: getedtext="我才不告诉你呢,你个糟老头子坏得很" else: getedtext=to_tuling(getedtext) res=text2audio(getedtext) os.system(res)
对话机器人玩具
应用结构:
在index.html中
<!DOCTYPE html> <html lang="en"> <head> <meta charset="UTF-8"> <title>Title</title> </head> <body> <audio controls autoplay id="player"></audio> <p> <button onclick="start_reco()" style="background-color: yellow">录制语音指令</button> </p> <p> <button onclick="stop_reco_audio()" style="background-color: blue">发送语音指令</button> </p> </body> <!--<script type="application/javascript" src="/static/Recorder.js"></script>--> <script type="application/javascript" src="https://cdn.bootcss.com/recorderjs/0.1.0/recorder.js"></script> <script type="text/javascript" src="/static/jQuery3.1.1.js"></script> <script type="text/javascript"> var reco = null; var audio_context = new AudioContext(); navigator.getUserMedia = (navigator.getUserMedia || navigator.webkitGetUserMedia || navigator.mozGetUserMedia || navigator.msGetUserMedia); navigator.getUserMedia({audio: true}, create_stream, function (err) { console.log(err) }); function create_stream(user_media) { var stream_input = audio_context.createMediaStreamSource(user_media); reco = new Recorder(stream_input); } function start_reco() { reco.record(); } function stop_reco_audio() { reco.stop(); send_audio(); reco.clear(); } function send_audio() { reco.exportWAV(function (wav_file) { var formdata = new FormData(); formdata.append("record", wav_file); console.log(formdata); $.ajax({ url: "http://192.168.43.158:9527/ai", type: 'post', processData: false, contentType: false, data: formdata, dataType: 'json', success: function (data) { document.getElementById("player").src ="http://192.168.43.158:9527/get_audio/" + data.filename } }); }) } </script> </html>
在app.py中
from flask import Flask,render_template,request,jsonify,send_file from uuid import uuid4 import baidu_ai app = Flask(__name__) @app.route("/") def index(): return render_template("index.html") @app.route("/ai",methods=["POST"]) def ai(): # 1.保存录音文件 print(111) audio = request.files.get("record") print('audio',audio) filename = f"{uuid4()}.wav" audio.save(filename) #2.将录音文件转换为PCM发送给百度进行语音识别 q_text = baidu_ai.audio2text(filename) print(8585) print(q_text) #3.将识别的问题交给图灵或自主处理获取答案 a_text = baidu_ai.to_tuling(q_text) print(a_text) #4.将答案发送给百度语音合成,合成音频文件 a_file = baidu_ai.text2audio(a_text) print(a_file) #5.将音频文件发送给前端播放 return jsonify({"filename":a_file}) @app.route("/get_audio/<filename>") def get_audio(filename): print(filename) return send_file(filename) if __name__ == '__main__': app.run("0.0.0.0",9527,debug=True)
在baidu_ai.py中
from aip import AipSpeech,AipNlp import time,os """ 你的 APPID AK SK """ APP_ID="15420964" API_KEY="6bPrLnkguN5ltxvfxRYP96Hk" SECRET_KEY="ckSFGccmaGr0b2EPGE3dueb1PkfW5IsW" # client = AipSpeech(APP_ID, API_KEY, SECRET_KEY) nlp = AipNlp(APP_ID, API_KEY, SECRET_KEY) client = AipSpeech(APP_ID, API_KEY, SECRET_KEY) # 读取文件 def get_file_content(filePath): print(234) os.system(f"ffmpeg -y -i {filePath} -acodec pcm_s16le -f s16le -ac 1 -ar 16000 {filePath}.pcm") print(354) with open(f"{filePath}.pcm", 'rb') as fp: return fp.read() def audio2text(filepath): # 识别本地文件 print(123) res = client.asr(get_file_content(filepath), 'pcm', 16000, { 'dev_pid': 1536, }) print(456456) print('----------res',res) return res.get("result")[0] def text2audio(text): filename = f"{time.time()}.mp3" result = client.synthesis(text, 'zh', 1, { 'vol': 5, "spd": 3, "pit": 7, "per": 4 }) # 识别正确返回语音二进制 错误则返回dict 参照下面错误码 if not isinstance(result, dict): with open(filename, 'wb') as f: f.write(result) return filename def to_tuling(text): import requests args = { "reqType": 0, "perception": { "inputText": { "text": text } }, "userInfo": { "apiKey": "e963f65c4c7a466a80e5aaa3510da2fa", "userId": "1111" } } url = "http://openapi.tuling123.com/openapi/api/v2" res = requests.post(url, json=args) print(res) text = res.json().get("results")[0].get("values").get("text") print("图灵答案",text) return text
改变世界,改变自己!