人工智能 - 基于火狐浏览器的语音识别,语音自动回复
目录
人工智能 - 基于火狐浏览器的语音识别,语音自动回复
一. 需求工具
-
下载安装火狐浏览器
因为火狐浏览器自带麦克风
-
安装Recorer.js
一个易于使用的录音机,以Matt Diamond的 录音机为核心
-
mongoDB(数据库) - pymongo
可以选择其他数据库
-
jQuery
-
Flask框架
二. 不废话,上代码
1. index.HTML
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<title>我是玩具</title>
</head>
<body>
<p><audio id="player" controls autoplay ></audio></p>
<button onclick="start_reco()">录音</button>
<button onclick="stop_reco()">发送语音</button>
<div id="content"></div>
</body>
<script type="text/javascript" src="/static/Recorder.js"></script>
<script type="text/javascript" src="/static/jquery-3.3.1.min.js"></script>
<script type="text/javascript">
var serv = "http://192.168.11.206:9527";
var reco = null;
var audio_context = new AudioContext();//音频内容对象
navigator.getUserMedia = (navigator.getUserMedia ||
navigator.webkitGetUserMedia ||
navigator.mozGetUserMedia ||
navigator.msGetUserMedia);
navigator.getUserMedia({audio:true}, create_stream, function (err) {
console.log(err)
});
function create_stream(user_media) {
var stream_input = audio_context.createMediaStreamSource(user_media);
reco = new Recorder(stream_input);
}
function start_reco() {
reco.record();
}
function stop_reco() {
reco.stop();
reco.exportWAV(function (wav_file) {
console.log(wav_file);
var formdata = new FormData(); // form 表单 {key:value}
formdata.append("reco",wav_file); // form input type="file"
formdata.append("key","value");
// # <input type="text" name = "key"> value
$.ajax({
url: serv + "/upload",
type: 'post',
processData: false,
contentType: false,
data: formdata,
dataType: 'json',
success: function (data) {
console.log(data);
if(data.code == 0){
document.getElementById("player").src = "http://192.168.11.206:9527/get_file/"+data.filename;
document.getElementById("content").innerText = data.content;
}
}
})
});
reco.clear();
}
</script>
</html>
2. adiou.py(封装的 百度AI, 图灵机器人 函数)
from aip import AipSpeech
from aip import AipNlp
import os
from uuid import uuid4
""" 你的 APPID AK SK """
APP_ID = '15837844'
API_KEY = '411VNGbuZVbDNZU78LqTzfsV'
SECRET_KEY = '84AnwR2NARGMqnC6WFnzqQL9WWdWh5bW'
client = AipSpeech(APP_ID, API_KEY, SECRET_KEY)
nlp_client = AipNlp(APP_ID, API_KEY, SECRET_KEY)
def get_file_content(filePath):
os.system(f"ffmpeg -y -i {filePath} -acodec pcm_s16le -f s16le -ac 1 -ar 16000 {filePath}.pcm")
with open(f"{filePath}.pcm", 'rb') as fp:
return fp.read()
def audio2text(filePath):
res = client.asr(get_file_content(filePath), 'pcm', 16000, {
'dev_pid': 1536,
})
text = res.get("result")[0]
print(text)
return text
import requests
def to_tuling(text, uid):
data = {
"perception": {
"inputText": {
"text": "北京"
}
},
"userInfo": {
"apiKey": "a4c4a668c9f94d0c928544f95a3c44fb",
"userId": "123"
}
}
data["perception"]["inputText"]["text"] = text
data["userInfo"]["userId"] = uid
res = requests.post("http://openapi.tuling123.com/openapi/api/v2", json=data)
# print(res.content)
res_json = res.json()
text = res_json.get("results")[0].get("values").get("text")
print(text)
return text
def my_nlp(text):
if nlp_client.simnet(text,"你叫什么名字").get("score") >= 0.75:
A = "我叫银王八"
return A
if nlp_client.simnet(text,"你今年几岁了").get("score") >= 0.75:
A = "我今年999岁了"
return A
A = to_tuling(text,"open123")
return A
def text2audio(text):
result = client.synthesis(text, 'zh', 1, {
'vol': 5,
'per': 4,
'spd': 4,
'pit': 7,
})
filename = f"{uuid4()}.mp3"
# 识别正确返回语音二进制 错误则返回dict 参照下面错误码
if not isinstance(result, dict):
# print(result)
with open(filename, 'wb') as f:
f.write(result)
return filename
3. app.py(路由视图, 逻辑)
from flask import Flask, render_template, request, jsonify, send_file
from uuid import uuid4
from adiou import audio2text, text2audio, my_nlp
from mongodb import MONGODB
from flask_cors import CORS
app = Flask(__name__)
CORS(app, resources={r"/*": {"origins": "*"}})
CORS(app, supports_credentials=True)
@app.route('/')
def hello_world():
print('////')
return render_template("index.html")
@app.route("/upload", methods=["POST"])
def upload():
print("111")
fi = request.files.get("reco")
print("2222", fi)
fi_name = f"{uuid4()}.wav"
print(fi_name, "3333")
fi.save(fi_name)
text = audio2text(fi_name)
print(text, "text")
text1 = {"kong": text}
res1 = MONGODB.users.insert_one(text1)
print(res1)
new_text = my_nlp(text)
print(new_text, "new_text")
text2 = {"机器人": new_text}
res2 = MONGODB.users.insert_one(text2)
print(res2)
filename = text2audio(new_text)
print(filename, "filename")
ret = {
"filename":filename,
"content":new_text,
"code":0
}
return jsonify(ret)
@app.route("/get_file/<filename>")
def get_file(filename):
return send_file(filename)
4. mongodb.py
from pymongo import MongoClient
conn = MongoClient("127.0.0.1", 27017)
MONGODB = conn["db3"]
5. run.py(Flask启动)
from app import app
if __name__ == '__main__':
app.run("0.0.0.0", 9527, debug=True)