语音交互前端实现方案
1、语音交互主要使用了Recorder插件,以来wav这个文件进行转换编码
1 <template> 2 <a-row :class="['voice-control', {'extend-w': isShow}]"> 3 <div class="voice-l" @click="activeRecordVoice"> 4 <img :class="['robot-img', {'robot-img-extend': isShow}]" :src="robotImg"> 5 <img 6 v-if="!isShow" class="pos" 7 :src="soundWaveImg" 8 > 9 </div> 10 <div :class="['voice-r',{'anim-in': isShow}, {'anim-out': !isShow}]"> 11 <img 12 :class="['img-con', {'anim-in': isShow}, {'anim-out': !isShow}]" 13 :src="recording ? soundWaveGif : soundBgImg" 14 > 15 <p>{{ voiceText }}</p> 16 </div> 17 <i 18 v-show="isShow" class="close-info-window" 19 @click="clickCloseBtn" 20 >×</i> 21 </a-row> 22 </template> 23 <script> 24 import {reactive, onMounted, toRefs, onBeforeUnmount, getCurrentInstance, watch} from 'vue'; 25 26 import robotImg from '@/assets/img/robot.gif'; 27 import soundBgImg from '@/assets/img/voice-bg.png'; 28 import soundWaveImg from '@/assets/img/voice.png'; 29 import soundWaveGif from '@/assets/img/wave.gif'; 30 import Recorder from 'recorder-core'; 31 import ReconnectingWebSocket from 'reconnecting-websocket'; 32 33 export default { 34 name: 'VoiceControl', 35 props: { 36 url: { 37 type: String, 38 default: 'ws://10.138.65.153:8887/client/speech' 39 } 40 }, 41 setup(props, content) { 42 const {proxy} = getCurrentInstance(); 43 const state = reactive({ 44 voiceText: '', 45 isShow: false, 46 recording: false, 47 robotImg, 48 soundWaveImg, 49 soundWaveGif, 50 soundBgImg, 51 ws: null, 52 realTimeSendTryType: 'wav', 53 realTimeSendTryEncBusy: 0, 54 realTimeSendTryTime: 0, 55 realTimeSendTryNumber: 0, 56 transferUploadNumberMax: 0, 57 realTimeSendTryChunk: null, 58 sendInterval: 100, 59 sampleRate: 16000, // 采样率 60 bitRate: 16, // 比特率 61 encStartTime: Date.now(), 62 scriptNode: null // 录音插件元素 63 }); 64 65 const clickStartBtn = () => { 66 if (!state.isShow) { 67 state.isShow = true; 68 creatSocket(); 69 } 70 }; 71 72 const clickCloseBtn = () => { 73 state.isShow = false; 74 stopRecord(); 75 }; 76 77 // 添加录音插件 78 const addPlugin = () => { 79 const wavUrl = '/wav.min.js'; 80 const scriptNode = document.createElement('script'); 81 scriptNode.setAttribute('type', 'text/javascript'); 82 scriptNode.setAttribute('src', wavUrl); 83 document.body.appendChild(scriptNode); 84 state.scriptNode = scriptNode; 85 }; 86 87 // 移除录音插件 88 const removePlugin = () => { 89 if (state.scriptNode) { 90 document.body.removeChild(state.scriptNode); 91 state.scriptNode = null; 92 } 93 }; 94 95 // 录音 96 const activeRecordVoice = () => { 97 if (!state.recording) { 98 state.encStartTime = Date.now(); 99 state.recorderInstance = Recorder({ 100 type: 'wav', 101 sampleRate: state.sampleRate, 102 bitRate: state.bitRate, 103 onProcess: (buffers, powerLevel, bufferDuration, 104 bufferSampleRate, newBufferIdx, asyncEnd) => { 105 // state.voicePowerArr.push(powerLevel); 106 // state.wave.input(buffers[buffers.length - 1], powerLevel, bufferSampleRate); 107 realTimeSendTry(state.recorderInstance, false); 108 } 109 }); 110 startRecord(); 111 } else { 112 stopRecord(); 113 } 114 }; 115 116 const startRecord = () => { 117 state.recording = true; 118 clickStartBtn(); 119 state.recorderInstance.open( 120 () => { 121 state.recorderInstance.start(); 122 }, 123 (msg, isUserNotAllow) => { 124 console.log((isUserNotAllow ? 'UserNotAllow,' : '') + '无法录音:' + msg); 125 state.recording = false; 126 } 127 ); 128 }; 129 130 const stopRecord = () => { 131 if (state.recorderInstance) { 132 state.recorderInstance.close(() => { 133 state.realTimeSendTryTime = 0; 134 state.realTimeSendTryEncBusy = 0; 135 state.realTimeSendTryNumber = 0; 136 state.transferUploadNumberMax = 0; 137 state.realTimeSendTryChunk = null; 138 state.recording = false; 139 state.recorderInstance = false; 140 console.log('%c录音完成,关闭录音', 'color:#fff;background:green;'); 141 }); 142 } 143 closeWs(); 144 }; 145 146 const realTimeSendTry = (rec, isClose) => { 147 const t1 = Date.now(); 148 let endT = 0; 149 const recImpl = Recorder.prototype; 150 if (state.realTimeSendTryTime === 0) { 151 state.realTimeSendTryTime = t1; 152 state.realTimeSendTryEncBusy = 0; 153 state.realTimeSendTryNumber = 0; 154 state.transferUploadNumberMax = 0; 155 state.realTimeSendTryChunk = null; 156 } 157 if (!isClose && t1 - state.realTimeSendTryTime < state.sendInterval) { 158 console.log('%c控制缓冲达到指定间隔才进行传输', 'color: #fff;background: red;'); 159 return; 160 } 161 state.realTimeSendTryTime = t1; 162 let number = ++state.realTimeSendTryNumber; 163 const chunk = Recorder.SampleData( 164 rec.buffers, 165 rec.srcSampleRate, 166 state.sampleRate, 167 state.realTimeSendTryChunk, 168 { 169 frameType: isClose ? '' : state.realTimeSendTryType 170 } 171 ); 172 // 清理已处理完的缓冲数据,释放内存以支持长时间录音,最后完成录音时不能调用stop,因为数据已经被清掉了 173 for (let i = state.realTimeSendTryChunk ? state.realTimeSendTryChunk.index : 0; i < chunk.index; i++) { 174 rec.buffers[i] = null; 175 } 176 state.realTimeSendTryChunk = chunk; 177 if (chunk.data.length === 0 || isClose && chunk.data.length < 2000) { 178 transferUpload(number, null, 0, null, isClose); 179 console.log('%c没有新数据,或结束时的数据量太小,不能进行mock转码', 'color: #fff;background: red;'); 180 return; 181 } 182 // 实时编码队列阻塞处理 183 if (!isClose && state.realTimeSendTryEncBusy >= 2) { 184 console.log('%c编码队列阻塞,已丢弃一帧', 'color: #fff;background: red;'); 185 return; 186 } 187 state.realTimeSendTryEncBusy++; 188 const encStartTime = Date.now(); 189 const recMock = Recorder({ 190 type: state.realTimeSendTryType, 191 sampleRate: state.sampleRate, 192 bitRate: state.bitRate 193 }); 194 recMock.mock(chunk.data, chunk.sampleRate); 195 recMock.stop((blob, duration) => { 196 state.realTimeSendTryEncBusy && (state.realTimeSendTryEncBusy--); 197 blob.encTime = Date.now() - encStartTime; 198 // 转码好就推入传输 199 transferUpload(number, blob, duration, recMock, isClose); 200 }, (msg) => { 201 state.realTimeSendTryEncBusy && (state.realTimeSendTryEncBusy--); 202 // 转码错误 203 console.log('%c出现的错误:' + msg, 'color: #fff;background: red;'); 204 }); 205 }; 206 207 const transferUpload = (number, blobOrNull, duration, blobRec, isClose) => { 208 state.transferUploadNumberMax = Math.max(state.transferUploadNumberMax, number); 209 if (blobOrNull) { 210 const blob = blobOrNull; 211 const encTime = blob.encTime; 212 const reader = new FileReader(); 213 reader.readAsArrayBuffer(blob); 214 reader.onload = () => { 215 const byteArray = Array.from(new Int8Array(reader.result)); 216 sendWs(byteArray); 217 }; 218 } 219 }; 220 221 const creatSocket = () => { 222 if (state.ws) { 223 closeWs(); 224 } 225 // 与后台建立websocket交互逻辑处理,socket使用了ReconnectingWebSocket插件 226 state.ws = new ReconnectingWebSocket(props.url, [], { 227 connectionTimeout: 3000, 228 maxRetries: 10 229 }); 230 state.ws?.addEventListener('open', () => { 231 state.ws?.send(JSON.stringify( 232 {type: 'start'} 233 )); 234 state.ws?.addEventListener('message', (e) => { 235 const info = JSON.parse(e?.data)?.data; 236 if (info.type === 'text') { 237 state.voiceText = info?.data?.text || ''; 238 } 239 if (info.type === 'intent') { 240 content.emit('commandOpe', info); 241 } 242 }); 243 }); 244 state.ws?.addEventListener('close', () => { 245 state.ws = null; 246 state.voiceText = ''; 247 }); 248 }; 249 250 const sendWs = (message) => { 251 if (state.ws) { 252 const mes = { 253 'type': 'data', 'data': {'speechBytes': message} 254 }; 255 if (state.ws?.readyState === 1) { 256 state.ws?.send(JSON.stringify(mes)); 257 } 258 } 259 }; 260 261 const closeWs = () => { 262 if (state.ws && state.ws?.readyState === 1) { 263 state.ws?.send(JSON.stringify({'type': 'stop'})); 264 state.ws?.close(); 265 } 266 }; 267 268 onMounted(() => { 269 watch(() => state.isShow, isShow => { 270 content.emit('clickHandler', isShow); 271 }); 272 addPlugin(); 273 }); 274 275 onBeforeUnmount(() => { 276 stopRecord(); 277 removePlugin(); 278 }); 279 280 return { 281 ...toRefs(state), 282 activeRecordVoice, 283 clickCloseBtn, 284 clickStartBtn 285 }; 286 } 287 }; 288 </script> 289 <style lang="less" scoped> 290 @import '~@/assets/less/variables.less'; 291 292 .voice-control { 293 position: relative; 294 display: flex; 295 flex-wrap: nowrap; 296 max-height: 216px; 297 margin: 0 @margin-base-lg * 3 0 0; 298 font-size: @--font-size-md; 299 font-weight: 500; 300 color: @--color-white; 301 // background: 302 // linear-gradient( 303 // 179deg, 304 // rgba(13, 45, 80, 0.8) 0%, 305 // rgba(0, 25, 64, 0.7) 100% 306 // ); 307 background-image: linear-gradient(179deg, rgba(13, 45, 80, 0.14) 0%, rgba(0, 25, 64, 0.70) 100%); 308 309 .time { 310 animation-duration: 0.5s; 311 } 312 313 .voice-l { 314 position: relative; 315 width: 216px; 316 min-width: 216px; 317 overflow: hidden; 318 319 .robot-img { 320 position: relative; 321 top: -38%; 322 left: 50%; 323 width: 150%; 324 transform: translate(-50%, 0); 325 } 326 327 .robot-img-extend { 328 top: 50%; 329 transform: translate(-50%, -50%); 330 } 331 332 .pos { 333 position: absolute; 334 bottom: @margin-base-lg; 335 bottom: @margin-base-md * 3; 336 left: 50%; 337 width: 90%; 338 transform: translate(-50%, 0); 339 } 340 } 341 342 .voice-r { 343 display: none; 344 flex-wrap: wrap; 345 align-content: center; 346 justify-content: center; 347 width: 0; 348 cursor: pointer; 349 350 > p { 351 width: 100%; 352 text-align: center; 353 } 354 } 355 356 .img-con { 357 width: 0; 358 max-width: 408px; 359 height: 50%; 360 max-height: 77px; 361 background: url('~@/assets/img/voice.png') no-repeat 54% center; 362 background-size: 60% 70%; 363 animation-name: anim-in-name; 364 .time(); 365 } 366 367 .img-h { 368 height: auto; 369 } 370 371 .anim-in { 372 // display: block; 373 display: flex; 374 animation-name: anim-in-name; 375 .extend-w(); 376 .time(); 377 } 378 379 .anim-out { 380 width: 0; 381 animation-name: anim-out-name; 382 .time(); 383 } 384 385 @keyframes anim-in-name { 386 from { 387 width: 0; 388 } 389 390 to { 391 .extend-s-w(); 392 } 393 } 394 395 @keyframes anim-out-name { 396 from { 397 .extend-s-w(); 398 } 399 400 to { 401 width: 0; 402 } 403 } 404 405 .close-info-window { 406 position: absolute; 407 top: -10px; 408 right: -10px; 409 box-sizing: border-box; 410 display: inline-block; 411 width: 40px; 412 height: 40px; 413 font-size: 20px; 414 font-style: normal; 415 line-height: 40px; 416 color: #fff; 417 text-align: center; 418 cursor: pointer; 419 border: 1px solid #47bef7; 420 box-shadow: inset 0 1px 45px 0 rgba(66, 212, 254, 0.8); 421 422 // &:after { 423 // position: absolute; 424 // top: 0; 425 // right: 0; 426 // display: inline-block; 427 // width: 4px; 428 // height: 38px; 429 // content: ''; 430 // background: #fff; 431 // } 432 } 433 } 434 435 .extend-w { 436 width: 100%; 437 } 438 439 .extend-s-w { 440 width: auto; 441 } 442 </style>
参考:https://xiangyuecn.gitee.io/recorder/assets/%E5%B7%A5%E5%85%B7-%E4%BB%A3%E7%A0%81%E8%BF%90%E8%A1%8C%E5%92%8C%E9%9D%99%E6%80%81%E5%88%86%E5%8F%91Runtime.html?jsname=teach.realtime.encode_transfer