VOSK实现语音输出

以下代码读取麦克风输入并输出中文:

#encoding:utf-8
import wave
import time
import json
import threading
from vosk import Model,KaldiRecognizer,SetLogLevel

# 禁止日志输出
SetLogLevel(-1)
model_path = "models/vosk-model-cn-0.1"
model = Model(model_path)

def resume_microphone(rec):
    print(rec.state(),alsaaudio.PCM_STATE_PAUSED)
    if rec.state() == alsaaudio.PCM_STATE_PAUSED:
        rec.pause(False)

rec = KaldiRecognizer(model,16000)
rec.SetWords(True)
rec.SetPartialWords(True)
import alsaaudio
inp = alsaaudio.PCM(alsaaudio.PCM_CAPTURE,alsaaudio.PCM_NONBLOCK,channels=1,rate=16000,format=alsaaudio.PCM_FORMAT_S16_LE,periodsize=4096)
try:
    while 1:
        l,data = inp.read()
        if rec.AcceptWaveform(data):
            result = json.loads(rec.Result())
            text = result["text"]
            print("You say:{}".format(text))
            if "停止" in text:
                print("program will be pause")
                inp.pause(True)
                t = threading.Timer(5, resume_microphone,args=(inp,))
                t.start()
            if "退出" in text:
                print("program will be exist.")
                break
            if "恢复" in text:
                inp.pause(False)
                print("program will be resume")
        else:
            res = json.loads(rec.PartialResult())
            partial = res["partial"]
            if partial:
                print("Say:{}".format(partial))
                if "退出" in partial:
                    print("program will be exist.")
                    break
        time.sleep(0.01)
except KeyboardInterrupt:
    print("KeyboardInterrupt...")
finally:
    inp.close()

对已存在文件进行分析,需要使用16KHz频率,单声道及16位输入:

#encoding:utf-8
import wave
import json
from vosk import Model,KaldiRecognizer,SetLogLevel


def recognize_speech_from_file(filename):
    SetLogLevel(-1)
    wf = wave.open(filename, "rb")
    if wf.getnchannels() != 1 or wf.getsampwidth() != 2 or wf.getcomptype() != "NONE":
        print("Audio file must be WAV format mono PCM")
        return
    rec = KaldiRecognizer(model, wf.getframerate())
    rec.SetWords(True)
    rec.SetPartialWords(True)
    while True:
        data = wf.readframes(2048)
        if len(data) == 0:
            break
        if rec.AcceptWaveform(data):
            jres = json.loads(rec.Result())
            print(jres["text"])
        # else:
        #     jres = json.loads(rec.PartialResult())
        #     if jres["partial"] != "":
        #         print(jres["partial"],2222)
    final_result = rec.FinalResult()
    if final_result:
        final_result = json.loads(final_result)
        text = final_result.get("text")
        if text:
            print(text)

recognize_speech_from_file("output.wav")

使用pyaudio的情况:

#encoding:utf-8
import wave
import json
from pyaudio import PyAudio, paInt16

SetLogLevel(-1)
model_path = "models/vosk-model-cn-0.1"
model = Model(model_path)

rec = KaldiRecognizer(model,16000)
rec.SetWords(True)
rec.SetPartialWords(True)
p = PyAudio()
stream = p.open(format=paInt16, channels=1, rate=16000, input=True, frames_per_buffer=4096)
stream.start_stream()
try:
    while stream.is_active():
        data = stream.read(4096)
        if rec.AcceptWaveform(data):
            result = json.loads(rec.Result())
            text = result["text"]
            print("You say:{}".format(text))
            if "退出" in text:
                print("program will be exist.")
                break
        else:
            res = json.loads(rec.PartialResult())
            partial = res["partial"]
            if partial:
                print("Say:{}".format(partial))
                if "退出" in partial:
                    print("program will be exist.")
                    break
        time.sleep(0.01)
except KeyboardInterrupt:
    print("KeyboardInterrupt...")
finally:
    stream.stop_stream()
    stream.close()
    p.terminate()

参考文章:

https://blog.csdn.net/weixin_48967543/article/details/142338862

posted @   月薪几千的牛马  阅读(84)  评论(0编辑  收藏  举报
相关博文:
阅读排行:
· 全程不用写代码,我用AI程序员写了一个飞机大战
· DeepSeek 开源周回顾「GitHub 热点速览」
· 记一次.NET内存居高不下排查解决与启示
· 物流快递公司核心技术能力-地址解析分单基础技术分享
· .NET 10首个预览版发布:重大改进与新特性概览!
点击右上角即可分享
微信分享提示