支持被压缩的wav,缺点是准确率低

 

import json
import wave
from vosk import Model, KaldiRecognizer
from pydub import AudioSegment

from pydub.utils import make_chunks

def recognize_wave(model, file_path):
print(111)
# 打开WAV音频文件
with wave.open(file_path, 'rb') as wf:
print(1)
rate = wf.getframerate() # 采样率

frames = wf.readframes(wf.getnframes()) # 读取所有帧

rec = KaldiRecognizer(model, rate)
rec.SetWords(True)
str_ret = ""

# 识别音频数据
if rec.AcceptWaveform(frames):
result = json.loads(rec.Result())
if 'text' in result:
str_ret += result['text']

result = json.loads(rec.FinalResult())
if 'text' in result:
str_ret += result['text']

str_ret = "".join(str_ret.split())
return str_ret


if __name__ == "__main__":
# 设置原始音频文件路径和新的采样率
file_path = '87622-1.wav'
new_sampling_rate = 44100 # 假设我们想要改变为44.1kHz

# 加载音频
audio = AudioSegment.from_file(file_path)

# 设置新的采样率
audio.set_frame_rate(new_sampling_rate)

# 保存新的音频文件
audio.export('output1234.wav', format='wav')


model = Model("vosk-model-cn-0.22")
file_path = 'output1234.wav' # 请确保文件名和路径正确

res = recognize_wave(model, file_path)
print(res)

posted on 2024-12-17 08:47  仔仔爱学习  阅读(3)  评论(0编辑  收藏  举报