支持被压缩的wav,缺点是准确率低
import json
import wave
from vosk import Model, KaldiRecognizer
from pydub import AudioSegment
from pydub.utils import make_chunks
def recognize_wave(model, file_path):
print(111)
# 打开WAV音频文件
with wave.open(file_path, 'rb') as wf:
print(1)
rate = wf.getframerate() # 采样率
frames = wf.readframes(wf.getnframes()) # 读取所有帧
rec = KaldiRecognizer(model, rate)
rec.SetWords(True)
str_ret = ""
# 识别音频数据
if rec.AcceptWaveform(frames):
result = json.loads(rec.Result())
if 'text' in result:
str_ret += result['text']
result = json.loads(rec.FinalResult())
if 'text' in result:
str_ret += result['text']
str_ret = "".join(str_ret.split())
return str_ret
if __name__ == "__main__":
# 设置原始音频文件路径和新的采样率
file_path = '87622-1.wav'
new_sampling_rate = 44100 # 假设我们想要改变为44.1kHz
# 加载音频
audio = AudioSegment.from_file(file_path)
# 设置新的采样率
audio.set_frame_rate(new_sampling_rate)
# 保存新的音频文件
audio.export('output1234.wav', format='wav')
model = Model("vosk-model-cn-0.22")
file_path = 'output1234.wav' # 请确保文件名和路径正确
res = recognize_wave(model, file_path)
print(res)