Python实现语音转文字功能
import os import requests import urllib import calendar import time import datetime from moviepy.editor import AudioFileClip from pydub import AudioSegment from aip import AipSpeech class DouYin: def __init__(self): self._headers = { 'Accept': '*/*', 'Accept-Encoding': 'gzip,deflate,sdch', 'Accept-Language': 'zh-CN,zh;q=0.8,gl;q=0.6,zh-TW;q=0.4', 'User-Agent': 'Mozilla/5.0 (Linux; Android 6.0;' ' Nexus 5 Build/MRA58N)' 'AppleWebKit/537.36 (KHTML, like Gecko) ' 'Chrome/66.0.3359.181 Mobile Safari/537.36', } # 音频文件 def transform(self,videoName): audioName = datetime.datetime.now() audioName = audioName.strftime('%m%d%H%M%S') + '.wav' audio = AudioFileClip(os.getcwd() + "/" +videoName) audio.write_audiofile(audioName) #语音分割 self.speech_seg(audioName) #语音分割 def speech_seg(self,filename): txtFileName = datetime.datetime.now() txtFileName = txtFileName.strftime('%m%d%H%M%S') + '.txt' txt_path = os.getcwd() + '/' + txtFileName if os.path.exists(txt_path): os.remove(txt_path) # 如果存在即删除文件 #长语音分割为59s语音区间 sound = AudioSegment.from_wav(filename) #音频文件读取 seconds_of_file = sound.duration_seconds #音频长度 seconds_per_split_file = 59 #设定每段59s if seconds_of_file % int(seconds_per_split_file) == 0: times = int(seconds_of_file / int(seconds_per_split_file)) # 语音长度能被59整除 else: times = int(seconds_of_file // int(seconds_per_split_file) + 1) # 非整除 print(f'{filename}可切割 {times} 次') #输出该语音能被切割几次 start_time = 0 internal = seconds_per_split_file * 1000 end_time = seconds_per_split_file * 1000 #语音结束时间点即59s #各分割语音的文本所含字数列表 length_list=[] for i in range(times): if i + 1 == times: # 最后一次切割 part = sound[start_time:] else: part = sound[start_time:end_time] data_split_filename = os.path.join('' + str(i) + '.wav') # audios_try文件夹用来临时存放分割后的语音文件 part.export(data_split_filename, format="wav") # 先导入该文件 wav_version = AudioSegment.from_wav(data_split_filename) # 再读取分割好的文件 mono = wav_version.set_frame_rate(16000).set_channels(1) # 设置声道和采样率 mono.export(data_split_filename, format='wav', codec='pcm_s16le') # 存储设置后的音频文件 text = self.speech_recognize(data_split_filename) #语音转文字 length_list.append(len(text)) with open(txt_path,'a') as ff: #识别的文字追加写入 ff.write(text) ff.write('\n') #换行 print(f' {str(i)}.wav语音转换成功,开始删除') os.remove(data_split_filename) #删除音频文件 start_time += internal end_time += internal time.sleep(0.5) #调用接口,实现语音识别 def speech_recognize(self,seg_filename): #对应参数输入 APP_ID = '57997766' API_KEY = '百度语音识别APP_KEY' SECRET_KEY = '百度语音识别SECRET_KEY' aipSpeech = AipSpeech(APP_ID, API_KEY, SECRET_KEY) #传入参数 with open(seg_filename, 'rb') as fp: audioPcm = fp.read() json = aipSpeech.asr(audioPcm, 'wav', 16000, {'dev_pid': 1537}) if 'success' in json['err_msg']: context = json['result'][0] #转换成功的文本 else: context = '=====识别失败=====' print('识别失败!') return context def run(self): print(f'============{self.id}共{len(self.audio_list)}个文件==============') for i in self.audio_list: try: self.speech_seg(i) print(f'-----{i}-----分析完成') except: print(f'-----{i}-----分析出现问题') pass if __name__ == "__main__": DY = DouYin() DY.transform("1.mp4")
作者:Aleen Pan
出处:http://panxiangfu.cnblogs.com/
如果您觉得本文对您的学习有所帮助,可通过“微信”或“支付宝”打赏博主,或者点击页面右下角【好文要顶】支持博主。