Python实现语音转文字功能
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 | import os import requests import urllib import calendar import time import datetime from moviepy.editor import AudioFileClip from pydub import AudioSegment from aip import AipSpeech class DouYin: def __init__( self ): self ._headers = { 'Accept' : '*/*' , 'Accept-Encoding' : 'gzip,deflate,sdch' , 'Accept-Language' : 'zh-CN,zh;q=0.8,gl;q=0.6,zh-TW;q=0.4' , 'User-Agent' : 'Mozilla/5.0 (Linux; Android 6.0;' ' Nexus 5 Build/MRA58N)' 'AppleWebKit/537.36 (KHTML, like Gecko) ' 'Chrome/66.0.3359.181 Mobile Safari/537.36' , } # 音频文件 def transform( self ,videoName): audioName = datetime.datetime.now() audioName = audioName.strftime( '%m%d%H%M%S' ) + '.wav' audio = AudioFileClip(os.getcwd() + "/" + videoName) audio.write_audiofile(audioName) #语音分割 self .speech_seg(audioName) #语音分割 def speech_seg( self ,filename): txtFileName = datetime.datetime.now() txtFileName = txtFileName.strftime( '%m%d%H%M%S' ) + '.txt' txt_path = os.getcwd() + '/' + txtFileName if os.path.exists(txt_path): os.remove(txt_path) # 如果存在即删除文件 #长语音分割为59s语音区间 sound = AudioSegment.from_wav(filename) #音频文件读取 seconds_of_file = sound.duration_seconds #音频长度 seconds_per_split_file = 59 #设定每段59s if seconds_of_file % int (seconds_per_split_file) = = 0 : times = int (seconds_of_file / int (seconds_per_split_file)) # 语音长度能被59整除 else : times = int (seconds_of_file / / int (seconds_per_split_file) + 1 ) # 非整除 print (f '{filename}可切割 {times} 次' ) #输出该语音能被切割几次 start_time = 0 internal = seconds_per_split_file * 1000 end_time = seconds_per_split_file * 1000 #语音结束时间点即59s #各分割语音的文本所含字数列表 length_list = [] for i in range (times): if i + 1 = = times: # 最后一次切割 part = sound[start_time:] else : part = sound[start_time:end_time] data_split_filename = os.path.join(' ' + str(i) + ' .wav') # audios_try文件夹用来临时存放分割后的语音文件 part.export(data_split_filename, format = "wav" ) # 先导入该文件 wav_version = AudioSegment.from_wav(data_split_filename) # 再读取分割好的文件 mono = wav_version.set_frame_rate( 16000 ).set_channels( 1 ) # 设置声道和采样率 mono.export(data_split_filename, format = 'wav' , codec = 'pcm_s16le' ) # 存储设置后的音频文件 text = self .speech_recognize(data_split_filename) #语音转文字 length_list.append( len (text)) with open (txt_path, 'a' ) as ff: #识别的文字追加写入 ff.write(text) ff.write( '\n' ) #换行 print (f ' {str(i)}.wav语音转换成功,开始删除' ) os.remove(data_split_filename) #删除音频文件 start_time + = internal end_time + = internal time.sleep( 0.5 ) #调用接口,实现语音识别 def speech_recognize( self ,seg_filename): #对应参数输入 APP_ID = '57997766' API_KEY = '百度语音识别APP_KEY' SECRET_KEY = '百度语音识别SECRET_KEY' aipSpeech = AipSpeech(APP_ID, API_KEY, SECRET_KEY) #传入参数 with open (seg_filename, 'rb' ) as fp: audioPcm = fp.read() json = aipSpeech.asr(audioPcm, 'wav' , 16000 , { 'dev_pid' : 1537 }) if 'success' in json[ 'err_msg' ]: context = json[ 'result' ][ 0 ] #转换成功的文本 else : context = '=====识别失败=====' print ( '识别失败!' ) return context def run( self ): print (f '============{self.id}共{len(self.audio_list)}个文件==============' ) for i in self .audio_list: try : self .speech_seg(i) print (f '-----{i}-----分析完成' ) except : print (f '-----{i}-----分析出现问题' ) pass if __name__ = = "__main__" : DY = DouYin() DY.transform( "1.mp4" ) |
作者:Aleen Pan
出处:http://panxiangfu.cnblogs.com/
如果您觉得本文对您的学习有所帮助,可通过“微信”或“支付宝”打赏博主,或者点击页面右下角【好文要顶】支持博主。



【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· Manus重磅发布:全球首款通用AI代理技术深度解析与实战指南
· 被坑几百块钱后,我竟然真的恢复了删除的微信聊天记录!
· 没有Manus邀请码?试试免邀请码的MGX或者开源的OpenManus吧
· 园子的第一款AI主题卫衣上架——"HELLO! HOW CAN I ASSIST YOU TODAY
· 【自荐】一款简洁、开源的在线白板工具 Drawnix