Python实现语音转文字功能

import os
import requests
import urllib
import calendar
import time
import datetime
from moviepy.editor import AudioFileClip
from pydub import AudioSegment
from aip import AipSpeech


class DouYin:
    def __init__(self):
        self._headers = {
            'Accept': '*/*',
            'Accept-Encoding': 'gzip,deflate,sdch',
            'Accept-Language': 'zh-CN,zh;q=0.8,gl;q=0.6,zh-TW;q=0.4',
            'User-Agent': 'Mozilla/5.0 (Linux; Android 6.0;'
                          ' Nexus 5 Build/MRA58N)'
                          'AppleWebKit/537.36 (KHTML, like Gecko) '
                          'Chrome/66.0.3359.181 Mobile Safari/537.36',
        }

    # 音频文件
    def transform(self,videoName):
        audioName = datetime.datetime.now()
        audioName = audioName.strftime('%m%d%H%M%S') + '.wav'

        audio = AudioFileClip(os.getcwd() + "/" +videoName)
        audio.write_audiofile(audioName)

        #语音分割
        self.speech_seg(audioName)


    #语音分割
    def speech_seg(self,filename):
        txtFileName = datetime.datetime.now()
        txtFileName = txtFileName.strftime('%m%d%H%M%S') + '.txt'

        txt_path = os.getcwd() + '/' + txtFileName
        if os.path.exists(txt_path):
            os.remove(txt_path)  # 如果存在即删除文件
         #长语音分割为59s语音区间
        sound = AudioSegment.from_wav(filename)   #音频文件读取
        seconds_of_file = sound.duration_seconds  #音频长度
        seconds_per_split_file = 59               #设定每段59s
        if seconds_of_file % int(seconds_per_split_file) == 0:
            times = int(seconds_of_file / int(seconds_per_split_file))  # 语音长度能被59整除
        else:
            times = int(seconds_of_file // int(seconds_per_split_file) + 1)  # 非整除
        print(f'{filename}可切割 {times} 次') #输出该语音能被切割几次
        start_time = 0
        internal = seconds_per_split_file * 1000 
        end_time = seconds_per_split_file * 1000  #语音结束时间点即59s
        #各分割语音的文本所含字数列表
        length_list=[]
        for i in range(times):
            if i + 1 == times:  # 最后一次切割
                part = sound[start_time:]
            else:
                part = sound[start_time:end_time]
             
            data_split_filename = os.path.join('' + str(i) + '.wav') # audios_try文件夹用来临时存放分割后的语音文件
            part.export(data_split_filename, format="wav")  # 先导入该文件
            wav_version = AudioSegment.from_wav(data_split_filename)  # 再读取分割好的文件
            mono = wav_version.set_frame_rate(16000).set_channels(1)  # 设置声道和采样率
            mono.export(data_split_filename, format='wav', codec='pcm_s16le')  # 存储设置后的音频文件
            text = self.speech_recognize(data_split_filename) #语音转文字
            length_list.append(len(text))
            with open(txt_path,'a') as ff: #识别的文字追加写入
                ff.write(text)
                ff.write('\n') #换行
            print(f'    {str(i)}.wav语音转换成功,开始删除')
            os.remove(data_split_filename) #删除音频文件
            start_time += internal
            end_time += internal
            time.sleep(0.5)

    #调用接口,实现语音识别
    def speech_recognize(self,seg_filename):
         #对应参数输入
        APP_ID = '57997766'
        API_KEY = '百度语音识别APP_KEY'
        SECRET_KEY = '百度语音识别SECRET_KEY'

        aipSpeech = AipSpeech(APP_ID, API_KEY, SECRET_KEY) #传入参数
        with open(seg_filename, 'rb') as fp:
            audioPcm = fp.read()
        json = aipSpeech.asr(audioPcm, 'wav', 16000, {'dev_pid': 1537})
        if 'success' in json['err_msg']:
            context = json['result'][0]    #转换成功的文本
        else:
            context = '=====识别失败====='
            print('识别失败!')
        return context
      
    def run(self):
        print(f'============{self.id}共{len(self.audio_list)}个文件==============')
        for i in self.audio_list:
            try:
                self.speech_seg(i)
                print(f'-----{i}-----分析完成')
            except:
                print(f'-----{i}-----分析出现问题')
                pass

if __name__ == "__main__":
    DY = DouYin()
    DY.transform("1.mp4")

  

posted @ 2024-06-23 21:34  潘向福  阅读(6)  评论(0编辑  收藏  举报