Python实现语音转文字功能

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
import os
import requests
import urllib
import calendar
import time
import datetime
from moviepy.editor import AudioFileClip
from pydub import AudioSegment
from aip import AipSpeech
 
 
class DouYin:
    def __init__(self):
        self._headers = {
            'Accept': '*/*',
            'Accept-Encoding': 'gzip,deflate,sdch',
            'Accept-Language': 'zh-CN,zh;q=0.8,gl;q=0.6,zh-TW;q=0.4',
            'User-Agent': 'Mozilla/5.0 (Linux; Android 6.0;'
                          ' Nexus 5 Build/MRA58N)'
                          'AppleWebKit/537.36 (KHTML, like Gecko) '
                          'Chrome/66.0.3359.181 Mobile Safari/537.36',
        }
 
    # 音频文件
    def transform(self,videoName):
        audioName = datetime.datetime.now()
        audioName = audioName.strftime('%m%d%H%M%S') + '.wav'
 
        audio = AudioFileClip(os.getcwd() + "/" +videoName)
        audio.write_audiofile(audioName)
 
        #语音分割
        self.speech_seg(audioName)
 
 
    #语音分割
    def speech_seg(self,filename):
        txtFileName = datetime.datetime.now()
        txtFileName = txtFileName.strftime('%m%d%H%M%S') + '.txt'
 
        txt_path = os.getcwd() + '/' + txtFileName
        if os.path.exists(txt_path):
            os.remove(txt_path)  # 如果存在即删除文件
         #长语音分割为59s语音区间
        sound = AudioSegment.from_wav(filename)   #音频文件读取
        seconds_of_file = sound.duration_seconds  #音频长度
        seconds_per_split_file = 59               #设定每段59s
        if seconds_of_file % int(seconds_per_split_file) == 0:
            times = int(seconds_of_file / int(seconds_per_split_file))  # 语音长度能被59整除
        else:
            times = int(seconds_of_file // int(seconds_per_split_file) + 1# 非整除
        print(f'{filename}可切割 {times} 次') #输出该语音能被切割几次
        start_time = 0
        internal = seconds_per_split_file * 1000
        end_time = seconds_per_split_file * 1000  #语音结束时间点即59s
        #各分割语音的文本所含字数列表
        length_list=[]
        for i in range(times):
            if i + 1 == times:  # 最后一次切割
                part = sound[start_time:]
            else:
                part = sound[start_time:end_time]
              
            data_split_filename = os.path.join('' + str(i) + '.wav') # audios_try文件夹用来临时存放分割后的语音文件
            part.export(data_split_filename, format="wav"# 先导入该文件
            wav_version = AudioSegment.from_wav(data_split_filename)  # 再读取分割好的文件
            mono = wav_version.set_frame_rate(16000).set_channels(1# 设置声道和采样率
            mono.export(data_split_filename, format='wav', codec='pcm_s16le'# 存储设置后的音频文件
            text = self.speech_recognize(data_split_filename) #语音转文字
            length_list.append(len(text))
            with open(txt_path,'a') as ff: #识别的文字追加写入
                ff.write(text)
                ff.write('\n') #换行
            print(f'    {str(i)}.wav语音转换成功,开始删除')
            os.remove(data_split_filename) #删除音频文件
            start_time += internal
            end_time += internal
            time.sleep(0.5)
 
    #调用接口,实现语音识别
    def speech_recognize(self,seg_filename):
         #对应参数输入
        APP_ID = '57997766'
        API_KEY = '百度语音识别APP_KEY'
        SECRET_KEY = '百度语音识别SECRET_KEY'
 
        aipSpeech = AipSpeech(APP_ID, API_KEY, SECRET_KEY) #传入参数
        with open(seg_filename, 'rb') as fp:
            audioPcm = fp.read()
        json = aipSpeech.asr(audioPcm, 'wav', 16000, {'dev_pid': 1537})
        if 'success' in json['err_msg']:
            context = json['result'][0]    #转换成功的文本
        else:
            context = '=====识别失败====='
            print('识别失败!')
        return context
       
    def run(self):
        print(f'============{self.id}共{len(self.audio_list)}个文件==============')
        for i in self.audio_list:
            try:
                self.speech_seg(i)
                print(f'-----{i}-----分析完成')
            except:
                print(f'-----{i}-----分析出现问题')
                pass
 
if __name__ == "__main__":
    DY = DouYin()
    DY.transform("1.mp4")

  

posted @   潘向福  阅读(149)  评论(0编辑  收藏  举报
相关博文:
阅读排行:
· Manus重磅发布:全球首款通用AI代理技术深度解析与实战指南
· 被坑几百块钱后,我竟然真的恢复了删除的微信聊天记录!
· 没有Manus邀请码?试试免邀请码的MGX或者开源的OpenManus吧
· 园子的第一款AI主题卫衣上架——"HELLO! HOW CAN I ASSIST YOU TODAY
· 【自荐】一款简洁、开源的在线白板工具 Drawnix
点击右上角即可分享
微信分享提示