import re
import os
import requests
from aip import AipSpeech
from pydub import AudioSegment
APP_ID = ''
API_KEY = ''
SECRET_KEY = ''
def save_mp4(filename, url):
response = requests.get(url)
data = response.text
mp4_url = re.findall('"playurl":"(.*?)"', data)
mp4_response = requests.get(mp4_url[0])
mp4_data = mp4_response.content
with open(filename, 'wb') as fw:
fw.write(mp4_data)
def cut_speech(filename):
file_name = filename
sound = AudioSegment.from_wav(filename)
start_time = "0:00"
stop_time = "0:42"
print("time:", start_time, "~", stop_time)
start_time = (int(start_time.split(':')[0]) * 60 + int(start_time.split(':')[1])) * 1000
stop_time = (int(stop_time.split(':')[0]) * 60 + int(stop_time.split(':')[1])) * 1000
print("ms:", start_time, "~", stop_time)
word = sound[start_time:stop_time]
save_name = "word" + file_name[6:]
print(save_name)
word.export(save_name, format="mp3", tags={'artist': 'AppLeU0', 'album': save_name[:-4]})
def automatic_speech_recognition(filename):
"""语音识别"""
# 调用语音识别接口
client = AipSpeech(APP_ID, API_KEY, SECRET_KEY)
# 把.m4a格式的文本转换成.pcm的二进制音频序列文本
# f''是f-string格式化字符串
os.system(f'ffmpeg -y -i {filename} -acodec pcm_s16le -f s16le -ac 1 -ar 16000 {filename}.pcm')
# 打开.pcm格式的二进制音频序列文本
with open(f'{filename}.pcm', 'rb') as f:
data = f.read()
# 语音识别。参数为:音频流,音频格式/pcm格式/wav格式,音频采样率支持8000-16000,{语言id,1536可以识别简单英文、1537不能识别简单英文}
asr_result = client.asr(data, 'pcm', 16000, {'dev_pid': 1536})
print('\nasr_result:{}'.format(asr_result))
if asr_result.get('result') is None:
return '', False
else:
return asr_result, True
if __name__ == '__main__':
name = 'test'
url = 'https://kg2.qq.com/node/play?s=GJH9sHGzgexrKGh-&shareuid=6a9a958c252a378836&topsource=a0_pn201001006_z1_u878907032_l1_t1558015859__'
filename = f'{name}.m4a'
save_mp4(filename, url)
os.system(f'ffmpeg -y -i {filename} -acodec pcm_s16le -f s16le -ac 1 -ar 16000 {filename}.pcm')
filename = f'{filename}.pcm'
cut_speech(filename)
# print('done...')
# asr_result = automatic_speech_recognition(filename)
# print(asr_result)