非线性
from aip import AipSpeech bd_k_l = ['11059852', '5Kk01GtG2fjCwpzEkwdn0mjw', 'bp6Wyx377Elq7RsCQZzTBgGUFzLm8G2A'] APP_ID, API_KEY, SECRET_KEY = bd_k_l def gen_bd_mp3(uid, str_): mp3_dir = 'C:\\Users\\sas\\PycharmProjects\\produce_video\\mymp3\\' mp3_dir = 'D:\\mymp3\\' client = AipSpeech(APP_ID, API_KEY, SECRET_KEY) result = client.synthesis(str_, 'zh', 1, { 'vol': 5, }) # 识别正确返回语音二进制 错误则返回dict 参照下面错误码 if not isinstance(result, dict): f_w = '{}{}{}{}'.format(mp3_dir, 'semHAND', uid, '.mp3') # with open('auido.b.mp3', 'wb') as f: with open(f_w, 'wb') as f: f.write(result) import os os_sep = os.sep this_file_abspath = os.path.abspath(__file__) this_file_dirname, this_file_name = os.path.dirname(this_file_abspath), os.path.abspath(__file__).split(os_sep)[-1] uid_d = [] f, uid_n = 'html.parp.txt', 0 sen_l = ['。', '\n'] whole_s = '' with open(f, 'r', encoding='utf-8') as fr: for i in fr: whole_s = '{}{}'.format(whole_s, i) # 设置分句的标志符号;可以根据实际需要进行修改 # cutlist = "。!?".decode('utf-8') cutlist = ['\n', '\t', '。', ';', '?', '.', ';', '?', '...', '、、、', ':', '!', '!'] cutlist = ['\n', '\t', '。', ';', '?', '.', '?', '...', '、、、', '!', '!'] # cutlist = [ '。', ';', '?', '.', ';', '?', '...', '、、、',':',':',','] # cutlist = [ '。', ';', '?', '.', ';', '?', '...', '、、、',':',',','、'] # 检查某字符是否分句标志符号的函数;如果是,返回True,否则返回False def FindToken(cutlist, char): if char in cutlist: return True else: return False # 进行分句的核心函数 def Cut(cutlist, lines): # 参数1:引用分句标志符;参数2:被分句的文本,为一行中文字符 l = [] # 句子列表,用于存储单个分句成功后的整句内容,为函数的返回值 line = [] # 临时列表,用于存储捕获到分句标志符之前的每个字符,一旦发现分句符号后,就会将其内容全部赋给l,然后就会被清空 for i in lines: # 对函数参数2中的每一字符逐个进行检查 (本函数中,如果将if和else对换一下位置,会更好懂) if FindToken(cutlist, i): # 如果当前字符是分句符号 line.append(i) # 将此字符放入临时列表中 l.append(''.join(line)) # 并把当前临时列表的内容加入到句子列表中 line = [] # 将符号列表清空,以便下次分句使用 else: # 如果当前字符不是分句符号,则将该字符直接放入临时列表中 line.append(i) return l sentence_l = Cut(list(cutlist), list(whole_s)) sentence_l_noblank = [] for i in sentence_l: if i != '\n': sentence_l_noblank.append(i.replace('\n', '')) ''' 在标点符号正确的情况下分割出自然的句子 ''' ''' 以句子为单位生成声音 ''' from aip import AipSpeech bd_k_l = ['11059852', '5Kk01GtG2fjCwpzEkwdn0mjw', 'bp6Wyx377Elq7RsCQZzTBgGUFzLm8G2A'] APP_ID, API_KEY, SECRET_KEY = bd_k_l def gen_bd_mp3(uid, str_): mp3_dir = 'C:\\Users\\sas\\PycharmProjects\\produce_video\\mymp3\\' mp3_dir = 'D:\\mymp3\\' client = AipSpeech(APP_ID, API_KEY, SECRET_KEY) result = client.synthesis(str_, 'zh', 1, { 'vol': 5, }) # 识别正确返回语音二进制 错误则返回dict 参照下面错误码 if not isinstance(result, dict): f_w = '{}{}{}{}'.format(mp3_dir, 'oneSen', uid, '.mp3') # with open('auido.b.mp3', 'wb') as f: with open(f_w, 'wb') as f: f.write(result) uid_d = {} uid, uid_n = 43020157, 0 uid_d[uid] = {} uid_d[uid]['sen_d'], uid_d[uid]['img_l'] = {}, [] for i in sentence_l_noblank: uid_sen = '{}{}{}'.format(uid, '_', uid_n) # gen_bd_mp3(uid_sen, i) uid_n += 1 mp3_dir = 'D:\\mymp3\\' f_w = '{}{}{}{}'.format(mp3_dir, 'oneSen', uid_sen, '.mp3') uid_d[uid]['sen_d'][f_w] = i import glob f_img_d = '{}{}{}{}{}'.format(this_file_dirname, os_sep, 'mypng', os_sep, '*.jpg') imgs = glob.glob(f_img_d) uid_d[uid]['img_l'] = [] for i in imgs: if 'logo' in i: if 'uid' in i: # print(i) uid_d[uid]['img_logo_uid'] = i else: uid_d[uid]['img_logo_our'] = i else: uid_d[uid]['img_l'].append(i) for i in uid_d[uid]: print(i) import os, time, glob import cv2 os_sep = os.sep this_file_abspath = os.path.abspath(__file__) this_file_dirname, this_file_name = os.path.dirname(this_file_abspath), os.path.abspath(__file__).split(os_sep)[-1] logo_f, logo_f_uid, imgs = uid_d[uid]['img_logo_our'], uid_d[uid]['img_logo_uid'], uid_d[uid]['img_l'] img_size_d = {} for i in imgs: img = cv2.imread(i) w_h_s = '{},{}'.format(img.shape[1], img.shape[0]) if w_h_s not in img_size_d: img_size_d[w_h_s] = 1 else: img_size_d[w_h_s] += 1 # 取众数 mode_img_size_wh = [int(i) for i in sorted(img_size_d.items(), key=lambda mytuple: mytuple[1], reverse=True)[0][0].split(',')] mode_img_size_wh = [1208, 720] os_sep = os.sep import imageio imageio.plugins.ffmpeg.download() from moviepy.editor import VideoFileClip # f_mp3 = 'g3dbG3g3uidnoBRBlankLine.06.mp3' import mutagen.id3 from mutagen.easyid3 import EasyID3 from mutagen.mp3 import MP3 # EasyID3.valid_keys["comment"] = "COMM::'XXX'" # id3info = MP3(f_mp3, ID3=EasyID3) # t_spend = id3info.info.length import cv2 import glob def resize_rescale_pilimg(img_f, w_h_tuple=(mode_img_size_wh[0], mode_img_size_wh[1]), mid_factor=1): img_n, img_type = img_f.split('.')[-2], img_f.split('.')[-1] img_n_resize_rescale_pilimg_dir = '{}{}{}'.format(os_sep.join(img_n.split(os_sep)[:-1]), 'resize_rescale_pilimg', os_sep, img_n.split(os_sep)[-1], os_sep) img_n_resize_rescale_pilimg = '{}{}{}'.format(img_n_resize_rescale_pilimg_dir, img_n.split(os_sep)[-1], '.PNG') img_type = 'PNG' img_f_new = img_n_resize_rescale_pilimg mid_icon = Image.open(img_f) mid_icon_w, mid_icon_h = w_h_tuple[0] * mid_factor, w_h_tuple[1] * mid_factor mid_icon = mid_icon.resize((mid_icon_w, mid_icon_h), Image.ANTIALIAS) mid_icon.save(img_n_resize_rescale_pilimg, img_type) return img_f_new from PIL import Image, ImageDraw, ImageFont myfont = ImageFont.truetype("simhei.ttf", encoding="utf-8") import cv2 import numpy as np import math br_step = math.floor((mode_img_size_wh[0]) * 0.0185) def gen_video(os_delay_factor=0.046, bear_error_second=0.05): audio_spend = 0 f_v = '{}{}{}{}'.format('D:\\myv\\', uid, int(time.time()), '.avi') fps, fourcc = 15, cv2.VideoWriter_fourcc('M', 'J', 'P', 'G') videoWriter = cv2.VideoWriter(f_v, fourcc, fps, (mode_img_size_wh[0], mode_img_size_wh[1])) img_seq = 0 for f_mp3 in uid_d[uid]['sen_d']: screen_str = uid_d[uid]['sen_d'][f_mp3] screen_str_l = [] br_step_times = math.ceil(len(screen_str) / br_step) for i in range(br_step_times): myrow = screen_str[i * br_step:(i + 1) * br_step] screen_str_l.append(myrow) screen_str = '\n'.join(screen_str_l) imgname = uid_d[uid]['img_l'][img_seq % len(uid_d[uid]['img_l'])] img_seq += 1 frame = cv2.imread(imgname) if (frame.shape[1], frame.shape[0]) != (mode_img_size_wh[0], mode_img_size_wh[1]): imgname = resize_rescale_pilimg(imgname) frame = cv2.imread(imgname) else: pass img1 = cv2.imread(imgname) # 加载图像 img2 = cv2.imread(logo_f) # logo rows, cols, channels = img2.shape roi = img1[0:rows, 0:cols] # 取img1的这个区域来处理 img2gray = cv2.cvtColor(img2, cv2.COLOR_BGR2GRAY) # 建立logo的二值图,也建立相反的二值图 ret, mask = cv2.threshold(img2gray, 175, 255, cv2.THRESH_BINARY) # 二值化 mask_inv = cv2.bitwise_not(mask) # 做非操作,黑的变白,白的变黑,黑色0,白色255 img1_bg = cv2.bitwise_and(roi, roi, mask=mask) # 与操作 ,参数输入,输出,与mask做and操作,黑色的被填充 img2_fg = cv2.bitwise_and(img2, img2, mask=mask_inv) # 与操作 dst = cv2.add(img1_bg, img2_fg) # 相加 img1[0:rows, 0:cols] = dst # 把添加了logo的该区域赋值回原来的地方 img3 = cv2.imread(logo_f_uid) # logo rows, cols, channels = img3.shape rows1, cols1, channels1 = img1.shape roi = img1[0:rows, cols1 - cols:cols1] # 取img1的这个区域来处理 img3gray = cv2.cvtColor(img3, cv2.COLOR_BGR2GRAY) # 建立logo的二值图,也建立相反的二值图 ret, mask = cv2.threshold(img3gray, 175, 255, cv2.THRESH_BINARY) # 二值化 mask_inv = cv2.bitwise_not(mask) # 做非操作,黑的变白,白的变黑,黑色0,白色255 img1_bg = cv2.bitwise_and(roi, roi, mask=mask) # 与操作 ,参数输入,输出,与mask做and操作,黑色的被填充 img3_fg = cv2.bitwise_and(img3, img3, mask=mask_inv) # 与操作 dst = cv2.add(img1_bg, img3_fg) # 相加 img1[0:rows, cols1 - cols:cols1] = dst # 把添加了logo的该区域赋值回原来的地方 frame = img1 frame_cv2 = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) frame_pil = Image.fromarray(frame_cv2) # 转为PIL的图片格式 font_size = math.floor((mode_img_size_wh[0]) * 0.040) font = ImageFont.truetype("simhei.ttf", font_size, encoding="utf-8") f_x, f_y = math.floor((mode_img_size_wh[0]) * 0.06), math.floor( mode_img_size_wh[1] * 0.85) - br_step_times * font_size ImageDraw.Draw(frame_pil).text((f_x, f_y), screen_str, (255, 0, 0), font) frame_cv2 = cv2.cvtColor(np.array(frame_pil), cv2.COLOR_RGB2BGR) img = frame_cv2 EasyID3.valid_keys["comment"] = "COMM::'XXX'" id3info = MP3(f_mp3, ID3=EasyID3) t_spend = id3info.info.length audio_spend += t_spend print(audio_spend) myinterval = t_spend * os_delay_factor print(myinterval, '---------------', screen_str) this_time = time.time() while time.time() - this_time < myinterval: videoWriter.write(img) videoWriter.release() time.sleep(1) print(f_v) video_playtime = VideoFileClip(f_v).duration print(math.fabs(video_playtime - audio_spend )) if math.fabs(video_playtime - audio_spend )>bear_error_second: os_delay_factor *= audio_spend / video_playtime gen_video(os_delay_factor, bear_error_second=0.05) else: os._exit(123) gen_video(os_delay_factor=0.001, bear_error_second=0.05) ddd = 9
以上代码成立的前提 是 系统是线性 的
’