语音问答助手和生成熟肉视频
语音问答助手
from multiprocessing import Process
from threading import Thread
import os
import json
import whisper
import zhconv
from pyaudio import PyAudio,paInt16
import wave
from pydub import AudioSegment
from pydub.playback import play
import sys
import time
import numpy as np
import subprocess
import openai
script_dir=os.path.dirname(os.path.realpath(sys.argv[0]))
def transcribe(file):
print(f'transcribing {file}')
model=whisper.load_model('small')
print('whisper model loaded')
result=model.transcribe(file,language='Chinese')
print(result)
with open(f'{file.rsplit(".",1)[0]}.json','w',encoding='utf8') as f:
json.dump(result,f,ensure_ascii=False,indent=4)
class recorder:
NUM_SAMPLES=2000
SAMPLING_RATE=16000
voice_string=[]
does=False
def start(self):
print('recording audio...')
self.does=True
self.voice_string=[]
pa=PyAudio()
stream=pa.open(format=paInt16,channels=1,rate=self.SAMPLING_RATE,input=True,frames_per_buffer=self.NUM_SAMPLES)
save_buffer=[]
while self.does:
string_audio_data=stream.read(self.NUM_SAMPLES)
self.voice_string.append(string_audio_data)
def finish(self):
print('recording audio end')
self.does=False
wav_id=time.strftime('%y_%m_%d_%H%M%S')
if not os.path.exists(wav_id[:8].replace('_','/')):
os.makedirs(wav_id[:8].replace('_','/'))
wf=wave.open(f'{wav_id.replace("_","/")}.wav','wb')
wf.setnchannels(1)
wf.setsampwidth(2)
wf.setframerate(self.SAMPLING_RATE)
wf.writeframes(np.array(self.voice_string).tobytes())
wf.close()
wav=wave.open(f'{wav_id.replace("_","/")}.wav','rb')
wav_duration=wav.getnframes()/16000
wav.close()
print('wav_id',wav_id)
return wav_id,wav_duration
rec=recorder()
def start_minicpm_service():
os.chdir('C:/Users/tellw/apps/python')
subprocess.run('python run_minicpm_service.py',shell=True)
def answer2(result,client):
completion=client.chat.completions.create(model='Model-7.6B-Q4_0_openbmb_MiniCPM-o-2_6-gguf',messages=[{'role':'system','content':'You are a helpful assistant.'},{'role':'user','content':result}],frequency_penalty=0.2)
return completion.choices[0].message.content
def synthesize_answer_and_play(answer,wav_id):
os.chdir('D:/asr-service/VITS-Paimon')
print('synthesizing audios...')
subprocess.run(f'python custom_synthesize_shell.py {answer.replace(" ","")} {script_dir}/{wav_id.replace("_","/")}_answer')
os.chdir(script_dir)
print('start to play')
song=AudioSegment.from_wav(f'{wav_id.replace("_","/")}_answer.wav')
play(song)
print('play end')
def exec_shell(cmd, ignore_err=False):
process = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
output, err = process.communicate()
retcode = process.poll()
if retcode == 0 or ignore_err:
return output, err
else:
return -1000, f'execute "{cmd}" failed'
if __name__=='__main__':
while True:
input('录音?')
record_thread=Thread(target=rec.start,args=())
record_thread.daemon=True
record_thread.start()
input('停止?')
st=time.time()
wav_id,wav_duration=rec.finish()
p1=Process(target=transcribe,args=(f'{wav_id.replace("_","/")}.wav',))
p1.start()
p1.join()
res,_=exec_shell('netstat -ano|grep 8080|grep -i listen',True)
if len(res.decode())==0:
gpt_running=False
else:
print('chatgpt服务正在运行')
gpt_running=True
p=None
if not gpt_running:
p=Process(target=start_minicpm_service,args=())
p.daemon=True
p.start()
time.sleep(60)
with open(f'{wav_id.replace("_","/")}.json','r',encoding='utf8') as f:
result=json.load(f)
client=openai.OpenAI(base_url='http://127.0.0.1:8080/v1',api_key='1')
result=zhconv.convert(result['text'],'zh-hans')
print(f'responcing to {result}')
answer=answer2(result,client)
print('answer',answer)
if not gpt_running:
output,_=exec_shell('netstat -ano|grep 8080|grep -i listen')
pid=int(output.decode().strip().split('\n')[0].strip().split(' ')[-1])
print('pid',pid)
os.kill(pid,9)
print('结束chatgpt服务')
p2=Process(target=synthesize_answer_and_play,args=(answer,wav_id))
p2.start()
p2.join()
with open('audio-robot-logs.txt','a',encoding='utf8') as f:
f.write(f'{wav_id}\n{result}<SPLIT>\n{answer}<SPLIT>\n')
et=time.time()
print(f'this query costs {et-st}s')
生成熟肉视频
from multiprocessing import Process
import os
import subprocess
import sys
import psutil
import time
import whisper
import json
import openai
script_dir=os.path.dirname(os.path.realpath(sys.argv[0]))
def transcribe(file):
print(f'transcribing {file}')
subprocess.run(f'ffmpeg -i "{file}" test.wav -y',shell=True)
model=whisper.load_model('small')
print('whisper model loaded')
result=model.transcribe('test.wav',language='Japanese')
print(result)
with open(f'{file.rsplit(".",1)[0]}.json','w',encoding='utf8') as f:
json.dump(result,f,ensure_ascii=False,indent=4)
def start_jp2cn_service():
os.chdir('C:/Users/tellw/apps/python')
subprocess.run('python run_jp2zh_service.py',shell=True)
def translate_jp2zh(jp,client):
try:
completion=client.chat.completions.create(model='sakura-1.5b-qwen2.5-v1.0-fp16',messages=[{'role':'system','content':'You are a helpful assistant.'},{'role':'user','content':jp}],frequency_penalty=0.2,timeout=20)
except Exception as e:
print(f'{e}')
return jp
if completion.choices[0].message.content[:-1]=='。':
return completion.choices[0].message.content[:-1]
else:
return completion.choices[0].message.content
def srt_time(t):
h=int(t//3600)
t%=3600
m=int(t//60)
t%=60
s=int(t)
return f'{h:02d}:{m:02d}:{s:02d},000'
def exec_shell(cmd, ignore_err=False):
process = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
output, err = process.communicate()
retcode = process.poll()
if retcode == 0 or ignore_err:
return output, err
else:
return -1000, f'execute "{cmd}" failed'
if __name__=='__main__':
wd=sys.argv[1]
os.chdir(wd)
for file in os.listdir():
if '.' not in file or file.rsplit('.',1)[1] not in ['mp4','mkv']:
continue
if not os.path.exists(f'{file.rsplit(".",1)[0]}.json'):
p1=Process(target=transcribe,args=(file,))
p1.start()
p1.join()
if not os.path.exists(f'{file.rsplit(".",1)[0]}.srt'):
p=Process(target=start_jp2cn_service,args=())
p.daemon=True
p.start()
time.sleep(30)
with open(f'{file.rsplit(".",1)[0]}.json','r',encoding='utf8') as f:
result=json.load(f)
srt_txt=''
client=openai.OpenAI(base_url='http://127.0.0.1:8080/v1',api_key='1')
rsl=len(result["segments"])
for i,segment in enumerate(result['segments']):
print(f'translating {i/rsl*100}% {i}/{rsl} {segment["text"]} {time.strftime("%y%m%d%H%M%S")}')
nc=f'{segment["id"]+1}\n{srt_time(segment["start"])} --> {srt_time(segment["end"])}\n{translate_jp2zh(segment["text"],client)}\n\n'
print(nc)
srt_txt+=nc
with open(f'{file.rsplit(".",1)[0]}.srt','w',encoding='utf8') as f:
f.write(srt_txt)
output,_=exec_shell('netstat -ano|grep 8080|grep -i listen')
pid=int(output.decode().strip().split('\n')[0].strip().split(' ')[-1])
print(pid)
os.kill(pid,9)
print('结束日语翻译服务')
print(f'generating subtitle video--{file.rsplit(".",1)[0]}_subtitle.mp4')
subprocess.run(f'ffmpeg -i "{file}" -vf subtitles="{file.rsplit(".",1)[0]}.srt" "{file.rsplit(".",1)[0]}_subtitle.mp4" -y')
if len(sys.argv)>=3 and sys.argv[2]=='shutdown_y':
subprocess.run('shutdown -s -t 0',shell=True)
创建于2502051411,修改于2502051411
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· 分享一个免费、快速、无限量使用的满血 DeepSeek R1 模型,支持深度思考和联网搜索!
· 基于 Docker 搭建 FRP 内网穿透开源项目(很简单哒)
· ollama系列01:轻松3步本地部署deepseek,普通电脑可用
· 25岁的心里话
· 按钮权限的设计及实现