一些Python与ffmpeg音频处理的实用程序和命令

ffmpeg 切分音频文件

ffmpeg -i 001.wav -ss 0 -t 520 001-slice-1.wav

对音频文件001.wav，从第0秒开始切出520秒，存为001-slice-1.wav

ffmpeg 切分视频文件（关键帧，不重编码，快速但是卡关键帧所以会有不精确）

ffmpeg -ss 00:48:22 -i 录制-22637261-20221214-195102-698-【3D】嘉心糖心中的TOP5是？.mp4 -c copy -t 00:08:51 new.mp4

其中

-ss 00:48:22 表示从这一时间点开始

-t 00:08:51 表示切分的时长有这么长

-i 录制-22637261-20221214-195102-698-【3D】嘉心糖心中的TOP5是？.mp4 是输入视频

new.mp4 表示输出的视频片段文件

ffmpeg精确切分视频（重编码，速度慢但是精确，无视关键帧）

ffmpeg -ss [start] -t [duration] -i [in].mp4 -c:v libx264 -c:a aac -strict experimental -b:a 98k [out].mp4

ffmpeg从mp4中抽取wav音频、Python Spleeter库人声提取

下载ffmpeg进行音视频分离，安装spleeter库进行人声提取，命令如下：

【音视频分离】ffmpeg -i 001.mp4 -f wav -ar 44100 001.wav

【提取flac格式音乐】ffmpeg -i audio.xxx -c:a flac audio.flac

【aac格式】ffmpeg -i input.mp4 -vn -c:a copy output.aac

或者使用Python执行，更便于编辑和复用：

def get_wav_from_mve():
    # 不知道为何，加上-c copy就会全是杂音噪音，去掉就正常了
    mp4_path = "E:/BASE-LIVE-RECORD/2022-11/221113D/"
    # dur 39:05
    os.system(
        "ffmpeg -i " + mp4_path + "录制-22637261-20221113-210944-179-【B限】今天聊点小秘密.flv -ss 00:02:00 -f wav " + mp4_path + "221113d.wav")

【人声提取 / 去除背景声音】

方法1：pip install spleeter，然后

C:\PythonModels> python -m spleeter separate inputwavseparate/230211dsep/inputwav/002-1.wav -o inputwavseparate\230211dsep\wavseparates

方法2：也可以使用最强大的UVR5来提取人声，缺点是软件太大、对显存要求过高，不过功能也更丰富，效果也更极致：

开源仓库：https://github.com/Anjok07/ultimatevocalremovergui

最新发行版本：https://github.com/Anjok07/ultimatevocalremovergui/releases/tag/v5.5.0

音频类型转换

以aac文件到mp3文件为例：ffmpeg -i filenameaac.aac -acodec libmp3lame filenamemp3.mp3

MP3到ogg：ffmpeg -i "Stanton Lanier - Awaken the Dawn(Solo).mp3" -acodec libvorbis -ab 128k AwakentheDawn.ogg

参考博客：

ffmpeg实现视频和音频分离，并且将声音切片：https://blog.csdn.net/wujiesunlirong/article/details/122199917

Win10基于python，spleeter 人声提取工具安装和使用（全网最全，超详细）：https://blog.csdn.net/qq_44976743/article/details/108632618

【干货】混剪/配音必备神器，一键提取/去除人声 | 强大的Spleeter：https://www.bilibili.com/video/BV13p4y1y7vF

librosa

加载音频，查看长度和采样率

import librosa
audio_data = 'data/Data_MGTV/angry/audio_1027.wav'
x , sr = librosa.load(audio_data)
print(x.shape, sr)

音频时长：

import wave
with wave.open(itempath, 'r') as f:
    params = f.getparams()
    # print(params)
    # 帧总数
    a = params.nframes
    # 采样率
    f = params.framerate
    # 音频时长
    t = a / f
    print(" time length: ", t)

更简单的获取音频时长的方法：

import librosa 
time = librosa.get_duration(filename="raw_data/请检测出果皮/10.wav")

对一条长音频，按照音频停顿时长分割成多条短音频：

def remove_silence(file_path):
    audio_path = file_path
    silence_len = 320
    sound = AudioSegment.from_file(audio_path, format='wav')
    # -72且不开，-64成功切了，但是仍然有很多语气音。下次试试更大的分贝来切割
    chunks = split_on_silence(sound, min_silence_len=silence_len, silence_thresh=-56, keep_silence=100)
    if len(chunks) < 60:
        print("片段太长，切割失败！")
        return
    print("切分成功，有"+str(len(chunks))+"条！")
    chunks_path = audio_path.split('.')[0]+"_chunks/"
    if not os.path.exists(chunks_path):
        os.mkdir(chunks_path)
    for i in range(len(chunks)):
        new = chunks[i]
        save_name = chunks_path + '%04d.%s'%(i, 'wav')
        new.export(save_name, format='wav')
    print("remove silence end!")

把多条短语音，合成10s以内的长语音：

def merge_short(filename, st_idx):
    if not os.path.exists(filename+"-output"):
        os.mkdir(filename+"-output")
    list_path = os.listdir(filename)  #读取文件夹里面的名字
    idx = st_idx
    # dur_sum = 0.
    tmp_wav = None
    sr = 0
    #list_path返回的是一个列表   通过for循环遍历提取元素
    for index in list_path:
        itempath = os.path.join(filename, index)
        item_dur = librosa.get_duration(filename=itempath)
        if item_dur < 9.9:
            if tmp_wav is None:
                tmp_wav, sr = librosa.load(itempath)
            else:
                if librosa.get_duration(tmp_wav) + librosa.get_duration(filename=itempath) < 9.9:
                    new_wav, _ = librosa.load(itempath)
                    tmp_wav = np.hstack((tmp_wav, new_wav))
                else:
                    soundfile.write(filename+"-output/"+("00000"+str(idx))[-6:]+".wav", tmp_wav, sr)
                    idx += 1
                    tmp_wav, _ = librosa.load(itempath)
        else:
            os.rename(itempath, filename+"-output/"+("00000" + str(idx))[-6:] + ".wav")
            idx += 1
        # dur_sum += item_dur
        # print(index, ':', str(item_dur))
    print("merge short end!")

删掉文件夹下所有10s以内的语音：

def delete_short(root_path):
    filename = root_path+'2_230218d-2_(Vocals)_chunks' #文件地址
    list_path = os.listdir(filename)  #读取文件夹里面的名字
    for index in list_path:  #list_path返回的是一个列表   通过for循环遍历提取元素
        itempath = os.path.join(filename, index)
        item_dur = librosa.get_duration(filename=itempath)
        if item_dur < 9.9:
            # print(os.path.join(filename, index))
            os.remove(os.path.join(filename, index))
    print("delete short end!")

把文件夹下的某些文件修改成某一格式：

import os
def format_wav_files():
    filename = './230211dsep/dataset-230211seps'  # 文件地址
    list_path = os.listdir(filename)  # 读取文件夹里面的名字
    st = 346  # 写入开始
    for index in list_path:  # list_path返回的是一个列表   通过for循环遍历提取元素
        if len(index) < 7:
            # print(index)
            # name = index.split('.')[0]  # split字符串分割的方法 , 分割之后是返回的列表 索引取第一个元素[0]
            # kid = index.split('.')[-1]  # [-1] 取最后一个
            path = filename + '/' + index
            print("old name:", path)
            new_path = filename + '/' + ("000" + str(st)) + '.' + "wav"
            print("new name:", new_path)
            os.rename(path, new_path)  # 重新命名
            st += 1

把某一文件夹下的所有音频，按照采样率16000读取，并重新存储到另一文件夹下（即批量修改采样率）

import librosa
import soundfile

# 10000条音频，文件名为00001 - 10000
# 输入一个自然数，返回该形式的文件名
def get_6_name(ind):
    res = str(ind)
    while len(res) < 6:
        res = "0"+res
    return res

if __name__ == '__main__':
    for i in range(1, 10001):
        audio_path = "baker_waves-22050/"+get_6_name(i)+".wav"
        y, sr = librosa.load(audio_path, sr=16000)
        soundfile.write("baker_waves/"+get_6_name(i)+".wav", y, 16000)
        if i % 500 == 0:
            print(i, "waves finished!")

参考：

python librosa 或 ffmpeg 改变音频采样率

https://blog.csdn.net/weixin_44493841/article/details/121287033

【Python】librosa音频处理教程

https://blog.csdn.net/fengdu78/article/details/124875505

winsound

字符转转换为摩斯电码及其音频：好玩的Python-摩斯码发报机：https://blog.csdn.net/juzicode00/article/details/106991289。

【python】用Python写一段音乐(winsound)：https://blog.csdn.net/tc9527_/article/details/87653892

对标注好帧号的视频进行批量切分（按时间）

import os
import pandas as pd

df = pd.read_csv("annotation-time.CSV",'r',delimiter=',')

filename = "./"
list_path = os.listdir(filename)
for index in list_path:  #list_path返回的是一个列表   通过for循环遍历提取元素
    if index.split('.')[1] == "mp4":
        input_name = index.split('_')[0]
        print("-------->>"+input_name)
        # 表格里面的name列为了方便看被我转成了数值类型
        # 因此文件名的id部分也要转为数值
        rids = df[df["name"]==int(input_name)].index.tolist()
        # print(rids)
        df_item = df.loc[rids]
        df_item['act_len'] = df_item['act_time'].diff()
        # print(df_item)

        for i in range(len(df_item)-1):
            item = df_item.iloc[i]
            print("item:", item)
            if item['act_id'] == -1:
                continue
            st = item['act_time']
            le = df_item.iloc[i+1]['act_len']
            print(input_name, st, le)
            cut_name = input_name+"_rgb"
            # print("ffmpeg -hide_banner -ss "+str(st)+" -t "+str(le)+" -i "+cut_name+".mp4 -codec copy "+cut_name+"-0.mp4")
            os.system("ffmpeg -hide_banner -ss "+str(st)+" -t "+str(le)+" -i "+cut_name+".mp4 -codec copy "+cut_name+"-"+str(i)+".mp4")

对标注好帧号的视频进行批量切分（按帧号）

import os
import pandas as pd

df = pd.read_csv("annotation.CSV",'r',delimiter=',')

filename = "./"
list_path = os.listdir(filename)
for index in list_path:  #list_path返回的是一个列表   通过for循环遍历提取元素
    if index.split('.')[1] == "mp4":
        input_name = index.split('_')[0]
        print("-------->>"+input_name)
        # 表格里面的name列为了方便看被我转成了数值类型
        # 因此文件名的id部分也要转为数值
        rids = df[df["name"]==int(input_name)].index.tolist()
        # print(rids)
        df_item = df.loc[rids]

        cut_name = input_name+"_rgb"
        for i in range(len(df_item)-1):
            item = df_item.iloc[i]
            print("item:", item)
            if item['act_id'] == -1:
                continue
            st = item['time']
            en = df_item.iloc[i+1]['time']
            print(input_name, st, en)
            # ffmpeg -i ./input.mp4 -vf "select=between(n\,20\,200)" -y -acodec copy ./output.mp4
            print("ffmpeg -i "+cut_name+".mp4 -vf select=between(n\,"+str(st)+"\,"+str(en)+") -y -acodec copy "+cut_name+"-"+str(i)+".mp4")
            # os.system("ffmpeg -i "+cut_name+".mp4 -vf select=between(n\,"+str(st)+"\,"+str(en)+") -y -acodec copy "+cut_name+"-"+str(i)+".mp4")

posted @ 2022-12-26 22:11 倦鸟已归时阅读(1457) 评论(0) 编辑收藏举报

会员力量，点亮园子希望

刷新页面返回顶部

倦鸟已归时

人能常清静，天地悉皆归。

一些Python与ffmpeg音频处理的实用程序和命令