Tensorflow 实现的sine函数,通过基频(f0)构建激励信号

本篇博客主要是用tensorflow 来实现激励信号的构建,通过基础频率来构建

主要思路如下

  • 在有声音的部分, 激励信号是有 fundamental frequency (f0) 和 harmonics的具体值来构建
  • 在静音位置,通过高斯白噪声来构建

具体代码如下

import tensorflow as tf
import numpy as np

class TFSineGen(tf.keras.layers.Layer):
    def __init__(self, samp_rate, harmonic_num=0,
                 sine_amp=0.1, noise_std=0.003,
                 voiced_threshold=0,
                 flag_for_pulse=True, **kwargs):
        """ Definition of sine generator
            TFSineGen(samp_rate, harmonic_num = 0,
                    sine_amp = 0.1, noise_std = 0.003,
                    voiced_threshold = 0,
                    flag_for_pulse=False)

            samp_rate: sampling rate in Hz
            harmonic_num: number of harmonic overtones (default 0)
            sine_amp: amplitude of sine-wavefrom (default 0.1)
            noise_std: std of Gaussian noise (default 0.003)
            voiced_thoreshold: F0 threshold for U/V classification (default 0)
            flag_for_pulse: this SinGen is used inside PulseGen (default False)

            Note: when flag_for_pulse is True, the first time step of a voiced
                segment is always sin(np.pi) or cos(0)
            """
        super().__init__(**kwargs)
        self.sine_amp = sine_amp
        self.noise_std = noise_std
        self.harmonic_num = harmonic_num
        self.dim = self.harmonic_num + 1
        self.sampling_rate = samp_rate
        self.voiced_threshold = voiced_threshold
        self.flag_for_pulse = flag_for_pulse
        self.rad_values1 = None
        self.rand_ini1 = None
        self.cumsum_shift = None

    def _f02uv(self, f0):
        # generate uv signal
        uv = tf.ones_like(f0)
        uv = uv * tf.cast((f0 > self.voiced_threshold), dtype=tf.float32)
        return uv

    def _f02sine(self, f0_values):
        """ f0_values: (batchsize, length, dim)
            where dim indicates fundamental tone and overtones
        """
        # convert to F0 in rad. The interger part n can be ignored
        # because 2 * np.pi * n doesn't affect phase
        rad_values = (f0_values / self.sampling_rate) % 1
        self.rad_values1 = rad_values
        f0_values_shape = shape_list(f0_values)
        rand_ini = tf.random.normal(shape=[f0_values_shape[0], f0_values_shape[2]], mean=0.0, stddev=1.0)
        rand_ini = tf.concat([tf.zeros([f0_values_shape[0], 1]), rand_ini[:, 1:]], axis=-1)
        rad_values = tf.concat([tf.expand_dims(rad_values[:, 0, :] + rand_ini, axis=1), rad_values[:, 1:, :]], axis=1)

        # To prevent torch.cumsum numerical overflow,
        # it is necessary to add -1 whenever \sum_k=1^n rad_value_k > 1.
        # Buffer tmp_over_one_idx indicates the time step to add -1.
        # This will not change F0 of sine because (x-1) * 2*pi = x *2*pi
        tmp_over_one = tf.cumsum(self.rad_values1, 1) % 1
        tmp_over_one_idx = (tmp_over_one[:, 1:, :] -
                            tmp_over_one[:, :-1, :]) < 0
        cumsum_shift = tf.zeros_like(self.rad_values1)
        cumsum_shift = tf.concat([cumsum_shift[:, 0:1, :], tf.cast(tmp_over_one_idx, dtype=tf.float32) * -1.0], axis=1)

        sines = tf.sin(tf.cumsum(rad_values + cumsum_shift, axis=1) * 2 * np.pi)
        return sines

    def call(self, f0):
        """ sine_tensor, uv = forward(f0)
        input F0: tensor(batchsize=1, length, dim=1)
                  f0 for unvoiced steps should be 0
        output sine_tensor: tensor(batchsize=1, length, dim)
        output uv: tensor(batchsize=1, length, 1)
        """
        f0_buf_1 = [f0[:, :, 0]]
        # f0_tmp = f0
        for idx in np.arange(self.harmonic_num):
            f0_tmp = f0[:, :, 0] * (idx + 2)
            f0_buf_1.append(f0_tmp)

        f0_buf = tf.stack(f0_buf_1, axis=-1)
        # generate sine waveforms
        sine_waves = self._f02sine(f0_buf) * self.sine_amp

        # generate uv signal
        uv = self._f02uv(f0)

        noise_amp = uv * self.noise_std + (1 - uv) * self.sine_amp / 3
        noise = noise_amp * tf.random.normal(shape=tf.shape(sine_waves))  # tf.randn_like(sine_waves)

        # first: set the unvoiced part to 0 by uv
        # then: additive noise
        sine_waves = sine_waves * uv + noise
        return sine_waves, uv, noise, noise_amp

posted on 2022-09-22 14:37  吾知  阅读(157)  评论(0编辑  收藏  举报

导航