Tensorflow 实现的sine函数,通过基频(f0)构建激励信号
本篇博客主要是用tensorflow 来实现激励信号的构建,通过基础频率来构建
主要思路如下
- 在有声音的部分, 激励信号是有 fundamental frequency (f0) 和 harmonics的具体值来构建
- 在静音位置,通过高斯白噪声来构建
具体代码如下
import tensorflow as tf
import numpy as np
class TFSineGen(tf.keras.layers.Layer):
def __init__(self, samp_rate, harmonic_num=0,
sine_amp=0.1, noise_std=0.003,
voiced_threshold=0,
flag_for_pulse=True, **kwargs):
""" Definition of sine generator
TFSineGen(samp_rate, harmonic_num = 0,
sine_amp = 0.1, noise_std = 0.003,
voiced_threshold = 0,
flag_for_pulse=False)
samp_rate: sampling rate in Hz
harmonic_num: number of harmonic overtones (default 0)
sine_amp: amplitude of sine-wavefrom (default 0.1)
noise_std: std of Gaussian noise (default 0.003)
voiced_thoreshold: F0 threshold for U/V classification (default 0)
flag_for_pulse: this SinGen is used inside PulseGen (default False)
Note: when flag_for_pulse is True, the first time step of a voiced
segment is always sin(np.pi) or cos(0)
"""
super().__init__(**kwargs)
self.sine_amp = sine_amp
self.noise_std = noise_std
self.harmonic_num = harmonic_num
self.dim = self.harmonic_num + 1
self.sampling_rate = samp_rate
self.voiced_threshold = voiced_threshold
self.flag_for_pulse = flag_for_pulse
self.rad_values1 = None
self.rand_ini1 = None
self.cumsum_shift = None
def _f02uv(self, f0):
# generate uv signal
uv = tf.ones_like(f0)
uv = uv * tf.cast((f0 > self.voiced_threshold), dtype=tf.float32)
return uv
def _f02sine(self, f0_values):
""" f0_values: (batchsize, length, dim)
where dim indicates fundamental tone and overtones
"""
# convert to F0 in rad. The interger part n can be ignored
# because 2 * np.pi * n doesn't affect phase
rad_values = (f0_values / self.sampling_rate) % 1
self.rad_values1 = rad_values
f0_values_shape = shape_list(f0_values)
rand_ini = tf.random.normal(shape=[f0_values_shape[0], f0_values_shape[2]], mean=0.0, stddev=1.0)
rand_ini = tf.concat([tf.zeros([f0_values_shape[0], 1]), rand_ini[:, 1:]], axis=-1)
rad_values = tf.concat([tf.expand_dims(rad_values[:, 0, :] + rand_ini, axis=1), rad_values[:, 1:, :]], axis=1)
# To prevent torch.cumsum numerical overflow,
# it is necessary to add -1 whenever \sum_k=1^n rad_value_k > 1.
# Buffer tmp_over_one_idx indicates the time step to add -1.
# This will not change F0 of sine because (x-1) * 2*pi = x *2*pi
tmp_over_one = tf.cumsum(self.rad_values1, 1) % 1
tmp_over_one_idx = (tmp_over_one[:, 1:, :] -
tmp_over_one[:, :-1, :]) < 0
cumsum_shift = tf.zeros_like(self.rad_values1)
cumsum_shift = tf.concat([cumsum_shift[:, 0:1, :], tf.cast(tmp_over_one_idx, dtype=tf.float32) * -1.0], axis=1)
sines = tf.sin(tf.cumsum(rad_values + cumsum_shift, axis=1) * 2 * np.pi)
return sines
def call(self, f0):
""" sine_tensor, uv = forward(f0)
input F0: tensor(batchsize=1, length, dim=1)
f0 for unvoiced steps should be 0
output sine_tensor: tensor(batchsize=1, length, dim)
output uv: tensor(batchsize=1, length, 1)
"""
f0_buf_1 = [f0[:, :, 0]]
# f0_tmp = f0
for idx in np.arange(self.harmonic_num):
f0_tmp = f0[:, :, 0] * (idx + 2)
f0_buf_1.append(f0_tmp)
f0_buf = tf.stack(f0_buf_1, axis=-1)
# generate sine waveforms
sine_waves = self._f02sine(f0_buf) * self.sine_amp
# generate uv signal
uv = self._f02uv(f0)
noise_amp = uv * self.noise_std + (1 - uv) * self.sine_amp / 3
noise = noise_amp * tf.random.normal(shape=tf.shape(sine_waves)) # tf.randn_like(sine_waves)
# first: set the unvoiced part to 0 by uv
# then: additive noise
sine_waves = sine_waves * uv + noise
return sine_waves, uv, noise, noise_amp