音频变速不变调研究

class WSOLA(object):
    def __init__(self, fs, speech_rate, shiftms=10):
        self.fs = fs # tensor
        self.speech_rate = speech_rate  # 16000

        self.shiftms = shiftms  # shift length [ms]    25-15,
        self.sl = int(self.fs * self.shiftms / 1000)  # of samples in a shift  160  
        self.fl = self.sl * 2  # of samples in a frame  320 
        self.epstep = int(self.sl * self.speech_rate)  # step size for WSOLA   192 
        self.win = np.hanning(self.fl)  # window function for a frame
        
    def duration_modification(self, x):
        wlen = len(x)
        wsolaed = np.zeros(int(wlen / self.speech_rate), dtype='d')

        # initialization
        sp = self.sl * 2   # 320
        rp = sp + self.sl  # 320 + 160
        ep = sp + self.epstep  # 320 + 192
        outp = self.sl

        # allocate first frame of waveform to outp
        wsolaed[:outp] = x[:outp]

        while wlen > ep + self.fl:
            # copy wavform
            ref = x[rp - self.sl:rp + self.sl]
            buff = x[ep - self.fl:ep + self.fl]

            # search minimum distance bepween ref and buff
            delta = self._search_minimum_distance(ref, buff)
            
            print(delta)
            
            return 
        
            epd = ep + delta

            # store WSOLAed waveform using over-lap add
            spdata = x[sp:sp + self.sl] * self.win[self.sl:]
            epdata = x[epd - self.sl:epd] * self.win[:self.sl]
            if len(spdata) == len(wsolaed[outp:outp + self.sl]):
                wsolaed[outp:outp + self.sl] = spdata + epdata
            else:
                wsolaed_len = len(wsolaed[outp:outp + self.sl])
                wsolaed[outp:outp + self.sl] = spdata[:wsolaed_len] + \
                    epdata[:wsolaed_len]

            outp += self.sl

            # transtion to next frame
            sp = epd
            rp = sp + self.sl
            ep += self.epstep

        return wsolaed

    def _search_minimum_distance(self, ref, buff):
        if len(ref) < self.fl:
            ref = np.r_[ref, np.zeros(self.fl - len(ref))]

        # slicing and windowing one sample by one
        buffmat = view_as_windows(buff, self.fl) * self.win
        refwin = np.array(ref * self.win).reshape(1, self.fl)
        corr = signal.correlate2d(buffmat, refwin, mode='valid')
        print(len(corr) , np.argmax(corr))
        return np.argmax(corr) - self.sl

https://blog.csdn.net/weixin_42476279/article/details/113566752
https://blog.csdn.net/qq_36002089/article/details/115630385
argmax

posted @ 2022-08-31 21:36 365/24/60 阅读(73) 评论(0) 编辑收藏举报

刷新页面返回顶部

Coding Poineer

Coding Poineer

Coding Poineer

Coding Poineer

Coding Poineer

Coding Poineer

Coding Poineer

Coding Poineer

Coding Poineer

Coding Poineer

Coding Poineer

365/24/60

音频变速不变调研究

Coding Poineer

Coding Poineer

Coding Poineer

Coding Poineer

Coding Poineer

Coding Poineer

Coding Poineer

Coding Poineer

Coding Poineer

Coding Poineer

Coding Poineer

365/24/60

音频变速不变调 研究

音频变速不变调研究