MFCC特征提取理解

摘自：http://blog.sina.com.cn/s/blog_6a25068b0100m5m6.html

作者：幸福的理由

看了个稀里糊涂，一知半解，觉得以后又会忘记。珍藏下来。边理解边添加。

以下是mfcc_float.cpp的内容

#include "stdlib.h" #include "stdio.h" #include "math.h" #include "MFCC_float.h"

#define PI 3.14159265358979 #define PRE_EMP_FACTOR 0.98 #define ENE_FLOOR 1.0

//------------------------------------------------------------------------------------- //public functions

//constructor CMFCC::CMFCC(long aInfo, int aSampleRate, int aN, int aFl, int aFh, int aM, int aTypeOfFilterBank, int aP) { //general Info=aInfo; SampleRate=aSampleRate; //sample rate N=aN; M=aM; P=aP; //fft point number, filterbanks number, cepstrum order Fl=aFl; Fh=aFh; //the lowest and highest frequency (Hz) of all the filterbanks //pre-emphasize Alfa=PRE_EMP_FACTOR; xtmp=new double[N];

//FFT x=new COMPLEX[N]; X=new double[N]; HammingWin=NewHammingWin(N);

//triangular filterbanks if(aTypeOfFilterBank==MEL_SCALE) NewMelFilterBanks(SampleRate, Fl, Fh, N, M, FilterBanks, FilterBanksL, FilterBanksH); else NewLinFilterBanks(SampleRate, Fl, Fh, N, M, FilterBanks, FilterBanksL, FilterBanksH);

S=new double[M];

//DCT DctMatrix=NewDctMatrix(P,M);

//feature temp buffer pfFea=new double[P]; }

//destructor CMFCC::~CMFCC(void) { delete xtmp; delete x; delete X; delete HammingWin; delete FilterBanks[0]; delete FilterBanks; delete FilterBanksL; delete FilterBanksH; delete S; delete DctMatrix[0]; delete DctMatrix; delete pfFea; }

//print void CMFCC::Print(void) { int i,j; FILE *fp;

fp=fopen("HammingWin.txt", "wt"); for(i=0 ; i<N ; i++) fprintf(fp, "%.6f\n", HammingWin[i]); fclose(fp);

fp=fopen("FilterBanks.txt", "wt"); for(j=0 ; j<N/2 ; j++) { for(i=0 ; i<M ; i++) fprintf(fp, ".6f ", FilterBanks[i][j]); fprintf(fp, "\n"); } fclose(fp);

fp=fopen("FilterBanksLH.txt", "wt"); for(i=0 ; i<M ; i++) fprintf(fp, "%d\t%d\t%d\n", FilterBanksL[i], FilterBanksH[i], (FilterBanksH[i]-FilterBanksL[i]+1)); fclose(fp);

fp=fopen("DCTMatrix.txt", "wt"); for(i=0 ; i<P ; i++) { for(j=0 ; j<M ; j++) fprintf(fp, "%.6f\t", DctMatrix[i][j]); fprintf(fp, "\n"); } fclose(fp);

return; }

//extract one sentence of MFCC/LFCC int CMFCC::MFCCbySentence(short *pnWav, int SampleCount, float *&pfMFCC, int &FrmCount, int &Dim, int FrmWidth, int FrmOffst) { int i; if(FrmWidth>N || FrmOffst>FrmWidth) return FAILURE;

//compute frame count & dim of feature vector & allocate memory space FrmCount=(SampleCount-FrmWidth)/FrmOffst+1; if(FrmCount<=0) return FAILURE; if(Info&DCEPS) { if(FrmCount<=10) return FAILURE; }

Dim=P; if(Info&DCEPS) Dim+=P; if(Info&DDCEPS) Dim+=P; pfMFCC = new float[FrmCount*Dim];

//remove DC component RemoveDC(pnWav, SampleCount);

//extract one sentence of MFCC for(i=0 ; i<FrmCount ; i++) { MFCCbyFrame(&pnWav[i*FrmOffst], FrmWidth, &pfMFCC[i*Dim], P); }

//CMS if(Info&CMS) { CepsMeanSub(pfMFCC, FrmCount, Dim, P); }

//extract one sentence of DMFCC if(Info&DCEPS) { DMFCCbyFrame(&pfMFCC[0], &pfMFCC[0], &pfMFCC[0], &pfMFCC[1*Dim], &pfMFCC[2*Dim], &pfMFCC[P], P); DMFCCbyFrame(&pfMFCC[0], &pfMFCC[0], &pfMFCC[1*Dim], &pfMFCC[2*Dim], &pfMFCC[3*Dim], &pfMFCC[1*Dim+P], P); for(i=2 ; i<=FrmCount-3 ; i++) DMFCCbyFrame(&pfMFCC[(i-2)*Dim], &pfMFCC[(i-1)*Dim], &pfMFCC[i*Dim], &pfMFCC[(i+1)*Dim], &pfMFCC[(i+2)*Dim], &pfMFCC[i*Dim+P], P); DMFCCbyFrame(&pfMFCC[(FrmCount-4)*Dim], &pfMFCC[(FrmCount-3)*Dim], &pfMFCC[(FrmCount-2)*Dim], &pfMFCC[(FrmCount-1)*Dim], &pfMFCC[(FrmCount-1)*Dim], &pfMFCC[(FrmCount-2)*Dim+P], P); DMFCCbyFrame(&pfMFCC[(FrmCount-3)*Dim], &pfMFCC[(FrmCount-2)*Dim], &pfMFCC[(FrmCount-1)*Dim], &pfMFCC[(FrmCount-1)*Dim], &pfMFCC[(FrmCount-1)*Dim], &pfMFCC[(FrmCount-1)*Dim+P], P); }

//extract one sentence of DDMFCC if(Info&DDCEPS) { for(i=0 ; i<P ; i++) { pfMFCC[2*P+i] = pfMFCC[(FrmCount-1)*Dim+2*P+i] = 0; }

for(i=1 ; i<=FrmCount-2 ; i++) { DDMFCCbyFrame(&pfMFCC[(i-1)*Dim+P], &pfMFCC[i*Dim+P], &pfMFCC[(i+1)*Dim+P], &pfMFCC[i*Dim+2*P], P); } }

return SUCCESS; }

//compute one frame of MFCC/LFCC int CMFCC::MFCCbyFrame(short *pnWav, int FrmWid, float *pfMFCC, int CepsOrder) { int i;

if(FrmWid>N || CepsOrder!=P) return FAILURE;

//pre-emphasize 预加重 PreEmp(pnWav, FrmWid, xtmp, N, Alfa);

//apply hamming window 加窗(窗宽度， ApplyWindow(xtmp, HammingWin, N, xtmp); //FFT 做fft for(i=0 ; i<N ; i++) { x[i].real=xtmp[i]; //实部，虚部？？ x[i].image=0; } FFTAmp(x, X, N);

//apply triangular windows & ln ApplyFilterBanks(X, N, S, M, FilterBanks, FilterBanksL, FilterBanksH);

//DCT ApplyDCT(S, M, pfFea, P, DctMatrix);

//double -> float for(i=0 ; i<CepsOrder ; i++) pfMFCC[i] = (float)pfFea[i]; return SUCCESS; }

//extract one frame of DMFCC void CMFCC::DMFCCbyFrame(float *Prev2Fea, float *Prev1Fea, float *CurrFea, float *Next1Fea, float *Next2Fea, float *DFea, int P) { int i;

for(i=0 ; i<P ; i++) DFea[i] = (-0.632456f*Prev2Fea[i]-0.316228f*Prev1Fea[i]+ \ 0.316228f*Next1Fea[i]+0.632456f*Next2Fea[i]);

return; }

//extract one frame of DDMFCC void CMFCC::DDMFCCbyFrame(float *PrevDFea, float *CurrDFea, float *NextDFea, float *DDFea, int P) { int i;

for(i=0 ; i<P ; i++) DDFea[i] = 0.7071f*(NextDFea[i]-PrevDFea[i]);

return; }

//------------------------------------------------------------------------------------- //private functions

//====== Pre-emphasize ======// void CMFCC::PreEmp(short *pnWav, int FrmWid, double *x, int N, double Alfa) { int i; int s=(N-FrmWid)/2; //start position of data in COMPLEX x[N] s=(512-帧宽）/2

//clear buffer for(i=0 ; i<N ; i++) { x[i]=0; }

//compute the first sample x[s] = pnWav[0]-Alfa*pnWav[0]; //y(n)=x(0)-a*x(0); 第一个点

//compute the following samples in one frame for(i=1 ; i<FrmWid ; i++) x[i+s] = pnWav[i]-Alfa*pnWav[i-1]; //每个帧宽中的余下点，x(i+s)=后一个wav-前一个wav*a

return; }

//====== Hamming window ======// //generate a hamming window double* CMFCC::NewHammingWin(int Len) { double* Win=new double[Len]; for(int i=0; i<Len; i++) Win[i]=(0.54-0.46*cos(2*PI*i/Len)); return Win; }

//applying a window void CMFCC::ApplyWindow(double *In, double *Win, int Len, double *Out) { for(int i=0; i<Len; i++) Out[i]=In[i]*Win[i]; //对于音频，乘以窗函数 };

//====== FFT ======// //Func: FFT 2DIT //In: COMPLEX *input.real; //one frame of wave data // int n; //Point number of FFT //Out: COMPLEX *input; //real part and image part after FFT 实部和虚部 void FFT(COMPLEX *input, int n) { int x, i, nv2, j, k, le, l, le1, ip, nm1; COMPLEX t, u, w; //复数

int ntemp=n; for(x=0; n>1; x++) n/=2; n=ntemp;

nv2=n/2; nm1=n-1; j=1;

for(i=1; i<=nm1; i++) { if(i<j) //i是前一个点，j是后一个点 { t.real = input[i-1].real; t.image = input[i-1].image; input[i-1].real = input[j-1].real; input[i-1].image = input[j-1].image; input[j-1].real = t.real; input[j-1].image = t.image; //后一个点被前一个点代替 }

k=nv2;

while(k<j) { j-=k; k/=2; } j+=k; }

le=1; for(l=1; l<=x; l++) { le*=2; le1=le/2; u.real = 1.0f; u.image = 0.0f; w.real = (float)cos(PI/le1); w.image = (float)-sin(PI/le1);

for(j=1; j<=le1; j++) { for(i=j; i<=n; i+=le) { ip = i+le1; t.real = input[ip-1].real*u.real-input[ip-1].image*u.image; t.image = input[ip-1].real*u.image+input[ip-1].image*u.real; input[ip-1].real = input[i-1].real-t.real; input[ip-1].image = input[i-1].image-t.image; input[i-1].real = t.real+input[i-1].real; input[i-1].image = t.image+input[i-1].image; }

t.real = u.real*w.real-u.image*w.image; t.image = u.image*w.real+u.real*w.image; u.real = t.real; u.image = t.image; } }

return; }

//Func: FFTAmp, compute the amplitude of spectrum 急速频率谱的幅度 //In: COMPLEX *x.real; //one frame of wave data 一帧的wav // int N; //point number of FFT 做fft的点数 //Out: double *Amp; //spectral amplitude of this frame 这一帧的谱幅度 void CMFCC::FFTAmp(COMPLEX *x, double *Amp, int N) //实部和虚部的平方和开方，求模 { //FFT FFT(x, N);

//sqrt 开方 for(int i=0 ; i<=N/2 ; i++) Amp[i]=sqrt(x[i].real*x[i].real + x[i].image*x[i].image);

return; }

//====== DCT ======// //generate the matrix of DCT, excluding 0'th dimension double** CMFCC::NewDctMatrix(int nP, int nM) { int p,m; double **Matrix=new double*[nP]; Matrix[0]=new double[nP*nM]; for(p=1 ; p<nP ; p++) Matrix[p]=&Matrix[0][p*nM];

for(p=0 ; p<nP ; p++) { for(m=0 ; m<nM ; m++) { Matrix[p][m]=cos(PI*p*(m+0.5)/nM); //including c0 // Matrix[p][m]=cos(PI*(p+1)*(m+0.5)/nM); //excluding c0 } }

return Matrix; }

//applying a transformation matrix 逆傅里叶变换 void CMFCC::ApplyDCT(double *In, int InDim, double *Out, int OutDim, double **Matrix) { int p,m;

for(p=0 ; p<OutDim ; p++) { Out[p]=0; for(m=0 ; m<InDim ; m++) { Out[p]+=In[m]*Matrix[p][m]; } }

return; }

//====== Generating Filterbanks on Mel-Scale & Linear-Scale ======// //FFT point to frequency in Hz double n2f(int n, int Fs, int N) { if(n<0||n>=N) return 0;

double f; f=(n+0.5)*Fs/N; return f; }

//frequency in Hz to FFT point int f2n(double f, int Fs, int N) { if(f<0||f>Fs) return 0;

int n; n=(int)(f*N/Fs+0.5); return n; }

//frequency in Hz to bark double f2b(double f) { if(f<0) return 0;

double b; b=1125*log(1+f/700.0); return b; }

//bark to frequency in Hz 将bark换成频率 double b2f(double b) { if(b<0) return 0;

double f; f=700*(exp(b/1125)-1); return f; }

//compute the central point of each filterbank on mel-scale //for M filterbanks, the central point shoule be extended to M+2 int* MelCentralPoint(int Fl, int Fh, int M, int Fs, int N) { int m; double F; int *CentralPoint=new int[M+2]; for(m=0 ; m<=M+1 ; m++) { //central frequency 中心频率，高-低加低，乘以m数/点数 F=b2f(f2b(Fl)+(f2b(Fh)-f2b(Fl))*m/(M+1)); //central point CentralPoint[m]=f2n(F, Fs, N); } return CentralPoint; }

//compute the central point of each filterbank on linear-scale //for M filterbanks, the central frequency shoule be extended to M+2 int* LinCentralPoint(int Fl, int Fh, int M, int Fs, int N) { int m; double F; int *CentralPoint=new int[M+2]; for(m=0 ; m<=M+1 ; m++) { //central frequency F=Fl+(Fh-Fl)*m*1.0/(M+1); //central point CentralPoint[m]=f2n(F, Fs, N); } return CentralPoint; }

//generate the one filter void GenTriangularFilter(double *FilterBank, int ns, int nm1, int nm2, int nm3, int ne) { int n;

for(n=ns ; n<nm1 ; n++) FilterBank[n]=0; for(n=nm1 ; n<nm2 ; n++) FilterBank[n]=1.0*(n-nm1)/(nm2-nm1); for(n=nm2 ; n<nm3 ; n++) FilterBank[n]=1.0*(nm3-n)/(nm3-nm2); for(n=nm3 ; n<ne ; n++) FilterBank[n]=0;

return; }

//Generate a set of mel-scale filterbanks 一个mel滤波器 //In: int Fs; //sample rate采样率 // int Fl; //the lowest frequency of all the filterbanks最低频率 // int Fh; //the highest frequency of all the filterbanks // int N; //FFT point fft点数 // int M; //number of filterbanks 滤波器的个数 //Out: double **FilterBanks; //Triangular filters 三角滤波器 // int *FilterBanksL; //Lower point number of filters 低一级的滤波器 // int *FilterBanksH; //Higher point number of filters 高一级的滤波器 void CMFCC::NewMelFilterBanks(int Fs, int Fl, int Fh, int N, int M, double **&FilterBanks, int *&FilterBanksL, int *&FilterBanksH) { int m;

//allocate memory space for filters 为滤波器分配空间 FilterBanksL=new int[M]; FilterBanksH=new int[M]; FilterBanks=new double*[M]; FilterBanks[0]=new double[M*N]; for(m=1 ; m<M ; m++) FilterBanks[m]=&FilterBanks[0][m*N]; //每个滤波器做傅里叶变换的点数是n

//compute the central points of filterbanks, totall M+2 points 计算滤波器的中心点，m个滤波器，是m+2个点 int *MelCenPoints=MelCentralPoint(Fl,Fh,M,Fs,N); for(m=0 ; m<M ; m++) { FilterBanksL[m]=MelCenPoints[m]; FilterBanksH[m]=MelCenPoints[m+2]; GenTriangularFilter(FilterBanks[m], 0, MelCenPoints[m], MelCenPoints[m+1], MelCenPoints[m+2], N-1); }

delete MelCenPoints;

return; }

//Generate a set of linear-scale filterbanks void CMFCC::NewLinFilterBanks(int Fs, int Fl, int Fh, int N, int M, double **&FilterBanks, int *&FilterBanksL, int *&FilterBanksH) { int m;

//allocate memory space for filters FilterBanksL=new int[M]; FilterBanksH=new int[M]; FilterBanks=new double*[M]; FilterBanks[0]=new double[M*N]; for(m=1 ; m<M ; m++) FilterBanks[m]=&FilterBanks[0][m*N];

//compute the central points of filterbanks, totall M+2 points int *LinCenPoints=LinCentralPoint(Fl,Fh,M,Fs,N); for(m=0 ; m<M ; m++) { FilterBanksL[m]=LinCenPoints[m]; FilterBanksH[m]=LinCenPoints[m+2]; GenTriangularFilter(FilterBanks[m], 0, LinCenPoints[m], LinCenPoints[m+1], LinCenPoints[m+2], N-1); }

delete LinCenPoints;

return; }

//====== Applying Filterbanks & Log Compression ======// 加滤波器，对数化 void CMFCC::ApplyFilterBanks(double *X, int N, double *S, int M, double **FilterBanks, int *FilterBanksL, int *FilterBanksH) { int m,n; int nl, nh;

for(m=0 ; m<M ; m++) { //get the lowest and the highest point number of mth filterbank 得到第m个滤波器的最低和最高点 nl=FilterBanksL[m]; nh=FilterBanksH[m];

//error if(nl<0 || nh>=N || nl>=nh) { S[m]=0; continue; }

//filter the signal with mth filterbank S[m]=0; for(n=nl ; n<=nh ; n++) S[m]+=X[n]*FilterBanks[m][n];

//log if(S[m]<ENE_FLOOR) S[m]=ENE_FLOOR; S[m]=log(S[m]); }

return; }

//====== CMS by Sentence ======// 得到均值 void CMFCC::CepsMeanSub(float *pfMFCC, int FrmCount, int Dim, int CepsOrder) { int i,j;

float *Mean=new float[CepsOrder];

for(i=0 ; i<CepsOrder ; i++) { Mean[i]=0; for(j=0 ; j<FrmCount ; j++) Mean[i]+=pfMFCC[j*Dim+i]; Mean[i]/=FrmCount; }

for(i=0 ; i<CepsOrder ; i++) for(j=0 ; j<FrmCount ; j++) pfMFCC[j*Dim+i]-=Mean[i];

delete Mean;

return; }

//====== Remove DC component ======// void CMFCC::RemoveDC(short *pnWav, int SampleCount) { if(pnWav==NULL || SampleCount<=0) return;

int i; float sum=0, wav;

for (i=0 ; i<SampleCount ; i++) { sum += pnWav[i]; } sum /= SampleCount;

for (i=0 ; i<SampleCount ; i++) { wav = pnWav[i]-sum; pnWav[i] = (short)wav; }

return; }

以下是mfcc_float.h的内容

由此可看出，本程序是mfcc特征和差分mfcc的结合，先提取mfcc，在对提取到的特征向量做差分，进一步剥离特征，目的是为了得到说话人言语和速度的变化啊啥的巴拉巴拉。看了一遍程序，总容易各种分心，太多不知道所以看得很晕。这个盒子，总算打开了。

首先，是预加重，是说为了减少嘴唇和声带的效应影响，加重高频部分，理解来就是消除唇部的摩擦，对真实的语音的频率做补偿。方法是y(n)=x(n)-a*x(n-1).这里a是，alfa系数，在0.9到1之间，一般取0.98。

其次是加窗分帧。加窗，又设计到语音的特征巴拉巴拉。语音在长范围内是不停变动的，没有固定的特性无法做处理，所以加个窗，窗外的值设定为0，这样就吧要处理的部分固定在窗内，做处理。这样的窗将分析帧，一般取10-30ms做为窗，为了避免窗边界对信号的遗漏，要对帧做偏移时候，要有帧迭，所以一般取帧长的一半作为帧移。加的窗是汉明窗。公式是在加窗范围内，

w(n)=0.54-0.46*cos(2*pi*n/(n-1)).用汉明窗是因为在时域，是信号乘以窗函数，所以它两端是平滑过渡到0，不像矩形窗变化剧烈，而在频率它能保留高频成分，保留细节信息。

再次就是提取特征了。步骤为先离散傅里叶变换，将信号变换到频域上。这里涉及到dft变换的过程，是实部虚部分别作处理。程序中DFT一段需要细细研究，没看懂。

离散傅里叶变换后得到信号的频谱，然后对它的幅度做平方，就是能量谱。

啥是能量谱？

在这一步步的追溯到太多问题了，查阅资料也各种明白了点，但更多的是一知半解。收集以后一点点明白。http://longer.spaces.eepw.com.cn/articles/article/item/71979

【对于能量信号，常用能量谱来描述。所谓的能量谱，也称为能量谱密度，是指用密度的概念表示信号能量在各频率点的分布情况。也即是说，对能量谱在频域上积分就可以得到信号的能量。能量谱是信号幅度谱的模的平方，其量纲是焦/赫。对于功率信号，常用功率谱来描述。所谓的功率谱，也称为功率谱密度，是指用密度的概念表示信号功率在各频率点的分布情况。也就是说，对功率谱在频域上积分就可以得到信号的功率。从理论上来说，功率谱是信号自相关函数的傅里叶变换。因为功率信号不满足傅里叶变换的条件，其频谱通常不存在，维纳-辛钦定理证明了自相关函数和傅里叶变换之间对应关系。在工程实际中，即便是功率信号，由于持续的时间有限，可以直接对信号进行傅里叶变换，然后对得到的幅度谱的模求平方，再除以持续时间来估计信号的功率谱。】又学习了。

啥是能量信号？

【当且仅当f(t)在所有时间上的能量不为0且有限时，该信号为能量信号，即(1)式中的 T 趋于无穷大的时候E为有限。典型的能量信号如方波信号、三角波信号等。但是有些信号不满足能量信号的条件，如周期信号和能量无限的随机信号，此时就需要用功率来描述这类信号。当且仅当x(t)在所有时间上的功率不为0且有限时，该信号为功率信号】一般来说，周期信号和随机信号是功率信号，而非周期的确定信号是能量信号。

因为语音信号是非周期的，且有人的能量。所以它是能量信号。

啥是能量？啥是功率？

功率是单位时间做的功，单位时间的能量，能量是连续时间内的功，能量就是对功率在一段时间内做积分。

再回去，得到能量谱后，用一组mel尺度的三角形滤波器组对能量谱做滤波。滤波器的个数为24-40个。每个滤波器有个中心频率。m个数小的时候他们间隔小，m多的时候间隔大。他们有交叠，当前滤波器的中心频率是上一个的最高频率，也是下一个的最低频率。

这样得到每个滤波器的输出，滤波器的传递函数有公式。因为是mel尺度的频率，要化为线性频率，所以有个f2b.

每个滤波器的中心频率是线性的。用这些滤波器来模拟人的听觉特性，将线性频率转化为mel频率，是B(f)=1125ln(1+f/700)。这样。就可以计算出对应的mel频率滤波器界限。进而得到滤波器函数，乘以能量谱，再取对数就是滤波器的输出。

然后，对输出做dct变换，是离散余弦变换。

啥是离散余弦变换？dct？

看了百度，有点理解。说是形成一个dct矩阵，左上角是重要部分，右下角是非重要部分，甚至可以抛弃，这样对压缩很有用，捡重要部分来压缩。矩阵的求解有公式。

自此，mfcc特征提取完成了。

还有很多原理上的，编程上的疑问再解决。

问：傅里叶变换过程，滤波器，数字信号处理。。很多流程。。明天理解。

//---------------------------------------------------------------------------------------- #ifndef __MFCCH_FLOAT_H__ #define __MFCCH_FLOAT_H__

#define SUCCESS 1 #define FAILURE 0

//---------------------------------------------------------------------------------------- #define MEL_SCALE 0 #define LIN_SCALE 1

#define CMS 0x0001 //parameters for long Info; #define DCEPS 0x0002 #define DDCEPS 0x0004

//complex structure 复数 struct COMPLEX { double real; double image; };

//MFCC extraction class MFCC提取类 class CMFCC { public: //采样率16KHZ,频率为从100到7000，24个滤波器，其余参数不解其意？？？ //constructor, allocate memory & generate pre-set parameters 构造函数，初始参数 CMFCC(long aInfo=7, int aSampleRate=16000, int aN=512, int aFl=100, int aFh=7000, int aM=24, int aTypeOfFilterBank=0, int aP=12); //destructor, release memory space 析构函数，释放内存 ~CMFCC(void); //print void Print(void);

//Func: extract one sentence of MFCC int MFCCbySentence(short *pnWav, int SampleCount, float *&pfMFCC, int &FrmCount, int &Dim, int FrmWidth=512, int FrmOffst=256);

//Func: extract one frame of MFCC //In: short *pnWav; //one frame of wave data // int FrmWid; //frame width //Out: float *pfMFCC; //one frame of MFCC // int CepsOrder; //order of cepstrum //Ret: flag of SUCCESS or FAILURE int MFCCbyFrame(short *pnWav, int FrmWid, float *pfMFCC, int CepsOrder); //Func: extract one frame of DMFCC //In: float *Prev2Fea; //feature vector of previous second frame // float *Prev1Fea; //feature vector of previous first frame // float *CurrFea; //feature vector of current frame // float *Next1Fea; //feature vector of next first frame // float *Next2Fea; //feature vector of next second frame // int P; //order of delta cepstrum //Out: float *DFea; //delta cepstrum of current frame void DMFCCbyFrame(float *Prev2Fea, float *Prev1Fea, float *CurrFea, float *Next1Fea, float *Next2Fea, float *DFea, int P);

//Func: extract one frame of DDMFCC //In: float *PrevDFea; //delta feature vector of previous frame // float *CurrDFea; //delta feature vector of current frame // float *NextDFea; //delta feature vector of next frame // int P; //order of delta delta cepstrum //Out: float *DDFea; //delta delta cepstrum of current frame void DDMFCCbyFrame(float *PrevDFea, float *CurrDFea, float *NextDFea, float *DDFea, int P);

private: //--- general parameters ---// long Info; int SampleRate; //sample rate int N, P; //point number of FFT, order of cepstrum

//--- pre-emphasize ---// double Alfa; //factor of pre-emphasize double *xtmp;//double xtmp[N], temp buffer for x[N] //pre-emphasize void PreEmp(short *pnWav, int FrmWid, double *x, int N, double Alfa);

//--- FFT ---// double *HammingWin; //double HammingWin[N], weights of hamming window

//generate the weights of a hamming window double* NewHammingWin(int Len); //apply a hamming window void ApplyWindow(double *In, double *Win, int Len, double *Out);

COMPLEX *x; //COMPLEX x[N], x.real is the input for FFT double *X; //double X[N], spectral amplitude of this frame, only [0,N/2) is efficient

//compute spectral amplitude void FFTAmp(COMPLEX *x, double *Amp, int N);

//--- triangular filterbanks & ln ---// int M; //number of triangular filters int Fl, Fh; //lowest and highest frequencies of all the filterbank, in Hz double *S; //double S[M], the output of filterbanks

double **FilterBanks; //double FilterBanks[M][N], the weights of each point and each filterbanks int *FilterBanksL; //int FilterBanksL[M], the lower point of each filterbanks int *FilterBanksH; //int FilterBanksH[M], the higher point of each filterbanks

//generate a set of filterbanks on mel-scale mel频率上 void NewMelFilterBanks(int Fs, int Fl, int Fh, int N, int M, double **&FilterBanks, int *&FilterBanksL, int *&FilterBanksH); //generate a set of filterbanks on linear-scale 线性频率上 void NewLinFilterBanks(int Fs, int Fl, int Fh, int N, int M, double **&FilterBanks, int *&FilterBanksL, int *&FilterBanksH); //apply triangular windows & ln void ApplyFilterBanks(double *X, int N, double *S, int M, double **FilterBanks, int *FilterBanksL, int *FilterBanksH);

//--- DCT ---// 离散余弦变换 double **DctMatrix; //double DctMatrix[P][M], the matrix of DCT

//generate the transform matrix of DCT double** NewDctMatrix(int nP, int nM); //DCT dct矩阵 void ApplyDCT(double *In, int InDim, double *Out, int OutDim, double **Matrix);

//--- CMS ---// //Cepstrum Mean Subtraction 差分倒谱系数的提取 void CepsMeanSub(float *pfMFCC, int FrmCount, int Dim, int CepsOrder);

//--- remove DC component ---// void RemoveDC(short *pnWav, int SampleCount);

//--- Operations on a whole sentence ---// double *pfFea; //double pfFea[P]; feature temp buffer };

//---------------------------------------------------------------------------------------- #endif

感言：果然过了很多年，才慢慢觉得信号处理是件有意思的事情了

posted @ 2014-08-25 13:26 Ivy_LMM 阅读(1198) 评论(0) 编辑收藏举报

会员力量，点亮园子希望

刷新页面返回顶部

Ivy_LMM

MFCC特征提取理解

公告